broom.helpers/ 0000755 0001762 0000144 00000000000 15062503242 013030 5 ustar ligges users broom.helpers/tests/ 0000755 0001762 0000144 00000000000 14737437002 014202 5 ustar ligges users broom.helpers/tests/testthat/ 0000755 0001762 0000144 00000000000 15062503242 016032 5 ustar ligges users broom.helpers/tests/testthat/test-add_n.R 0000644 0001762 0000144 00000021637 14746151337 020224 0 ustar ligges users test_that("tidy_add_n() works for basic models", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_n()
expect_equal(
res$n_obs,
c(193, 52, 40, 49, 63, 63, 98),
ignore_attr = TRUE
)
expect_equal(
res$n_event,
c(61, 13, 15, 15, 19, 21, 33),
ignore_attr = TRUE
)
expect_equal(attr(res, "N_obs"), 193)
expect_equal(attr(res, "N_event"), 61)
mod <- glm(response ~ stage + grade + trt, gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.sum, grade = contr.helmert, trt = contr.SAS)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_n()
expect_equal(
res$n_obs,
c(193, 52, 52, 40, 63, 63, 95),
ignore_attr = TRUE
)
expect_equal(attr(res, "N_obs"), 193)
expect_equal(attr(res, "N_event"), 61)
mod <- glm(response ~ stage + grade + trt, gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.poly, grade = contr.treatment, trt = matrix(c(-3, 2)))
)
res <- mod |>
tidy_and_attach() |>
tidy_add_n()
expect_equal(
res$n_obs,
c(193, 193, 193, 193, 63, 63, 98),
ignore_attr = TRUE
)
expect_equal(attr(res, "N_obs"), 193)
expect_equal(attr(res, "N_event"), 61)
mod <- glm(
response ~ stage + grade + trt + factor(death),
gtsummary::trial,
family = binomial,
contrasts = list(
stage = contr.treatment(4, 3), grade = contr.treatment(3, 2),
trt = contr.treatment(2, 2), "factor(death)" = matrix(c(-3, 2))
)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_n()
expect_equal(
res$n_obs,
c(193, 52, 52, 49, 67, 63, 95, 107),
ignore_attr = TRUE
)
expect_equal(attr(res, "N_obs"), 193)
expect_equal(attr(res, "N_event"), 61)
mod <- glm(response ~ stage + grade + trt, gtsummary::trial,
family = binomial,
contrasts = list(stage = "contr.sum", grade = "contr.helmert", trt = "contr.SAS")
)
res <- mod |>
tidy_and_attach() |>
tidy_add_n()
expect_equal(
res$n_obs,
c(193, 52, 52, 40, 63, 63, 95),
ignore_attr = TRUE
)
mod <- glm(response ~ age + grade * trt, gtsummary::trial, family = poisson)
res <- mod |>
tidy_and_attach() |>
tidy_add_n()
expect_equal(
res$n_obs,
c(183, 183, 58, 60, 94, 29, 33),
ignore_attr = TRUE
)
expect_equal(
res$n_event,
c(58, 58, 17, 20, 31, 10, 8),
ignore_attr = TRUE
)
expect_equal(
res$exposure,
c(183, 183, 58, 60, 94, 29, 33),
ignore_attr = TRUE
)
expect_equal(attr(res, "N_obs"), 183)
expect_equal(attr(res, "N_event"), 58)
expect_equal(attr(res, "Exposure"), 183)
mod <- glm(
response ~ trt * grade + offset(log(ttdeath)),
gtsummary::trial,
family = poisson,
weights = rep_len(1:2, 200)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_n()
expect_equal(
res$n_obs,
c(292, 151, 94, 92, 49, 49),
ignore_attr = TRUE
)
expect_equal(
res$n_event,
c(96, 53, 28, 31, 19, 12),
ignore_attr = TRUE
)
expect_equal(
res$exposure,
c(5819.07, 2913.6, 1826.26, 1765.52, 887.22, 915.56),
ignore_attr = TRUE
)
expect_equal(attr(res, "N_obs"), 292)
expect_equal(attr(res, "N_event"), 96)
expect_equal(attr(res, "Exposure"), 5819.07)
})
test_that("test tidy_add_n() checks", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
# expect an error if no model attached
expect_error(mod |> broom::tidy() |> tidy_add_n())
# could be apply twice (no error)
expect_no_error(
mod |> tidy_and_attach() |> tidy_add_n() |> tidy_add_n()
)
})
test_that("tidy_add_n() works with variables having non standard name", {
df <- gtsummary::trial |> dplyr::mutate(`grade of kids` = grade)
mod <- glm(response ~ stage + `grade of kids` + trt, df, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_n()
expect_equal(
res$n_obs,
c(193, 52, 40, 49, 63, 63, 98),
ignore_attr = TRUE
)
})
test_that("tidy_add_n() works with lme4::lmer", {
skip_on_cran()
skip_if_not_installed("lme4")
df <- gtsummary::trial
df$stage <- as.character(df$stage)
df$group <- rep.int(1:2, 100)
mod <- lme4::lmer(marker ~ stage + grade + (1 | group), df)
expect_no_error(mod |> tidy_and_attach(tidy_fun = broom.mixed::tidy) |> tidy_add_n())
})
test_that("tidy_add_n() works with lme4::glmer", {
skip_on_cran()
skip_if_not_installed("lme4")
df <- gtsummary::trial
df$stage <- as.character(df$stage)
df$group <- rep.int(1:2, 100)
suppressMessages(
mod <- lme4::glmer(response ~ stage + grade + (1 | group), df, family = binomial)
)
expect_no_error(mod |> tidy_and_attach(tidy_fun = broom.mixed::tidy) |> tidy_add_n())
})
test_that("tidy_add_n() works with survival::coxph", {
skip_on_cran()
skip_if_not_installed("survival")
df <- survival::lung |> dplyr::mutate(sex = factor(sex))
mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df)
expect_no_error(res <- mod |> tidy_and_attach() |> tidy_add_n())
expect_equal(res$n_ind, c(227, 227, 90), ignore_attr = TRUE)
expect_equal(attr(res, "N_ind"), 227)
})
test_that("tidy_add_n() works with survival::survreg", {
skip_on_cran()
skip_if_not_installed("survival")
mod <- survival::survreg(
survival::Surv(futime, fustat) ~ factor(ecog.ps) + rx,
survival::ovarian,
dist = "exponential"
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_n())
})
test_that("tidy_add_n() works with nnet::multinom", {
skip_if_not_installed("nnet")
skip_on_cran()
mod <- nnet::multinom(grade ~ stage + marker + age, data = gtsummary::trial, trace = FALSE)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_n())
mod <- nnet::multinom(
grade ~ stage + marker + age,
data = gtsummary::trial, trace = FALSE,
contrasts = list(stage = contr.sum)
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_n())
res <- mod |>
tidy_and_attach() |>
tidy_add_n()
expect_equal(
res$n_obs,
c(179, 47, 52, 37, 179, 179, 179, 47, 52, 37, 179, 179),
ignore_attr = TRUE
)
expect_equal(
res$n_event,
c(57, 21, 16, 8, 57, 57, 58, 12, 18, 12, 58, 58),
ignore_attr = TRUE
)
# when y is not coded as a factor
mod <- nnet::multinom(race ~ age + lwt + bwt, data = MASS::birthwt, trace = FALSE)
expect_no_error(
mod |> tidy_and_attach() |> tidy_add_n()
)
})
test_that("tidy_add_n() works with survey::svyglm", {
skip_if_not_installed("survey")
df <- survey::svydesign(~1, weights = ~1, data = gtsummary::trial)
mod <- survey::svyglm(response ~ age + grade * trt, df, family = quasibinomial)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_n())
})
test_that("tidy_add_n() works with ordinal::clm", {
mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_n())
})
test_that("tidy_add_n() works with ordinal::clmm", {
mod <- ordinal::clmm(rating ~ temp * contact + (1 | judge), data = ordinal::wine)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_n())
})
test_that("tidy_add_n() works with MASS::polr", {
mod <- MASS::polr(Sat ~ Infl + Type + Cont, weights = Freq, data = MASS::housing)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_n())
})
test_that("tidy_add_n() works with geepack::geeglm", {
skip_if(packageVersion("geepack") < "1.3")
df <- geepack::dietox
df$Cu <- as.factor(df$Cu)
mf <- formula(Weight ~ Cu * Time)
suppressWarnings(
mod <- geepack::geeglm(mf, data = df, id = Pig, family = poisson("identity"), corstr = "ar1")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_n())
})
test_that("tidy_add_n() works with gam::gam", {
skip_if_not_installed("gam")
data(kyphosis, package = "gam")
mod <- gam::gam(Kyphosis ~ gam::s(Age, 4) + Number, family = binomial, data = kyphosis)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_n())
})
test_that("tidy_add_n() works with lavaan::lavaan", {
skip_if_not_installed("lavaan")
df <- lavaan::HolzingerSwineford1939
df$grade <- factor(df$grade, ordered = TRUE)
HS.model <- "visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6 + grade
speed =~ x7 + x8 + x9 "
mod <- lavaan::lavaan(HS.model,
data = df,
auto.var = TRUE, auto.fix.first = TRUE,
auto.cov.lv.x = TRUE
)
expect_no_error(res <- mod |> tidy_and_attach() |> tidy_add_n())
expect_true(all(is.na(res$n)))
})
test_that("model_compute_terms_contributions() with subset", {
mod <- glm(mpg ~ gear, data = mtcars, subset = mpg < 30)
expect_no_warning(
res <- mod |> model_compute_terms_contributions()
)
expect_equal(
nrow(res),
nrow(mtcars[mtcars$mpg < 30, ])
)
})
broom.helpers/tests/testthat/test-add_reference_rows.R 0000644 0001762 0000144 00000016313 14737437002 022766 0 ustar ligges users test_that("tidy_add_reference_rows() works as expected", {
mod <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.treatment, grade = contr.SAS, trt = contr.sum)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows()
if ("stage2" %in% names(coef(mod))) {
expect_equal(
res$term,
c(
"(Intercept)", "stage1", "stage2", "stage3", "stage4", "grade1",
"grade2", "grade3", "trt1", "trt2", "grade1:trt1", "grade2:trt1"
)
)
} else {
expect_equal(
res$term,
c(
"(Intercept)", "stageT1", "stageT2", "stageT3", "stageT4",
"gradeI", "gradeII", "gradeIII", "trt1", "trt2", "gradeI:trt1",
"gradeII:trt1"
)
)
}
expect_equal(
res$reference_row,
c(
NA, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE,
NA, NA
)
)
expect_equal(
res$var_class,
c(
NA, "factor", "factor", "factor", "factor", "factor", "factor",
"factor", "character", "character", NA, NA
),
ignore_attr = TRUE
)
expect_equal(
res$var_type,
c(
"intercept", "categorical", "categorical", "categorical", "categorical",
"categorical", "categorical", "categorical", "dichotomous", "dichotomous",
"interaction", "interaction"
)
)
expect_equal(
res$var_nlevels,
c(NA, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 2L, 2L, NA, NA),
ignore_attr = TRUE
)
# no reference row added if other contrasts are used
mod <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.poly, grade = contr.helmert, trt = matrix(c(2, 3)))
)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows()
expect_true(all(is.na(res$reference_row)))
# no reference row for an interaction only variable
mod <- lm(age ~ factor(response):marker, gtsummary::trial)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows()
expect_equal(
res$reference_row,
c(NA, NA, NA)
)
# no reference row if defined in no_reference_row
mod <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.treatment, grade = contr.SAS, trt = contr.sum)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows(no_reference_row = c("stage", "grade"))
if ("stage2" %in% names(coef(mod))) {
expect_equal(
res$term,
c(
"(Intercept)", "stage2", "stage3", "stage4", "grade1", "grade2",
"trt1", "trt2", "grade1:trt1", "grade2:trt1"
)
)
} else {
expect_equal(
res$term,
c(
"(Intercept)", "stageT2", "stageT3", "stageT4", "gradeI", "gradeII",
"trt1", "trt2", "gradeI:trt1", "gradeII:trt1"
)
)
}
expect_equal(
res$reference_row,
c(NA, NA, NA, NA, NA, NA, FALSE, TRUE, NA, NA)
)
})
test_that("test tidy_add_reference_rows() checks", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
# expect an error if no model attached
expect_error(mod |> broom::tidy() |> tidy_add_reference_rows())
# warning if applied twice
expect_message(
mod |>
tidy_and_attach() |>
tidy_add_reference_rows() |>
tidy_add_reference_rows()
)
# message if applied after tidy_add_term_labels()
expect_message(
mod |>
tidy_and_attach() |>
tidy_add_term_labels() |>
tidy_add_reference_rows()
)
# message if applied after tidy_add_n()
expect_message(
mod |>
tidy_and_attach() |>
tidy_add_n() |>
tidy_add_reference_rows()
)
# error if applied after tidy_add_header_rows()
expect_error(
mod |>
tidy_and_attach() |>
tidy_add_header_rows() |>
tidy_add_reference_rows()
)
# message or error if non existing variable in no_reference_row
expect_error(
mod |> tidy_and_attach() |> tidy_add_reference_rows(no_reference_row = "g")
)
})
test_that("tidy_add_reference_rows() works with different values of base in contr.treatment()", {
mod <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial,
contrasts = list(
stage = contr.treatment(4, base = 3),
grade = contr.treatment(3, base = 2),
trt = contr.treatment(2, base = 2)
)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows()
if ("stage2" %in% names(coef(mod))) {
expect_equal(
res$term,
c(
"(Intercept)", "stage1", "stage2", "stage3", "stage4", "grade1",
"grade2", "grade3", "trt1", "trt2", "grade1:trt1", "grade3:trt1"
)
)
} else {
expect_equal(
res$term,
c(
"(Intercept)", "stageT1", "stageT2", "stageT3", "stageT4", "gradeI",
"gradeII", "gradeIII", "trt1", "trt2", "gradeI:trt1", "gradeIII:trt1"
)
)
}
expect_equal(
res$reference_row,
c(
NA, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE,
NA, NA
)
)
})
test_that("tidy_add_reference_rows() use var_label if available", {
mod <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial
)
res <- mod |>
tidy_and_attach() |>
tidy_add_variable_labels() |>
tidy_add_reference_rows()
expect_equal(
res$var_label,
c(
"(Intercept)", "T Stage", "T Stage", "T Stage", "T Stage",
"Grade", "Grade", "Grade", "Chemotherapy Treatment", "Chemotherapy Treatment",
"Grade * Chemotherapy Treatment", "Grade * Chemotherapy Treatment"
),
ignore_attr = TRUE
)
})
test_that("tidy_add_reference_rows() works with nnet::multinom", {
skip_if_not_installed("nnet")
skip_on_cran()
mod <- nnet::multinom(grade ~ stage + marker + age, data = gtsummary::trial, trace = FALSE)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows()
expect_equal(
res$reference_row,
c(
NA, TRUE, FALSE, FALSE, FALSE, NA, NA, NA, TRUE, FALSE, FALSE,
FALSE, NA, NA
)
)
})
test_that("tidy_add_reference_rows() works with lme4::glmer", {
skip_on_cran()
skip_if_not_installed("lme4")
skip_if_not_installed("broom.mixed")
mod <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
family = binomial, data = lme4::cbpp
)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows()
expect_equal(
res[res$reference_row & !is.na(res$reference_row), ]$effect,
"fixed"
)
})
test_that("tidy_add_reference_rows() works with glmmTMB::glmmTMB", {
skip_on_cran()
skip_if_not_installed("glmmTMB")
skip_if_not_installed("broom.mixed")
suppressWarnings(
mod <- glmmTMB::glmmTMB(
count ~ mined + spp,
ziformula = ~mined,
family = poisson,
data = glmmTMB::Salamanders
)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows()
expect_equal(
res$reference_row,
c(
NA, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
NA, TRUE, FALSE
)
)
})
broom.helpers/tests/testthat/test-add_header_rows.R 0000644 0001762 0000144 00000015067 14737437002 022265 0 ustar ligges users test_that("tidy_add_header_rows() works as expected", {
mod <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.treatment, grade = contr.SAS, trt = contr.sum)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_header_rows()
expect_equal(
res$label,
c(
"(Intercept)", "T Stage", "T2", "T3", "T4", "Grade", "I", "II",
"Chemotherapy Treatment", "Drug A", "Grade * Chemotherapy Treatment",
"I * Drug A", "II * Drug A"
),
ignore_attr = TRUE
)
expect_equal(
res$header_row,
c(
NA, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE,
TRUE, FALSE, FALSE
)
)
expect_equal(
res$var_nlevels,
c(NA, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 2L, 2L, NA, NA, NA),
ignore_attr = TRUE
)
# show_single_row has an effect only on variables with one term (2 if a ref term)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables() |>
tidy_add_header_rows(show_single_row = everything(), quiet = TRUE)
expect_equal(
res$label,
c(
"(Intercept)", "T Stage", "T2", "T3", "T4", "Grade", "I", "II",
"Chemotherapy Treatment", "Grade * Chemotherapy Treatment", "I * Drug A",
"II * Drug A"
),
ignore_attr = TRUE
)
expect_equal(
res$header_row,
c(
NA, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, NA, TRUE,
FALSE, FALSE
)
)
# with reference rows
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows() |>
tidy_add_header_rows()
expect_equal(
res$label,
c(
"(Intercept)", "T Stage", "T1", "T2", "T3", "T4", "Grade",
"I", "II", "III", "Chemotherapy Treatment", "Drug A", "Drug B",
"Grade * Chemotherapy Treatment", "I * Drug A", "II * Drug A"
),
ignore_attr = TRUE
)
expect_equal(
res$header_row,
c(
NA, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
TRUE, FALSE, FALSE, TRUE, FALSE, FALSE
)
)
# no warning with an intercept only model
mod <- lm(mpg ~ 1, mtcars)
expect_no_warning(
mod |> tidy_and_attach() |> tidy_add_header_rows()
)
# header row for all categorical variable (even if no reference row)
# and if interaction with a categorical variable
# (except if )
mod <- lm(age ~ factor(response) * marker + trt, gtsummary::trial)
res <- mod |>
tidy_and_attach() |>
tidy_add_header_rows(show_single_row = "trt")
expect_equal(
res$header_row,
c(NA, TRUE, FALSE, NA, NA, TRUE, FALSE)
)
# show_single_row could be apply to an interaction variable
mod <- lm(age ~ factor(response) * marker, gtsummary::trial)
res <- mod |>
tidy_and_attach() |>
tidy_add_header_rows(show_single_row = "factor(response):marker")
expect_equal(
res$header_row,
c(NA, TRUE, FALSE, NA, NA)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows() |>
tidy_add_header_rows(show_single_row = "factor(response):marker")
expect_equal(
res$header_row,
c(NA, TRUE, FALSE, FALSE, NA, NA)
)
expect_equal(
res$var_label,
c(
"(Intercept)", "factor(response)", "factor(response)", "factor(response)",
"Marker Level (ng/mL)", "factor(response) * Marker Level (ng/mL)"
),
ignore_attr = TRUE
)
# no standard name
mod <- lm(
hp ~ `miles per gallon`,
mtcars |> dplyr::rename(`miles per gallon` = mpg)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_header_rows()
expect_equal(
res$header_row,
c(NA, NA)
)
mod <- lm(
hp ~ `cyl as factor`,
mtcars |> dplyr::mutate(`cyl as factor` = factor(cyl))
)
res <- mod |>
tidy_and_attach() |>
tidy_add_header_rows()
expect_equal(
res$header_row,
c(NA, TRUE, FALSE, FALSE)
)
})
test_that("test tidy_add_header_rows() checks", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
# expect an error if no model attached
expect_error(mod |> broom::tidy() |> tidy_add_header_rows())
# warning if applied twice
expect_message(
mod |>
tidy_and_attach() |>
tidy_add_header_rows() |>
tidy_add_header_rows()
)
})
test_that("tidy_add_header_rows() works with nnet::multinom", {
skip_if_not_installed("nnet")
skip_on_cran()
mod <- nnet::multinom(grade ~ stage + marker + age + trt, data = gtsummary::trial, trace = FALSE)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows() |>
tidy_add_header_rows()
expect_equal(
res$header_row,
c(
NA, TRUE, FALSE, FALSE, FALSE, FALSE, NA, NA, TRUE, FALSE,
FALSE, NA, TRUE, FALSE, FALSE, FALSE, FALSE, NA, NA, TRUE, FALSE,
FALSE
)
)
expect_equal(
res$label,
c(
"(Intercept)", "T Stage", "T1", "T2", "T3", "T4",
"Marker Level (ng/mL)", "Age", "Chemotherapy Treatment",
"Drug A", "Drug B", "(Intercept)", "T Stage", "T1", "T2",
"T3", "T4", "Marker Level (ng/mL)", "Age",
"Chemotherapy Treatment", "Drug A", "Drug B"
),
ignore_attr = TRUE
)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows() |>
tidy_add_header_rows(show_single_row = everything(), quiet = TRUE)
expect_equal(
res$header_row,
c(
NA, TRUE, FALSE, FALSE, FALSE, FALSE, NA, NA, NA, NA, TRUE,
FALSE, FALSE, FALSE, FALSE, NA, NA, NA
)
)
expect_equal(
res$label,
c(
"(Intercept)", "T Stage", "T1", "T2", "T3", "T4",
"Marker Level (ng/mL)", "Age", "Chemotherapy Treatment",
"(Intercept)", "T Stage", "T1", "T2", "T3", "T4",
"Marker Level (ng/mL)", "Age", "Chemotherapy Treatment"
),
ignore_attr = TRUE
)
})
test_that("test tidy_add_header_rows() bad single row request", {
mod <- lm(mpg ~ hp + factor(cyl) + factor(am), mtcars) |>
tidy_and_attach() |>
tidy_identify_variables()
expect_message(
tidy_add_header_rows(mod, show_single_row = "factor(cyl)")
)
expect_error(
tidy_add_header_rows(mod, show_single_row = "factor(cyl)", strict = TRUE)
)
})
test_that("tidy_add_header_rows() and mixed model", {
skip_on_cran()
skip_if_not_installed("lme4")
mod <- lme4::lmer(
age ~ stage + (stage | grade) + (1 | grade),
gtsummary::trial
)
res <- mod |>
tidy_and_attach(tidy_fun = broom.mixed::tidy) |>
tidy_add_header_rows()
expect_equal(
res |>
dplyr::filter(.data$header_row & .data$var_type == "ran_pars") |>
nrow(),
0L
)
})
broom.helpers/tests/testthat/test-tidy_parameters.R 0000644 0001762 0000144 00000002167 14737437002 022344 0 ustar ligges users test_that("tidy_parameters() works for basic models", {
skip_if_not_installed("parameters")
mod <- lm(Petal.Length ~ Petal.Width, iris)
expect_no_error(
mod |> tidy_parameters()
)
expect_no_error(
mod |> tidy_plus_plus(tidy_fun = tidy_parameters)
)
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
expect_no_error(
mod |> tidy_parameters()
)
expect_no_error(
res1 <- mod |> tidy_plus_plus(tidy_fun = tidy_parameters)
)
expect_no_error(
res2 <- mod |> tidy_plus_plus(tidy_fun = tidy_parameters, conf.level = .80)
)
expect_false(identical(res1$conf.low, res2$conf.low))
expect_no_error(
res <- mod |> tidy_plus_plus(tidy_fun = tidy_parameters, conf.int = FALSE)
)
expect_false("conf.low" %in% res)
})
test_that("tidy_with_broom_or_parameters() works for basic models", {
skip_if_not_installed("parameters")
mod <- lm(Petal.Length ~ Petal.Width, iris)
expect_no_error(
mod |> tidy_with_broom_or_parameters()
)
expect_error(
suppressWarnings("not a model" |> tidy_with_broom_or_parameters())
)
})
broom.helpers/tests/testthat/test-model_get_n.R 0000644 0001762 0000144 00000034221 15051577134 021421 0 ustar ligges users test_that("model_get_n() works for basic models", {
mod <- lm(Sepal.Length ~ ., iris)
res <- mod |> model_get_n()
expect_equal(
res$n_obs,
c(150, 150, 150, 150, 50, 50, 50),
ignore_attr = TRUE
)
mod <- lm(
Sepal.Length ~ log(Sepal.Width) + Petal.Length^2,
iris
)
res <- mod |> model_get_n()
expect_equal(
res$n_obs,
c(150, 150, 150),
ignore_attr = TRUE
)
# logistic model
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
res <- mod |> model_get_n()
expect_equal(
res$n_obs,
c(193, 52, 40, 49, 63, 63, 98, 52, 67, 95),
ignore_attr = TRUE
)
expect_equal(
res$n_event,
c(61, 13, 15, 15, 19, 21, 33, 18, 21, 28),
ignore_attr = TRUE
)
mod <- glm(
Survived ~ Class * Age + Sex,
data = Titanic |> as.data.frame(),
weights = Freq, family = binomial
)
res <- mod |> model_get_n()
expect_equal(
res$n_obs,
c(2201, 285, 706, 885, 2092, 470, 261, 627, 885, 325, 109, 1731),
ignore_attr = TRUE
)
expect_equal(
res$n_event,
c(711, 118, 178, 212, 654, 344, 94, 151, 212, 203, 57, 367),
ignore_attr = TRUE
)
# cbind() syntax
d <- dplyr::as_tibble(Titanic) |>
dplyr::group_by(Class, Sex, Age) |>
dplyr::summarise(
n_survived = sum(n * (Survived == "Yes")),
n_dead = sum(n * (Survived == "No"))
)
mod <- glm(
cbind(n_survived, n_dead) ~ Class * Age + Sex,
data = d,
family = binomial,
y = FALSE # should work even if y is not returned
)
expect_no_error(res <- mod |> model_get_n())
expect_equal(
res$n_obs,
c(2201, 285, 706, 885, 109, 1731, 24, 79, 0, 325, 2092, 470),
ignore_attr = TRUE
)
expect_equal(
res$n_event,
c(711, 118, 178, 212, 57, 367, 24, 27, 0, 203, 654, 344),
ignore_attr = TRUE
)
# Poisson without offset
mod <- glm(response ~ age + grade * trt, gtsummary::trial, family = poisson)
res <- mod |> model_get_n()
expect_equal(
res$n_obs,
c(183, 183, 58, 60, 94, 29, 33, 65, 89),
ignore_attr = TRUE
)
expect_equal(
res$n_event,
c(58, 58, 17, 20, 31, 10, 8, 21, 27),
ignore_attr = TRUE
)
expect_equal(
res$exposure,
c(183, 183, 58, 60, 94, 29, 33, 65, 89),
ignore_attr = TRUE
)
# Poisson with offset
mod <- glm(
response ~ trt * grade + offset(log(ttdeath)),
gtsummary::trial,
family = poisson,
weights = rep_len(1:2, 200)
)
res <- mod |> model_get_n()
expect_equal(
res$n_obs,
c(292, 151, 94, 92, 49, 49, 141, 106),
ignore_attr = TRUE
)
expect_equal(
res$n_event,
c(96, 53, 28, 31, 19, 12, 43, 37),
ignore_attr = TRUE
)
expect_equal(
res$exposure |> round(),
c(5819, 2914, 1826, 1766, 887, 916, 2905, 2227),
ignore_attr = TRUE
)
# interaction only terms
mod <- glm(
Survived ~ Class:Age,
data = Titanic |> as.data.frame(),
weights = Freq, family = binomial
)
res <- mod |> model_get_n()
expect_equal(
res$n_obs,
c(2201, 6, 24, 79, 0, 319, 261, 627, 885),
ignore_attr = TRUE
)
expect_equal(
res$n_event,
c(711, 6, 24, 27, 0, 197, 94, 151, 212),
ignore_attr = TRUE
)
})
test_that("model_get_n() handles variables having non standard name", {
df <- gtsummary::trial |> dplyr::mutate(`grade of kids` = grade)
mod <- glm(response ~ stage + `grade of kids` + trt, df,
family = binomial,
contrasts = list(`grade of kids` = contr.sum)
)
expect_no_error(
res <- mod |> model_get_n()
)
})
test_that("model_get_n() works with different contrasts", {
mod <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.treatment, grade = contr.SAS, trt = contr.SAS)
)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs", "n_event"))
if ("stage2" %in% names(coef(mod))) {
expect_equal(
res$term,
c(
"(Intercept)", "stage2", "stage3", "stage4", "grade1", "grade2",
"trt1", "grade1:trt1", "grade2:trt1", "stage1", "grade3", "trt2"
)
)
} else {
expect_equal(
res$term,
c(
"(Intercept)", "stageT2", "stageT3", "stageT4", "gradeI", "gradeII",
"trtDrug A", "gradeI:trtDrug A", "gradeII:trtDrug A", "stageT1",
"gradeIII", "trtDrug B"
)
)
}
expect_equal(
res$n_obs,
c(193, 52, 40, 49, 67, 63, 95, 35, 30, 52, 63, 98),
ignore_attr = TRUE
)
mod <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.poly, grade = contr.helmert, trt = contr.sum)
)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs", "n_event"))
expect_equal(
res$term,
c(
"(Intercept)", "stage.L", "stage.Q", "stage.C", "grade1", "grade2",
"trt1", "grade1:trt1", "grade2:trt1", "trt2"
)
)
expect_equal(
res$n_obs,
c(193, 193, 193, 193, 63, 63, 95, 62, 95, 98),
ignore_attr = TRUE
)
})
test_that("model_get_n() works with stats::poly()", {
skip_on_cran()
mod <- lm(Sepal.Length ~ poly(Sepal.Width, 3) + poly(Petal.Length, 2), iris)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs"))
expect_equal(
res$term,
c(
"(Intercept)", "poly(Sepal.Width, 3)1", "poly(Sepal.Width, 3)2",
"poly(Sepal.Width, 3)3", "poly(Petal.Length, 2)1",
"poly(Petal.Length, 2)2"
)
)
expect_equal(
res$n_obs,
c(150, 150, 150, 150, 150, 150),
ignore_attr = TRUE
)
})
test_that("model_get_n() works with lme4::lmer", {
skip_on_cran()
skip_if_not_installed("lme4")
df <- gtsummary::trial
df$stage <- as.character(df$stage)
df$group <- rep.int(1:2, 100)
mod <- lme4::lmer(marker ~ stage + grade + (1 | group), df)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs"))
})
test_that("model_get_n() works with lme4::glmer", {
skip_on_cran()
skip_if_not_installed("lme4")
df <- gtsummary::trial
df$stage <- as.character(df$stage)
df$group <- rep.int(1:2, 100)
df$response <- factor(df$response)
suppressMessages(
mod <- lme4::glmer(response ~ stage + grade + (1 | group), df, family = binomial)
)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs", "n_event"))
})
test_that("model_get_n() works with survival::coxph", {
skip_on_cran()
skip_if_not_installed("survival")
df <- survival::lung |> dplyr::mutate(sex = factor(sex))
mod <- survival::coxph(
survival::Surv(time, status) ~ ph.ecog + age + sex,
data = df
)
expect_no_error(res <- mod |> model_get_n())
expect_equal(
names(res),
c("term", "n_obs", "n_ind", "n_event", "exposure")
)
test <- list(
start = c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8),
stop = c(2, 3, 6, 7, 8, 9, 9, 9, 14, 17),
event = c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0),
x = c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0)
)
mod <- survival::coxph(survival::Surv(start, stop, event) ~ x, test)
expect_no_error(res <- mod |> model_get_n())
expect_equal(
names(res),
c("term", "n_obs", "n_ind", "n_event", "exposure")
)
expect_equal(res$n_obs, c(10, 10), ignore_attr = TRUE)
expect_equal(res$n_ind, c(10, 10), ignore_attr = TRUE)
expect_equal(res$n_event, c(7, 7), ignore_attr = TRUE)
expect_equal(res$exposure, c(43, 43), ignore_attr = TRUE)
# specific case when missing values in the `id`
# should not result in a warning
mod <- survival::coxph(
survival::Surv(ttdeath, death) ~ age + grade,
id = response,
data = gtsummary::trial
)
expect_no_warning(mod |> model_get_n())
})
test_that("model_get_n() works with survival::survreg", {
skip_on_cran()
skip_if_not_installed("survival")
mod <- survival::survreg(
survival::Surv(futime, fustat) ~ factor(ecog.ps) + rx,
survival::ovarian,
dist = "exponential"
)
expect_no_error(res <- mod |> model_get_n())
expect_equal(
names(res),
c("term", "n_obs", "n_ind", "n_event", "exposure")
)
})
test_that("model_get_n() works with nnet::multinom", {
skip_if_not_installed("nnet")
skip_on_cran()
mod <- nnet::multinom(grade ~ stage + marker + age, data = gtsummary::trial, trace = FALSE)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("y.level", "term", "n_obs", "n_event"))
expect_equal(
res$y.level,
c(
"II", "II", "II", "II", "II", "II", "II", "III", "III", "III",
"III", "III", "III", "III"
)
)
expect_equal(
res$n_obs,
c(179, 52, 37, 43, 179, 179, 47, 179, 52, 37, 43, 179, 179, 47),
ignore_attr = TRUE
)
expect_equal(
res$n_event,
c(57, 16, 8, 12, 57, 57, 21, 58, 18, 12, 16, 58, 58, 12),
ignore_attr = TRUE
)
# when y is not coded as a factor
mod <- nnet::multinom(race ~ age + lwt + bwt, data = MASS::birthwt, trace = FALSE)
expect_true(mod |> model_get_n() |> nrow() > 0)
})
test_that("model_get_n() works with survey::svyglm", {
skip_on_cran()
skip_if_not_installed("survey")
df <- survey::svydesign(~1, weights = ~1, data = gtsummary::trial)
mod <- survey::svyglm(response ~ age + grade * trt, df, family = quasibinomial)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs", "n_event"))
mod <- survey::svyglm(response ~ age + grade + offset(log(ttdeath)), df, family = quasipoisson)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs", "n_event", "exposure"))
df <- survey::svydesign(
~1,
weights = ~Freq,
data = as.data.frame(Titanic) |> dplyr::filter(Freq > 0)
)
mod <- survey::svyglm(Survived ~ Class + Age * Sex, df, family = quasibinomial)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs", "n_event"))
expect_equal(
res$n_obs,
c(2201, 285, 706, 885, 2092, 470, 425, 325, 109, 1731),
ignore_attr = TRUE
)
})
test_that("model_get_n() works with ordinal::clm", {
skip_on_cran()
mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs"))
# note: no nevent computed for ordinal models
})
test_that("model_get_n() works with ordinal::clmm", {
skip_on_cran()
mod <- ordinal::clmm(rating ~ temp * contact + (1 | judge), data = ordinal::wine)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs"))
})
test_that("model_get_n() works with MASS::polr", {
skip_on_cran()
mod <- MASS::polr(Sat ~ Infl + Type + Cont, weights = Freq, data = MASS::housing)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs"))
})
test_that("model_get_n() works with geepack::geeglm", {
skip_on_cran()
skip_if(packageVersion("geepack") < "1.3")
df <- geepack::dietox
df$Cu <- as.factor(df$Cu)
mf <- formula(Weight ~ Cu * Time)
suppressWarnings(
mod <- geepack::geeglm(mf, data = df, id = Pig, family = poisson("identity"), corstr = "ar1")
)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs"))
suppressWarnings(
mod <- geepack::geeglm(mf, data = df, id = Pig, family = poisson(), corstr = "ar1")
)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs", "n_event", "exposure"))
})
test_that("model_get_n() works with gam::gam", {
skip_on_cran()
skip_if_not_installed("gam")
data(kyphosis, package = "gam")
mod <- gam::gam(Kyphosis ~ gam::s(Age, 4) + Number, family = binomial, data = kyphosis)
expect_no_error(res <- mod |> model_get_n())
expect_equal(names(res), c("term", "n_obs", "n_event"))
})
test_that("model_get_n() works with lavaan::lavaan", {
skip_on_cran()
skip_if_not_installed("lavaan")
df <- lavaan::HolzingerSwineford1939
df$grade <- factor(df$grade, ordered = TRUE)
HS.model <- "visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6 + grade
speed =~ x7 + x8 + x9 "
mod <- lavaan::lavaan(HS.model,
data = df,
auto.var = TRUE, auto.fix.first = TRUE,
auto.cov.lv.x = TRUE
)
expect_no_error(res <- mod |> model_get_n())
expect_null(res)
expect_null(mod |> model_get_response())
expect_null(mod |> model_get_weights())
expect_null(mod |> model_get_offset())
expect_null(mod |> model_compute_terms_contributions())
})
test_that("model_get_n() works with tidycmprsk::crr", {
skip_on_cran()
skip_if_not_installed("tidycmprsk")
skip_if_not_installed("survival")
mod <- tidycmprsk::crr(
survival::Surv(ttdeath, death_cr) ~ age + grade,
tidycmprsk::trial
)
res <- mod |> tidy_plus_plus()
expect_equal(
res$n_event,
c(52, 16, 15, 21),
ignore_attr = TRUE
)
})
test_that("tidy_add_n() does not duplicates rows with gam model", {
skip_on_cran()
skip_if_not_installed("mgcv")
skip_if_not_installed("gtsummary")
mod <- mgcv::gam(
marker ~ s(age, bs = "ad", k = -1) + grade + ti(age, by = grade, bs = "fs"),
data = gtsummary::trial,
method = "REML",
family = gaussian
)
res <- mod |>
tidy_and_attach(tidy_fun = gtsummary::tidy_gam) |>
tidy_add_n()
expect_equal(nrow(res), 7L)
})
test_that("model_get_n() works with fixest", {
skip_on_cran()
skip_if_not_installed("fixest")
d <- iris
d$bin_out <- sample(c(0, 1), nrow(iris), replace = TRUE)
d$bin_out[1:50] <- 0 # set setosa to constant outcome and therefore dropped
d$bin_out[150] <- NA # add an NA for comparison
labelled::var_label(d$Sepal.Length) <- "test"
mod <- fixest::feglm(
bin_out ~ Sepal.Length | Species,
data = d,
family = binomial(logit)
)
res <- mod |> tidy_plus_plus()
expect_equal(
res$n_obs,
c(99),
ignore_attr = TRUE
)
# check that variable labels are preserved
expect_equal(res$label, "test", ignore_attr = TRUE)
})
broom.helpers/tests/testthat/test-add_contrasts.R 0000644 0001762 0000144 00000023301 14746151341 021770 0 ustar ligges users test_that("tidy_add_contrast() works for basic models", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_contrasts()
expect_equal(
res$contrasts,
c(
NA, "contr.treatment", "contr.treatment", "contr.treatment",
"contr.treatment", "contr.treatment", "contr.treatment"
)
)
expect_equal(
res$contrasts_type,
c(
NA, "treatment", "treatment", "treatment", "treatment", "treatment",
"treatment"
)
)
mod <- glm(response ~ stage + grade + trt, gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.sum, grade = contr.helmert, trt = contr.SAS)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_contrasts()
expect_equal(
res$contrasts,
c(
NA, "contr.sum", "contr.sum", "contr.sum", "contr.helmert",
"contr.helmert", "contr.SAS"
)
)
expect_equal(
res$contrasts_type,
c(NA, "sum", "sum", "sum", "helmert", "helmert", "treatment")
)
mod <- glm(response ~ stage + grade + trt, gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.poly, grade = contr.treatment, trt = matrix(c(-3, 2)))
)
res <- mod |>
tidy_and_attach() |>
tidy_add_contrasts()
expect_equal(
res$contrasts,
c(
NA, "contr.poly", "contr.poly", "contr.poly", "contr.treatment",
"contr.treatment", "custom"
)
)
expect_equal(
res$contrasts_type,
c(NA, "poly", "poly", "poly", "treatment", "treatment", "other")
)
mod <- glm(
response ~ stage + grade + trt + factor(death),
gtsummary::trial,
family = binomial,
contrasts = list(
stage = contr.treatment(4, 3), grade = contr.treatment(3, 2),
trt = contr.treatment(2, 2), "factor(death)" = matrix(c(-3, 2))
)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_contrasts()
expect_equal(
res$contrasts,
c(
NA, "contr.treatment(base=3)", "contr.treatment(base=3)", "contr.treatment(base=3)",
"contr.treatment(base=2)", "contr.treatment(base=2)", "contr.SAS",
"custom"
)
)
expect_equal(
res$contrasts_type,
c(
NA, "treatment", "treatment", "treatment", "treatment", "treatment",
"treatment", "other"
)
)
mod <- glm(response ~ stage + grade + trt, gtsummary::trial,
family = binomial,
contrasts = list(stage = "contr.sum", grade = "contr.helmert", trt = "contr.SAS")
)
res <- mod |>
tidy_and_attach() |>
tidy_add_contrasts()
expect_equal(
res$contrasts,
c(
NA, "contr.sum", "contr.sum", "contr.sum", "contr.helmert",
"contr.helmert", "contr.SAS"
)
)
expect_equal(
res$contrasts_type,
c(NA, "sum", "sum", "sum", "helmert", "helmert", "treatment")
)
skip_if_not_installed("MASS")
library(MASS)
mod <- glm(
response ~ stage + grade + trt,
gtsummary::trial,
family = binomial,
contrasts = list(
stage = contr.sdif,
grade = contr.sdif(3),
trt = "contr.sdif"
)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_contrasts()
expect_equal(
res$contrasts,
c(NA, "contr.sdif", "contr.sdif", "contr.sdif", "contr.sdif",
"contr.sdif", "contr.sdif")
)
expect_equal(
res$contrasts_type,
c(NA, "sdif", "sdif", "sdif", "sdif", "sdif", "sdif")
)
})
test_that("test tidy_add_contrasts() checks", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
# expect an error if no model attached
expect_error(mod |> broom::tidy() |> tidy_add_contrasts())
# could be apply twice (no error)
expect_error(
mod |> tidy_and_attach() |> tidy_add_contrasts() |> tidy_add_contrasts(),
NA
)
})
test_that("tidy_add_contrasts() works with no intercept models", {
mod <- glm(response ~ stage + grade - 1, data = gtsummary::trial, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_contrasts()
expect_equal(
res$contrasts_type,
c(
"no.contrast", "no.contrast", "no.contrast", "no.contrast",
"treatment", "treatment"
)
)
})
test_that("tidy_add_contrasts() works with variables having non standard name", {
df <- gtsummary::trial |> dplyr::mutate(`grade of kids` = grade)
mod <- glm(response ~ stage + `grade of kids` + trt, df, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_contrasts()
expect_equal(
res$contrasts,
c(
NA, "contr.treatment", "contr.treatment", "contr.treatment",
"contr.treatment", "contr.treatment", "contr.treatment"
)
)
mod <- glm(response ~ stage + `grade of kids` + trt, df,
family = binomial,
contrasts = list(`grade of kids` = contr.helmert)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_contrasts()
expect_equal(
res$contrasts,
c(
NA, "contr.treatment", "contr.treatment", "contr.treatment",
"contr.helmert", "contr.helmert", "contr.treatment"
)
)
})
test_that("tidy_add_contrasts() works with lme4::lmer", {
skip_on_cran()
skip_if_not_installed("lme4")
df <- gtsummary::trial
df$stage <- as.character(df$stage)
df$group <- rep.int(1:2, 100)
mod <- lme4::lmer(marker ~ stage + grade + (1 | group), df)
expect_no_error(mod |> tidy_and_attach(tidy_fun = broom.mixed::tidy) |> tidy_add_contrasts())
})
test_that("tidy_add_contrasts() works with lme4::glmer", {
skip_on_cran()
skip_if_not_installed("lme4")
df <- gtsummary::trial
df$stage <- as.character(df$stage)
df$group <- rep.int(1:2, 100)
suppressMessages(
mod <- lme4::glmer(response ~ stage + grade + (1 | group), df, family = binomial)
)
expect_no_error(mod |> tidy_and_attach(tidy_fun = broom.mixed::tidy) |> tidy_add_contrasts())
})
test_that("tidy_add_contrasts() works with survival::coxph", {
skip_if_not_installed("survival")
df <- survival::lung |> dplyr::mutate(sex = factor(sex))
mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_contrasts())
})
test_that("tidy_add_contrasts() works with survival::survreg", {
skip_if_not_installed("survival")
mod <- survival::survreg(
survival::Surv(futime, fustat) ~ factor(ecog.ps) + rx,
survival::ovarian,
dist = "exponential"
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_contrasts())
})
test_that("tidy_add_contrasts() works with nnet::multinom", {
skip_if_not_installed("nnet")
mod <- nnet::multinom(grade ~ stage + marker + age, data = gtsummary::trial, trace = FALSE)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_contrasts())
mod <- nnet::multinom(
grade ~ stage + marker + age,
data = gtsummary::trial, trace = FALSE,
contrasts = list(stage = contr.sum)
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_contrasts())
res <- mod |>
tidy_and_attach() |>
tidy_add_contrasts()
expect_equal(
res$contrasts,
c(
NA, "contr.sum", "contr.sum", "contr.sum", NA, NA, NA, "contr.sum",
"contr.sum", "contr.sum", NA, NA
)
)
})
test_that("tidy_add_contrasts() works with survey::svyglm", {
skip_if_not_installed("survey")
df <- survey::svydesign(~1, weights = ~1, data = gtsummary::trial)
mod <- survey::svyglm(response ~ age + grade * trt, df, family = quasibinomial)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_contrasts())
})
test_that("tidy_add_contrasts() works with ordinal::clm", {
mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_contrasts())
})
test_that("tidy_add_contrasts() works with ordinal::clmm", {
mod <- ordinal::clmm(rating ~ temp * contact + (1 | judge), data = ordinal::wine)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_contrasts())
})
test_that("tidy_add_contrasts() works with MASS::polr", {
mod <- MASS::polr(Sat ~ Infl + Type + Cont, weights = Freq, data = MASS::housing)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_contrasts())
})
test_that("tidy_add_contrasts() works with geepack::geeglm", {
skip_if(packageVersion("geepack") < "1.3")
df <- geepack::dietox
df$Cu <- as.factor(df$Cu)
mf <- formula(Weight ~ Cu * Time)
suppressWarnings(
mod <- geepack::geeglm(mf, data = df, id = Pig, family = poisson("identity"), corstr = "ar1")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_contrasts())
})
test_that("tidy_add_contrasts() works with gam::gam", {
skip_if_not_installed("gam")
data(kyphosis, package = "gam")
mod <- gam::gam(Kyphosis ~ gam::s(Age, 4) + Number, family = binomial, data = kyphosis)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_contrasts())
})
test_that("tidy_add_contrasts() works with lavaan::lavaan", {
skip_if_not_installed("lavaan")
df <- lavaan::HolzingerSwineford1939
df$grade <- factor(df$grade, ordered = TRUE)
HS.model <- "visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6 + grade
speed =~ x7 + x8 + x9 "
mod <- lavaan::lavaan(HS.model,
data = df,
auto.var = TRUE, auto.fix.first = TRUE,
auto.cov.lv.x = TRUE
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_contrasts())
})
test_that("model_get_contrasts() works with rstanarm::stan_glm", {
skip_on_cran()
skip_if_not_installed("broom.mixed")
skip_if_not_installed("rstanarm")
mod <- rstanarm::stan_glm(
response ~ age + grade,
data = gtsummary::trial,
refresh = 0,
family = binomial
)
expect_false(
is.null(mod |> model_get_contrasts())
)
})
broom.helpers/tests/testthat/test-disambiguate_terms.R 0000644 0001762 0000144 00000004065 14737437002 023017 0 ustar ligges users test_that("tidy_disambiguate_terms() changes nothing for basic models", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
# no change by default
res2 <- res |> tidy_disambiguate_terms()
expect_equal(res, res2)
expect_false("original_term" %in% names(res2))
})
test_that("tidy_disambiguate_terms() works for mixed models", {
skip_on_cran()
skip_if_not_installed("lme4")
mod <- lme4::lmer(Reaction ~ Days + (Days | Subject), lme4::sleepstudy)
skip_if_not_installed("broom.mixed")
res <- mod |>
tidy_and_attach() |>
tidy_disambiguate_terms(sep = ".")
expect_equal(
res$term,
c(
"(Intercept)", "Days", "Subject.sd__(Intercept)",
"Subject.cor__(Intercept).Days",
"Subject.sd__Days", "Residual.sd__Observation"
)
)
expect_true("original_term" %in% names(res))
res <- mod |>
tidy_and_attach() |>
tidy_disambiguate_terms(sep = "_")
expect_equal(
res$term,
c(
"(Intercept)", "Days", "Subject_sd__(Intercept)",
"Subject_cor__(Intercept).Days",
"Subject_sd__Days", "Residual_sd__Observation"
)
)
})
test_that("test tidy_disambiguate_terms() checks", {
skip_on_cran()
skip_if_not_installed("lme4")
skip_if_not_installed("broom.mixed")
mod <- lme4::lmer(Reaction ~ Days + (Days | Subject), lme4::sleepstudy)
# expect an error if no model attached
expect_error(mod |> broom.mixed::tidy() |> tidy_disambiguate_terms())
# could be apply twice (no error but a message)
expect_no_error(
mod |>
tidy_and_attach() |>
tidy_disambiguate_terms() |>
tidy_disambiguate_terms()
)
expect_message(
mod |>
tidy_and_attach(tidy_fun = broom::tidy) |>
tidy_disambiguate_terms() |>
tidy_disambiguate_terms()
)
expect_no_message(
mod |>
tidy_and_attach(tidy_fun = broom::tidy) |>
tidy_disambiguate_terms() |>
tidy_disambiguate_terms(quiet = TRUE)
)
})
broom.helpers/tests/testthat/test-add_coefficients_type.R 0000644 0001762 0000144 00000025306 14746151343 023463 0 ustar ligges users library(survival)
library(gtsummary)
test_that("tidy_add_coefficients_type() works for common models", {
mod <- lm(Sepal.Length ~ Sepal.Width, iris)
res <- mod |>
tidy_and_attach() |>
tidy_add_coefficients_type()
expect_equal(attr(res, "coefficients_type"), "generic")
expect_equal(attr(res, "coefficients_label"), "Beta")
mod <- glm(Sepal.Length ~ Sepal.Width, iris, family = gaussian)
res <- mod |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_coefficients_type()
expect_equal(attr(res, "coefficients_type"), "generic")
expect_equal(attr(res, "coefficients_label"), "exp(Beta)")
mod <- glm(response ~ age + grade * trt, gtsummary::trial, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_coefficients_type()
expect_equal(attr(res, "coefficients_type"), "logistic")
expect_equal(attr(res, "coefficients_label"), "log(OR)")
res <- mod |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_coefficients_type()
expect_equal(attr(res, "coefficients_type"), "logistic")
expect_equal(attr(res, "coefficients_label"), "OR")
mod <- glm(response ~ age + grade * trt, gtsummary::trial, family = binomial(probit))
res <- mod |>
tidy_and_attach() |>
tidy_add_coefficients_type()
expect_equal(attr(res, "coefficients_type"), "generic")
expect_equal(attr(res, "coefficients_label"), "Beta")
res <- mod |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_coefficients_type()
expect_equal(attr(res, "coefficients_type"), "generic")
expect_equal(attr(res, "coefficients_label"), "exp(Beta)")
mod <- glm(response ~ age + grade * trt, gtsummary::trial, family = binomial(log))
res <- mod |>
tidy_and_attach() |>
tidy_add_coefficients_type()
expect_equal(attr(res, "coefficients_type"), "relative_risk")
expect_equal(attr(res, "coefficients_label"), "log(RR)")
res <- mod |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_coefficients_type()
expect_equal(attr(res, "coefficients_type"), "relative_risk")
expect_equal(attr(res, "coefficients_label"), "RR")
mod <- glm(response ~ age + grade * trt, gtsummary::trial, family = binomial(cloglog))
res <- mod |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_coefficients_type()
expect_equal(attr(res, "coefficients_type"), "prop_hazard")
expect_equal(attr(res, "coefficients_label"), "HR")
mod <- glm(response ~ age + grade * trt, gtsummary::trial, family = poisson)
res <- mod |>
tidy_and_attach() |>
tidy_add_coefficients_type()
expect_equal(attr(res, "coefficients_type"), "poisson")
expect_equal(attr(res, "coefficients_label"), "log(IRR)")
res <- mod |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_coefficients_type()
expect_equal(attr(res, "coefficients_type"), "poisson")
expect_equal(attr(res, "coefficients_label"), "IRR")
mod <- glm(response ~ age + grade * trt, gtsummary::trial, family = poisson("identity"))
res <- mod |>
tidy_and_attach(conf.int = FALSE) |>
tidy_add_coefficients_type()
expect_equal(attr(res, "coefficients_type"), "generic")
expect_equal(attr(res, "coefficients_label"), "Beta")
mod <- glm(response ~ age + grade * trt, gtsummary::trial, family = quasipoisson)
res <- mod |>
tidy_and_attach() |>
tidy_add_coefficients_type(exponentiate = TRUE)
expect_equal(attr(res, "coefficients_type"), "poisson")
expect_equal(attr(res, "coefficients_label"), "IRR")
mod <- glm(response ~ age + grade * trt, gtsummary::trial, family = quasibinomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_coefficients_type(exponentiate = TRUE)
expect_equal(attr(res, "coefficients_type"), "logistic")
expect_equal(attr(res, "coefficients_label"), "OR")
})
test_that("test tidy_add_coefficients_type() checks", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
# expect an error if no model attached
expect_error(mod |> broom::tidy() |> tidy_add_coefficients_type(exponentiate = TRUE))
# expect an error if no value for exponentiate
expect_error(mod |> tidy_and_attach() |> tidy_add_coefficients_type(exponentiate = NULL))
expect_error(mod |> broom::tidy() |> tidy_attach_model(mod) |> tidy_add_coefficients_type())
# could be apply twice (no error)
expect_no_error(
mod |> tidy_and_attach() |> tidy_add_coefficients_type() |> tidy_add_coefficients_type()
)
})
test_that("model_get_coefficients_type() works with lme4::lmer", {
skip_on_cran()
skip_if_not_installed("lme4")
mod <- lme4::lmer(Reaction ~ Days + (Days | Subject), lme4::sleepstudy)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "generic")
})
test_that("model_identify_variables() works with lme4::glmer", {
skip_on_cran()
skip_if_not_installed("lme4")
mod <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
family = binomial, data = lme4::cbpp
)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "logistic")
mod <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
family = binomial("probit"), data = lme4::cbpp
)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "generic")
mod <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
family = binomial("log"), data = lme4::cbpp
)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "relative_risk")
mod <- lme4::glmer(response ~ trt + (1 | grade), gtsummary::trial, family = poisson)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "poisson")
})
test_that("model_get_coefficients_type() works with survival::coxph", {
skip_if_not_installed("survival")
df <- survival::lung |> dplyr::mutate(sex = factor(sex))
mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "prop_hazard")
})
test_that("model_get_coefficients_type() works with survival::survreg", {
skip_if_not_installed("survival")
mod <- survival::survreg(
survival::Surv(futime, fustat) ~ ecog.ps + rx,
survival::ovarian,
dist = "exponential"
)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "generic")
})
test_that("model_get_coefficients_type() works with survival::clogit", {
skip_if_not_installed("survival")
resp <- levels(survival::logan$occupation)
n <- nrow(survival::logan)
indx <- rep(1:n, length(resp))
logan2 <- data.frame(survival::logan[indx, ],
id = indx,
tocc = factor(rep(resp, each = n))
)
logan2$case <- (logan2$occupation == logan2$tocc)
mod <- survival::clogit(case ~ tocc + tocc:education + strata(id), logan2)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "logistic")
})
test_that("model_get_coefficients_type() works with nnet::multinom", {
skip_if_not_installed("nnet")
mod <- nnet::multinom(grade ~ stage + marker + age, data = gtsummary::trial, trace = FALSE)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "logistic")
})
test_that("model_get_coefficients_type() works with survey::svyglm", {
skip_if_not_installed("survey")
df <- survey::svydesign(~1, weights = ~1, data = gtsummary::trial)
mod <- survey::svyglm(response ~ age + grade * trt, df, family = quasibinomial)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "logistic")
})
test_that("model_get_coefficients_type() works with survey::svycoxph", {
skip_if_not_installed("survey")
skip_if_not_installed("survival")
dpbc <- survey::svydesign(id = ~1, prob = ~1, strata = ~edema, data = survival::pbc)
mod <- survey::svycoxph(
Surv(time, status > 0) ~ log(bili) + protime + albumin,
design = dpbc
)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "prop_hazard")
})
test_that("tidy_plus_plus() works with survey::svyolr", {
skip_if_not_installed("survey")
skip_if_not_installed("survival")
data(api, package = "survey")
fpc <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
fpc <- update(fpc, mealcat = cut(meals, c(0, 25, 50, 75, 100)))
mod <- survey::svyolr(mealcat ~ avg.ed + mobility + stype, design = fpc)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "logistic")
})
test_that("model_get_coefficients_type() works with ordinal::clm", {
mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "logistic")
})
test_that("model_get_coefficients_type() works with ordinal::clmm", {
mod <- ordinal::clmm(rating ~ temp * contact + (1 | judge), data = ordinal::wine)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "logistic")
})
test_that("model_get_coefficients_type() works with MASS::polr", {
mod <- MASS::polr(Sat ~ Infl + Type + Cont, weights = Freq, data = MASS::housing)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "logistic")
mod <- MASS::polr(
Sat ~ Infl + Type + Cont,
weights = Freq,
data = MASS::housing,
method = "probit"
)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "generic")
})
test_that("model_get_coefficients_type() works with geepack::geeglm", {
skip_if(packageVersion("geepack") < "1.3")
df <- geepack::dietox
df$Cu <- as.factor(df$Cu)
mf <- formula(Weight ~ Cu * Time)
suppressWarnings(
mod <- geepack::geeglm(mf, data = df, id = Pig, family = poisson("log"), corstr = "ar1")
)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "poisson")
})
test_that("model_get_coefficients_type() works with gam::gam", {
skip_if_not_installed("gam")
data(kyphosis, package = "gam")
mod <- gam::gam(Kyphosis ~ gam::s(Age, 4) + Number, family = binomial, data = kyphosis)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "logistic")
mod <- suppressWarnings(gam::gam(
Ozone^(1 / 3) ~ gam::lo(Solar.R) + gam::lo(Wind, Temp),
data = datasets::airquality, na = gam::na.gam.replace
))
res <- mod |> model_get_coefficients_type()
expect_equal(res, "generic")
})
test_that("model_get_coefficients_type() works with lavaan::lavaan", {
skip_if_not_installed("lavaan")
df <- lavaan::HolzingerSwineford1939
df$grade <- factor(df$grade, ordered = TRUE)
HS.model <- "visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6 + grade
speed =~ x7 + x8 + x9 "
mod <- lavaan::lavaan(HS.model,
data = df,
auto.var = TRUE, auto.fix.first = TRUE,
auto.cov.lv.x = TRUE
)
res <- mod |> model_get_coefficients_type()
expect_equal(res, "generic")
})
broom.helpers/tests/testthat/test-tidy_plus_plus.R 0000644 0001762 0000144 00000065730 15062265365 022237 0 ustar ligges users test_that("tidy_plus_plus() works for basic models", {
mod <- lm(Petal.Length ~ Petal.Width, iris)
expect_no_error(
mod |> tidy_plus_plus()
)
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
expect_no_error(
mod |> tidy_plus_plus(add_header_rows = TRUE, include = c(stage, grade))
)
# combining custom variable labels with categorical_terms_pattern
# check that the custom variable labels are passed to model_list_terms_levels()
res <- mod |>
tidy_plus_plus(
variable_labels = c(grade = "custom"),
add_reference_rows = FALSE,
categorical_terms_pattern = "{var_label}:{level}/{reference_level}"
)
expect_equal(
res$label,
c(
"T Stage:T2/T1", "T Stage:T3/T1", "T Stage:T4/T1", "custom:II/I",
"custom:III/I", "Chemotherapy Treatment:Drug B/Drug A"
),
ignore_attr = TRUE
)
# works with add_n
res <- mod |> tidy_plus_plus(add_n = TRUE)
expect_true(all(c("n_obs", "n_event") %in% names(res)))
})
test_that("tidy_plus_plus() works with no intercept models", {
mod <- glm(response ~ stage + grade - 1, data = gtsummary::trial, family = binomial)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
expect_equal(
res$variable,
c("stage", "stage", "stage", "stage", "grade", "grade", "grade")
)
expect_equal(
res$label,
c("T1", "T2", "T3", "T4", "I", "II", "III"),
ignore_attr = TRUE
)
expect_equal(
res$contrasts_type,
c(
"no.contrast", "no.contrast", "no.contrast", "no.contrast",
"treatment", "treatment", "treatment"
)
)
})
test_that("tidy_plus_plus() and functionnal programming", {
skip_on_cran()
# works with glm
expect_no_error(
res <- dplyr::tibble(grade = c("I", "II", "III")) |>
dplyr::mutate(
df_model = purrr::map(grade, ~ gtsummary::trial |> dplyr::filter(grade == ..1)),
mv_formula_char = "response ~ trt + age + marker",
mv_formula = purrr::map(mv_formula_char, ~ as.formula(.x)),
mv_model_form =
purrr::map2(
mv_formula, df_model,
~ glm(..1, data = ..2)
),
mv_tbl_form =
purrr::map(
mv_model_form,
~ tidy_plus_plus(..1, exponentiate = TRUE, add_header_rows = TRUE)
)
)
)
# for coxph, identification of variables will not work
# will display a message
# but a result should be returned
skip_if_not_installed("survival")
expect_message(
suppressWarnings(
res <- dplyr::tibble(grade = c("I", "II", "III")) |>
dplyr::mutate(
df_model = purrr::map(grade, ~ gtsummary::trial |> dplyr::filter(grade == ..1)),
mv_formula_char = "survival::Surv(ttdeath, death) ~ trt + age + marker",
mv_formula = purrr::map(mv_formula_char, ~ as.formula(.x)),
mv_model_form =
purrr::map2(
mv_formula, df_model,
~ survival::coxph(..1, data = ..2)
),
mv_tbl_form =
purrr::map(
mv_model_form,
~ tidy_plus_plus(..1, exponentiate = TRUE)
)
)
)
)
})
test_that("tidy_plus_plus() with mice objects", {
skip_on_cran()
skip_if(packageVersion("mice") < "3.12.0")
# impute missing values
imputed_trial <-
suppressWarnings(mice::mice(gtsummary::trial, maxit = 2, m = 2, print = FALSE))
# build regression model
mod <- with(imputed_trial, lm(age ~ marker + grade))
# testing pre-pooled results
expect_no_error(
tidy_plus_plus(
mod,
exponentiate = FALSE,
tidy_fun = function(x, ...) mice::pool(x) |> mice::tidy(...)
)
)
})
test_that("tidy_plus_plus() with tidyselect", {
skip_on_cran()
# build regression model
mod <- lm(age ~ trt + marker + grade, gtsummary::trial)
expect_no_error(
tidy_plus_plus(
mod,
add_header_rows = TRUE,
show_single_row = trt,
no_reference_row = grade
)
)
expect_equal(
tidy_plus_plus(
mod,
add_header_rows = TRUE,
show_single_row = "trt",
no_reference_row = "grade"
),
tidy_plus_plus(
mod,
add_header_rows = TRUE,
show_single_row = trt,
no_reference_row = grade
)
)
})
test_that("tidy_plus_plus() works with stats::aov", {
skip_on_cran()
mod <- aov(yield ~ block + N * P * K, npk)
expect_no_error(
res <- tidy_plus_plus(mod)
)
expect_equal(
res$variable,
c("block", "N", "P", "K", "N:P", "N:K", "P:K")
)
})
test_that("tidy_plus_plus() works with lme4::lmer", {
skip_on_cran()
skip_if_not_installed("lme4")
skip_if_not_installed("broom.mixed")
mod <- lme4::lmer(Reaction ~ Days + (Days | Subject), lme4::sleepstudy)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
expect_no_error(
res <- mod |> tidy_plus_plus(tidy_fun = tidy_parameters)
)
})
test_that("tidy_plus_plus() works with lme4::glmer", {
skip_on_cran()
skip_if_not_installed("lme4")
mod <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
family = binomial, data = lme4::cbpp
)
skip_if_not_installed("broom.mixed")
expect_no_error(
res <- mod |> tidy_plus_plus()
)
mod <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
family = binomial("probit"), data = lme4::cbpp
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with lme4::glmer.nb", {
skip_on_cran()
skip_if_not_installed("lme4")
skip_if_not_installed("MASS")
library(lme4)
suppressMessages(
mod <- lme4::glmer.nb(Days ~ Age + Eth + (1 | Sex), data = MASS::quine)
)
skip_if_not_installed("broom.mixed")
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with survival::coxph", {
skip_on_cran()
skip_if_not_installed("survival")
df <- survival::lung |> dplyr::mutate(sex = factor(sex))
mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with survival::survreg", {
skip_on_cran()
skip_if_not_installed("survival")
mod <- survival::survreg(
survival::Surv(futime, fustat) ~ ecog.ps + rx,
survival::ovarian,
dist = "exponential"
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with survival::clogit", {
skip_on_cran()
skip_if_not_installed("survival")
library(survival)
resp <- levels(survival::logan$occupation)
n <- nrow(survival::logan)
indx <- rep(1:n, length(resp))
logan2 <- data.frame(survival::logan[indx, ],
id = indx,
tocc = factor(rep(resp, each = n))
)
logan2$case <- (logan2$occupation == logan2$tocc)
mod <- survival::clogit(case ~ tocc + tocc:education + strata(id), logan2)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with nnet::multinom", {
skip_on_cran()
suppressMessages(
mod <- nnet::multinom(
grade ~ stage + marker + age,
data = gtsummary::trial,
trace = FALSE
)
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
expect_equal(
res$y.level,
c(
"II", "II", "II", "II", "II", "II",
"III", "III", "III", "III", "III", "III"
)
)
expect_equal(
res$term,
c(
"stageT1", "stageT2", "stageT3", "stageT4", "marker", "age",
"stageT1", "stageT2", "stageT3", "stageT4", "marker", "age"
)
)
# multinom model with binary outcome
suppressMessages(
mod <- nnet::multinom(
response ~ stage + marker + age,
data = gtsummary::trial,
trace = FALSE
)
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with survey::svyglm", {
skip_on_cran()
skip_if_not_installed("survey")
df <- survey::svydesign(~1, weights = ~1, data = gtsummary::trial)
mod <- survey::svyglm(response ~ age + grade * trt, df, family = quasibinomial)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
df_rep <- survey::as.svrepdesign(df)
mod_rep <- survey::svyglm(
response ~ age + grade * trt,
df_rep,
family = quasibinomial
)
expect_no_error(
res <- mod_rep |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with survey::svycoxph", {
skip_on_cran()
skip_if_not_installed("survey")
skip_if_not_installed("labelled")
skip_if_not_installed("survival")
d <- survival::pbc
labelled::var_label(d$albumin) <- "Custom label"
dpbc <- survey::svydesign(id = ~1, prob = ~1, strata = ~edema, data = d)
mod <- survey::svycoxph(
Surv(time, status > 0) ~ log(bili) + protime + albumin,
design = dpbc
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
expect_equal(
res[res$term == "albumin", "var_label"][[1]][1],
"Custom label",
ignore_attr = TRUE
)
})
test_that("tidy_plus_plus() works with survey::svyolr", {
skip_on_cran()
skip_if_not_installed("survey")
data(api, package = "survey")
fpc <- survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc)
fpc <- update(fpc, mealcat = cut(meals, c(0, 25, 50, 75, 100)))
mod <- survey::svyolr(mealcat ~ avg.ed + mobility + stype, design = fpc)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with ordinal::clm", {
skip_on_cran()
skip_if_not_installed("ordinal")
mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with ordinal::clmm", {
skip_on_cran()
skip_if_not_installed("ordinal")
mod <- ordinal::clmm(rating ~ temp * contact + (1 | judge), data = ordinal::wine)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with MASS::polr", {
skip_on_cran()
mod <- MASS::polr(Sat ~ Infl + Type + Cont, weights = Freq, data = MASS::housing)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with MASS::glm.nb", {
skip_on_cran()
mod <- MASS::glm.nb(Days ~ Sex / (Age + Eth * Lrn), data = MASS::quine)
expect_no_error(
suppressWarnings(res <- mod |> tidy_plus_plus())
)
})
test_that("tidy_plus_plus() works with geepack::geeglm", {
skip_on_cran()
skip_if(packageVersion("geepack") < "1.3")
df <- geepack::dietox
df$Cu <- as.factor(df$Cu)
mf <- formula(Weight ~ Cu * Time)
suppressWarnings(
mod <- geepack::geeglm(mf, data = df, id = Pig, family = poisson("log"), corstr = "ar1")
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with gam::gam", {
skip_on_cran()
skip_if_not_installed("gam")
data(kyphosis, package = "gam")
mod <- gam::gam(Kyphosis ~ gam::s(Age, 4) + Number, family = binomial, data = kyphosis)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with brms::brm", {
skip_on_cran()
skip_if_not_installed("broom.mixed")
skip_if_not_installed("brms")
skip_if(packageVersion("brms") < "2.13")
skip_if_not_installed("rstanarm")
load(system.file("extdata", "brms_example.rda", package = "broom.mixed"))
mod <- brms_crossedRE
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with rstanarm::stan_glm", {
skip_on_cran()
skip_if_not_installed("broom.mixed")
skip_if_not_installed("rstanarm")
mod <- rstanarm::stan_glm(
response ~ age + grade,
data = gtsummary::trial,
refresh = 0,
family = binomial
)
expect_no_error(
res <- mod |> tidy_plus_plus(tidy_fun = broom.mixed::tidy)
)
})
test_that("tidy_plus_plus() works with cmprsk::crr", {
skip_on_cran()
skip_if_not_installed("cmprsk")
skip_if(packageVersion("broom") < "0.7.4")
ftime <- rexp(200)
fstatus <- sample(0:2, 200, replace = TRUE)
cov <- matrix(runif(600), nrow = 200)
dimnames(cov)[[2]] <- c("x1", "x2", "x3")
mod <- cmprsk::crr(ftime, fstatus, cov)
expect_no_error(
res <- mod |> tidy_plus_plus(quiet = TRUE)
)
})
test_that("tidy_plus_plus() works with tidycmprsk::crr", {
skip_on_cran()
skip_if_not_installed("tidycmprsk")
mod <- tidycmprsk::crr(Surv(ttdeath, death_cr) ~ age + grade, tidycmprsk::trial)
expect_no_error(
res <- mod |> tidy_plus_plus(quiet = TRUE)
)
})
test_that("tidy_plus_plus() works with stats::nls", {
skip_on_cran()
mod <- stats::nls(
Petal.Width ~ a * Petal.Length - (Sepal.Width + Sepal.Length) / b + a^2,
data = iris,
start = list(a = 1, b = 1)
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with lavaan::lavaan", {
skip_on_cran()
skip_if_not_installed("lavaan")
df <- lavaan::HolzingerSwineford1939
df$grade <- factor(df$grade, ordered = TRUE)
HS.model <- "visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6 + grade
speed =~ x7 + x8 + x9 "
mod <- lavaan::lavaan(HS.model,
data = df,
auto.var = TRUE, auto.fix.first = TRUE,
auto.cov.lv.x = TRUE
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with lfe::felm", {
skip_on_cran()
skip_if_not_installed("lfe")
mod <- lfe::felm(marker ~ age + grade | stage | 0, gtsummary::trial)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() error messaging", {
# does not allow for exponentiate, conf.inf, conf.level arguments
bad_tidy <- function(x) {
broom::tidy
}
expect_error(
lm(mpg ~ cyl, mtcars) |>
tidy_plus_plus(tidy_fun = bad_tidy)
)
})
test_that("tidy_plus_plus() works with mgcv::gam", {
skip_on_cran()
skip_if_not_installed("mgcv")
tidy_gam <- function(x, conf.int = FALSE, exponentiate = FALSE, ...) {
broom::tidy(x,
conf.int = conf.int,
exponentiate = exponentiate,
parametric = TRUE, ...
) |>
dplyr::mutate(parametric = TRUE) |>
dplyr::bind_rows(
broom::tidy(x, parametric = FALSE, ...) |>
dplyr::mutate(parametric = FALSE)
) |>
dplyr::relocate(parametric, .after = dplyr::last_col())
}
gam_logistic <- mgcv::gam(
response ~ s(marker, ttdeath) + grade + age,
data = gtsummary::trial,
family = binomial
)
gam_linear <- mgcv::gam(response ~ s(marker, ttdeath) + grade, data = gtsummary::trial)
gam_smooth_only <- mgcv::gam(response ~ s(marker, ttdeath), data = gtsummary::trial)
gam_param_only <- mgcv::gam(response ~ grade, data = gtsummary::trial)
expect_no_error(tbl_gam_logistic <- gam_logistic |> tidy_plus_plus(tidy_fun = tidy_gam))
expect_no_error(gam_logistic |> tidy_plus_plus())
expect_no_error(tbl_gam_linear <- gam_linear |> tidy_plus_plus(tidy_fun = tidy_gam))
expect_no_error(gam_linear |> tidy_plus_plus())
expect_no_error(tbl_gam_smooth_only <- gam_smooth_only |> tidy_plus_plus(tidy_fun = tidy_gam))
expect_no_error(gam_smooth_only |> tidy_plus_plus())
expect_no_error(tbl_gam_param_only <- gam_param_only |> tidy_plus_plus(tidy_fun = tidy_gam))
# the default tidier return a df with no columns and no rows...it fails.
})
test_that("tidy_plus_plus() works with VGAM::vglm", {
skip_on_cran()
skip_if_not_installed("VGAM")
skip_if_not_installed("parameters")
library(VGAM)
df <- data.frame(
treatment = gl(3, 3),
outcome = gl(3, 1, 9),
counts = c(18, 17, 15, 20, 10, 20, 25, 13, 12)
)
mod <- VGAM::vglm(
counts ~ outcome + treatment,
family = VGAM::poissonff,
data = df,
trace = FALSE
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
# multinomial
mod <- vglm(stage ~ grade + age, multinomial, data = gtsummary::trial)
expect_no_error(
res <- mod |> tidy_plus_plus(exponentiate = TRUE)
)
expect_true("y.level" %in% colnames(res))
mod <- vglm(
stage ~ grade + age,
multinomial(parallel = TRUE),
data = gtsummary::trial
)
expect_no_error(
res <- mod |> tidy_plus_plus(exponentiate = TRUE)
)
d <- gtsummary::trial
d$grade <- ordered(d$grade)
mod <- vglm(
grade ~ stage + age,
cumulative(),
data = d
)
expect_no_error(
res <- mod |> tidy_plus_plus(exponentiate = TRUE)
)
expect_true("component" %in% colnames(res))
})
test_that("tidy_plus_plus() works with svyVGAM::svy_vglm", {
skip_on_cran()
skip_if_not_installed("svyVGAM")
skip_if_not_installed("parameters")
skip_if_not_installed("survey")
library(svyVGAM)
mod <- svy_vglm(
stage ~ grade + age,
VGAM::multinomial(),
design = survey::svydesign(~ 1, data = gtsummary::trial, weights = ~ 1)
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
expect_true("y.level" %in% colnames(res))
})
test_that("tidy_plus_plus() works with plm::plm", {
skip_on_cran()
skip_if_not_installed("plm")
data("Grunfeld", package = "plm")
mod <- plm::plm(
inv ~ value + capital,
data = Grunfeld,
model = "within",
index = c("firm", "year")
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with biglm::bigglm", {
skip_on_cran()
skip_if_not_installed("biglm")
skip_if(compareVersion(as.character(getRversion()), "3.6") < 0)
mod <- biglm::bigglm(
response ~ age + trt,
data = as.data.frame(gtsummary::trial),
family = binomial()
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
# check that reference rows are properly added
expect_equal(
res |> dplyr::filter(variable == "trt") |> purrr::pluck("reference_row"),
c(TRUE, FALSE)
)
})
test_that("tidy_plus_plus() works with parsnip::model_fit object", {
skip_on_cran()
skip_if_not_installed("parsnip")
d <- gtsummary::trial
d$response <- as.factor(d$response)
mod1 <- glm(response ~ stage + grade + trt, d, family = binomial)
mod2 <- parsnip::logistic_reg() |>
parsnip::set_engine("glm") |>
parsnip::fit(response ~ stage + grade + trt, data = d)
res1 <- mod1 |> tidy_plus_plus(exponentiate = TRUE)
expect_no_error(
res2 <- mod2 |> tidy_plus_plus(exponentiate = TRUE)
)
expect_equal(res1, res2)
})
test_that("tidy_plus_plus() works with fixest models", {
skip_on_cran()
skip_if_not_installed("fixest")
skip_if(compareVersion(as.character(getRversion()), "4.1") < 0)
data("mtcars")
mod <- fixest::feols(fml = mpg ~ am + factor(carb), data = mtcars)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
data("iris")
mod <- fixest::feglm(
Sepal.Length ~ Sepal.Width + Petal.Length | Species,
iris,
"poisson"
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
mod <- fixest::feols(mpg ~ disp | cyl | wt ~ qsec, data = mtcars)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
expect_equal(nrow(res), 2L)
expect_true(res$instrumental[res$term == "wt"])
res <- mod |> tidy_plus_plus(instrumental_suffix = NULL)
expect_equal(res$var_label[res$term == "wt"], "wt", ignore_attr = TRUE)
res <- mod |> tidy_plus_plus(instrumental_suffix = " (IV)")
expect_equal(res$var_label[res$term == "wt"], "wt (IV)", ignore_attr = TRUE)
mod <- fixest::feols(mpg ~ cyl, data = mtcars[mtcars$carb != 1, ])
expect_no_error(
res <- mod |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with logitr models", {
skip_on_cran()
skip_if_not(.assert_package("logitr", boolean = TRUE))
mod <- logitr::logitr(
data = logitr::yogurt |> head(1000),
outcome = "choice",
obsID = "obsID",
pars = c("feat", "brand"),
scalePar = "price",
randScale = "n",
numMultiStarts = 1
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
expect_true("scalePar" %in% res$variable)
})
test_that("tidy_plus_plus() works with multgee models", {
skip_on_cran()
skip_if_not_installed("multgee")
skip_if_not_installed("parameters")
library(multgee)
h <- housing
h$status <- factor(
h$y,
labels = c("street", "community", "independant")
)
mod <- multgee::nomLORgee(
status ~ factor(time) * sec,
data = h,
id = id,
repeated = time
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
expect_equal(
res$y.level,
c(
"street", "street", "street", "street", "street", "street",
"street", "street", "community", "community", "community", "community",
"community", "community", "community", "community"
)
)
expect_equal(
res$term,
c(
"factor(time)0", "factor(time)6", "factor(time)12", "factor(time)24",
"sec", "factor(time)6:sec", "factor(time)12:sec", "factor(time)24:sec",
"factor(time)0", "factor(time)6", "factor(time)12", "factor(time)24",
"sec", "factor(time)6:sec", "factor(time)12:sec", "factor(time)24:sec"
)
)
mod2 <- ordLORgee(
formula = y ~ factor(time) + factor(trt) + factor(baseline),
data = multgee::arthritis,
id = id,
repeated = time,
LORstr = "uniform"
)
expect_no_error(
res <- mod2 |> tidy_plus_plus()
)
})
test_that("tidy_plus_plus() works with pscl::zeroinfl() & hurdle() models", {
skip_on_cran()
skip_if_not_installed("pscl")
skip_if_not_installed("parameters")
library(pscl)
data("bioChemists", package = "pscl")
m1 <- zeroinfl(art ~ fem + mar + phd | fem + mar + phd, data = bioChemists)
m2 <- zeroinfl(art ~ fem + mar + phd | 1, data = bioChemists, dist = "negbin")
m3 <- zeroinfl(art ~ fem + mar + phd | fem, data = bioChemists)
m4 <- hurdle(art ~ fem + mar + phd | fem, data = bioChemists)
expect_message(
res <- m1 |> tidy_plus_plus()
)
expect_message(
res <- m4 |> tidy_plus_plus()
)
expect_no_error(
res <- m1 |> tidy_plus_plus(exponentiate = TRUE, tidy_fun = tidy_zeroinfl)
)
expect_equal(nrow(res), 10)
expect_no_error(
res <- m1 |> tidy_plus_plus(intercept = TRUE, tidy_fun = tidy_zeroinfl)
)
expect_equal(nrow(res), 12)
expect_no_error(
res <- m2 |> tidy_plus_plus(intercept = TRUE, tidy_fun = tidy_zeroinfl)
)
expect_equal(nrow(res), 7)
expect_no_error(
res <- m3 |> tidy_plus_plus(intercept = TRUE, tidy_fun = tidy_zeroinfl)
)
expect_equal(nrow(res), 9)
expect_no_error(
res <- m4 |> tidy_plus_plus(intercept = TRUE, tidy_fun = tidy_zeroinfl)
)
expect_equal(nrow(res), 9)
expect_error(
m3 |> tidy_plus_plus(add_pairwise_contrasts = TRUE)
)
expect_error(
m4 |> tidy_plus_plus(add_pairwise_contrasts = TRUE)
)
})
test_that("tidy_plus_plus() works with betareg::betareg() models", {
skip_on_cran()
skip_if_not_installed("betareg")
skip_if_not_installed("parameters")
library(betareg)
data("GasolineYield", package = "betareg")
m1 <- betareg(yield ~ batch + temp, data = GasolineYield)
m2 <- betareg(yield ~ batch + temp | temp + pressure, data = GasolineYield)
m3 <- betareg(yield ~ temp | temp + batch, data = GasolineYield)
m4 <- betareg(yield ~ temp + batch | temp + batch, data = GasolineYield)
expect_no_error(
res <- m1 |> tidy_plus_plus(intercept = TRUE)
)
expect_equal(nrow(res), 13)
expect_no_error(
res <- m1 |> tidy_plus_plus(exponentiate = TRUE)
)
expect_equal(nrow(res), 11)
expect_no_error(
res <- m1 |> tidy_plus_plus(add_header_rows = TRUE)
)
expect_equal(nrow(res), 12)
expect_no_error(
res <- m2 |> tidy_plus_plus(intercept = TRUE)
)
expect_equal(nrow(res), 15)
expect_no_error(
res <- m2 |> tidy_plus_plus(exponentiate = TRUE)
)
expect_equal(nrow(res), 13)
expect_no_error(
res <- m2 |> tidy_plus_plus(component = "conditional")
)
expect_equal(nrow(res), 11)
expect_no_error(
res <- m2 |> tidy_plus_plus(add_header_rows = TRUE)
)
expect_equal(nrow(res), 14)
expect_no_error(
res <- m3 |> tidy_plus_plus(intercept = TRUE)
)
expect_equal(nrow(res), 14)
expect_no_error(
res <- m3 |> tidy_plus_plus(exponentiate = TRUE)
)
expect_equal(nrow(res), 12)
expect_no_error(
res <- m3 |> tidy_plus_plus(component = "mean")
)
expect_equal(nrow(res), 1)
expect_error(
m3 |> tidy_plus_plus(add_pairwise_contrasts = TRUE)
)
expect_no_error(
res <- m4 |> tidy_plus_plus(add_header_rows = TRUE)
)
expect_equal(nrow(res), 24)
})
test_that("tidy_plus_plus() works with mmrm::mmrm() models", {
skip_on_cran()
skip_if_not_installed("mmrm")
m1 <- mmrm::mmrm(FEV1 ~ SEX + ARMCD + AVISIT + us(AVISIT | USUBJID), data = mmrm::fev_data)
m2 <- mmrm::mmrm(FEV1 ~ SEX + ARMCD * AVISIT + us(AVISIT | USUBJID), data = mmrm::fev_data)
expect_no_error(
res <- m1 |> tidy_plus_plus(intercept = TRUE)
)
expect_equal(nrow(res), 9)
expect_no_error(
res <- m1 |> tidy_plus_plus(add_header_rows = TRUE)
)
expect_equal(nrow(res), 11)
expect_no_error(
res <- m2 |> tidy_plus_plus(intercept = TRUE)
)
expect_equal(nrow(res), 12)
expect_no_error(
res <- m2 |> tidy_plus_plus(add_header_rows = TRUE)
)
expect_equal(nrow(res), 15)
})
test_that("tidy_post_fun argument of `tidy_plus_plus()`", {
mod <- lm(Petal.Length ~ Petal.Width + Species, iris)
add_titi <- function(x) {
x$titi <- "titi"
x
}
expect_no_error(
res <- tidy_plus_plus(mod, tidy_post_fun = add_titi)
)
expect_true("titi" %in% names(res))
expect_true(res$titi[1] == "titi")
keep_2_rows <- function(res) {
head(res, n = 2)
}
expect_no_error(
res <- tidy_plus_plus(mod, tidy_post_fun = keep_2_rows)
)
expect_equal(nrow(res), 2L)
})
# test for survival::cch() not working, model.frame() not working
# in the test_that environment for this type of model
test_that("tidy_plus_plus() works with glmtoolbox::glmgee() models", {
skip_on_cran()
skip_if_not_installed("glmtoolbox")
data("spruces", package = "glmtoolbox")
mod <- glmtoolbox::glmgee(
size ~ poly(days, 4) + treat,
id = tree,
family = Gamma(log),
corstr = "AR-M-dependent(1)",
data = spruces
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
expect_equal(nrow(res), 6)
})
test_that("tidy_plus_plus() works with quantreg::rq() models", {
skip_on_cran()
skip_if_not_installed("quantreg")
data("stackloss", package = "datasets")
mod <- quantreg::rq(
stack.loss ~ stack.x,
tau = .5
)
expect_no_error(
res <- mod |> tidy_plus_plus()
)
expect_equal(nrow(res), 3)
mod <- quantreg::rq(
stack.loss ~ stack.x,
tau = 1:3 / 4
)
expect_no_error(
res <- mod |> tidy_plus_plus(intercept = TRUE)
)
expect_equal(nrow(res), 12)
})
broom.helpers/tests/testthat/test-helpers.R 0000644 0001762 0000144 00000000554 14737437002 020610 0 ustar ligges users test_that(".update_vector()", {
# y vector must be named
expect_error(
.update_vector(letters, LETTERS)
)
expect_error(
.update_vector(
c(a = 2, b = 3),
c(a = 1, d = 5, 4)
)
)
})
test_that(".superscript_numbers ()", {
# works with non character vector
expect_no_error(
.superscript_numbers(1:4)
)
})
broom.helpers/tests/testthat/test-select_variables.R 0000644 0001762 0000144 00000004625 14737437002 022460 0 ustar ligges users test_that("tidy_select_variables() works for basic models", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
# no change by default
res2 <- res |> tidy_select_variables()
expect_equal(res, res2)
# include
res2 <- res |> tidy_select_variables(include = "stage")
expect_equal(
res2$variable,
c("(Intercept)", "stage", "stage", "stage")
)
res2 <- res |> tidy_select_variables(include = c("grade", "trt"))
expect_equal(
res2$variable,
c("(Intercept)", "grade", "grade", "trt")
)
res2 <- res |> tidy_select_variables(include = c("trt", "grade"))
expect_equal(
res2$variable,
c("(Intercept)", "trt", "grade", "grade")
)
res2 <- res |> tidy_select_variables(include = c(trt, grade, dplyr::everything()))
expect_equal(
res2$variable,
c("(Intercept)", "trt", "grade", "grade", "stage", "stage", "stage")
)
# select and de-select
expect_equal(
res |> tidy_select_variables(include = stage),
res |> tidy_select_variables(include = -c(grade, trt))
)
# tidyselect fns
expect_equal(
res |> tidy_select_variables(include = contains("tage")),
res |> tidy_select_variables(include = stage)
)
# no error when none selected
expect_no_error(
res |> tidy_select_variables(include = starts_with("zzzzzzz"))
)
expect_no_error(
res |> tidy_select_variables(include = -everything())
)
expect_no_error(
res |> tidy_select_variables(include = where(is.character))
)
# interaction
mod <- glm(response ~ stage + grade * trt, gtsummary::trial, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
res2 <- res |> tidy_select_variables(include = c(trt, grade, dplyr::everything()))
expect_equal(
res2$variable,
c(
"(Intercept)", "trt", "grade", "grade", "stage", "stage", "stage",
"grade:trt", "grade:trt"
)
)
})
test_that("test tidy_select_variables() checks", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
# expect an error if no model attached
expect_error(mod |> broom::tidy() |> tidy_select_variables())
# could be apply twice (no error)
expect_no_error(
mod |> tidy_and_attach() |> tidy_select_variables() |> tidy_select_variables()
)
})
broom.helpers/tests/testthat/test-get_response_variable.R 0000644 0001762 0000144 00000001100 14657100641 023471 0 ustar ligges users test_that("model_get_response_variable() works for basic models", {
mod <- lm(hp ~ mpg + factor(cyl) + disp:hp, mtcars)
expect_equal(
mod |> model_get_response_variable(),
"hp"
)
mod <- glm(
Survived ~ Class + Age + Sex,
data = Titanic |> as.data.frame(),
weights = Freq,
family = binomial
)
expect_equal(
mod |> model_get_response_variable(),
"Survived"
)
mod <- lm(Petal.Length ~ Petal.Width + Species, data = iris)
expect_equal(
mod |> model_get_response_variable(),
"Petal.Length"
)
})
broom.helpers/tests/testthat/test-add_estimate_to_reference_rows.R 0000644 0001762 0000144 00000031645 14746151320 025364 0 ustar ligges users test_that("tidy_add_estimate_to_reference_rows() works for basic models", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_estimate_to_reference_rows()
expect_equal(
res$estimate[res$reference_row & !is.na(res$reference_row)],
c(0, 0, 0)
)
res <- mod |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_estimate_to_reference_rows()
expect_equal(
res$estimate[res$reference_row & !is.na(res$reference_row)],
c(1, 1, 1)
)
mod <- glm(response ~ stage + grade + trt, gtsummary::trial,
family = binomial,
contrasts = list(
stage = contr.treatment(4, base = 3),
grade = contr.treatment(3, base = 2),
trt = contr.SAS
)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_estimate_to_reference_rows()
expect_equal(
res$estimate[res$reference_row & !is.na(res$reference_row)],
c(0, 0, 0)
)
res <- mod |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_estimate_to_reference_rows()
expect_equal(
res$estimate[res$reference_row & !is.na(res$reference_row)],
c(1, 1, 1)
)
skip_if_not_installed("emmeans")
mod <- glm(response ~ stage + grade + trt, gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.sum, grade = contr.sum, trt = contr.sum)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_estimate_to_reference_rows()
# should be -1 * sum of other coefficients when sum contrasts
expect_equal(
res$estimate[res$reference_row & res$variable == "stage" & !is.na(res$reference_row)],
sum(res$estimate[!res$reference_row & res$variable == "stage"], na.rm = TRUE) * -1
)
expect_equal(
res$estimate[res$reference_row & res$variable == "grade" & !is.na(res$reference_row)],
sum(res$estimate[!res$reference_row & res$variable == "grade"], na.rm = TRUE) * -1
)
expect_equal(
res$estimate[res$reference_row & res$variable == "trt" & !is.na(res$reference_row)],
sum(res$estimate[!res$reference_row & res$variable == "trt"], na.rm = TRUE) * -1
)
# p-values and confidence intervals should be populated
expect_false(any(is.na(res$p.value)))
expect_false(any(is.na(res$conf.low)))
expect_false(any(is.na(res$conf.high)))
res2 <- mod |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_estimate_to_reference_rows()
expect_equal(
res2$estimate[res2$reference_row & res2$variable == "stage" & !is.na(res2$reference_row)],
exp(sum(res$estimate[!res$reference_row & res$variable == "stage"], na.rm = TRUE) * -1)
)
expect_equal(
res2$estimate[res2$reference_row & res2$variable == "grade" & !is.na(res2$reference_row)],
exp(sum(res$estimate[!res$reference_row & res$variable == "grade"], na.rm = TRUE) * -1)
)
expect_equal(
res2$estimate[res2$reference_row & res2$variable == "trt" & !is.na(res2$reference_row)],
exp(sum(res$estimate[!res$reference_row & res$variable == "trt"], na.rm = TRUE) * -1)
)
## works also when there is an interaction term
mod <- glm(response ~ stage * grade * trt, gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.sum, grade = contr.sum, trt = contr.sum)
)
suppressWarnings(
res <- mod |>
tidy_and_attach() |>
tidy_add_estimate_to_reference_rows()
)
# should be -1 * sum of other coefficients when sum contrasts
expect_equal(
res$estimate[res$reference_row & res$variable == "stage" & !is.na(res$reference_row)],
sum(res$estimate[!res$reference_row & res$variable == "stage"], na.rm = TRUE) * -1
)
expect_equal(
res$estimate[res$reference_row & res$variable == "grade" & !is.na(res$reference_row)],
sum(res$estimate[!res$reference_row & res$variable == "grade"], na.rm = TRUE) * -1
)
expect_equal(
res$estimate[res$reference_row & res$variable == "trt" & !is.na(res$reference_row)],
sum(res$estimate[!res$reference_row & res$variable == "trt"], na.rm = TRUE) * -1
)
skip_on_cran()
mod <- lm(
Petal.Length ~ Petal.Width + Species,
data = iris,
contrasts = list(Species = contr.sum)
)
expect_no_error(
res <- mod |>
tidy_and_attach() |>
tidy_add_estimate_to_reference_rows()
)
expect_no_error(
res2 <- mod |>
tidy_and_attach(conf.level = .8) |>
tidy_add_estimate_to_reference_rows()
)
expect_no_error(
res3 <- mod |>
tidy_and_attach() |>
tidy_add_estimate_to_reference_rows(conf.level = .8)
)
expect_false(res$conf.low[5] == res2$conf.low[5])
expect_true(res2$conf.low[5] == res3$conf.low[5])
})
test_that("test tidy_add_estimate_to_reference_rows() checks", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
# expect an error if no model attached
expect_error(mod |> broom::tidy() |> tidy_add_estimate_to_reference_rows(exponentiate = TRUE))
# expect an error if no value for exponentiate
expect_error(
mod |>
tidy_and_attach() |>
tidy_add_estimate_to_reference_rows(exponentiate = NULL)
)
expect_error(
mod |>
broom::tidy() |>
tidy_attach_model(mod) |>
tidy_add_estimate_to_reference_rows()
)
skip_if_not_installed("emmeans")
# expect a message if this is a model not covered by emmeans
mod <- glm(
response ~ stage + grade + trt, gtsummary::trial,
family = binomial, contrasts = list(grade = contr.sum)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows()
class(mod) <- "unknown"
expect_message(
res |> tidy_add_estimate_to_reference_rows(model = mod)
)
})
test_that("tidy_add_estimate_to_reference_rows() works with character variables", {
df <- gtsummary::trial |>
dplyr::mutate(dplyr::across(where(is.factor), as.character))
mod <- glm(response ~ stage + grade + trt, df, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_estimate_to_reference_rows()
expect_equal(
res$estimate[res$reference_row & !is.na(res$reference_row)],
c(0, 0, 0)
)
mod <- glm(response ~ stage + grade + trt, df,
family = binomial,
contrasts = list(
stage = contr.treatment(4, base = 3),
grade = contr.treatment(3, base = 2),
trt = contr.SAS
)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_estimate_to_reference_rows()
expect_equal(
res$estimate[res$reference_row & !is.na(res$reference_row)],
c(0, 0, 0)
)
skip_if_not_installed("emmeans")
mod <- glm(response ~ stage + grade + trt, df,
family = binomial,
contrasts = list(stage = contr.sum, grade = contr.sum, trt = contr.sum)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_estimate_to_reference_rows()
# should be -1 * sum of other coefficients when sum contrasts
expect_equal(
res$estimate[res$reference_row & res$variable == "stage" & !is.na(res$reference_row)],
sum(res$estimate[!res$reference_row & res$variable == "stage"], na.rm = TRUE) * -1
)
expect_equal(
res$estimate[res$reference_row & res$variable == "grade" & !is.na(res$reference_row)],
sum(res$estimate[!res$reference_row & res$variable == "grade"], na.rm = TRUE) * -1
)
expect_equal(
res$estimate[res$reference_row & res$variable == "trt" & !is.na(res$reference_row)],
sum(res$estimate[!res$reference_row & res$variable == "trt"], na.rm = TRUE) * -1
)
})
test_that("tidy_add_estimate_to_reference_rows() handles variables having non standard name", {
skip_if_not_installed("emmeans")
df <- gtsummary::trial |> dplyr::mutate(`grade of kids` = grade)
mod <- glm(response ~ stage + `grade of kids` + trt, df,
family = binomial,
contrasts = list(`grade of kids` = contr.sum)
)
expect_no_message(
res <- mod |>
tidy_and_attach(tidy_fun = broom::tidy) |>
tidy_add_estimate_to_reference_rows()
)
expect_equal(
res$estimate[res$variable == "grade of kids" & !is.na(res$variable)] |> sum(),
0
)
})
test_that("tidy_add_estimate_to_reference_rows() preserve estimates of continuous variables", {
mod <- glm(response ~ poly(age, 3) + ttdeath, na.omit(gtsummary::trial), family = binomial)
res1 <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows()
res2 <- res1 |> tidy_add_estimate_to_reference_rows()
expect_equal(res1$estimate, res2$estimate)
})
skip_on_cran()
test_that("tidy_add_estimate_to_reference_rows() works with lme4::lmer", {
skip_on_cran()
skip_if_not_installed("lme4")
df <- gtsummary::trial
df$stage <- as.character(df$stage)
df$group <- rep.int(1:2, 100)
mod <- lme4::lmer(marker ~ stage + grade + (1 | group), df)
expect_no_error(
mod |>
tidy_and_attach(tidy_fun = broom.mixed::tidy) |>
tidy_add_estimate_to_reference_rows()
)
})
test_that("tidy_add_estimate_to_reference_rows() works with lme4::glmer", {
skip_on_cran()
skip_if_not_installed("lme4")
df <- gtsummary::trial
df$stage <- as.character(df$stage)
df$group <- rep.int(1:2, 100)
suppressMessages(
mod <- lme4::glmer(response ~ stage + grade + (1 | group), df, family = binomial)
)
expect_no_error(
mod |>
tidy_and_attach(tidy_fun = broom.mixed::tidy) |>
tidy_add_estimate_to_reference_rows()
)
})
test_that("tidy_add_estimate_to_reference_rows() works with survival::coxph", {
skip_if_not_installed("survival")
df <- survival::lung |> dplyr::mutate(sex = factor(sex))
mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_estimate_to_reference_rows())
})
test_that("tidy_add_estimate_to_reference_rows() works with survival::survreg", {
skip_if_not_installed("survival")
mod <- survival::survreg(
survival::Surv(futime, fustat) ~ factor(ecog.ps) + rx,
survival::ovarian,
dist = "exponential"
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_estimate_to_reference_rows())
})
test_that("tidy_add_estimate_to_reference_rows() works with nnet::multinom", {
mod <- nnet::multinom(grade ~ stage + marker + age, data = gtsummary::trial, trace = FALSE)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_estimate_to_reference_rows())
# no emmeans for multinom
# should return a warning but not an error
mod <- nnet::multinom(
grade ~ stage + marker + age,
data = gtsummary::trial, trace = FALSE,
contrasts = list(stage = contr.sum)
)
expect_message(mod |> tidy_and_attach() |> tidy_add_estimate_to_reference_rows())
})
test_that("tidy_add_estimate_to_reference_rows() works with survey::svyglm", {
skip_if_not_installed("survey")
df <- survey::svydesign(~1, weights = ~1, data = gtsummary::trial)
mod <- survey::svyglm(response ~ age + grade * trt, df, family = quasibinomial)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_estimate_to_reference_rows())
})
test_that("tidy_add_estimate_to_reference_rows() works with ordinal::clm", {
mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_estimate_to_reference_rows())
})
test_that("tidy_add_estimate_to_reference_rows() works with ordinal::clmm", {
mod <- ordinal::clmm(rating ~ temp * contact + (1 | judge), data = ordinal::wine)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_estimate_to_reference_rows())
})
test_that("tidy_add_estimate_to_reference_rows() works with MASS::polr", {
mod <- MASS::polr(Sat ~ Infl + Type + Cont, weights = Freq, data = MASS::housing)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_estimate_to_reference_rows())
})
test_that("tidy_add_estimate_to_reference_rows() works with geepack::geeglm", {
skip_if(packageVersion("geepack") < "1.3")
df <- geepack::dietox
df$Cu <- as.factor(df$Cu)
mf <- formula(Weight ~ Cu * Time)
suppressWarnings(
mod <- geepack::geeglm(mf, data = df, id = Pig, family = poisson("identity"), corstr = "ar1")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_estimate_to_reference_rows())
})
test_that("tidy_add_estimate_to_reference_rows() works with gam::gam", {
skip_if_not_installed("gam")
data(kyphosis, package = "gam")
mod <- gam::gam(Kyphosis ~ gam::s(Age, 4) + Number, family = binomial, data = kyphosis)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_estimate_to_reference_rows())
})
test_that("tidy_add_estimate_to_reference_rows() works with lavaan::lavaan", {
skip_if_not_installed("lavaan")
df <- lavaan::HolzingerSwineford1939
df$grade <- factor(df$grade, ordered = TRUE)
HS.model <- "visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6 + grade
speed =~ x7 + x8 + x9 "
mod <- lavaan::lavaan(HS.model,
data = df,
auto.var = TRUE, auto.fix.first = TRUE,
auto.cov.lv.x = TRUE
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_estimate_to_reference_rows())
})
broom.helpers/tests/testthat/test-assert_package.R 0000644 0001762 0000144 00000002320 14737437002 022113 0 ustar ligges users test_that(".assert_package() works", {
# broom will always be installed with broom.helpers
expect_no_error(
.assert_package("broom")
)
expect_true(.assert_package("broom", boolean = TRUE))
expect_false(.assert_package("br000000m", boolean = TRUE))
mv <- c(Suggests = "1.1.28")
attr(mv, "compare") <- ">="
expect_equal(
.get_min_version_required("lme4"),
mv
)
expect_null(
.get_min_version_required("brms", pkg_search = NULL)
)
expect_null(
.get_min_version_required("broom", pkg_search = NULL)
)
expect_no_error(
df_deps <- .get_package_dependencies()
)
expect_true(
df_deps |> inherits("data.frame")
)
expect_equal(
names(df_deps),
c("pkg_search", "pkg_search_version", "dependency_type", "pkg", "version", "compare")
)
expect_no_error(
deps <- .get_all_packages_dependencies()
)
expect_true(nrow(deps) > 100)
skip_if(interactive())
# expect an error msg for pkg that doesn't exist
# note: if interactive(), user will be invited to install the missing pkg
expect_error(
.assert_package("br000000m")
)
expect_error(
.assert_package("br000000m", fn = "test_fun()")
)
})
broom.helpers/tests/testthat/test-marginal_tidiers.R 0000644 0001762 0000144 00000027464 14746125043 022473 0 ustar ligges users test_that("tidy_margins()", {
skip_on_cran()
skip_if_not_installed("margins")
mod <- lm(Petal.Length ~ Petal.Width + Species, data = iris)
expect_no_error(
suppressWarnings(t <- tidy_margins(mod))
)
expect_error(
tidy_margins(mod, exponentiate = TRUE)
)
expect_no_error(
res <- tidy_plus_plus(mod, tidy_fun = tidy_margins)
)
expect_equal(
nrow(res),
nrow(t) + 1 # due to adding ref row
)
expect_equal(
attr(res, "coefficients_label"),
"Average Marginal Effects"
)
expect_error(
tidy_plus_plus(
mod,
tidy_fun = tidy_margins,
add_pairwise_contrasts = TRUE
)
)
})
test_that("tidy_all_effects()", {
skip_on_cran()
skip_if_not_installed("effects")
mod <- lm(Petal.Length ~ Petal.Width + Species, data = iris)
expect_no_error(
t <- tidy_all_effects(mod)
)
expect_error(
tidy_all_effects(mod, exponentiate = TRUE)
)
expect_no_error(
res <- tidy_plus_plus(mod, tidy_fun = tidy_all_effects)
)
expect_equal(
nrow(res),
nrow(t)
)
expect_equal(
attr(res, "coefficients_label"),
"Marginal Predictions at the Mean"
)
expect_error(
tidy_plus_plus(
mod,
tidy_fun = tidy_all_effects,
add_pairwise_contrasts = TRUE
)
)
})
test_that("tidy_ggpredict()", {
skip_on_cran()
skip_if_not_installed("ggeffects")
mod <- lm(Petal.Length ~ Petal.Width + Species, data = iris)
expect_no_error(
t <- tidy_ggpredict(mod)
)
expect_error(
tidy_ggpredict(mod, exponentiate = TRUE)
)
expect_no_error(
res <- tidy_plus_plus(mod, tidy_fun = tidy_ggpredict)
)
expect_equal(
nrow(res),
nrow(t)
)
expect_equal(
attr(res, "coefficients_label"),
"Marginal Predictions"
)
expect_error(
tidy_plus_plus(
mod,
tidy_fun = tidy_ggpredict,
add_pairwise_contrasts = TRUE
)
)
})
test_that("tidy_marginal_predictions()", {
skip_on_cran()
skip_if_not_installed("marginaleffects")
iris <- iris |> dplyr::arrange(dplyr::desc(Species))
mod <- lm(Petal.Length ~ Petal.Width + Species + Sepal.Length, data = iris)
expect_no_error(
t <- tidy_marginal_predictions(mod)
)
expect_equal(t[t$variable == "Species", "term"], levels(iris$Species))
mod <- lm(Petal.Length ~ Petal.Width * Species + Sepal.Length, data = iris)
expect_no_error(
t <- tidy_marginal_predictions(mod)
)
expect_error(
tidy_marginal_predictions(mod, exponentiate = TRUE)
)
expect_no_error(
res <- tidy_plus_plus(mod, tidy_fun = tidy_marginal_predictions)
)
expect_equal(
nrow(res),
nrow(t)
)
expect_equal(
attr(res, "coefficients_label"),
"Average Marginal Predictions"
)
expect_true(any(res$var_type == "interaction"))
expect_error(
tidy_plus_plus(
mod,
tidy_fun = tidy_marginal_predictions,
add_pairwise_contrasts = TRUE
)
)
expect_no_error(
t <- tidy_marginal_predictions(mod, "no_interaction")
)
expect_no_error(
res <- tidy_plus_plus(
mod,
tidy_fun = tidy_marginal_predictions,
variables_list = "no_interaction"
)
)
expect_equal(
nrow(res),
nrow(t)
)
expect_false(any(res$var_type == "interaction"))
expect_no_error(
t <- tidy_marginal_predictions(mod, newdata = "mean")
)
expect_no_error(
res <- tidy_plus_plus(
mod,
tidy_fun = tidy_marginal_predictions,
newdata = "mean"
)
)
expect_equal(
attr(res, "coefficients_label"),
"Marginal Predictions at the Mean"
)
expect_no_error(
res <- tidy_plus_plus(
mod,
tidy_fun = tidy_marginal_predictions,
newdata = "balanced"
)
)
expect_equal(
attr(res, "coefficients_label"),
"Marginal Predictions at Marginal Means"
)
expect_type(
p <- plot_marginal_predictions(mod),
"list"
)
expect_length(p, 2)
expect_type(
p <- plot_marginal_predictions(mod, variables_list = "no_interaction"),
"list"
)
expect_length(p, 3)
})
test_that("tidy_avg_slopes()", {
skip_on_cran()
skip_if_not_installed("marginaleffects")
mod <- lm(Petal.Length ~ Petal.Width * Species + Sepal.Length, data = iris)
expect_no_error(
t <- tidy_avg_slopes(mod)
)
expect_error(
tidy_avg_slopes(mod, exponentiate = TRUE)
)
expect_no_error(
res <- tidy_plus_plus(mod, tidy_fun = tidy_avg_slopes)
)
expect_equal(
nrow(res),
nrow(t)
)
expect_equal(
attr(res, "coefficients_label"),
"Average Marginal Effects"
)
expect_error(
tidy_plus_plus(
mod,
tidy_fun = tidy_avg_slopes,
add_pairwise_contrasts = TRUE
)
)
expect_no_error(
t <- tidy_avg_slopes(mod, newdata = "mean")
)
expect_no_error(
res <- tidy_plus_plus(
mod,
tidy_fun = tidy_avg_slopes,
newdata = "mean"
)
)
expect_equal(
attr(res, "coefficients_label"),
"Marginal Effects at the Mean"
)
expect_no_error(
res <- tidy_plus_plus(
mod,
tidy_fun = tidy_avg_slopes,
newdata = "balanced"
)
)
expect_equal(
attr(res, "coefficients_label"),
"Marginal Effects at Marginal Means"
)
})
test_that("tidy_marginal_contrasts()", {
skip_on_cran()
skip_if_not_installed("marginaleffects")
mod <- lm(Petal.Length ~ Petal.Width * Species + Sepal.Length, data = iris)
expect_no_error(
t <- tidy_marginal_contrasts(mod)
)
expect_error(
tidy_marginal_contrasts(mod, exponentiate = TRUE)
)
expect_no_error(
res <- tidy_plus_plus(mod, tidy_fun = tidy_marginal_contrasts)
)
expect_equal(
nrow(res),
nrow(t)
)
expect_equal(
attr(res, "coefficients_label"),
"Average Marginal Contrasts"
)
expect_true(any(res$var_type == "interaction"))
expect_error(
tidy_plus_plus(
mod,
tidy_fun = tidy_marginal_contrasts,
add_pairwise_contrasts = TRUE
)
)
expect_no_error(
t <- tidy_marginal_contrasts(mod, "no_interaction")
)
expect_no_error(
res <- tidy_plus_plus(
mod,
tidy_fun = tidy_marginal_contrasts,
variables_list = "no_interaction"
)
)
expect_equal(
nrow(res),
nrow(t)
)
expect_false(any(res$var_type == "interaction"))
expect_no_error(
t <- tidy_marginal_contrasts(mod, newdata = "mean")
)
expect_no_error(
res <- tidy_plus_plus(
mod,
tidy_fun = tidy_marginal_contrasts,
newdata = "mean"
)
)
expect_equal(
attr(res, "coefficients_label"),
"Marginal Contrasts at the Mean"
)
expect_no_error(
res <- tidy_plus_plus(
mod,
tidy_fun = tidy_marginal_contrasts,
newdata = "balanced"
)
)
expect_equal(
attr(res, "coefficients_label"),
"Marginal Contrasts at Marginal Means"
)
})
test_that("tidy_avg_comparisons()", {
skip_on_cran()
skip_if_not_installed("marginaleffects")
mod <- lm(Petal.Length ~ Petal.Width * Species + Sepal.Length, data = iris)
expect_no_error(
t <- tidy_avg_comparisons(mod)
)
expect_error(
tidy_avg_comparisons(mod, exponentiate = TRUE)
)
expect_no_error(
res <- tidy_plus_plus(mod, tidy_fun = tidy_avg_comparisons)
)
expect_equal(
nrow(res),
nrow(t)
)
expect_equal(
attr(res, "coefficients_label"),
"Average Marginal Contrasts"
)
expect_error(
tidy_plus_plus(
mod,
tidy_fun = tidy_avg_comparisons,
add_pairwise_contrasts = TRUE
)
)
expect_no_error(
t <- tidy_avg_comparisons(mod, newdata = "mean"),
)
expect_no_error(
res <- tidy_plus_plus(
mod,
tidy_fun = tidy_avg_comparisons,
newdata = "mean"
)
)
expect_equal(
attr(res, "coefficients_label"),
"Marginal Contrasts at the Mean"
)
expect_no_error(
res <- tidy_plus_plus(
mod,
tidy_fun = tidy_avg_comparisons,
newdata = "balanced"
)
)
expect_equal(
attr(res, "coefficients_label"),
"Marginal Contrasts at Marginal Means"
)
})
test_that("Marginal tidiers works with nnet::multinom() models", {
skip_on_cran()
skip_if_not_installed("nnet")
skip_if_not_installed("margins")
skip_if_not_installed("effects")
skip_if_not_installed("ggeffects")
skip_if_not_installed("marginaleffects")
suppressMessages(
mod <- nnet::multinom(
grade ~ stage + marker + age,
data = gtsummary::trial,
trace = FALSE
)
)
# not supported: tidy_margins(mod)
expect_no_error(
res <- tidy_all_effects(mod)
)
expect_true("y.level" %in% names(res))
expect_no_error(
suppressMessages(res <- tidy_ggpredict(mod))
)
expect_true("y.level" %in% names(res))
expect_no_error(
res <- tidy_avg_slopes(mod)
)
expect_true("y.level" %in% names(res))
expect_no_error(
res <- tidy_avg_comparisons(mod)
)
expect_true("y.level" %in% names(res))
expect_no_error(
res <- tidy_marginal_predictions(mod)
)
expect_true("y.level" %in% names(res))
expect_type(
p <- plot_marginal_predictions(mod),
"list"
)
expect_length(p, 3)
expect_no_error(
res <- tidy_marginal_contrasts(mod)
)
expect_true("y.level" %in% names(res))
})
test_that("Marginal tidiers works with MASS::polr() models", {
skip_on_cran()
skip_if_not_installed("MASS")
skip_if_not_installed("margins")
skip_if_not_installed("effects")
skip_if_not_installed("ggeffects")
skip_if_not_installed("marginaleffects")
mod <- MASS::polr(Sat ~ Infl + Type + Cont, weights = Freq, data = MASS::housing)
expect_no_error(
suppressMessages(res <- tidy_margins(mod))
)
# for margins, no result per y.level
expect_no_error(
suppressMessages(res <- tidy_all_effects(mod))
)
expect_true("y.level" %in% names(res))
expect_no_error(
suppressMessages(res <- tidy_ggpredict(mod))
)
expect_true("y.level" %in% names(res))
expect_no_error(
suppressMessages(res <- tidy_avg_slopes(mod))
)
expect_true("y.level" %in% names(res))
expect_no_error(
suppressMessages(res <- tidy_avg_comparisons(mod))
)
expect_true("y.level" %in% names(res))
expect_no_error(
suppressMessages(res <- tidy_marginal_predictions(mod))
)
expect_true("y.level" %in% names(res))
expect_type(
suppressMessages(p <- plot_marginal_predictions(mod)),
"list"
)
expect_length(p, 3)
expect_no_error(
suppressMessages(res <- tidy_marginal_contrasts(mod))
)
expect_true("y.level" %in% names(res))
})
test_that("Marginal tidiers works with ordinal::clm() models", {
skip_on_cran()
skip_if_not_installed("ordinal")
library(ordinal)
skip_if_not_installed("margins")
skip_if_not_installed("effects")
skip_if_not_installed("ggeffects")
skip_if_not_installed("marginaleffects")
mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine)
# not supported: tidy_margins(mod)
library(MASS)
expect_no_error(
res <- tidy_all_effects(mod)
)
expect_true("y.level" %in% names(res))
expect_no_error(
suppressMessages(res <- tidy_ggpredict(mod))
)
expect_true("y.level" %in% names(res))
expect_no_error(
res <- tidy_avg_slopes(mod)
)
expect_true("y.level" %in% names(res))
expect_no_error(
res <- tidy_avg_comparisons(mod)
)
expect_true("y.level" %in% names(res))
expect_no_error(
res <- tidy_marginal_predictions(mod)
)
expect_true("y.level" %in% names(res))
expect_type(
p <- plot_marginal_predictions(mod),
"list"
)
expect_length(p, 1)
expect_no_error(
res <- tidy_marginal_contrasts(mod)
)
expect_true("y.level" %in% names(res))
})
broom.helpers/tests/testthat/test-attach_and_detach.R 0000644 0001762 0000144 00000002115 14737437002 022537 0 ustar ligges users test_that("Attach and Detach models works", {
mod <- lm(Sepal.Length ~ Sepal.Width + Species, data = iris)
expect_identical(
mod,
mod |> tidy_and_attach(model_matrix_attr = FALSE) |> tidy_get_model()
)
tb <- broom::tidy(mod)
expect_equal(
tb,
tb |> tidy_attach_model(mod) |> tidy_detach_model(),
ignore_attr = TRUE
)
# an error should occur if 'exponentiate = TRUE' for a linear model
expect_error(
mod |> tidy_and_attach(exponentiate = TRUE)
)
})
test_that("tidy_and_attach() handles models without exponentiate arguments", {
skip_if_not_installed("lavaan")
skip_on_cran()
df <- lavaan::HolzingerSwineford1939
df$grade <- factor(df$grade, ordered = TRUE)
HS.model <- "visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6 + grade
speed =~ x7 + x8 + x9 "
mod <- lavaan::lavaan(HS.model,
data = df,
auto.var = TRUE, auto.fix.first = TRUE,
auto.cov.lv.x = TRUE
)
expect_error(mod |> tidy_and_attach(exponentiate = TRUE))
expect_no_error(mod |> tidy_and_attach())
})
broom.helpers/tests/testthat/test-remove_intercept.R 0000644 0001762 0000144 00000001310 14737437002 022507 0 ustar ligges users test_that("tidy_remove_intercept() works for basic models", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_remove_intercept()
expect_equal(
res |> dplyr::filter(var_type == "intercept") |> nrow(),
0L
)
})
test_that("test tidy_remove_intercept() checks", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
# expect an error if no model attached
expect_error(mod |> broom::tidy() |> tidy_remove_intercept())
# could be apply twice (no error)
expect_no_error(
mod |> tidy_and_attach() |> tidy_remove_intercept() |> tidy_remove_intercept()
)
})
broom.helpers/tests/testthat/test-identify_variables.R 0000644 0001762 0000144 00000044040 14746151432 023007 0 ustar ligges users library(survival)
library(gtsummary)
test_that("model_list_variables() tests", {
mod <- glm(response ~ age + grade * trt + death, gtsummary::trial, family = binomial)
res <- mod |> model_list_variables()
expect_equal(
res$variable,
c("response", "age", "grade", "trt", "death", "grade:trt")
)
expect_equal(
res$variable,
mod |> model_list_variables(only_variable = TRUE)
)
expect_equal(
res$var_class,
c(
response = "integer", age = "numeric", grade = "factor", trt = "character",
death = "integer", NA
)
)
mod <- lm(marker ~ as.logical(response), gtsummary::trial)
res <- mod |>
model_list_variables(
labels = list(marker = "MARKER", "as.logical(response)" = "RESPONSE")
)
expect_equal(
res$var_class,
c("numeric", "logical"),
ignore_attr = TRUE
)
expect_equal(
res$var_label,
c("MARKER", "RESPONSE"),
ignore_attr = TRUE
)
expect_equal(
.MFclass2(as.Date("2000-01-01")),
"other"
)
})
test_that("tidy_identify_variables() works for common models", {
mod <- glm(response ~ age + grade * trt + death, gtsummary::trial, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
expect_equal(
res$variable,
c("(Intercept)", "age", "grade", "grade", "trt", "death", "grade:trt", "grade:trt")
)
expect_equal(
res$var_class,
c(NA, "numeric", "factor", "factor", "character", "integer", NA, NA),
ignore_attr = TRUE
)
expect_equal(
res$var_type,
c(
"intercept", "continuous", "categorical", "categorical", "dichotomous",
"continuous", "interaction", "interaction"
)
)
expect_equal(
res$var_nlevels,
c(NA, NA, 3L, 3L, 2L, NA, NA, NA),
ignore_attr = TRUE
)
})
test_that("test tidy_identify_variables() checks", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
# expect an error if no model attached
expect_error(mod |> broom::tidy() |> tidy_identify_variables())
# could be apply twice (no error)
expect_no_error(
mod |> tidy_and_attach() |> tidy_identify_variables() |> tidy_identify_variables()
)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables() |>
tidy_identify_variables()
expect_true(
all(c("variable", "var_type", "var_class", "var_nlevels") %in% names(res))
)
# cannot be applied after tidy_add_header_rows
expect_error(
mod |> tidy_and_attach() |> tidy_add_header_rows() |> tidy_identify_variables()
)
})
test_that("model_dientify_variables() works well with logical variables", {
mod <- lm(
age ~ response + marker,
data = gtsummary::trial |>
dplyr::mutate(response = as.logical(response))
)
res <- model_identify_variables(mod)
expect_equal(
res |> dplyr::filter(variable == "response") |> purrr::pluck("var_type"),
"dichotomous"
)
expect_equal(
res |> dplyr::filter(variable == "response") |> purrr::pluck("var_nlevels"),
2,
ignore_attr = TRUE
)
expect_equal(
model_get_xlevels(mod)$response,
c("FALSE", "TRUE")
)
})
test_that("model_identify_variables() works with different contrasts", {
mod <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.treatment, grade = contr.SAS, trt = contr.SAS)
)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c(
NA, "stage", "stage", "stage", "grade", "grade", "trt", "grade:trt",
"grade:trt"
)
)
expect_no_error(mod |> tidy_and_attach() |> tidy_identify_variables())
mod <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.poly, grade = contr.helmert, trt = contr.sum)
)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c(NA, "stage", "stage", "stage", "grade", "grade", "trt", "grade:trt", "grade:trt")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_identify_variables())
})
test_that("model_identify_variables() works with stats::poly()", {
mod <- lm(Sepal.Length ~ poly(Sepal.Width, 3) + poly(Petal.Length, 2), iris)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c(
NA, "Sepal.Width", "Sepal.Width", "Sepal.Width",
"Petal.Length", "Petal.Length"
)
)
expect_no_error(tb <- mod |> tidy_and_attach() |> tidy_identify_variables())
expect_equal(
tb$variable,
c(
"(Intercept)", "Sepal.Width", "Sepal.Width", "Sepal.Width", "Petal.Length",
"Petal.Length"
)
)
})
test_that("tidy_identify_variables() works with variables having non standard name", {
# cf. https://github.com/ddsjoberg/gtsummary/issues/609
df <- gtsummary::trial |> dplyr::mutate(`grade of kids` = grade)
mod <- lm(age ~ marker * `grade of kids`, df)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
expect_equal(
res$variable,
c(
"(Intercept)", "marker", "grade of kids", "grade of kids", "marker:grade of kids",
"marker:grade of kids"
)
)
expect_equal(
res$var_class,
c(NA, "numeric", "factor", "factor", NA, NA),
ignore_attr = TRUE
)
expect_no_error(mod |> tidy_and_attach() |> tidy_identify_variables())
# interaction only term
mod <- lm(age ~ marker:`grade of kids`, df)
expect_equal(
mod |> model_list_variables(only_variable = TRUE),
c("age", "marker", "grade of kids", "marker:grade of kids")
)
expect_equal(
mod |> model_identify_variables() |> purrr::pluck("variable"),
c(NA, "marker:grade of kids", "marker:grade of kids", "marker:grade of kids")
)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
expect_equal(
res$variable,
c("(Intercept)", "marker:grade of kids", "marker:grade of kids", "marker:grade of kids")
)
trial2 <-
gtsummary::trial |>
dplyr::mutate(
`treatment +name` = trt,
`disease stage` = stage
)
mod <- glm(
response ~ `treatment +name` + `disease stage`,
trial2,
family = binomial(link = "logit")
)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables() |>
tidy_remove_intercept()
expect_equal(
res$variable,
c(
"treatment +name", "disease stage",
"disease stage", "disease stage"
)
)
expect_equal(
res$var_type,
c("dichotomous", "categorical", "categorical", "categorical")
)
mod <- lm(
hp ~ factor(`number + cylinders`):`miles :: galon` + factor(`type of transmission`),
mtcars |> dplyr::rename(
`miles :: galon` = mpg, `type of transmission` = am,
`number + cylinders` = cyl
)
)
res <- tidy_plus_plus(mod)
expect_equal(
res$variable,
c(
"factor(`type of transmission`)",
"factor(`type of transmission`)",
"factor(`number + cylinders`):miles :: galon",
"factor(`number + cylinders`):miles :: galon",
"factor(`number + cylinders`):miles :: galon"
)
)
})
test_that("model_identify_variables() works with lme4::lmer", {
skip_on_cran()
skip_if_not_installed("lme4")
mod <- lme4::lmer(Reaction ~ Days + (Days | Subject), lme4::sleepstudy)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c(NA, "Days")
)
expect_no_error(
mod |>
tidy_and_attach(tidy_fun = broom.mixed::tidy) |>
tidy_identify_variables()
)
mod <- lme4::lmer(
age ~ stage + (stage | grade) + (1 | grade),
gtsummary::trial
)
res <- mod |>
tidy_and_attach(tidy_fun = broom.mixed::tidy) |>
tidy_identify_variables()
expect_equal(
res |>
dplyr::filter(effect == "ran_pars") |>
purrr::pluck("var_type") |>
unique(),
"ran_pars"
)
})
test_that("model_identify_variables() works with lme4::glmer", {
skip_on_cran()
skip_if_not_installed("lme4")
mod <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
family = binomial, data = lme4::cbpp
)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c(NA, "period", "period", "period")
)
expect_no_error(
mod |>
tidy_and_attach(tidy_fun = broom.mixed::tidy) |>
tidy_identify_variables()
)
})
test_that("model_identify_variables() works with survival::coxph", {
skip_if_not_installed("survival")
df <- survival::lung |> dplyr::mutate(sex = factor(sex))
mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c("ph.ecog", "age", "sex")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_identify_variables())
})
test_that("model_identify_variables() works with survival::survreg", {
skip_if_not_installed("survival")
mod <- survival::survreg(
survival::Surv(futime, fustat) ~ ecog.ps + rx,
survival::ovarian,
dist = "exponential"
)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c(NA, "ecog.ps", "rx")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_identify_variables())
})
test_that("model_identify_variables() works with nnet::multinom", {
skip_if_not_installed("nnet")
mod <- nnet::multinom(grade ~ stage + marker + age, data = gtsummary::trial, trace = FALSE)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c(NA, "stage", "stage", "stage", "marker", "age")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_identify_variables())
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
expect_equal(
res$variable,
c(
"(Intercept)", "stage", "stage", "stage", "marker", "age", "(Intercept)", "stage",
"stage", "stage", "marker", "age"
)
)
# should work also with sum/SAS contrasts
mod <- nnet::multinom(
grade ~ stage + marker + age,
data = gtsummary::trial, trace = FALSE,
contrasts = list(stage = contr.sum)
)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
expect_equal(
res$variable,
c(
"(Intercept)", "stage", "stage", "stage", "marker", "age", "(Intercept)", "stage",
"stage", "stage", "marker", "age"
)
)
mod <- nnet::multinom(
grade ~ stage + marker + age,
data = gtsummary::trial, trace = FALSE,
contrasts = list(stage = contr.SAS)
)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
expect_equal(
res$variable,
c(
"(Intercept)", "stage", "stage", "stage", "marker", "age", "(Intercept)", "stage",
"stage", "stage", "marker", "age"
)
)
mod <- nnet::multinom(
grade ~ stage + marker + age,
data = gtsummary::trial, trace = FALSE,
contrasts = list(stage = contr.helmert)
)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
expect_equal(
res$variable,
c(
"(Intercept)", "stage", "stage", "stage", "marker", "age",
"(Intercept)", "stage", "stage", "stage", "marker", "age"
)
)
})
test_that("model_identify_variables() works with survey::svyglm", {
skip_if_not_installed("survey")
df <- survey::svydesign(~1, weights = ~1, data = gtsummary::trial)
mod <- survey::svyglm(response ~ age + grade * trt, df, family = quasibinomial)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c(NA, "age", "grade", "grade", "trt", "grade:trt", "grade:trt")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_identify_variables())
})
test_that("model_identify_variables() works with ordinal::clm", {
mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
expect_equal(
res$variable,
c("1|2", "2|3", "3|4", "4|5", "temp", "contact", "temp:contact")
)
mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine, threshold = "symmetric")
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
expect_equal(
res$variable,
c("central.1", "central.2", "spacing.1", "temp", "contact", "temp:contact")
)
mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine, threshold = "symmetric2")
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
expect_equal(
res$variable,
c("spacing.1", "spacing.2", "temp", "contact", "temp:contact")
)
mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine, threshold = "equidistant")
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
expect_equal(
res$variable,
c("threshold.1", "spacing", "temp", "contact", "temp:contact")
)
# nolint start
# wait for https://github.com/runehaubo/ordinal/issues/37
# before testing nominal predictors
# mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine, nominal = ~contact)
# res <- mod |> tidy_and_attach() |> tidy_identify_variables()
# expect_equal(
# res$variable,
# c("1|2.(Intercept)", "2|3.(Intercept)", "3|4.(Intercept)", "4|5.(Intercept)",
# "contact", "contact", "contact", "contact", "temp", "contactyes",
# "temp:contact")
# )
# nolint end
})
test_that("model_identify_variables() works with ordinal::clmm", {
mod <- ordinal::clmm(rating ~ temp * contact + (1 | judge), data = ordinal::wine)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables()
expect_equal(
res$variable,
c("1|2", "2|3", "3|4", "4|5", "temp", "contact", "temp:contact")
)
})
test_that("model_identify_variables() works with MASS::polr", {
mod <- MASS::polr(Sat ~ Infl + Type + Cont, weights = Freq, data = MASS::housing)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c(NA, "Infl", "Infl", "Type", "Type", "Type", "Cont")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_identify_variables())
})
test_that("model_identify_variables() works with geepack::geeglm", {
skip_if(packageVersion("geepack") < "1.3")
df <- geepack::dietox
df$Cu <- as.factor(df$Cu)
mf <- formula(Weight ~ Cu * Time)
suppressWarnings(
mod <- geepack::geeglm(mf, data = df, id = Pig, family = poisson("identity"), corstr = "ar1")
)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c(NA, "Cu", "Cu", "Time", "Cu:Time", "Cu:Time")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_identify_variables())
})
test_that("model_identify_variables() works with gam::gam", {
skip_if_not_installed("gam")
data(kyphosis, package = "gam")
mod <- gam::gam(Kyphosis ~ gam::s(Age, 4) + Number, family = binomial, data = kyphosis)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c(NA, "gam::s(Age, 4)", "Number")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_identify_variables())
mod <- suppressWarnings(gam::gam(
Ozone^(1 / 3) ~ gam::lo(Solar.R) + gam::lo(Wind, Temp),
data = datasets::airquality, na = gam::na.gam.replace
))
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c(NA, "gam::lo(Solar.R)", "gam::lo(Wind, Temp)", "gam::lo(Wind, Temp)")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_identify_variables())
})
test_that("model_identify_variables() works with lavaan::lavaan", {
skip_if_not_installed("lavaan")
df <- lavaan::HolzingerSwineford1939
df$grade <- factor(df$grade, ordered = TRUE)
HS.model <- "visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6 + grade
speed =~ x7 + x8 + x9 "
mod <- lavaan::lavaan(HS.model,
data = df,
auto.var = TRUE, auto.fix.first = TRUE,
auto.cov.lv.x = TRUE
)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
mod@ParTable$lhs
)
expect_no_error(mod |> tidy_and_attach() |> tidy_identify_variables())
expect_vector(
mod |> model_list_variables(only_variable = TRUE)
)
})
test_that("model_identify_variables() message when failure", {
skip_if_not_installed("survival")
trial <- gtsummary::trial
df_models <-
tibble::tibble(grade = c("I", "II", "III")) |>
dplyr::mutate(
df_model = purrr::map(grade, ~ trial |> dplyr::filter(grade == ..1)),
mv_formula_char = "Surv(ttdeath, death) ~ trt + age + marker",
mv_formula = purrr::map(mv_formula_char, as.formula),
mv_model_form = purrr::map2(
mv_formula, df_model,
~ survival::coxph(..1, data = ..2)
)
)
expect_message(
df_models |>
dplyr::mutate(
mv_tbl_form =
purrr::map(
mv_model_form,
~ tidy_and_attach(.x) |> tidy_identify_variables(quiet = FALSE)
)
)
)
})
test_that("model_identify_variables() works with glmmTMB::glmmTMB", {
skip_if_not_installed("glmmTMB")
skip_if_not_installed("broom.mixed")
skip_on_cran()
mod <- suppressWarnings(
glmmTMB::glmmTMB(
count ~ mined + spp,
ziformula = ~ mined,
family = poisson,
data = glmmTMB::Salamanders
)
)
res <- mod |> model_identify_variables()
expect_equal(
res$variable,
c(
NA, "mined", "spp", "spp", "spp", "spp", "spp", "spp"
)
)
expect_no_error(
mod |>
tidy_and_attach() |>
tidy_identify_variables()
)
})
test_that("model_identify_variables() works with plm::plm", {
skip_if_not_installed("plm")
skip_on_cran()
data("Grunfeld", package = "plm")
mod <- plm::plm(
inv ~ value + capital,
data = Grunfeld,
model = "within",
index = c("firm", "year")
)
res <- mod |> model_identify_variables()
expect_equal(
mod |> model_get_model_matrix() |> colnames(),
c("(Intercept)", "value", "capital")
)
expect_equal(
res$term,
c("(Intercept)", "value", "capital")
)
expect_equal(
res$variable,
c(NA, "value", "capital")
)
})
broom.helpers/tests/testthat/test-add_pairwise_contrasts.R 0000644 0001762 0000144 00000003640 14737437002 023700 0 ustar ligges users test_that("tidy_add_pairwise_contrasts() works for glm", {
skip_on_cran()
skip_if_not_installed("emmeans")
skip_if_not_installed("gtsummary")
mod <- glm(response ~ stage + trt, gtsummary::trial, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_pairwise_contrasts()
expect_equal(
res$term,
c(
"(Intercept)", "T2 - T1", "T3 - T1", "T3 - T2", "T4 - T1",
"T4 - T2", "T4 - T3", "Drug B - Drug A"
)
)
res <- mod |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_pairwise_contrasts()
expect_equal(
res$term,
c(
"(Intercept)", "T2 / T1", "T3 / T1", "T3 / T2", "T4 / T1",
"T4 / T2", "T4 / T3", "Drug B / Drug A"
)
)
expect_equal(
round(res$estimate, digits = 2),
c(0.48, 0.62, 1.12, 1.82, 0.82, 1.33, 0.73, 1.24)
)
expect_equal(
round(res$conf.low, digits = 2),
c(0.25, 0.2, 0.36, 0.56, 0.27, 0.42, 0.23, 0.67)
)
res <- mod |>
tidy_and_attach(exponentiate = TRUE, conf.level = .9) |>
tidy_add_pairwise_contrasts(
variables = stage,
keep_model_terms = TRUE,
pairwise_reverse = FALSE
)
expect_equal(
res$term,
c(
"(Intercept)", "stageT2", "stageT3", "stageT4", "T1 / T2",
"T1 / T3", "T1 / T4", "T2 / T3", "T2 / T4", "T3 / T4", "trtDrug B"
)
)
expect_equal(
round(res$conf.low, digits = 2),
c(0.27, 0.3, 0.54, 0.4, 0.6, 0.33, 0.46, 0.19, 0.27, 0.49, 0.74)
)
res <- mod |>
tidy_plus_plus(exponentiate = TRUE, add_pairwise_contrasts = TRUE)
expect_equal(
res$term,
c(
"T2 / T1", "T3 / T1", "T3 / T2", "T4 / T1", "T4 / T2", "T4 / T3",
"Drug B / Drug A"
)
)
res1 <- mod |>
tidy_plus_plus(add_pairwise_contrasts = TRUE)
res2 <- mod |>
tidy_plus_plus(add_pairwise_contrasts = TRUE, contrasts_adjust = "none")
expect_false(identical(res1, res2))
})
broom.helpers/tests/testthat/test-add_term_labels.R 0000644 0001762 0000144 00000031305 15002155537 022241 0 ustar ligges users test_that("tidy_add_term_labels() works for basic models", {
mod <- lm(Petal.Length ~ Petal.Width, iris)
expect_no_error(
mod |> tidy_and_attach() |> tidy_add_term_labels()
)
df <- gtsummary::trial
mod <- glm(response ~ age + grade + trt, df, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_term_labels()
expect_equal(
res$label,
c("(Intercept)", "Age", "II", "III", "Drug B"),
ignore_attr = TRUE
)
df <- gtsummary::trial
mod <- glm(response ~ age + grade + trt, df, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows() |>
tidy_add_term_labels()
expect_equal(
res$label,
c("(Intercept)", "Age", "I", "II", "III", "Drug A", "Drug B"),
ignore_attr = TRUE
)
# if labels provided in `labels`, taken into account
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows() |>
tidy_add_term_labels(
labels = list(
"(Intercept)" = "the intercept",
"trtDrug A" = "the reference term",
gradeIII = "third grade"
)
)
expect_equal(
res$label,
c(
"the intercept", "Age", "I", "II", "third grade", "the reference term",
"Drug B"
),
ignore_attr = TRUE
)
# no error if providing labels not corresponding to an existing variable
# but display a message
expect_no_error(
mod |>
tidy_and_attach() |>
tidy_add_term_labels(
labels = list(aaa = "aaa", bbb = "bbb", ccc = 44)
)
)
expect_message(
mod |>
tidy_and_attach() |>
tidy_add_term_labels(
labels = list(aaa = "aaa", bbb = "bbb", ccc = 44)
)
)
expect_error(
mod |>
tidy_and_attach() |>
tidy_add_term_labels(
labels = list(aaa = "aaa", bbb = "bbb", ccc = 44),
strict = TRUE
)
)
# model with an interaction term only
mod <- lm(age ~ factor(response):marker, gtsummary::trial)
res <- mod |>
tidy_and_attach() |>
tidy_add_term_labels()
expect_equal(
res$label,
c("(Intercept)", "0 * Marker Level (ng/mL)", "1 * Marker Level (ng/mL)"),
ignore_attr = TRUE
)
})
test_that("test tidy_add_term_labels() checks", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
# expect an error if no model attached
expect_error(mod |> broom::tidy() |> tidy_add_term_labels())
# could be apply twice (no error)
expect_no_error(
mod |> tidy_and_attach() |> tidy_add_term_labels() |> tidy_add_term_labels()
)
# cannot be applied after tidy_add_header_rows
expect_error(
mod |> tidy_and_attach() |> tidy_add_header_rows() |> tidy_add_term_labels()
)
})
test_that("tidy_add_term_labels() correctly manages interaction terms", {
df <- gtsummary::trial
mod <- glm(response ~ age * grade * trt, df, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows() |>
tidy_add_term_labels()
expect_equal(
res$label,
c(
"(Intercept)", "Age", "I", "II", "III", "Drug A", "Drug B",
"Age * II", "Age * III", "Age * Drug B", "II * Drug B", "III * Drug B",
"Age * II * Drug B", "Age * III * Drug B"
),
ignore_attr = TRUE
)
# custom separator and custom labels for certain interaction terms
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows() |>
tidy_add_term_labels(
interaction_sep = ":::",
labels = c(
"age:gradeII" = "custom interaction label",
"gradeII:trtDrug B" = "a second custom label"
)
)
expect_equal(
res$label,
c(
"(Intercept)", "Age", "I", "II", "III", "Drug A", "Drug B",
"custom interaction label", "Age:::III", "Age:::Drug B", "a second custom label",
"III:::Drug B", "Age:::II:::Drug B", "Age:::III:::Drug B"
),
ignore_attr = TRUE
)
# case with sum contrasts
mod <- lm(
marker ~ stage:ttdeath + stage,
data = gtsummary::trial,
contrasts = list(stage = "contr.sum")
)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows() |>
tidy_add_term_labels()
expect_equal(
res$label,
c(
"(Intercept)", "T1", "T2", "T3", "T4", "T1 * Months to Death/Censor",
"T2 * Months to Death/Censor", "T3 * Months to Death/Censor",
"T4 * Months to Death/Censor"
),
ignore_attr = TRUE
)
# complex case: model with no intercept and sum contrasts
mod <- lm(
Petal.Length ~ Species * Petal.Width - 1,
data = iris,
contrasts = list(Species = contr.sum)
)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows() |>
tidy_add_term_labels()
expect_equal(
res$label,
c(
"setosa", "versicolor", "virginica", "Petal.Width",
"setosa * Petal.Width", "versicolor * Petal.Width"
),
ignore_attr = TRUE
)
})
test_that("tidy_add_term_labels() works with poly or helmert contrasts", {
mod <- glm(
response ~ stage + grade + trt,
gtsummary::trial,
family = binomial,
contrasts = list(stage = contr.sum, grade = contr.helmert, trt = contr.SAS)
)
# should not produce an error
expect_no_error(
mod |> tidy_and_attach() |> tidy_add_term_labels()
)
})
test_that("tidy_add_term_labels() works with sdif contrasts", {
skip_if_not_installed("MASS")
mod <- glm(
response ~ stage + grade,
gtsummary::trial,
family = binomial,
contrasts = list(stage = MASS::contr.sdif, grade = MASS::contr.sdif)
)
# should not produce an error
expect_no_error(
res <- mod |> tidy_and_attach() |> tidy_add_term_labels()
)
expect_equal(
res$label,
c(
`(Intercept)` = "(Intercept)", `stageT2-T1` = "T2 - T1",
`stageT3-T2` = "T3 - T2", `stageT4-T3` = "T4 - T3",
`gradeII-I` = "II - I", `gradeIII-II` = "III - II"
),
ignore_attr = TRUE
)
# should not produce an error
expect_no_error(
res <- mod |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_term_labels()
)
expect_equal(
res$label,
c(
`(Intercept)` = "(Intercept)", `stageT2-T1` = "T2 / T1",
`stageT3-T2` = "T3 / T2", `stageT4-T3` = "T4 / T3",
`gradeII-I` = "II / I", `gradeIII-II` = "III / II"
),
ignore_attr = TRUE
)
})
test_that("tidy_add_term_labels() works with variables having non standard name", {
skip_on_cran()
df <- gtsummary::trial |> dplyr::rename(
`grade of kids...` = grade,
`?? treatment ++ response ...` = response
)
mod <- lm(age ~ marker * `grade of kids...` + factor(`?? treatment ++ response ...`), df)
res <- mod |>
tidy_and_attach() |>
tidy_add_reference_rows() |>
tidy_add_term_labels()
expect_equal(
res$label,
c(
"(Intercept)", "Marker Level (ng/mL)", "I", "II", "III", "0",
"1", "Marker Level (ng/mL) * II", "Marker Level (ng/mL) * III"
),
ignore_attr = TRUE
)
expect_equal(
res$variable,
c(
"(Intercept)", "marker", "grade of kids...", "grade of kids...", "grade of kids...",
"factor(`?? treatment ++ response ...`)", "factor(`?? treatment ++ response ...`)",
"marker:grade of kids...", "marker:grade of kids..."
)
)
res <-
lm(
response ~ `age at dx` + `drug type`,
data = gtsummary::trial |>
dplyr::select(response, `age at dx` = age, `drug type` = trt)
) |>
tidy_and_attach() |>
tidy_add_variable_labels(list(`age at dx` = "AGGGGGGGE")) |>
tidy_add_term_labels()
expect_equal(
res$label,
c("(Intercept)", "AGGGGGGGE", "Drug B"),
ignore_attr = TRUE
)
})
test_that("tidy_add_term_labels() works with stats::poly()", {
skip_on_cran()
df <- iris |> labelled::set_variable_labels(Petal.Length = "Length of petal")
mod <- lm(Sepal.Length ~ poly(Sepal.Width, 3) + poly(Petal.Length, 2), df)
res <- mod |>
tidy_and_attach() |>
tidy_add_term_labels()
expect_equal(
res$label,
c(
`(Intercept)` = "(Intercept)",
`poly(Sepal.Width, 3)1` = "poly(Sepal.Width, 3)1",
`poly(Sepal.Width, 3)2` = "poly(Sepal.Width, 3)2",
`poly(Sepal.Width, 3)3` = "poly(Sepal.Width, 3)3",
`poly(Petal.Length, 2)1` = "poly(Petal.Length, 2)1",
`poly(Petal.Length, 2)2` = "poly(Petal.Length, 2)2"
),
ignore_attr = TRUE
)
res <- mod |>
tidy_and_attach() |>
tidy_add_term_labels(relabel_poly = TRUE)
expect_equal(
res$label,
c(
"(Intercept)", "Sepal.Width", "Sepal.Width²", "Sepal.Width³",
"Petal.Length", "Petal.Length²"
),
ignore_attr = TRUE
)
})
skip_on_cran()
test_that("tidy_add_term_labels() works with lme4::lmer", {
skip_on_cran()
skip_if_not_installed("lme4")
mod <- lme4::lmer(Reaction ~ Days + (Days | Subject), lme4::sleepstudy)
expect_no_error(mod |> tidy_and_attach(tidy_fun = broom.mixed::tidy) |> tidy_add_term_labels())
})
test_that("tidy_add_term_labels() works with lme4::glmer", {
skip_on_cran()
skip_if_not_installed("lme4")
mod <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
family = binomial, data = lme4::cbpp
)
expect_no_error(mod |> tidy_and_attach(tidy_fun = broom.mixed::tidy) |> tidy_add_term_labels())
})
test_that("tidy_add_term_labels() works with survival::coxph", {
skip_on_cran()
skip_if_not_installed("survival")
df <- survival::lung |> dplyr::mutate(sex = factor(sex))
mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_term_labels())
})
test_that("tidy_add_term_labels() works with survival::survreg", {
skip_on_cran()
skip_if_not_installed("survival")
mod <- survival::survreg(
survival::Surv(futime, fustat) ~ ecog.ps + rx,
survival::ovarian,
dist = "exponential"
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_term_labels())
})
test_that("tidy_add_term_labels() works with nnet::multinom", {
skip_on_cran()
mod <- nnet::multinom(grade ~ stage + marker + age, data = gtsummary::trial, trace = FALSE)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_term_labels())
})
test_that("tidy_add_term_labels() works with survey::svyglm", {
skip_on_cran()
skip_if_not_installed("survey")
df <- survey::svydesign(~1, weights = ~1, data = gtsummary::trial)
mod <- survey::svyglm(response ~ age + grade * trt, df, family = quasibinomial)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_term_labels())
})
test_that("tidy_add_term_labels() works with ordinal::clm", {
skip_on_cran()
mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_term_labels())
})
test_that("tidy_add_term_labels() works with ordinal::clmm", {
skip_on_cran()
mod <- ordinal::clmm(rating ~ temp * contact + (1 | judge), data = ordinal::wine)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_term_labels())
})
test_that("tidy_add_term_labels() works with MASS::polr", {
skip_on_cran()
mod <- MASS::polr(Sat ~ Infl + Type + Cont, weights = Freq, data = MASS::housing)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_term_labels())
})
test_that("tidy_add_term_labels() works with geepack::geeglm", {
skip_on_cran()
skip_if(packageVersion("geepack") < "1.3")
df <- geepack::dietox
df$Cu <- as.factor(df$Cu)
mf <- formula(Weight ~ Cu * Time)
suppressWarnings(
mod <- geepack::geeglm(mf, data = df, id = Pig, family = poisson("identity"), corstr = "ar1")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_term_labels())
})
test_that("tidy_add_term_labels() works with gam::gam", {
skip_on_cran()
skip_if_not_installed("gam")
data(kyphosis, package = "gam")
mod <- gam::gam(Kyphosis ~ gam::s(Age, 4) + Number, family = binomial, data = kyphosis)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_term_labels())
mod <- suppressWarnings(gam::gam(
Ozone^(1 / 3) ~ gam::lo(Solar.R) + gam::lo(Wind, Temp),
data = datasets::airquality, na = gam::na.gam.replace
))
expect_no_error(mod |> tidy_and_attach() |> tidy_add_term_labels())
})
test_that("tidy_add_term_labels() works with lavaan::lavaan", {
skip_on_cran()
skip_if_not_installed("lavaan")
df <- lavaan::HolzingerSwineford1939
df$grade <- factor(df$grade, ordered = TRUE)
HS.model <- "visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6 + grade
speed =~ x7 + x8 + x9 "
mod <- lavaan::lavaan(HS.model,
data = df,
auto.var = TRUE, auto.fix.first = TRUE,
auto.cov.lv.x = TRUE
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_term_labels())
})
broom.helpers/tests/testthat/test-select_helpers.R 0000644 0001762 0000144 00000015232 14737437002 022146 0 ustar ligges users test_that("select_helpers: all_*()", {
mod <- glm(response ~ age * trt + grade, gtsummary::trial, family = binomial)
mod_tidy <- tidy_and_attach(mod)
expect_equal(
tidy_select_variables(mod_tidy, include = all_categorical())$variable |>
na.omit() |>
unique(),
c("(Intercept)", "trt", "grade")
)
expect_equal(
tidy_select_variables(mod_tidy, include = all_categorical(dichotomous = FALSE))$variable |>
na.omit() |>
unique(),
c("(Intercept)", "grade")
)
expect_equal(
tidy_select_variables(mod_tidy, include = all_continuous())$variable |>
na.omit() |>
unique(),
c("(Intercept)", "age")
)
expect_equal(
tidy_select_variables(mod_tidy, include = all_dichotomous())$variable |>
na.omit() |>
unique(),
c("(Intercept)", "trt")
)
expect_equal(
tidy_select_variables(mod_tidy, include = all_interaction())$variable |>
na.omit() |>
unique(),
c("(Intercept)", "age:trt")
)
})
test_that("select_helpers: tidy_plus_plus", {
skip_on_cran()
mod <- glm(response ~ age * trt + grade, gtsummary::trial, family = binomial)
mod2 <- glm(response ~ stage + grade * trt,
gtsummary::trial,
family = binomial,
contrasts = list(
stage = contr.sum,
grade = contr.poly,
trt = contr.helmert
)
)
mod3 <- glm(
response ~ stage + grade + trt + factor(death),
gtsummary::trial,
family = binomial,
contrasts = list(
stage = contr.treatment(4, 3), grade = contr.treatment(3, 2),
trt = contr.treatment(2, 2), "factor(death)" = matrix(c(-3, 2))
)
)
expect_equal(
tidy_plus_plus(mod3, include = all_contrasts("treatment"))$variable |>
na.omit() |>
unique(),
c("stage", "grade", "trt")
)
expect_equal(
tidy_plus_plus(mod3, include = all_contrasts("other"))$variable |>
na.omit() |>
unique(),
c("factor(death)")
)
expect_equal(
tidy_plus_plus(mod, include = all_contrasts())$variable |>
na.omit() |>
unique(),
c("trt", "grade")
)
expect_equal(
tidy_plus_plus(mod, include = all_categorical())$variable |>
na.omit() |>
unique(),
c("trt", "grade")
)
expect_equal(
tidy_plus_plus(mod, include = all_contrasts("treatment"))$variable |>
na.omit() |>
unique(),
c("trt", "grade")
)
expect_equal(
tidy_plus_plus(mod, include = all_continuous())$variable |>
na.omit() |>
unique(),
c("age")
)
expect_equal(
tidy_plus_plus(mod, include = all_dichotomous())$variable |>
na.omit() |>
unique(),
c("trt")
)
expect_equal(
tidy_plus_plus(mod, include = all_interaction())$variable |>
na.omit() |>
unique(),
c("age:trt")
)
expect_equal(
tidy_plus_plus(mod, include = all_intercepts(), intercept = TRUE)$variable |>
na.omit() |>
unique(),
c("(Intercept)")
)
expect_equal(
tidy_plus_plus(mod,
add_header_rows = TRUE,
show_single_row = all_dichotomous()
)$variable %in% "trt" |>
sum(),
1L
)
skip_if_not_installed("emmeans")
expect_equal(
tidy_plus_plus(mod2, include = all_contrasts("sum"))$variable |>
na.omit() |>
unique(),
c("stage")
)
expect_equal(
tidy_plus_plus(mod2, include = all_contrasts("poly"))$variable |>
na.omit() |>
unique(),
c("grade")
)
expect_equal(
tidy_plus_plus(mod2, include = all_contrasts("helmert"))$variable |>
na.omit() |>
unique(),
c("trt")
)
skip_on_cran()
skip_if_not_installed("lme4")
mod3 <- lme4::lmer(age ~ stage + (stage | grade) + (1 | grade), gtsummary::trial)
res <- mod3 |> tidy_plus_plus(
tidy_fun = broom.mixed::tidy,
include = all_ran_pars()
)
expect_equal(
res$term,
c(
"grade.sd__(Intercept)", "grade.cor__(Intercept).stageT2",
"grade.cor__(Intercept).stageT3", "grade.cor__(Intercept).stageT4",
"grade.sd__stageT2", "grade.cor__stageT2.stageT3", "grade.cor__stageT2.stageT4",
"grade.sd__stageT3", "grade.cor__stageT3.stageT4", "grade.sd__stageT4",
"grade.1.sd__(Intercept)", "Residual.sd__Observation"
)
)
res <- mod3 |> tidy_plus_plus(
tidy_fun = broom.mixed::tidy,
include = all_ran_vals()
)
expect_equal(res |> nrow(), 0L)
})
test_that("select_helpers: tidy_add_header_rows", {
mod <- glm(response ~ age * trt + grade, gtsummary::trial, family = binomial)
mod_tidy <- tidy_and_attach(mod)
expect_equal(
tidy_add_header_rows(mod_tidy, show_single_row = all_dichotomous())$variable %in% "trt" |>
sum(),
1L
)
})
test_that("select_helpers: tidy_add_variable_labels", {
mod <- glm(response ~ age * trt + grade, gtsummary::trial, family = binomial)
mod_tidy <- tidy_and_attach(mod)
expect_no_error(
tidy_add_variable_labels(mod_tidy, labels = where(is.numeric) ~ "NUMERIC")
)
expect_equal(
tidy_add_variable_labels(mod_tidy,
labels = list(
`(Intercept)` ~ "b0",
age ~ "AGE",
trt ~ "Drug",
"grade" ~ "Grade",
contains("age:") ~ "Interaction"
)
) |>
dplyr::pull(var_label) |>
unique(),
c("b0", "AGE", "Drug", "Grade", "Interaction")
)
})
test_that("select helpers are consistent with gtsummary", {
skip_on_cran()
skip_if_not_installed("gtsummary")
mod <- glm(response ~ age * trt + grade, gtsummary::trial, family = binomial)
x <- mod |>
tidy_and_attach() |>
tidy_identify_variables() |>
tidy_add_contrasts() |>
scope_tidy()
expect_equal(
x |> dplyr::select(broom.helpers::all_categorical()) |> colnames(),
x |> dplyr::select(gtsummary::all_categorical()) |> colnames()
)
expect_equal(
x |> dplyr::select(broom.helpers::all_continuous()) |> colnames(),
x |> dplyr::select(gtsummary::all_continuous()) |> colnames()
)
expect_equal(
x |> dplyr::select(broom.helpers::all_contrasts("treatment")) |> colnames(),
x |> dplyr::select(gtsummary::all_contrasts("treatment")) |> colnames()
)
expect_equal(
x |> dplyr::select(broom.helpers::all_dichotomous()) |> colnames(),
x |> dplyr::select(gtsummary::all_dichotomous()) |> colnames()
)
expect_equal(
x |> dplyr::select(broom.helpers::all_interaction()) |> colnames(),
x |> dplyr::select(gtsummary::all_interaction()) |> colnames()
)
expect_equal(
x |> dplyr::select(broom.helpers::all_intercepts()) |> colnames(),
x |> dplyr::select(gtsummary::all_intercepts()) |> colnames()
)
})
broom.helpers/tests/testthat/test-group_by.R 0000644 0001762 0000144 00000004025 14760117574 020776 0 ustar ligges users test_that("tidy_group_by() works for basic models", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
expect_no_error(
res <- mod |> tidy_and_attach() |> tidy_group_by()
)
expect_false("group_by" %in% colnames(res))
expect_no_error(
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables() |>
tidy_group_by(group_by = var_type)
)
expect_true("group_by" %in% colnames(res))
expect_true(is.factor(res$group_by))
expect_equal(as.character(res$group_by), res$var_type)
})
test_that("tidy_group_by() works with nnet::multinom", {
skip_if_not_installed("nnet")
skip_if_not_installed("gtsummary")
mod <- nnet::multinom(
grade ~ stage + marker + age,
data = gtsummary::trial,
trace = FALSE
)
expect_no_error(
res <- mod |> tidy_and_attach() |> tidy_group_by()
)
expect_true("group_by" %in% colnames(res))
expect_equal(
levels(res$group_by),
c("II", "III")
)
expect_message(
res <- mod |>
tidy_and_attach() |>
tidy_group_by(group_labels = c(IV = "not found"))
)
expect_no_error(
res <- mod |>
tidy_and_attach() |>
tidy_group_by(group_labels = c(III = "group 3"))
)
expect_error(
res <- mod |>
tidy_and_attach() |>
tidy_group_by(group_labels = c("group 3"))
)
expect_equal(
levels(res$group_by),
c("II", "group 3")
)
expect_no_error(
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables() |>
tidy_group_by(group_by = c(var_type, y.level))
)
expect_equal(
length(levels(res$group_by)),
6
)
x <- mod |> tidy_and_attach() |> tidy_identify_variables()
# by default, keep any pre-existing group_by
expect_equal(
x |> tidy_group_by(group_by = "var_type"),
x |> tidy_group_by(group_by = "var_type") |> tidy_group_by()
)
# NULL to remove any pre-existing group_by
expect_equal(
x,
x |> tidy_group_by() |> tidy_group_by(group_by = NULL)
)
})
broom.helpers/tests/testthat/test-list_higher_order_variables.R 0000644 0001762 0000144 00000001260 14657100641 024662 0 ustar ligges users test_that("model_list_higher_order_variables() works for basic models", {
mod <- lm(hp ~ mpg + factor(cyl) + disp:hp, mtcars)
expect_equal(
mod |> model_list_higher_order_variables(),
c("mpg", "factor(cyl)", "hp:disp")
)
mod <- glm(
Survived ~ Class * Age + Sex:Class,
data = Titanic |> as.data.frame(),
weights = Freq,
family = binomial
)
expect_equal(
mod |> model_list_higher_order_variables(),
c("Class:Age", "Class:Sex")
)
mod <- lm(Petal.Length ~ Petal.Width * Species * Sepal.Length, data = iris)
expect_equal(
mod |> model_list_higher_order_variables(),
"Petal.Width:Species:Sepal.Length"
)
})
broom.helpers/tests/testthat/test-add_variable_labels.R 0000644 0001762 0000144 00000024224 14746151400 023060 0 ustar ligges users test_that("tidy_add_variable_labels() works for basic models", {
# if no variable labels, variable names
# term for intercept
df <- gtsummary::trial
labelled::var_label(df) <- NULL
mod <- glm(response ~ age + grade + trt, df, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_variable_labels()
expect_equal(
res$var_label,
c("(Intercept)", "age", "grade", "grade", "trt"),
ignore_attr = TRUE
)
# if variable labels defined in data, variable labels
df <- gtsummary::trial
mod <- glm(response ~ age + grade + trt, df, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_variable_labels()
expect_equal(
res$var_label,
c("(Intercept)", "Age", "Grade", "Grade", "Chemotherapy Treatment"),
ignore_attr = TRUE
)
# if labels provided in `labels`, taken into account
res <- mod |>
tidy_and_attach() |>
tidy_add_variable_labels(
labels = list(`(Intercept)` = "custom intercept", grade = "custom label")
)
expect_equal(
res$var_label,
c(
"custom intercept", "Age", "custom label", "custom label",
"Chemotherapy Treatment"
),
ignore_attr = TRUE
)
# labels can also be a named vector
res <- mod |>
tidy_and_attach() |>
tidy_add_variable_labels(
labels = c(`(Intercept)` = "custom intercept", grade = "custom label")
)
expect_equal(
res$var_label,
c(
"custom intercept", "Age", "custom label", "custom label",
"Chemotherapy Treatment"
),
ignore_attr = TRUE
)
# model with only an interaction term
mod <- lm(age ~ factor(response):marker, gtsummary::trial)
res <- mod |>
tidy_and_attach() |>
tidy_add_variable_labels()
expect_equal(
res$var_label,
c(
"(Intercept)",
"factor(response) * Marker Level (ng/mL)",
"factor(response) * Marker Level (ng/mL)"
),
ignore_attr = TRUE
)
# custom label for interaction term
mod <- glm(response ~ age + grade * trt, df, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_variable_labels(labels = c("grade:trt" = "custom label"))
expect_equal(
res$var_label,
c(
"(Intercept)", "Age", "Grade", "Grade", "Chemotherapy Treatment",
"custom label", "custom label"
),
ignore_attr = TRUE
)
})
test_that("test tidy_add_variable_labels() checks", {
mod <- glm(response ~ stage + grade + trt, gtsummary::trial, family = binomial)
# expect an error if no model attached
expect_error(mod |> broom::tidy() |> tidy_add_variable_labels())
# could be apply twice (no error)
expect_no_error(
mod |> tidy_and_attach() |> tidy_add_variable_labels() |> tidy_add_variable_labels()
)
# cannot be applied after tidy_add_header_rows()
expect_error(
mod |>
tidy_and_attach() |>
tidy_add_header_rows() |>
tidy_add_variable_labels()
)
})
test_that("tidy_add_variable_labels() correctly manages interaction terms", {
df <- gtsummary::trial
mod <- glm(response ~ age * grade * trt, df, family = binomial)
res <- mod |>
tidy_and_attach() |>
tidy_add_variable_labels()
expect_equal(
res$var_label,
c(
"(Intercept)", "Age", "Grade", "Grade", "Chemotherapy Treatment",
"Age * Grade", "Age * Grade", "Age * Chemotherapy Treatment",
"Grade * Chemotherapy Treatment", "Grade * Chemotherapy Treatment",
"Age * Grade * Chemotherapy Treatment", "Age * Grade * Chemotherapy Treatment"
),
ignore_attr = TRUE
)
# custom separator and custom labels for certain interaction terms
res <- mod |>
tidy_and_attach() |>
tidy_add_variable_labels(
interaction_sep = ":::",
labels = c(
"age:grade" = "custom interaction label",
"grade:trt" = "a second custom label"
)
)
expect_equal(
res$var_label,
c(
"(Intercept)", "Age", "Grade", "Grade", "Chemotherapy Treatment",
"custom interaction label", "custom interaction label", "Age:::Chemotherapy Treatment",
"a second custom label", "a second custom label", "Age:::Grade:::Chemotherapy Treatment",
"Age:::Grade:::Chemotherapy Treatment"
),
ignore_attr = TRUE
)
})
test_that("tidy_add_variable_labels() works with variables having non standard name", {
df <- gtsummary::trial |> dplyr::mutate(`grade of kids` = grade)
mod <- lm(age ~ marker * `grade of kids`, df)
res <- mod |>
tidy_and_attach() |>
tidy_add_variable_labels()
expect_equal(
res$var_label,
c(
"(Intercept)", "Marker Level (ng/mL)", "Grade", "Grade", "Marker Level (ng/mL) * Grade",
"Marker Level (ng/mL) * Grade"
),
ignore_attr = TRUE
)
})
test_that("tidy_add_variable_labels() works with stats::poly()", {
df <- iris |> labelled::set_variable_labels(Petal.Length = "Length of petal")
mod <- lm(Sepal.Length ~ poly(Sepal.Width, 3) + poly(Petal.Length, 2), df)
res <- mod |>
tidy_and_attach() |>
tidy_add_variable_labels(labels = c(Sepal.Width = "Width of sepal"))
expect_equal(
res$var_label,
c(
"(Intercept)", "Width of sepal", "Width of sepal", "Width of sepal",
"Petal.Length", "Petal.Length"
),
ignore_attr = TRUE
)
})
test_that("tidy_add_variable_labels() works with lme4::lmer", {
skip_on_cran()
skip_if_not_installed("lme4")
mod <- lme4::lmer(Reaction ~ Days + (Days | Subject), lme4::sleepstudy)
expect_no_error(
mod |>
tidy_and_attach(tidy_fun = broom.mixed::tidy) |>
tidy_add_variable_labels()
)
})
test_that("tidy_add_variable_labels() works with lme4::glmer", {
skip_on_cran()
skip_if_not_installed("lme4")
mod <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
family = binomial, data = lme4::cbpp
)
expect_no_error(
mod |>
tidy_and_attach(tidy_fun = broom.mixed::tidy) |>
tidy_add_variable_labels()
)
})
test_that("tidy_add_variable_labels() works with survival::coxph", {
skip_if_not_installed("survival")
df <- survival::lung |> dplyr::mutate(sex = factor(sex))
mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_variable_labels())
# check that label attribute in original dataset is preserved
mod <- survival::coxph(survival::Surv(ttdeath, death) ~ grade, gtsummary::trial)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables() |>
tidy_add_reference_rows() |>
tidy_add_variable_labels()
expect_equal(
res$var_label,
c("Grade", "Grade", "Grade"),
ignore_attr = TRUE
)
})
test_that("tidy_add_variable_labels() works with survival::survreg", {
skip_if_not_installed("survival")
mod <- survival::survreg(
survival::Surv(futime, fustat) ~ ecog.ps + rx,
survival::ovarian,
dist = "exponential"
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_variable_labels())
# check that label attribute in original dataset is preserved
mod <- survival::survreg(survival::Surv(ttdeath, death) ~ grade, gtsummary::trial)
res <- mod |>
tidy_and_attach() |>
tidy_identify_variables() |>
tidy_add_reference_rows() |>
tidy_add_variable_labels()
expect_equal(
res$var_label,
c("(Intercept)", "Grade", "Grade", "Grade", "Log(scale)"),
ignore_attr = TRUE
)
})
test_that("tidy_add_variable_labels() works with nnet::multinom", {
skip_if_not_installed("nnet")
mod <- nnet::multinom(grade ~ stage + marker + age, data = gtsummary::trial, trace = FALSE)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_variable_labels())
})
test_that("tidy_add_variable_labels() works with survey::svyglm", {
skip_if_not_installed("survey")
df <- survey::svydesign(~1, weights = ~1, data = gtsummary::trial)
mod <- survey::svyglm(response ~ age + grade * trt, df, family = quasibinomial)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_variable_labels())
})
test_that("tidy_add_variable_labels() works with ordinal::clm", {
mod <- ordinal::clm(rating ~ temp * contact, data = ordinal::wine)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_variable_labels())
})
test_that("tidy_add_variable_labels() works with ordinal::clmm", {
mod <- ordinal::clmm(rating ~ temp * contact + (1 | judge), data = ordinal::wine)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_variable_labels())
})
test_that("tidy_add_variable_labels() works with MASS::polr", {
mod <- MASS::polr(Sat ~ Infl + Type + Cont, weights = Freq, data = MASS::housing)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_variable_labels())
})
test_that("tidy_add_variable_labels() works with geepack::geeglm", {
skip_if(packageVersion("geepack") < "1.3")
df <- geepack::dietox
df$Cu <- as.factor(df$Cu)
mf <- formula(Weight ~ Cu * Time)
suppressWarnings(
mod <- geepack::geeglm(mf, data = df, id = Pig, family = poisson("identity"), corstr = "ar1")
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_variable_labels())
})
test_that("tidy_add_variable_labels() works with gam::gam", {
skip_if_not_installed("gam")
data(kyphosis, package = "gam")
mod <- gam::gam(Kyphosis ~ gam::s(Age, 4) + Number, family = binomial, data = kyphosis)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_variable_labels())
mod <- suppressWarnings(gam::gam(
Ozone^(1 / 3) ~ gam::lo(Solar.R) + gam::lo(Wind, Temp),
data = datasets::airquality, na = gam::na.gam.replace
))
expect_no_error(mod |> tidy_and_attach() |> tidy_add_variable_labels())
})
test_that("tidy_add_variable_labels() works with lavaan::lavaan", {
skip_if_not_installed("lavaan")
df <- lavaan::HolzingerSwineford1939
df$grade <- factor(df$grade, ordered = TRUE)
HS.model <- "visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6 + grade
speed =~ x7 + x8 + x9 "
mod <- lavaan::lavaan(HS.model,
data = df,
auto.var = TRUE, auto.fix.first = TRUE,
auto.cov.lv.x = TRUE
)
expect_no_error(mod |> tidy_and_attach() |> tidy_add_variable_labels())
})
broom.helpers/tests/testthat.R 0000644 0001762 0000144 00000000642 14737437002 016167 0 ustar ligges users # This file is part of the standard setup for testthat.
# It is recommended that you do not modify it.
#
# Where should you do additional test configuration?
# Learn more about the roles of various files in:
# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
# * https://testthat.r-lib.org/articles/special-files.html
library(testthat)
library(broom.helpers)
test_check("broom.helpers")
broom.helpers/tests/spelling.R 0000644 0001762 0000144 00000000233 14457457144 016150 0 ustar ligges users if (requireNamespace("spelling", quietly = TRUE)) {
spelling::spell_check_test(
vignettes = TRUE, error = FALSE,
skip_on_cran = TRUE
)
}
broom.helpers/MD5 0000644 0001762 0000144 00000022465 15062503242 013351 0 ustar ligges users 89ce86f707219d9f10042d02525dc51a *DESCRIPTION
15b876cbdc483cdf61d168e9bf24c707 *NAMESPACE
ad4bb40527897bcafa8bcc692f39d8b8 *NEWS.md
c0c2b1483fe244a0247c532c2f1bcdb9 *R/assert_package.R
7c4dcdf2857c4edd501f4befc66e3185 *R/broom.helpers-package.R
129e77b48d676d044c010a664fb51b3a *R/custom_tidiers.R
55c01d0cb58801ae0da3678caab51782 *R/data.R
59c7df87ce1acd7ba9c4f7aad7c4239f *R/helpers.R
4898b975e8837649f56dc6888fe20ed1 *R/marginal_tidiers.R
e47a12b18778cd7c9d8ef26b137c38c9 *R/model_compute_terms_contributions.R
0d1253b575716ea41b788b127bcbe52b *R/model_get_assign.R
e05a1dd7809dcf9f1fcaa186e30a75fa *R/model_get_coefficients_type.R
9ea2ab577ed8398b8bcc0514213e6deb *R/model_get_contrasts.R
2c2e345609771004544112c0d1aacb5f *R/model_get_model.R
3bcbbef3a7e60fa4c0146fcd62880474 *R/model_get_model_frame.R
27b48e93d0daa07551fead778a10ef4d *R/model_get_model_matrix.R
184df543bf96e92acec12e4ee2f09667 *R/model_get_n.R
ab08ba134a0d3046d28a7627dfa84a4f *R/model_get_nlevels.R
d3a14c9624976cb8394dcd863c90270f *R/model_get_offset.R
1b51387e589461772bf258145fbb98db *R/model_get_pairwise_contrasts.R
981f00c549e20d79a39cd837189370c7 *R/model_get_response.R
41699c4bbbb22ad1b8ddc977a8d707d3 *R/model_get_response_variable.R
3a24bbf0dac937ef6f5162ee284b31ca *R/model_get_terms.R
663cc4916446a8f411b527b6cb1993db *R/model_get_weights.R
661d5f45f23b55d9cef7e7eb9bc757c2 *R/model_get_xlevels.R
be3937d2a9dede869809355099412a89 *R/model_identify_variables.R
e4b73e9e5a5b6c0aad59d5c9e72a5ece *R/model_list_contrasts.R
0126e872921ecfb2e8f0118e1642aef0 *R/model_list_higher_order_variables.R
da1349b2ab8d4eefafad6a7dedeff678 *R/model_list_terms_levels.R
60d99f3a5b3c0bb656be8cbf3d612976 *R/model_list_variables.R
4f7183ad35cc4ccb38ba3f3bd8e55f90 *R/reexport.R
6a5bffbf22b5df7edf44cbe3bfd787a9 *R/scope_tidy.R
dc309a500295f12df76568c1dfc63d75 *R/select_helpers.R
29b12bd6276a04e2f1e68b525139b8ef *R/tidy_add_coefficients_type.R
0991b975cdc22c7c63e612d06e5a4417 *R/tidy_add_contrasts.R
d09b9d8e9296fca90d5fcd7fc1490b7e *R/tidy_add_estimate_to_reference_rows.R
4ea186803d2c70a776a6e29770821dc6 *R/tidy_add_header_rows.R
d1ce74e5da7cefcc737b8e67e146f363 *R/tidy_add_n.R
03338b55a3cbb94fc4ea1a8d64ef2b59 *R/tidy_add_pairwise_contrasts.R
f0e1c7246c030af82c80647ea17f3265 *R/tidy_add_reference_rows.R
59c1b63fa3988a7a06a4b70cce828574 *R/tidy_add_term_labels.R
8d29f43752a8a165867246182e4c04a8 *R/tidy_add_variable_labels.R
6973990fd0f016ac15212ec80cad0f93 *R/tidy_and_attach.R
44090eb2120ff79d5fcbd49eb690789c *R/tidy_disambiguate_terms.R
fdc145ee452503e8b9f173d614bbbbbe *R/tidy_group_by.R
7e992a1e1cc7075654ebe70ab8cc8112 *R/tidy_identify_variables.R
4ba97067dbfdaeb42bc39cac5d41762b *R/tidy_plus_plus.R
2c50b62ae7a47d4292a306692e8f0d5b *R/tidy_remove_intercept.R
035d05f01564f8004d0597512dd1fbd2 *R/tidy_select_variables.R
27df1687e56a1e834be338a0c02cdf4a *README.md
bfcb985dd428dcbd57ca39f0f4e495db *build/vignette.rds
fa8fa822b2ce2c16f4a559d1ece361c1 *data/supported_models.rda
6ff7a4dece7f8f17e47172397a4808cd *inst/WORDLIST
dd9041d03b86a54f2174e69f7c672fa2 *inst/doc/broom-helpers.R
56f00bb8874ed7d32c10088071343cd9 *inst/doc/broom-helpers.Rmd
dccc90ef5bc5e783e554d3b7c0dca255 *inst/doc/broom-helpers.html
a21cffa18f02b81b15bfeee342b658d7 *man/assert_package.Rd
6681ea547286b75364f990db9e6e2f05 *man/dot-clean_backticks.Rd
2db273b6845e020c0cf3d1615ed6c412 *man/dot-escape_regex.Rd
30e674427c4e49f6c860aeeb51efcf0e *man/figures/broom.helpers.png
e408d5625c4dc2036468b549ce3c82e8 *man/figures/broom.helpers.svg
cb1e46f469cfbbbde29c8b5113e1d789 *man/figures/lifecycle-archived.svg
c0d2e5a54f1fa4ff02bf9533079dd1f7 *man/figures/lifecycle-defunct.svg
a1b8c987c676c16af790f563f96cbb1f *man/figures/lifecycle-deprecated.svg
c3978703d8f40f2679795335715e98f4 *man/figures/lifecycle-experimental.svg
952b59dc07b171b97d5d982924244f61 *man/figures/lifecycle-maturing.svg
27b879bf3677ea76e3991d56ab324081 *man/figures/lifecycle-questioning.svg
6902bbfaf963fbc4ed98b86bda80caa2 *man/figures/lifecycle-soft-deprecated.svg
53b3f893324260b737b3c46ed2a0e643 *man/figures/lifecycle-stable.svg
1c1fe7a759b86dc6dbcbe7797ab8246c *man/figures/lifecycle-superseded.svg
6495fbbedc102387d4e590ac98badcd6 *man/model_compute_terms_contributions.Rd
18e29f2b3a1742da5fe2385dae4402a3 *man/model_get_assign.Rd
4c9ef6cc76d7c1edcb210bb96c526565 *man/model_get_coefficients_type.Rd
7275808f6c371b396ec5d4aa8a912ac9 *man/model_get_contrasts.Rd
ee11c0322a8a83fd58c283c79ba4a25d *man/model_get_model.Rd
daed13918653736acbc2a8a78c0da6ec *man/model_get_model_frame.Rd
af2d2ba6f1a65658d1b8ae92c110b839 *man/model_get_model_matrix.Rd
6971fa9f16acd808db40f798d78db791 *man/model_get_n.Rd
62ace2128838a40f3240a7117c300ac7 *man/model_get_nlevels.Rd
9b194fbdf0d1c5336f6668d8aaf4e6ce *man/model_get_offset.Rd
4f581c15df2050b777e0c7ccc886f9ef *man/model_get_pairwise_contrasts.Rd
4314fd41b27617c07db5e737ed6ad93b *man/model_get_response.Rd
9297d54dd9b81d3c1e6b2e4fd78b1e67 *man/model_get_response_variable.Rd
e21bfce55d33124d37c9a8de833368e5 *man/model_get_terms.Rd
3cb14c2c53f3c03625f7026929abd14d *man/model_get_weights.Rd
066b7e7dd88a4553374d379fb0050ea0 *man/model_get_xlevels.Rd
1ab2efde7d89ab28696c6f0aae7ba69e *man/model_identify_variables.Rd
7c1e39a8cff634158fe1c43d994dc70a *man/model_list_contrasts.Rd
183315fa89e8eaf32e9c85e96d61c873 *man/model_list_higher_order_variables.Rd
884fd9b2dc295873305ee004be9b524e *man/model_list_terms_levels.Rd
ca0360e759aef50ff46fce75ca308611 *man/model_list_variables.Rd
449cdc1eb57a0ea7bc9cd001b8e9912e *man/reexports.Rd
9a8c30eac37032901a932097149c60bd *man/scope_tidy.Rd
742f4dbd010e4cbe5ef885386e7cb966 *man/select_helpers.Rd
e6c8e7af99782dc71b155bf0d1fe6bb2 *man/seq_range.Rd
9fe9dc2ef06f44699938e150f0198e4c *man/supported_models.Rd
aacae87348744f8fae3fbc01a0f5844c *man/tidy_add_coefficients_type.Rd
14a129f2cf311372a96a5766f26390ef *man/tidy_add_contrasts.Rd
7fbb591f6a2de0f51de2f6e92e401ce5 *man/tidy_add_estimate_to_reference_rows.Rd
649c8fe41928d73bc9979d8b7e05de23 *man/tidy_add_header_rows.Rd
edc25fadab6359ea5e654569fec0b6a7 *man/tidy_add_n.Rd
200bb64fe0ed6b24a5ca1e294ab4db01 *man/tidy_add_pairwise_contrasts.Rd
94fe267ce7dd74a6d29e25de39038d72 *man/tidy_add_reference_rows.Rd
7a4586f99a4da0fdb827aed49e592ea2 *man/tidy_add_term_labels.Rd
2f2d02c5593d6763fd90eb0e8254b94c *man/tidy_add_variable_labels.Rd
a186e102a3fa7c4e38c6570ba2f087f2 *man/tidy_all_effects.Rd
dbe3adfc11c3b052dab6b507c33da780 *man/tidy_attach_model.Rd
c3499ac107482dfd920a6c9bbef323ab *man/tidy_avg_comparisons.Rd
141de0f747262a1cdb1d6b7524c6d236 *man/tidy_avg_slopes.Rd
a37f45ab2111903ed44984907f1286d5 *man/tidy_broom.Rd
940320d5c314f4dc2986b55ff7ee9a14 *man/tidy_disambiguate_terms.Rd
a708b4f4c1c460b2514e795e0956764e *man/tidy_ggpredict.Rd
74856edaf016d49101341a44cadc2abe *man/tidy_group_by.Rd
a63a0094867295222f16c4d5e907493f *man/tidy_identify_variables.Rd
396584dd4651ffa62b1a8c511cfab9b7 *man/tidy_marginal_contrasts.Rd
733742b5d78f2fabf2c52da35da37f2f *man/tidy_marginal_means.Rd
85842cc22d04d627a1c54c77984fd1db *man/tidy_marginal_predictions.Rd
a8a423f91ea58355442791e32c7b541b *man/tidy_margins.Rd
01ecca5f242987c6d8b1f5e075dd29b2 *man/tidy_multgee.Rd
f10d7f3767a5db312904009fafed1e7d *man/tidy_parameters.Rd
3501af036c08d929c59d4baec2d2ecbc *man/tidy_plus_plus.Rd
7be8752d4a79fb428b707784e1193a25 *man/tidy_remove_intercept.Rd
0b2069d1a01f6322938296e1f7e519e7 *man/tidy_select_variables.Rd
f8b2c88665aaacaf103af67e658f0cfd *man/tidy_svy_vglm.Rd
1af66dba993a5553a67f40323b8a70af *man/tidy_vgam.Rd
0236163ae924a6933c1248c35f09d972 *man/tidy_with_broom_or_parameters.Rd
acc41fb0075557fdb9ebd878d4fb5e8e *man/tidy_zeroinfl.Rd
dbd9bab057163c01c320d0b9d3bb9620 *tests/spelling.R
1dd85cf8fd0cc5201521d49efa4a8c39 *tests/testthat.R
15e605b87b7967eb112378f404cc50db *tests/testthat/test-add_coefficients_type.R
2eab4a92637551c9df12a1b7294034a6 *tests/testthat/test-add_contrasts.R
8c7c80069cfded4641236743283e7dd2 *tests/testthat/test-add_estimate_to_reference_rows.R
dd8b1566fdb90eef81fbd18d807859b9 *tests/testthat/test-add_header_rows.R
1030d972fb9a3d4134981e1b52bd3853 *tests/testthat/test-add_n.R
a73c64c33c9f122bc11eca37a24d42a7 *tests/testthat/test-add_pairwise_contrasts.R
0e6bc04febfed7b1b055f611cd960700 *tests/testthat/test-add_reference_rows.R
6cc3dc7fe161219c543afddeb973336e *tests/testthat/test-add_term_labels.R
64938e7398558893c700323af7f0bbe1 *tests/testthat/test-add_variable_labels.R
85e55546ea237b80bb78499ef11b37d9 *tests/testthat/test-assert_package.R
2ef36e372c59e1223ec4f915edbc4f4b *tests/testthat/test-attach_and_detach.R
1fff0599fb3e66d72b4dc4520377d5f9 *tests/testthat/test-disambiguate_terms.R
c404a048d8f29e7bf3ee8c75e6138c27 *tests/testthat/test-get_response_variable.R
b5e6a9381fa34a0b22b5da36c1ed43db *tests/testthat/test-group_by.R
f4731c67e076c417df8d311184787540 *tests/testthat/test-helpers.R
838d068bac1135541a1dec02baf3350e *tests/testthat/test-identify_variables.R
a5a85d2e76c25c3120244350c8f5f72c *tests/testthat/test-list_higher_order_variables.R
b4ec904d0042ac197b8baf770755c72f *tests/testthat/test-marginal_tidiers.R
7aedeeebb3179b35805f2a3427d3118b *tests/testthat/test-model_get_n.R
e4904e7fb88e62fb86eca53b3c248faa *tests/testthat/test-remove_intercept.R
e173166dba02fe44a3a256b9dde0a1ff *tests/testthat/test-select_helpers.R
cc09f90079b9e5502f3c9d658f9f05b9 *tests/testthat/test-select_variables.R
8852ffaa6f14cab1bcff7081bfff1148 *tests/testthat/test-tidy_parameters.R
d5733347fa26857c83288ad7b05c33be *tests/testthat/test-tidy_plus_plus.R
56f00bb8874ed7d32c10088071343cd9 *vignettes/broom-helpers.Rmd
broom.helpers/R/ 0000755 0001762 0000144 00000000000 15062262374 013241 5 ustar ligges users broom.helpers/R/tidy_disambiguate_terms.R 0000644 0001762 0000144 00000004551 15002155536 020265 0 ustar ligges users #' Disambiguate terms
#'
#' For mixed models, the `term` column returned by `broom.mixed` may have
#' duplicated values for random-effect parameters and random-effect values.
#' In such case, the terms could be disambiguated be prefixing them with the
#' value of the `group` column. `tidy_disambiguate_terms()` will not change
#' any term if there is no `group` column in `x`. The original term value
#' is kept in a new column `original_term`.
#'
#'
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param sep (`string`)\cr
#' Separator added between group name and term.
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @inheritParams tidy_plus_plus
#' @export
#' @family tidy_helpers
#' @examples
#' \donttest{
#' if (
#' .assert_package("lme4", boolean = TRUE) &&
#' .assert_package("broom.mixed", boolean = TRUE) &&
#' .assert_package("gtsummary", boolean = TRUE)
#' ) {
#' mod <- lme4::lmer(marker ~ stage + (1 | grade) + (death | response), gtsummary::trial)
#' mod |>
#' tidy_and_attach() |>
#' tidy_disambiguate_terms()
#' }
#' }
tidy_disambiguate_terms <- function(x, sep = ".", model = tidy_get_model(x), quiet = FALSE) {
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
if ("original_term" %in% names(x)) {
if (
!quiet &&
!inherits(model, "LORgee") && # no alert for multgee models
!inherits(model, "zeroinfl") && # or zeroninfl/hurdle
!inherits(model, "hurdle") &&
!inherits(model, "vgam") && # vgam models
!inherits(model, "vglm") &&
!inherits(model, "svy_vglm")
) {
cli_alert_danger(paste(
"{.code tidy_disambiguate_terms()} has already been applied.",
"x has been returned unchanged."
))
}
return(x)
}
.attributes <- .save_attributes(x)
if ("group" %in% names(x)) {
x <- x |>
dplyr::mutate(
original_term = .data$term,
term = dplyr::if_else(
is.na(.data$group) | .data$group == "",
.data$term,
paste(.data$group, .data$term, sep = sep)
)
)
}
x |>
tidy_attach_model(model = model, .attributes = .attributes)
}
broom.helpers/R/model_get_n.R 0000644 0001762 0000144 00000015024 14662130321 015631 0 ustar ligges users #' Get the number of observations
#'
#' For binomial and multinomial logistic models, will also return
#' the number of events.
#'
#' For Poisson models, will return the number of events and exposure time
#' (defined with [stats::offset()]).
#'
#' For Cox models ([survival::coxph()]), will return the number of events,
#' exposure time and the number of individuals.
#'
#' For competing risk regression models ([tidycmprsk::crr()]), `n_event` takes
#' into account only the event of interest defined by `failcode.`
#'
#' See [tidy_add_n()] for more details.
#'
#' The total number of observations (`N_obs`), of individuals (`N_ind`), of
#' events (`N_event`) and of exposure time (`Exposure`) are stored as attributes
#' of the returned tibble.
#'
#' This function does not cover `lavaan` models (`NULL` is returned).
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @export
#' @family model_helpers
#' @examples
#' lm(hp ~ mpg + factor(cyl) + disp:hp, mtcars) |>
#' model_get_n()
#'
#' mod <- glm(
#' response ~ stage * grade + trt,
#' gtsummary::trial,
#' family = binomial,
#' contrasts = list(stage = contr.sum, grade = contr.treatment(3, 2), trt = "contr.SAS")
#' )
#' mod |> model_get_n()
#'
#' \dontrun{
#' mod <- glm(
#' Survived ~ Class * Age + Sex,
#' data = Titanic |> as.data.frame(),
#' weights = Freq, family = binomial
#' )
#' mod |> model_get_n()
#'
#' d <- dplyr::as_tibble(Titanic) |>
#' dplyr::group_by(Class, Sex, Age) |>
#' dplyr::summarise(
#' n_survived = sum(n * (Survived == "Yes")),
#' n_dead = sum(n * (Survived == "No"))
#' )
#' mod <- glm(cbind(n_survived, n_dead) ~ Class * Age + Sex, data = d, family = binomial)
#' mod |> model_get_n()
#'
#' mod <- glm(response ~ age + grade * trt, gtsummary::trial, family = poisson)
#' mod |> model_get_n()
#'
#' mod <- glm(
#' response ~ trt * grade + offset(ttdeath),
#' gtsummary::trial,
#' family = poisson
#' )
#' mod |> model_get_n()
#'
#' dont
#' df <- survival::lung |> dplyr::mutate(sex = factor(sex))
#' mod <- survival::coxph(survival::Surv(time, status) ~ ph.ecog + age + sex, data = df)
#' mod |> model_get_n()
#'
#' mod <- lme4::lmer(Reaction ~ Days + (Days | Subject), lme4::sleepstudy)
#' mod |> model_get_n()
#'
#' mod <- lme4::glmer(response ~ trt * grade + (1 | stage),
#' family = binomial, data = gtsummary::trial
#' )
#' mod |> model_get_n()
#'
#' mod <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
#' family = binomial, data = lme4::cbpp
#' )
#' mod |> model_get_n()
#' }
model_get_n <- function(model) {
UseMethod("model_get_n")
}
#' @export
#' @rdname model_get_n
model_get_n.default <- function(model) {
tcm <- model |> model_compute_terms_contributions()
if (is.null(tcm)) {
return(NULL)
}
w <- model |> model_get_weights()
n <- dplyr::tibble(
term = colnames(tcm),
n_obs = colSums(tcm * w)
)
attr(n, "N_obs") <- sum(w)
n
}
#' @export
#' @rdname model_get_n
model_get_n.glm <- function(model) {
tcm <- model |> model_compute_terms_contributions()
if (is.null(tcm)) {
return(NULL)
} # nocov
w <- model |> model_get_weights()
n <- dplyr::tibble(
term = colnames(tcm),
n_obs = colSums(tcm * w)
)
attr(n, "N_obs") <- sum(w)
ct <- model |> model_get_coefficients_type()
if (ct %in% c("logistic", "poisson")) {
y <- model |> model_get_response()
if (is.factor(y)) {
# the first level denotes failure and all others success
y <- as.integer(y != levels(y)[1])
}
n$n_event <- colSums(tcm * y * w)
attr(n, "N_event") <- sum(y * w)
}
if (ct == "poisson") {
off <- model |> model_get_offset()
if (is.null(off)) off <- 0L
n$exposure <- colSums(tcm * exp(off) * w)
attr(n, "Exposure") <- sum(exp(off) * w)
}
n
}
#' @export
#' @rdname model_get_n
model_get_n.glmerMod <- model_get_n.glm
#' @export
#' @rdname model_get_n
model_get_n.multinom <- function(model) {
tcm <- model |> model_compute_terms_contributions()
if (is.null(tcm)) {
return(NULL)
} # nocov
w <- model |> model_get_weights()
y <- model |> model_get_response()
if (!is.factor(y)) y <- factor(y)
n <- purrr::map_df(
levels(y)[-1],
~ dplyr::tibble(
y.level = .x,
term = colnames(tcm),
n_obs = colSums(tcm * w),
n_event = colSums((y == .x) * tcm * w)
)
)
attr(n, "N_obs") <- sum(w)
attr(n, "N_event") <- sum((y != levels(y)[1]) * w)
n
}
#' @export
#' @rdname model_get_n
model_get_n.LORgee <- function(model) {
if (stringr::str_detect(model$title, "NOMINAL")) {
model_get_n.multinom(model)
} else {
model_get_n.default(model)
}
}
#' @export
#' @rdname model_get_n
model_get_n.coxph <- function(model) {
tcm <- model |> model_compute_terms_contributions()
if (is.null(tcm)) {
return(NULL)
} # nocov
w <- model |> model_get_weights()
n <- dplyr::tibble(
term = colnames(tcm),
n_obs = colSums(tcm * w)
)
attr(n, "N_obs") <- sum(w)
mf <- stats::model.frame(model) # using stats::model.frame() to get (id)
if (!"(id)" %in% names(mf))
mf[["(id)"]] <- seq_len(nrow(mf))
n_obs_per_ind <- mf |>
dplyr::add_count(dplyr::pick("(id)")) |>
dplyr::pull("n")
n$n_ind <- colSums(tcm * w / n_obs_per_ind)
attr(n, "N_ind") <- sum(w / n_obs_per_ind)
y <- model |> model_get_response()
status <- y[, ncol(y)]
if (ncol(y) == 3) {
time <- y[, 2] - y[, 1]
} else {
time <- y[, 1]
}
n$n_event <- colSums(tcm * status * w)
attr(n, "N_event") <- sum(status * w)
n$exposure <- colSums(tcm * time * w)
attr(n, "Exposure") <- sum(time * w)
n
}
#' @export
#' @rdname model_get_n
model_get_n.survreg <- model_get_n.coxph
#' @export
#' @rdname model_get_n
model_get_n.model_fit <- function(model) {
model_get_n(model$fit)
}
#' @export
#' @rdname model_get_n
model_get_n.tidycrr <- function(model) {
tcm <- model |> model_compute_terms_contributions()
if (is.null(tcm)) {
return(NULL)
} # nocov
w <- model |> model_get_weights()
n <- dplyr::tibble(
term = colnames(tcm),
n_obs = colSums(tcm * w)
)
attr(n, "N_obs") <- sum(w)
y <- model |> model_get_response()
time <- y[, 1]
status <- as.integer(y[, 2] == model$failcode)
n$n_event <- colSums(tcm * status * w)
attr(n, "N_event") <- sum(status * w)
n$exposure <- colSums(tcm * time * w)
attr(n, "Exposure") <- sum(time * w)
n
}
broom.helpers/R/custom_tidiers.R 0000644 0001762 0000144 00000047110 15044413405 016415 0 ustar ligges users #' Tidy a model with parameters package
#'
#' Use [parameters::model_parameters()] to tidy a model and apply
#' `parameters::standardize_names(style = "broom")` to the output
#' @param x (a model object, e.g. `glm`)\cr
#' A model to be tidied.
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param ... Additional parameters passed to [parameters::model_parameters()].
#' @note
#' For [betareg::betareg()] models, the component column in the results is
#' standardized with [broom::tidy()], using `"mean"` and `"precision"` values.
#' @examplesIf .assert_package("parameters", boolean = TRUE)
#' \donttest{
#' lm(Sepal.Length ~ Sepal.Width + Species, data = iris) |>
#' tidy_parameters()
#' }
#' @export
#' @family custom_tidiers
tidy_parameters <- function(x, conf.int = TRUE, conf.level = .95, ...) {
.assert_package("parameters", fn = "broom.helpers::tidy_parameters()")
args <- list(...)
if (!conf.int) conf.level <- NULL
args$ci <- conf.level
args$model <- x
if (is.null(args$pretty_names)) args$pretty_names <- FALSE
if (
inherits(x, "betareg") &&
!is.null(args$component) &&
args$component == "mean"
) {
args$component <- "conditional"
}
res <-
do.call(parameters::model_parameters, args) |>
parameters::standardize_names(style = "broom")
if (inherits(x, "multinom")) {
if ("response" %in% colnames(res)) {
res <- res |>
dplyr::rename(y.level = "response")
} else {
# binary
res$y.level <- x$lev |> utils::tail(n = 1)
}
}
if (!is.null(args$component)) {
attr(res, "component") <- args$component
}
# for betareg, need to standardize component with tidy::broom()
if (inherits(x, "betareg")) {
if (is.null(args$component) || args$component == "conditional") {
res$component <- "mean"
}
if (!is.null(args$component) && args$component == "precision") {
res$component <- "precision"
}
if (!is.null(args$component) && args$component == "all") {
res$component[res$component == "conditional"] <- "mean"
}
}
res
}
#' Tidy a model with broom or parameters
#'
#' Try to tidy a model with `broom::tidy()`. If it fails, will try to tidy the
#' model using `parameters::model_parameters()` through `tidy_parameters()`.
#' @param x (a model object, e.g. `glm`)\cr
#' A model to be tidied.
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param ... Additional parameters passed to `broom::tidy()` or
#' `parameters::model_parameters()`.
#' @note
#' For [quantreg::rq()] models, if the result contains several *tau* values,
#' a `"component"` column is added and populated
#' with the value of the `"tau"` column.
#' @export
#' @family custom_tidiers
tidy_with_broom_or_parameters <- function(x, conf.int = TRUE, conf.level = .95, ...) {
exponentiate_later <- FALSE
# load broom.mixed if available
if (any(c("glmerMod", "lmerMod", "glmmTMB", "glmmadmb", "stanreg", "brmsfit") %in% class(x))) {
.assert_package("broom.mixed", fn = "broom.helpers::tidy_with_broom_or_parameters()")
}
if (inherits(x, "LORgee")) {
cli::cli_alert_info("{.pkg multgee} model detected.")
cli::cli_alert_success("{.fn tidy_multgee} used instead.")
cli::cli_alert_info(
"Add {.code tidy_fun = broom.helpers::tidy_multgee} to quiet these messages."
)
return(tidy_multgee(x, conf.int = conf.int, conf.level = conf.level, ...))
}
if (inherits(x, "zeroinfl")) {
cli::cli_alert_info("{.cls zeroinfl} model detected.")
cli::cli_alert_success("{.fn tidy_zeroinfl} used instead.")
cli::cli_alert_info(
"Add {.code tidy_fun = broom.helpers::tidy_zeroinfl} to quiet these messages."
)
return(tidy_zeroinfl(x, conf.int = conf.int, conf.level = conf.level, ...))
}
if (inherits(x, "hurdle")) {
cli::cli_alert_info("{.cls hurdle} model detected.")
cli::cli_alert_success("{.fn tidy_zeroinfl} used instead.")
cli::cli_alert_info(
"Add {.code tidy_fun = broom.helpers::tidy_zeroinfl} to quiet these messages."
)
return(tidy_zeroinfl(x, conf.int = conf.int, conf.level = conf.level, ...))
}
if (inherits(x, "vglm")) {
cli::cli_alert_info("{.cls vglm} model detected.")
cli::cli_alert_success("{.fn tidy_vgam} used instead.")
cli::cli_alert_info(
"Add {.code tidy_fun = broom.helpers::tidy_vgam} to quiet these messages."
)
return(tidy_vgam(x, conf.int = conf.int, conf.level = conf.level, ...))
}
if (inherits(x, "vgam")) {
cli::cli_alert_info("{.cls vgam} model detected.")
cli::cli_alert_success("{.fn tidy_vgam} used instead.")
cli::cli_alert_info(
"Add {.code tidy_fun = broom.helpers::tidy_vgam} to quiet these messages."
)
return(tidy_vgam(x, conf.int = conf.int, conf.level = conf.level, ...))
}
if (inherits(x, "svy_vglm")) {
cli::cli_alert_info("{.cls svy_vglm} model detected.")
cli::cli_alert_success("{.fn tidy_svy_vglm} used instead.")
cli::cli_alert_info(
"Add {.code tidy_fun = broom.helpers::tidy_svy_vglm} to quiet these messages."
)
return(tidy_svy_vglm(x, conf.int = conf.int, conf.level = conf.level, ...))
}
tidy_args <- list(...)
tidy_args$x <- x
tidy_args$conf.int <- conf.int
if (conf.int) tidy_args$conf.level <- conf.level
# class of models known for tidy() not supporting exponentiate argument
# and for ignoring it
mods_no_exp <-
c("fixest", "plm", "felm", "lavaan", "nls", "survreg")
if (any(mods_no_exp %in% class(x))) {
if (isFALSE(tidy_args$exponentiate)) {
tidy_args$exponentiate <- NULL
} else {
cli::cli_abort("'exponentiate = TRUE' is not valid for this type of model.")
}
}
# specific case for cch models
# exponentiate and conf.int not supported by broom::tidy()
if (inherits(x, "cch")) {
if (isTRUE(tidy_args$exponentiate)) {
exponentiate_later <- TRUE
}
tidy_args$exponentiate <- NULL
tidy_args$conf.int <- NULL
}
# specific case for quantreg::rq models
# exponentiate and conf.int not supported by broom::tidy()
if (inherits(x, "rq") || inherits(x, "rqs")) {
tidy_args$exponentiate <- NULL
tidy_args$conf.int <- NULL
}
# for betareg, if exponentiate = TRUE, forcing tidy_parameters,
# by adding `component = "all" to the arguments`
if (inherits(x, "betareg")) {
if (isFALSE(tidy_args$exponentiate)) {
tidy_args$exponentiate <- NULL
} else if (isTRUE(tidy_args$exponentiate)) {
component <- tidy_args$component
cli::cli_alert_info(
"{.code exponentiate = TRUE} not valid for {.cl betareg} with {.fn broom::tidy()}."
)
if (is.null(component)) {
cli::cli_alert_success("{.code tidy_parameters(component = \"all\")} used instead.")
cli::cli_alert_info(
"Add {.code tidy_fun = broom.helpers::tidy_parameters} to quiet these messages."
)
return(
tidy_parameters(
x,
conf.int = conf.int,
conf.level = conf.level,
component = "all",
...
)
)
} else {
cli::cli_alert_success("{.code tidy_parameters()} used instead.")
cli::cli_alert_info(
"Add {.code tidy_fun = broom.helpers::tidy_parameters} to quiet these messages."
)
return(
tidy_parameters(
x,
conf.int = conf.int,
conf.level = conf.level,
...
)
)
}
}
}
res <- tryCatch(
do.call(tidy_broom, tidy_args),
error = function(e) {
NULL
}
)
# trying without exponentiate
if (is.null(res)) {
tidy_args2 <- tidy_args
tidy_args2$exponentiate <- NULL
res <- tryCatch(
do.call(tidy_broom, tidy_args2),
error = function(e) {
NULL
}
)
if (!is.null(res) && !is.null(tidy_args$exponentiate) && tidy_args$exponentiate) {
# changing to FALSE is managed by tidy_and_attach()
cli::cli_abort("'exponentiate = TRUE' is not valid for this type of model.")
}
}
if (is.null(res)) {
cli::cli_alert_warning("{.code broom::tidy()} failed to tidy the model.")
res <- tryCatch(
do.call(tidy_parameters, tidy_args),
error = function(e) {
cli::cli_alert_warning("{.code tidy_parameters()} also failed.")
cli::cli_alert_danger(e)
NULL
}
)
if (is.null(res)) {
cli::cli_abort("Unable to tidy {.arg x}.")
} else {
# success of parameters
cli::cli_alert_success("{.code tidy_parameters()} used instead.")
cli::cli_alert_info(
"Add {.code tidy_fun = broom.helpers::tidy_parameters} to quiet these messages."
)
}
}
# cleaning in conf.int = FALSE
if (isFALSE(conf.int)) {
res <- res |>
dplyr::select(-dplyr::any_of(c("conf.low", "conf.high")))
}
if (exponentiate_later) {
res <- .exponentiate(res)
}
# specific case for quantreg::rq models
if (
"tau" %in% names(res) &&
!"component" %in% names(res) &&
length(unique(res$tau)) > 1
) {
res <- res |>
dplyr::mutate(component = .data$tau)
}
res
}
#' Tidy with `broom::tidy()` and checks that all arguments are used
#'
#' @param x (a model object, e.g. `glm`)\cr
#' A model to be tidied.
#' @param ... Additional parameters passed to `broom::tidy()`.
#' @family custom_tidiers
#' @export
tidy_broom <- function(x, ...) {
rlang::check_dots_used()
broom::tidy(x, ...)
}
#' Tidy a `multgee` model
#'
#' A tidier for models generated with `multgee::nomLORgee()` or `multgee::ordLORgee()`.
#' Term names will be updated to be consistent with generic models. The original
#' term names are preserved in an `"original_term"` column.
#' @param x (`LORgee`)\cr
#' A `multgee::nomLORgee()` or a `multgee::ordLORgee()` model.
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param ... Additional parameters passed to `parameters::model_parameters()`.
#' @details
#' To be noted, for `multgee::nomLORgee()`, the baseline `y` category is the
#' latest modality of `y`.
#'
#' @export
#' @family custom_tidiers
#' @examplesIf .assert_package("multgee", boolean = TRUE)
#' \donttest{
#' library(multgee)
#'
#' h <- housing
#' h$status <- factor(
#' h$y,
#' labels = c("street", "community", "independant")
#' )
#'
#' mod <- multgee::nomLORgee(
#' status ~ factor(time) * sec,
#' data = h,
#' id = id,
#' repeated = time,
#' )
#' mod |> tidy_multgee()
#'
#' mod2 <- ordLORgee(
#' formula = y ~ factor(time) + factor(trt) + factor(baseline),
#' data = multgee::arthritis,
#' id = id,
#' repeated = time,
#' LORstr = "uniform"
#' )
#' mod2 |> tidy_multgee()
#' }
tidy_multgee <- function(x, conf.int = TRUE, conf.level = .95, ...) {
if (!inherits(x, "LORgee")) {
cli::cli_abort(paste(
"Only {.fn multgee::nomLORgee} and {.fn multgee::ordLORgee} models",
"are supported."
))
}
res <- tidy_parameters(x, conf.int = conf.int, conf.level = conf.level, ...)
res$original_term <- res$term
# multinomial model
if (stringr::str_detect(x$title, "NOMINAL")) {
mf <- x |> model_get_model_frame()
if (!is.factor(mf[[1]])) {
mf[[1]] <- factor(mf[[1]])
}
y.levels <- levels(mf[[1]])[-length(levels(mf[[1]]))]
mm <- x |> model_get_model_matrix()
t <- colnames(mm)
res$term <- rep.int(t, times = length(y.levels))
res$y.level <- rep(y.levels, each = length(t))
return(res)
} else {
mm <- x |> model_get_model_matrix()
t <- colnames(mm)
t <- t[t != "(Intercept)"]
b <- res$term[stringr::str_starts(res$term, "beta")]
res$term <- c(b, t)
return(res)
}
}
#' Tidy a `zeroinfl` or a `hurdle` model
#'
#' A tidier for models generated with `pscl::zeroinfl()` or `pscl::hurdle()`.
#' Term names will be updated to be consistent with generic models. The original
#' term names are preserved in an `"original_term"` column.
#' @param x (`zeroinfl` or `hurdle`)\cr
#' A `pscl::zeroinfl()` or a `pscl::hurdle()` model.
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param component (`string`)\cr
#' `NULL` or one of `"all"`, `"conditional"`, `"zi"`, or `"zero_inflated"`.
#' @param ... Additional parameters passed to `parameters::model_parameters()`.
#' @export
#' @family custom_tidiers
#' @examplesIf .assert_package("pscl", boolean = TRUE)
#' \donttest{
#' library(pscl)
#' mod <- zeroinfl(
#' art ~ fem + mar + phd,
#' data = pscl::bioChemists
#' )
#'
#' mod |> tidy_zeroinfl(exponentiate = TRUE)
#' }
tidy_zeroinfl <- function(
x,
conf.int = TRUE,
conf.level = .95,
component = NULL,
...) {
if (!inherits(x, "zeroinfl") && !inherits(x, "hurdle")) {
cli::cli_abort("{.arg x} should be of class {.cls zeroinfl} or {.cls hurdle}")
} # nolint
res <- tidy_parameters(
x,
conf.int = conf.int,
conf.level = conf.level,
component = component,
...
)
res$original_term <- res$term
starts_zero <- stringr::str_starts(res$term, "zero_")
res$term[starts_zero] <- stringr::str_sub(res$term[starts_zero], 6)
starts_count <- stringr::str_starts(res$term, "count_")
res$term[starts_count] <- stringr::str_sub(res$term[starts_count], 7)
if (!is.null(component) && component %in% c("conditional", "zero_inflated")) {
res$component <- component
}
if (!is.null(component) && component == "zi") {
res$component <- "zero_inflated"
}
attr(res, "component") <- component
res
}
#' Tidy a `vglm` or a `vgam` model
#'
#' `r lifecycle::badge("experimental")`
#' A tidier for models generated with `VGAM::vglm()` or `VGAM::vgam()`.
#' Term names will be updated to be consistent with generic models. The original
#' term names are preserved in an `"original_term"` column. Depending on the
#' model, additional column `"group"`, `"component"` and/or `"y.level"` may be
#' added to the results.
#' @param x (`vglm` or `vgam`)\cr
#' A `VGAM::vglm()` or a `VGAM::vgam()` model.
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param ... Additional parameters passed to `parameters::model_parameters()`.
#' @export
#' @family custom_tidiers
#' @examplesIf .assert_package("VGAM", boolean = TRUE)
#' \donttest{
#' library(VGAM)
#' mod <- vglm(
#' Species ~ Sepal.Length + Sepal.Width,
#' family = multinomial(),
#' data = iris
#' )
#' mod |> tidy_vgam(exponentiate = TRUE)
#' mod <- vglm(
#' Species ~ Sepal.Length + Sepal.Width,
#' family = multinomial(parallel = TRUE),
#' data = iris
#' )
#' mod |> tidy_vgam(exponentiate = TRUE)
#' }
tidy_vgam <- function(
x,
conf.int = TRUE,
conf.level = .95,
...) {
if (!inherits(x, "vgam") && !inherits(x, "vglm")) {
cli::cli_abort("{.arg x} should be of class {.cls vglm} or {.cls vgam}")
} # nolint
res <- tidy_parameters(
x,
conf.int = conf.int,
conf.level = conf.level,
...
)
.process_vgam_tidy_tbl(res, x)
}
.process_vgam_tidy_tbl <- function(res, x) {
res <- res |> dplyr::rename(original_term = dplyr::all_of("term"))
# identify groups
res <- res |>
dplyr::left_join(
.vgam_identify_groups(x) |>
dplyr::select(dplyr::all_of(c("original_term", "term", "group"))),
by = "original_term"
) |>
dplyr::relocate(
dplyr::all_of(c("term", "group")),
.after = dplyr::all_of("original_term")
)
# component names
if (!is.null(x@misc$predictors.names)) {
res$component <- x@misc$predictors.names[as.integer(res$group)]
if (!is.null(x@misc$parallel) && !isFALSE(x@misc$parallel)) {
res$component <- res$component |> tidyr::replace_na("parallel")
} else {
res$component <- res$component |> tidyr::replace_na("")
}
}
# identification of y.level (multinomial models)
if (
!is.null(x@misc$refLevel) &&
length(x@misc$predictors.names) == length(x@misc$ynames) - 1
) {
ylevels <- x@misc$ynames[-x@misc$refLevel]
res$y.level <- ylevels[as.integer(res$group)]
res$y.level[res$component == "parallel"] <- "parallel"
}
# remove component if all empty
if (all(res$component == ""))
res <- res |> dplyr::select(-dplyr::all_of(c("component", "group")))
res
}
# exploring assign and vassign from model.matrix to identify potential groups
.vgam_identify_groups <- function(x) {
# exploring assign and vassign from model.matrix to identify potential groups
mm <- stats::model.matrix(x)
a <- attr(mm, "assign")
a <- dplyr::tibble(variable = names(a), pos = a) |> tidyr::unnest("pos")
va <- attr(mm, "vassign")
va <- dplyr::tibble(vvariable = names(va), pos = va) |> tidyr::unnest("pos")
t <- mm |> colnames()
t <- dplyr::tibble(original_term = t, pos = seq_along(t))
t <- t |> dplyr::full_join(a, by = "pos") |> dplyr::full_join(va, by = "pos")
t$group <- t$vvariable |>
stringr::str_sub(start = stringr::str_length(t$variable) + 2)
t$term <- t$original_term |>
stringr::str_sub(
end = dplyr::if_else(
stringr::str_length(t$group) == 0,
-1L,
-1 * stringr::str_length(t$group) - 2
)
)
t
}
#' Tidy a `svy_vglm` model
#'
#' `r lifecycle::badge("experimental")`
#' A tidier for models generated with `svyVGAM::svy_vglm()`.
#' Term names will be updated to be consistent with generic models. The original
#' term names are preserved in an `"original_term"` column. Depending on the
#' model, additional column `"group"`, `"component"` and/or `"y.level"` may be
#' added to the results.
#' @param x (`svy_vglm`)\cr
#' A `svyVGAM::svy_vglm()` model.
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param ... Additional parameters passed to `parameters::model_parameters()`.
#' @export
#' @family custom_tidiers
#' @examplesIf .assert_package("svyVGAM", boolean = TRUE)
#' \donttest{
#' library(svyVGAM)
#'
#' mod <- svy_vglm(
#' Species ~ Sepal.Length + Sepal.Width,
#' family = multinomial(),
#' design = survey::svydesign(~1, data = iris)
#' )
#' mod |> tidy_svy_vglm(exponentiate = TRUE)
#' }
tidy_svy_vglm <- function(
x,
conf.int = TRUE,
conf.level = .95,
...) {
if (!inherits(x, "svy_vglm")) {
cli::cli_abort("{.arg x} should be of class {.cls svy_vglm}.")
} # nolint
res <- tidy_parameters(
x,
conf.int = conf.int,
conf.level = conf.level,
...
)
.process_vgam_tidy_tbl(res, x$fit)
}
broom.helpers/R/tidy_add_estimate_to_reference_rows.R 0000644 0001762 0000144 00000015436 14762101413 022633 0 ustar ligges users #' Add an estimate value to references rows for categorical variables
#'
#' For categorical variables with a treatment contrast
#' ([stats::contr.treatment()]) or a SAS contrast ([stats::contr.SAS()]),
#' will add an estimate equal to `0` (or `1` if `exponentiate = TRUE`)
#' to the reference row.
#'
#' For categorical variables with a sum contrast ([stats::contr.sum()]),
#' the estimate value of the reference row will be equal to the sum of
#' all other coefficients multiplied by `-1` (eventually exponentiated if
#' `exponentiate = TRUE`), and obtained with `emmeans::emmeans()`.
#' The `emmeans` package should therefore be installed.
#' For sum contrasts, the model coefficient corresponds
#' to the difference of each level with the grand mean.
#' For sum contrasts, confidence intervals and p-values will also
#' be computed and added to the reference rows.
#'
#' For other variables, no change will be made.
#'
#' @details
#' If the `reference_row` column is not yet available in `x`,
#' [tidy_add_reference_rows()] will be automatically applied.
#'
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param exponentiate (`logical`)\cr
#' Whether or not to exponentiate the coefficient estimates. It should be
#' consistent with the original call to [broom::tidy()]
#' @param conf.level (`numeric`)\cr
#' Confidence level, by default use the value indicated
#' previously in [tidy_and_attach()], used only for sum contrasts.
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @inheritParams tidy_plus_plus
#' @export
#' @family tidy_helpers
#' @examplesIf require("gtsummary") && require("emmeans")
#' \donttest{
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(dplyr::across(where(is.character), factor))
#'
#' glm(
#' Survived ~ Class + Age + Sex,
#' data = df, weights = df$n, family = binomial,
#' contrasts = list(Age = contr.sum, Class = "contr.SAS")
#' ) |>
#' tidy_and_attach(exponentiate = TRUE) |>
#' tidy_add_reference_rows() |>
#' tidy_add_estimate_to_reference_rows()
#'
#' glm(
#' response ~ stage + grade * trt,
#' gtsummary::trial,
#' family = binomial,
#' contrasts = list(
#' stage = contr.treatment(4, base = 3),
#' grade = contr.treatment(3, base = 2),
#' trt = contr.treatment(2, base = 2)
#' )
#' ) |>
#' tidy_and_attach() |>
#' tidy_add_reference_rows() |>
#' tidy_add_estimate_to_reference_rows()
#' }
tidy_add_estimate_to_reference_rows <- function(
x,
exponentiate = attr(x, "exponentiate"),
conf.level = attr(x, "conf.level"),
model = tidy_get_model(x),
quiet = FALSE) {
if (is.null(exponentiate) || !is.logical(exponentiate)) {
cli::cli_abort("{.arg exponentiate} is not provided. You need to pass it explicitely.")
}
if (is.null(conf.level) || !is.numeric(conf.level)) {
cli::cli_abort("{.arg conf.level} is not provided. You need to pass it explicitely.")
}
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
.attributes <- .save_attributes(x)
.attributes$exponentiate <- exponentiate
if (!"reference_row" %in% names(x)) {
x <- x |> tidy_add_reference_rows(model = model)
}
if (!"estimate" %in% names(x)) { # to avoid a problem with certain types of model (e.g. gam)
return(x |> tidy_attach_model(model))
}
# treatment contrasts
x <- x |>
dplyr::mutate(
estimate = dplyr::if_else(
!is.na(.data$reference_row) &
.data$reference_row &
stringr::str_starts(.data$contrasts, "contr.treatment|contr.SAS"),
dplyr::if_else(exponentiate, 1, 0),
.data$estimate
)
)
# sum contrasts
ref_rows_sum <- which(x$reference_row & x$contrasts == "contr.sum")
if (length(ref_rows_sum) > 0) {
for (i in ref_rows_sum) {
est <- .get_ref_row_estimate_contr_sum(
x$variable[i],
model = model,
exponentiate = exponentiate,
conf.level = conf.level,
quiet = quiet
)
x$estimate[i] <- est$estimate
x$std.error[i] <- est$std.error
x$p.value[i] <- est$p.value
if (all(c("conf.low", "conf.high") %in% names(x))) {
x$conf.low[i] <- est$conf.low
x$conf.high[i] <- est$conf.high
}
}
}
x |>
tidy_attach_model(model = model, .attributes = .attributes)
}
.get_ref_row_estimate_contr_sum <- function(variable, model, exponentiate = FALSE,
conf.level = .95, quiet = FALSE) {
if (inherits(model, "multinom")) {
dc <- NULL
if (!quiet) {
cli_alert_info(paste0(
"Sum contrasts are not supported for 'multinom' models.\n",
"Reference row of variable '", variable, "' remained unchanged."
))
}
} else if (inherits(model, "LORgee")) {
dc <- NULL
if (!quiet) {
cli_alert_info(paste0(
"Sum contrasts are not supported for {.pkg multgee} models.\n",
"Reference row of variable '", variable, "' remained unchanged."
))
}
} else {
.assert_package("emmeans", fn = "broom.helpers::tidy_add_estimate_to_reference_rows()")
dc <- tryCatch(
suppressMessages(
emmeans::emmeans(model, specs = variable, contr = "eff")
),
error = function(e) {
if (!quiet) {
cli_alert_info(paste0(
"No emmeans() method for this type of model.\n",
"Reference row of variable '", variable, "' remained unchanged."
))
}
NULL
}
)
}
if (is.null(dc)) {
res <- data.frame(
estimate = NA_real_,
std.error = NA_real_,
p.value = NA_real_,
conf.low = NA_real_,
conf.high = NA_real_
)
} else {
res <- dc$contrasts |>
as.data.frame(destroy.annotations = TRUE) |>
dplyr::last() |>
dplyr::select("estimate", std.error = "SE", "p.value")
ci <- dc$contrasts |>
stats::confint(level = conf.level) |>
as.data.frame() |>
dplyr::last()
if ("asymp.LCL" %in% names(ci)) {
res$conf.low <- ci$asymp.LCL
res$conf.high <- ci$asymp.UCL
} else if ("lower.CL" %in% names(ci)) {
res$conf.low <- ci$lower.CL
res$conf.high <- ci$upper.CL
} else if ("lower.PL" %in% names(ci)) {
res$conf.low <- ci$lower.PL
res$conf.high <- ci$upper.PL
} else {
res$conf.low <- NA_real_
res$conf.high <- NA_real_
}
}
if (exponentiate) {
res$estimate <- exp(res$estimate)
res$conf.low <- exp(res$conf.low)
res$conf.high <- exp(res$conf.high)
}
res
}
broom.helpers/R/tidy_add_contrasts.R 0000644 0001762 0000144 00000004006 14662130321 017234 0 ustar ligges users #' Add contrasts type for categorical variables
#'
#' Add a `contrasts` column corresponding to contrasts used for a
#' categorical variable and a `contrasts_type` column equal to
#' "treatment", "sum", "poly", "helmert", "other" or "no.contrast".
#'
#' @details
#' If the `variable` column is not yet available in `x`,
#' [tidy_identify_variables()] will be automatically applied.
#'
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @param quiet (`logical`)\cr
#' Whether broom.helpers should not return a message
#' when `tidy_disambiguate_terms()` was already applied
#' @export
#' @family tidy_helpers
#' @examples
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#'
#' glm(
#' Survived ~ Class + Age + Sex,
#' data = df, weights = df$n, family = binomial,
#' contrasts = list(Age = contr.sum, Class = "contr.helmert")
#' ) |>
#' tidy_and_attach() |>
#' tidy_add_contrasts()
tidy_add_contrasts <- function(x, model = tidy_get_model(x), quiet = FALSE) {
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
.attributes <- .save_attributes(x)
if ("contrasts" %in% names(x)) {
x <- x |> dplyr::select(-dplyr::all_of("contrasts"))
}
if (!"variable" %in% names(x)) {
if (!quiet) {
x <- x |> tidy_identify_variables()
}
}
contrasts_list <- model_list_contrasts(model)
if (is.null(contrasts_list)) {
x$contrasts <- NA_character_
x$contrasts_type <- NA_character_
} else {
x <- x |>
dplyr::left_join(
contrasts_list |>
dplyr::select(dplyr::all_of(c("variable", "contrasts", "contrasts_type"))),
by = "variable"
)
}
x |>
tidy_attach_model(model = model, .attributes = .attributes)
}
broom.helpers/R/tidy_and_attach.R 0000644 0001762 0000144 00000011546 14662130321 016501 0 ustar ligges users #' Attach a full model to the tibble of model terms
#'
#' To facilitate the use of broom helpers with pipe, it is recommended to
#' attach the original model as an attribute to the tibble of model terms
#' generated by `broom::tidy()`.
#'
#' `tidy_attach_model()` attach the model to a tibble already generated while
#' `tidy_and_attach()` will apply `broom::tidy()` and attach the model.
#'
#' Use `tidy_get_model()` to get the model attached to the tibble and
#' `tidy_detach_model()` to remove the attribute containing the model.
#' @inheritParams tidy_plus_plus
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param model_matrix_attr (`logical`)\cr
#' Whether model frame and model matrix should be added as attributes of
#' `model` (respectively named `"model_frame"` and `"model_matrix"`) and
#' passed through
#' @param .attributes (`list`)\cr
#' Named list of additional attributes to be attached to `x`.
#' @param ... Other arguments passed to `tidy_fun()`.
#' @family tidy_helpers
#' @examples
#' mod <- lm(Sepal.Length ~ Sepal.Width + Species, data = iris)
#' tt <- mod |>
#' tidy_and_attach(conf.int = TRUE)
#' tt
#' tidy_get_model(tt)
#' @export
tidy_attach_model <- function(x, model, .attributes = NULL) {
x <- x |>
dplyr::as_tibble() |>
.order_tidy_columns()
class(x) <- c("broom.helpers", class(x))
model <- model_get_model(model)
# if force_contr.treatment
if (isTRUE(attr(x, "force_contr.treatment"))) {
for (v in names(model$contrasts)) {
model$contrasts[[v]] <- "contr.treatment"
}
}
attr(x, "model") <- model
for (a in names(.attributes)) {
if (!is.null(.attributes[[a]])) {
attr(x, a) <- .attributes[[a]]
}
}
x
}
#' @rdname tidy_attach_model
#' @export
tidy_and_attach <- function(
model, tidy_fun = tidy_with_broom_or_parameters,
conf.int = TRUE, conf.level = .95, exponentiate = FALSE,
model_matrix_attr = TRUE, ...) {
# exponentiate cannot be used with lm models
# but broom will not produce an error and will return unexponentiated estimates
if (identical(class(model), "lm") && exponentiate) {
cli::cli_abort("{.code exponentiate = TRUE} is not valid for this type of model.")
}
tidy_args <- list(...)
tidy_args$x <- model
if (model_matrix_attr) {
attr(model, "model_frame") <- model |> model_get_model_frame()
attr(model, "model_matrix") <- model |> model_get_model_matrix()
}
tidy_args$conf.int <- conf.int
if (conf.int) tidy_args$conf.level <- conf.level
tidy_args$exponentiate <- exponentiate
# test if exponentiate can be passed to tidy_fun, and if tidy_fun runs without error
result <-
tryCatch(
do.call(tidy_fun, tidy_args) |>
tidy_attach_model(
model,
.attributes = list(
exponentiate = exponentiate,
conf.level = conf.level
)
),
error = function(e) {
# `tidy_fun()` fails for two primary reasons:
# 1. `tidy_fun()` does not accept the `exponentiate=` arg
# - in this case, we re-run `tidy_fun()` without the `exponentiate=` argument
# 2. Incorrect input or incorrect custom `tidy_fun()` passed
# - in this case, we print a message explaining the likely source of error
# first attempting to run without `exponentiate=` argument
tryCatch(
{
tidy_args$exponentiate <- NULL
xx <-
do.call(tidy_fun, tidy_args) |>
tidy_attach_model(
model,
.attributes = list(exponentiate = FALSE, conf.level = conf.level)
)
if (exponentiate) {
cli::cli_alert_warning(
"`exponentiate = TRUE` is not valid for this type of model and was ignored."
)
}
xx
},
error = function(e) {
# if error persists, then there is a problem with either model input or `tidy_fun=`
paste0(
"There was an error calling {.code tidy_fun()}. ",
"Most likely, this is because the function supplied in {.code tidy_fun=} ",
"was misspelled, does not exist, is not compatible with your object, ",
"or was missing necessary arguments (e.g. {.code conf.level=} ",
"or {.code conf.int=}). See error message below."
) |>
stringr::str_wrap() |>
cli_alert_danger()
cli::cli_abort(as.character(e), call = NULL)
}
)
}
)
# return result
result
}
#' @rdname tidy_attach_model
#' @export
tidy_get_model <- function(x) {
attr(x, "model")
}
#' @rdname tidy_attach_model
#' @export
tidy_detach_model <- function(x) {
attr(x, "model") <- NULL
x
}
broom.helpers/R/model_get_terms.R 0000644 0001762 0000144 00000004526 15051110233 016524 0 ustar ligges users #' Get the terms of a model
#'
#' Return the result of [stats::terms()] applied to the model
#' or `NULL` if it is not possible to get terms from `model`.
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @export
#' @family model_helpers
#' @seealso [stats::terms()]
#' @examples
#' lm(hp ~ mpg + factor(cyl), mtcars) |>
#' model_get_terms()
model_get_terms <- function(model) {
UseMethod("model_get_terms")
}
#' @export
#' @rdname model_get_terms
model_get_terms.default <- function(model) {
tryCatch(
stats::terms(model),
error = function(e) {
NULL
}
)
}
#' @export
#' @rdname model_get_terms
model_get_terms.brmsfit <- function(model) {
model$formula |>
brms::brmsterms(resp_rhs_all = FALSE) |>
purrr::pluck("allvars") |>
stats::terms()
}
#' @export
#' @rdname model_get_terms
#' @details
#' For models fitted with `glmmTMB::glmmTMB()`, it will return a terms object
#' taking into account all components ("cond" and "zi"). For a more
#' restricted terms object, please refer to `glmmTMB::terms.glmmTMB()`.
model_get_terms.glmmTMB <- function(model) {
model$modelInfo$allForm$combForm |> stats::terms()
}
#' @export
#' @rdname model_get_terms
model_get_terms.model_fit <- function(model) {
model_get_terms(model$fit)
}
#' @export
#' @rdname model_get_terms
model_get_terms.betareg <- function(model) {
model_get_terms(model$terms$full)
}
#' @export
#' @rdname model_get_terms
model_get_terms.betareg <- function(model) {
model_get_terms(model$terms$full)
}
#' @export
#' @rdname model_get_terms
model_get_terms.cch <- function(model) {
stats::terms.formula(
model$call$formula |> stats::formula(),
data = model |> model_get_model_frame()
)
}
#' @export
#' @rdname model_get_terms
#' @details
#' For `fixest` models, return a term object combining main variables and
#' instrumental variables.
#'
model_get_terms.fixest <- function(model) {
fml <- model$fml
fiv <- model$iv_endo_fml
if (is.null(fiv)) {
f <- fml
} else {
f <-
paste(
deparse(fml),
"+",
deparse(fiv[[3]])
) |>
stats::as.formula()
}
stats::terms(f)
}
#' @export
#' @rdname model_get_terms
model_get_terms.svy_vglm <- function(model) {
model_get_terms(model$fit)
}
broom.helpers/R/model_get_contrasts.R 0000644 0001762 0000144 00000004477 15062256343 017436 0 ustar ligges users #' Get contrasts used in the model
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @export
#' @family model_helpers
#' @examples
#' glm(
#' am ~ mpg + factor(cyl),
#' data = mtcars,
#' family = binomial,
#' contrasts = list(`factor(cyl)` = contr.sum)
#' ) |>
#' model_get_contrasts()
model_get_contrasts <- function(model) {
UseMethod("model_get_contrasts")
}
#' @export
model_get_contrasts.default <- function(model) {
# we try 3 different approaches in a row
mc <- model_get_contrasts_1(model)
if (is.null(mc)) {
mc <- model_get_contrasts_2(model)
}
if (is.null(mc)) {
mc <- model_get_contrasts_3(model)
}
mc
}
model_get_contrasts_1 <- function(model) {
tryCatch(
purrr::chuck(model, "contrasts"),
error = function(e) {
NULL
}
)
}
model_get_contrasts_2 <- function(model) {
tryCatch(
attr(model_get_model_matrix(model), "contrasts"),
error = function(e) {
NULL
}
)
}
model_get_contrasts_3 <- function(model) {
tryCatch(
attr(stats::model.matrix(stats::terms(model), stats::model.frame(model)), "contrasts"),
error = function(e) {
NULL
}
)
}
#' @export
#' @rdname model_get_contrasts
model_get_contrasts.model_fit <- function(model) {
model_get_contrasts(model$fit)
}
#' @export
#' @rdname model_get_contrasts
model_get_contrasts.zeroinfl <- function(model) {
mc <- model_get_contrasts_1(model)
res <- mc$count
# merging/combining the two lists
for (v in names(mc$zero)) res[[v]] <- mc$zero[[v]]
res
}
#' @export
#' @rdname model_get_contrasts
model_get_contrasts.hurdle <- model_get_contrasts.zeroinfl
#' @export
#' @rdname model_get_contrasts
model_get_contrasts.betareg <- function(model) {
mc <- model_get_contrasts_1(model)
res <- mc$mean
# merging/combining the two lists
for (v in names(mc$precision)) res[[v]] <- mc$precision[[v]]
res
}
#' @export
#' @rdname model_get_contrasts
model_get_contrasts.svy_vglm <- function(model) {
model_get_contrasts(model$fit)
}
#' @export
#' @rdname model_get_contrasts
model_get_contrasts.fixest <- function(model) {
mm <- stats::model.matrix.default(
model_get_terms(model),
data = eval(model$call$data, model$call_env)
)
attr(mm, "contrasts")
}
broom.helpers/R/select_helpers.R 0000644 0001762 0000144 00000007072 14762100662 016370 0 ustar ligges users #' Select helper functions
#'
#' @description Set of functions to supplement the *tidyselect* set of
#' functions for selecting columns of data frames (and other items as well).
#' - `all_continuous()` selects continuous variables
#' - `all_categorical()` selects categorical (including `"dichotomous"`) variables
#' - `all_dichotomous()` selects only type `"dichotomous"`
#' - `all_interaction()` selects interaction terms from a regression model
#' - `all_intercepts()` selects intercept terms from a regression model
#' - `all_contrasts()` selects variables in regression model based on their type
#' of contrast
#' - `all_ran_pars()` and `all_ran_vals()` for random-effect parameters and
#' values from a mixed model
#' (see `vignette("broom_mixed_intro", package = "broom.mixed")`)
#' @name select_helpers
#' @rdname select_helpers
#' @param dichotomous (`logical`)\cr
#' Whether to include dichotomous variables, default is `TRUE`.
#' @param contrasts_type (`string`)\cr
#' Type of contrast to select. When `NULL`, all variables with a
#' contrast will be selected. Default is `NULL`. Select among contrast types
#' `c("treatment", "sum", "poly", "helmert", "sdif", "other")`.
#' @param continuous2 (`logical`)\cr
#' Whether to include continuous2 variables, default is `TRUE`.
#' For compatibility with `{gtsummary}`), see [`gtsummary::all_continuous2()`].
#'
#' @return A character vector of column names selected.
#' @seealso [scope_tidy()]
#' @examples
#' \donttest{
#' glm(response ~ age * trt + grade, gtsummary::trial, family = binomial) |>
#' tidy_plus_plus(exponentiate = TRUE, include = all_categorical())
#' }
#' @examplesIf .assert_package("emmeans", boolean = TRUE)
#' \donttest{
#' glm(response ~ age + trt + grade + stage,
#' gtsummary::trial,
#' family = binomial,
#' contrasts = list(trt = contr.SAS, grade = contr.sum, stage = contr.poly)
#' ) |>
#' tidy_plus_plus(
#' exponentiate = TRUE,
#' include = all_contrasts(c("treatment", "sum"))
#' )
#' }
NULL
#' @rdname select_helpers
#' @export
all_continuous <- function(continuous2 = TRUE) {
types <- if (continuous2) c("continuous", "continuous2") else "continuous"
where(function(x) isTRUE(attr(x, "gtsummary.var_type") %in% types))
}
#' @rdname select_helpers
#' @export
all_categorical <- function(dichotomous = TRUE) {
types <- if (dichotomous) c("categorical", "dichotomous") else "categorical"
where(function(x) isTRUE(attr(x, "gtsummary.var_type") %in% types))
}
#' @rdname select_helpers
#' @export
all_dichotomous <- function() {
where(function(x) isTRUE(attr(x, "gtsummary.var_type") %in% "dichotomous"))
}
#' @rdname select_helpers
#' @export
all_interaction <- function() {
where(function(x) isTRUE(attr(x, "gtsummary.var_type") %in% "interaction"))
}
#' @rdname select_helpers
#' @export
all_ran_pars <- function() {
where(function(x) isTRUE(attr(x, "gtsummary.var_type") %in% "ran_pars"))
}
#' @rdname select_helpers
#' @export
all_ran_vals <- function() {
where(function(x) isTRUE(attr(x, "gtsummary.var_type") %in% "ran_vals"))
}
#' @rdname select_helpers
#' @export
all_intercepts <- function() {
where(function(x) isTRUE(attr(x, "gtsummary.var_type") %in% "intercept"))
}
#' @rdname select_helpers
#' @export
all_contrasts <- function(contrasts_type = c("treatment", "sum", "poly", "helmert", "sdif", "other")) { # nolint
contrasts_type <- rlang::arg_match(contrasts_type, multiple = TRUE)
where(function(x) isTRUE(attr(x, "gtsummary.contrasts_type") %in% contrasts_type))
}
broom.helpers/R/model_get_coefficients_type.R 0000644 0001762 0000144 00000013025 15002155536 021101 0 ustar ligges users #' Get coefficient type
#'
#' Indicate the type of coefficient among "generic", "logistic",
#' "poisson", "relative_risk" or "prop_hazard".
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @export
#' @family model_helpers
#' @examples
#' lm(hp ~ mpg + factor(cyl), mtcars) |>
#' model_get_coefficients_type()
#'
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#' glm(Survived ~ Class + Age * Sex, data = df, weights = df$n, family = binomial) |>
#' model_get_coefficients_type()
model_get_coefficients_type <- function(model) {
UseMethod("model_get_coefficients_type")
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.default <- function(model) {
"generic"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.glm <- function(model) {
if (!is.null(model$family)) {
if (model$family$family == "binomial" && model$family$link == "logit") {
return("logistic")
}
if (model$family$family == "binomial" && model$family$link == "log") {
return("relative_risk")
}
if (model$family$family == "binomial" && model$family$link == "cloglog") {
return("prop_hazard")
}
if (model$family$family == "poisson" && model$family$link == "log") {
return("poisson")
}
if (model$family$family == "quasibinomial" && model$family$link == "logit") {
return("logistic")
}
if (model$family$family == "quasipoisson" && model$family$link == "log") {
return("poisson")
}
}
"generic"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.negbin <- function(model) {
"poisson"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.geeglm <- model_get_coefficients_type.glm
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.fixest <- model_get_coefficients_type.glm
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.biglm <- model_get_coefficients_type.glm
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.glmerMod <- function(model) {
if (model@resp$family$family == "binomial" && model@resp$family$link == "logit") {
return("logistic")
}
if (model@resp$family$family == "binomial" && model@resp$family$link == "log") {
return("relative_risk")
}
if (model@resp$family$family == "binomial" && model@resp$family$link == "cloglog") {
return("prop_hazard")
}
if (model@resp$family$family == "poisson" && model@resp$family$link == "log") {
return("poisson")
}
# "quasi" families cannot be used with in glmer
"generic"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.clogit <- function(model) {
"logistic"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.polr <- function(model) {
if (model$method == "logistic") {
return("logistic")
}
"generic"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.multinom <- function(model) {
"logistic"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.svyolr <- function(model) {
"logistic"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.clm <- function(model) {
"logistic"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.clmm <- function(model) {
"logistic"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.coxph <- function(model) {
"prop_hazard"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.crr <- function(model) {
"prop_hazard"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.tidycrr <- function(model) {
"prop_hazard"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.cch <- function(model) {
"prop_hazard"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.model_fit <- function(model) {
model_get_coefficients_type(model$fit)
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.LORgee <- function(model) {
if (stringr::str_detect(
model$link,
stringr::regex("logit", ignore_case = TRUE)
)) {
return("logistic")
}
if (stringr::str_detect(
model$link,
stringr::regex("cloglog", ignore_case = TRUE)
)) {
return("prop_hazard")
}
"generic"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.vglm <- function(model) {
if (model@family@vfamily[1] == "binomialff" && model@misc$link[1] == "logitlink")
return("logistic")
if (model@family@vfamily[1] == "poissonff" && model@misc$link[1] == "loglink")
return("poisson")
if (model@family@vfamily[1] == "negbinomial" && model@misc$link[1] == "loglink")
return("poisson")
if (model@family@vfamily[1] == "multinomial" && model@misc$link[1] == "multilogitlink")
return("logistic")
if (model@family@vfamily[1] == "cumulative" && model@misc$link[1] == "logitlink")
return("logistic")
"generic"
}
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.vgam <- model_get_coefficients_type.vglm
#' @export
#' @rdname model_get_coefficients_type
model_get_coefficients_type.svy_vglm <- function(model) {
model_get_coefficients_type(model$fit)
}
broom.helpers/R/model_list_terms_levels.R 0000644 0001762 0000144 00000017644 14662130321 020306 0 ustar ligges users #' List levels of categorical terms
#'
#' Only for categorical variables with treatment,
#' SAS, sum or successive differences contrasts (cf. [MASS::contr.sdif()]), and
#' categorical variables with no contrast.
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @param label_pattern ([`glue pattern`][glue::glue()])\cr
#' A [glue pattern][glue::glue()] for term labels (see examples).
#' @param variable_labels (`list` or `string`)\cr
#' An optional named list or named vector of
#' custom variable labels passed to [model_list_variables()]
#' @param sdif_term_level (`string`)\cr
#' For successive differences contrasts, how should term
#' levels be named? `"diff"` for `"B - A"` (default), `"ratio"` for `"B / A"`.
#' @return
#' A tibble with ten columns:
#' * `variable`: variable
#' * `contrasts_type`: type of contrasts ("sum" or "treatment")
#' * `term`: term name
#' * `level`: term level
#' * `level_rank`: rank of the level
#' * `reference`: logical indicating which term is the reference level
#' * `reference_level`: level of the reference term
#' * `var_label`: variable label obtained with [model_list_variables()]
#' * `var_nlevels`: number of levels in this variable
#' * `dichotomous`: logical indicating if the variable is dichotomous
#' * `label`: term label (by default equal to term level)
#' The first nine columns can be used in `label_pattern`.
#' @export
#' @family model_helpers
#' @examples
#' glm(
#' am ~ mpg + factor(cyl),
#' data = mtcars,
#' family = binomial,
#' contrasts = list(`factor(cyl)` = contr.sum)
#' ) |>
#' model_list_terms_levels()
#'
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#'
#' mod <- glm(
#' Survived ~ Class + Age + Sex,
#' data = df, weights = df$n, family = binomial,
#' contrasts = list(Age = contr.sum, Class = "contr.helmert")
#' )
#' mod |> model_list_terms_levels()
#' mod |> model_list_terms_levels("{level} vs {reference_level}")
#' mod |> model_list_terms_levels("{variable} [{level} - {reference_level}]")
#' mod |> model_list_terms_levels(
#' "{ifelse(reference, level, paste(level, '-', reference_level))}"
#' )
model_list_terms_levels <- function(
model,
label_pattern = "{level}",
variable_labels = NULL,
sdif_term_level = c("diff", "ratio")) {
UseMethod("model_list_terms_levels")
}
#' @export
#' @rdname model_list_terms_levels
model_list_terms_levels.default <- function(
model, label_pattern = "{level}",
variable_labels = NULL,
sdif_term_level = c("diff", "ratio")) {
contrasts_list <- model_list_contrasts(model)
if (is.null(contrasts_list)) {
return(NULL)
}
sdif_term_level <- match.arg(sdif_term_level)
contrasts_list <- contrasts_list |>
# keep only treatment, SAS and sum contrasts
dplyr::filter(
.data$contrasts |>
stringr::str_starts("contr.treatment|contr.SAS|contr.sum|no.contrast|contr.sdif")
)
xlevels <- model_get_xlevels(model)
if (nrow(contrasts_list) == 0 || length(xlevels) == 0) {
return(NULL)
}
model_terms <- model_identify_variables(model) |>
dplyr::filter(!is.na(.data$variable))
if (nrow(model_terms) == 0) {
return(NULL)
}
res <- dplyr::tibble()
for (v in contrasts_list$variable) {
if (v %in% names(xlevels)) {
contrasts_type <- contrasts_list$contrasts_type[contrasts_list$variable == v]
terms_levels <- xlevels[[v]]
observed_terms <- model_terms$term[model_terms$variable == v]
ref <- contrasts_list$reference[contrasts_list$variable == v]
# terms could be named according to two approaches
# plus variations with backticks
s <- seq(1, length(terms_levels))
terms_names1 <- paste0(v, terms_levels)
terms_names2 <- paste0(v, s)
terms_names1b <- paste0("`", v, "`", terms_levels)
terms_names2b <- paste0("`", v, "`", s)
# naming approach for contr.sdif
terms_names3 <- paste0(v, terms_levels, "-", dplyr::lag(terms_levels))
terms_names3 <- terms_names3[-1]
terms_names3b <- paste0("`", v, "`", terms_levels, "-", dplyr::lag(terms_levels))
terms_names3b <- terms_names3b[-1]
terms_names4 <- paste0(v, s, "-", dplyr::lag(s))
terms_names4 <- terms_names4[-1]
terms_names4b <- paste0("`", v, "`", s, "-", dplyr::lag(s))
terms_names4b <- terms_names4b[-1]
# identification of the naming approach
approach <- NA
if (length(observed_terms) && !is.na(ref)) {
approach <- dplyr::case_when(
all(observed_terms %in% terms_names1[-ref]) ~ "1",
all(observed_terms %in% terms_names2[-ref]) ~ "2",
all(observed_terms %in% terms_names3[-ref]) ~ "3",
all(observed_terms %in% terms_names4[-ref]) ~ "4",
all(observed_terms %in% terms_names1b[-ref]) ~ "1b",
all(observed_terms %in% terms_names2b[-ref]) ~ "2b",
all(observed_terms %in% terms_names3b[-ref]) ~ "3b",
all(observed_terms %in% terms_names4b[-ref]) ~ "4b"
)
}
if (length(observed_terms) && is.na(ref)) {
approach <- dplyr::case_when(
all(observed_terms %in% terms_names1) ~ "1",
all(observed_terms %in% terms_names2) ~ "2",
all(observed_terms %in% terms_names3) ~ "3",
all(observed_terms %in% terms_names4) ~ "4",
all(observed_terms %in% terms_names1b) ~ "1b",
all(observed_terms %in% terms_names2b) ~ "2b",
all(observed_terms %in% terms_names3b) ~ "3b",
all(observed_terms %in% terms_names4b) ~ "4b"
)
}
# case of an interaction term only
if (is.na(approach)) {
n1 <- .count_term(model_terms$term, terms_names1)
n2 <- .count_term(model_terms$term, terms_names2)
n1b <- .count_term(model_terms$term, terms_names1b)
n2b <- .count_term(model_terms$term, terms_names2b)
approach <- dplyr::case_when(
(n1b + n2b) > (n1 + n2) & n1b >= n2b ~ "1b",
(n1b + n2b) > (n1 + n2) & n1b < n2b ~ "2b",
n2 > n1 ~ "2",
TRUE ~ "1"
)
}
terms_names <- switch(
approach,
"1" = terms_names1,
"2" = terms_names2,
"3" = terms_names3,
"4" = terms_names4,
"1b" = terms_names1b,
"2b" = terms_names2b,
"3b" = terms_names3b,
"4b" = terms_names4b
)
if (approach %in% c("3", "3b", "4", "4b")) {
sep <- "-"
if (sdif_term_level == "ratio") sep <- "/"
tl <- terms_levels
terms_levels <- paste(tl, sep, dplyr::lag(tl))
terms_levels <- terms_levels[-1]
}
res <- dplyr::bind_rows(
res,
dplyr::tibble(
variable = v,
contrasts_type = contrasts_type,
term = terms_names,
level = terms_levels,
level_rank = seq(1, length(terms_levels)),
reference = seq(1, length(terms_levels)) == ref,
reference_level = terms_levels[ref]
)
)
}
}
res |>
dplyr::left_join(
model |>
model_list_variables(labels = variable_labels) |>
dplyr::select(all_of(c("variable", "var_label"))),
by = "variable"
) |>
dplyr::left_join(
model |>
model_get_nlevels() |>
dplyr::select(all_of(c("variable", "var_nlevels"))),
by = "variable"
) |>
dplyr::mutate(
dichotomous = .data$var_nlevels == 2,
label = stringr::str_glue_data(res, label_pattern)
)
}
# count the total number of times where elements of searched
# are found in observed terms
.count_term <- function(observed, searched) {
total <- 0
for (i in searched) {
total <-
total +
stringr::str_count(
observed,
paste0("(^|:)", .escape_regex(i), "(:|$)")
) |>
sum()
}
total
}
broom.helpers/R/tidy_add_n.R 0000644 0001762 0000144 00000014021 14760117574 015465 0 ustar ligges users #' Add the (weighted) number of observations
#'
#' Add the number of observations in a new column `n_obs`, taking into account any
#' weights if they have been defined.
#'
#' For continuous variables, it corresponds to all valid observations
#' contributing to the model.
#'
#' For categorical variables coded with treatment or sum contrasts,
#' each model term could be associated to only one level of the original
#' categorical variable. Therefore, `n_obs` will correspond to the number of
#' observations associated with that level. `n_obs` will also be computed for
#' reference rows. For polynomial contrasts (defined with [stats::contr.poly()]),
#' all levels will contribute to the computation of each model term. Therefore,
#' `n_obs` will be equal to the total number of observations. For Helmert and custom
#' contrasts, only rows contributing positively (i.e. with a positive contrast)
#' to the computation of a term will be considered for estimating `n_obs`. The
#' result could therefore be difficult to interpret. For a better understanding
#' of which observations are taken into account to compute `n_obs` values, you
#' could look at [model_compute_terms_contributions()].
#'
#' For interaction terms, only rows contributing to all the terms of the
#' interaction will be considered to compute `n_obs`.
#'
#' For binomial logistic models, `tidy_add_n()` will also return the
#' corresponding number of events (`n_event`) for each term, taking into account
#' any defined weights. Observed proportions could be obtained as `n_obs / n_event`.
#'
#' Similarly, a number of events will be computed for multinomial logistic
#' models (`nnet::multinom()`) for each level of the outcome (`y.level`),
#' corresponding to the number of observations equal to that outcome level.
#'
#' For Poisson models, `n_event` will be equal to the number of counts per term.
#' In addition, a third column `exposure` will be computed. If no offset is
#' defined, exposure is assumed to be equal to 1 (eventually multiplied by
#' weights) per observation. If an offset is defined, `exposure` will be equal
#' to the (weighted) sum of the exponential of the offset (as a reminder, to
#' model the effect of `x` on the ratio `y / z`, a Poisson model will be defined
#' as `glm(y ~ x + offset(log(z)), family = poisson)`). Observed rates could be
#' obtained with `n_event / exposure`.
#'
#' For Cox models ([survival::coxph()]), an individual could be coded
#' with several observations (several rows). `n_obs` will correspond to the
#' weighted number of observations which could be different from the number of
#' individuals `n_ind`. `tidy_add_n()` will also compute a (weighted) number of
#' events (`n_event`) according to the definition of the [survival::Surv()]
#' object.
#' Exposure time is also returned in `exposure` column. It is equal to the
#' (weighted) sum of the time variable if only one variable time is passed to
#' [survival::Surv()], and to the (weighted) sum of `time2 - time` if two time
#' variables are defined in [survival::Surv()].
#'
#' For competing risk regression models ([tidycmprsk::crr()]), `n_event` takes
#' into account only the event of interest defined by `failcode.`
#'
#' The (weighted) total number of observations (`N_obs`), of individuals
#' (`N_ind`), of events (`N_event`) and of exposure time (`Exposure`) are
#' stored as attributes of the returned tibble.
#'
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @export
#' @family tidy_helpers
#' @examples
#' \donttest{
#' lm(Petal.Length ~ ., data = iris) |>
#' tidy_and_attach() |>
#' tidy_add_n()
#'
#' lm(Petal.Length ~ ., data = iris, contrasts = list(Species = contr.sum)) |>
#' tidy_and_attach() |>
#' tidy_add_n()
#'
#' lm(Petal.Length ~ ., data = iris, contrasts = list(Species = contr.poly)) |>
#' tidy_and_attach() |>
#' tidy_add_n()
#'
#' lm(Petal.Length ~ poly(Sepal.Length, 2), data = iris) |>
#' tidy_and_attach() |>
#' tidy_add_n()
#'
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#'
#' glm(
#' Survived ~ Class + Age + Sex,
#' data = df, weights = df$n, family = binomial,
#' contrasts = list(Age = contr.sum, Class = "contr.helmert")
#' ) |>
#' tidy_and_attach() |>
#' tidy_add_n()
#'
#' glm(
#' Survived ~ Class * (Age:Sex),
#' data = df, weights = df$n, family = binomial,
#' contrasts = list(Age = contr.sum, Class = "contr.helmert")
#' ) |>
#' tidy_and_attach() |>
#' tidy_add_n()
#'
#' glm(response ~ age + grade * trt, gtsummary::trial, family = poisson) |>
#' tidy_and_attach() |>
#' tidy_add_n()
#'
#' glm(
#' response ~ trt * grade + offset(log(ttdeath)),
#' gtsummary::trial,
#' family = poisson
#' ) |>
#' tidy_and_attach() |>
#' tidy_add_n()
#' }
tidy_add_n <- function(x, model = tidy_get_model(x)) {
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
.attributes <- .save_attributes(x)
if (any(c("n_obs", "n_event", "exposure") %in% names(x))) {
x <- x |> dplyr::select(-dplyr::any_of(c("n_obs", "n_event", "exposure")))
}
n <- model |> model_get_n()
if (is.null(n)) {
x$n <- NA_real_
} else {
if ("y.level" %in% names(n)) {
x <- x |>
dplyr::left_join(n, by = c("y.level", "term"))
} else {
x <- x |>
dplyr::left_join(n, by = "term")
}
}
if (!is.null(attr(n, "N_obs"))) {
.attributes$N_obs <- attr(n, "N_obs")
}
if (!is.null(attr(n, "N_ind"))) {
.attributes$N_ind <- attr(n, "N_ind")
}
if (!is.null(attr(n, "N_event"))) {
.attributes$N_event <- attr(n, "N_event")
}
if (!is.null(attr(n, "Exposure"))) {
.attributes$Exposure <- attr(n, "Exposure")
}
x |>
tidy_attach_model(model = model, .attributes = .attributes)
}
broom.helpers/R/model_identify_variables.R 0000644 0001762 0000144 00000014664 15002155536 020415 0 ustar ligges users #' Identify for each coefficient of a model the corresponding variable
#'
#' It will also identify interaction terms and intercept(s).
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @return
#' A tibble with four columns:
#' * `term`: coefficients of the model
#' * `variable`: the corresponding variable
#' * `var_class`: class of the variable (cf. [stats::.MFclass()])
#' * `var_type`: `"continuous"`, `"dichotomous"` (categorical variable with 2 levels),
#' `"categorical"` (categorical variable with 3 or more levels), `"intercept"`
#' or `"interaction"`
#' * `var_nlevels`: number of original levels for categorical variables
#' @export
#' @family model_helpers
#' @seealso [tidy_identify_variables()]
#' @examples
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#' glm(
#' Survived ~ Class + Age * Sex,
#' data = df, weights = df$n,
#' family = binomial
#' ) |>
#' model_identify_variables()
#'
#' lm(
#' Sepal.Length ~ poly(Sepal.Width, 2) + Species,
#' data = iris,
#' contrasts = list(Species = contr.sum)
#' ) |>
#' model_identify_variables()
model_identify_variables <- function(model) {
UseMethod("model_identify_variables")
}
#' @rdname model_identify_variables
#' @export
model_identify_variables.default <- function(model) {
assign <- model |> model_get_assign()
model_matrix <- attr(assign, "model_matrix")
if (is.null(model_matrix) || is.null(assign)) {
# return an empty tibble
return(
dplyr::tibble(
variable = NA_character_,
var_class = NA_character_,
var_type = NA_character_,
var_nlevels = NA_integer_
) |>
dplyr::filter(FALSE)
)
}
assign[assign == 0] <- NA
model_terms <- model_get_terms(model)
variable_names <- model |> model_list_variables(only_variable = TRUE)
variables <- attr(model_terms, "term.labels") |>
.clean_backticks(variable_names = variable_names)
tibble::tibble(
term = colnames(model_matrix),
variable = variables[assign]
) |>
# specific case of polynomial terms defined with poly()
dplyr::mutate(
variable = stringr::str_replace(.data$variable, "^poly\\(([^,]*),(.*)\\)$", "\\1")
) |>
dplyr::left_join(
model_list_variables(model) |>
dplyr::select("variable", "var_class"),
by = "variable"
) |>
dplyr::left_join(
model_get_nlevels(model),
by = "variable"
) |>
.compute_var_type()
}
#' @rdname model_identify_variables
#' @export
model_identify_variables.lavaan <- function(model) {
tibble::tibble(
term = paste(model@ParTable$lhs, model@ParTable$op, model@ParTable$rhs),
variable = .clean_backticks(model@ParTable$lhs)
) |>
dplyr::left_join(
tibble::tibble(
variable = .clean_backticks(model@Data@ov$name),
var_class = model@Data@ov$type,
var_nlevels = model@Data@ov$nlev
),
by = "variable"
) |>
dplyr::mutate(
var_nlevels = dplyr::if_else(
.data$var_nlevels == 0,
NA_integer_,
.data$var_nlevels
),
var_class = dplyr::if_else(
.data$var_class == "ordered",
"factor",
.data$var_class
)
) |>
.compute_var_type()
}
# for stats::aov(), variable is equal to term
#' @rdname model_identify_variables
#' @export
model_identify_variables.aov <- function(model) {
model |>
model_list_variables() |>
dplyr::mutate(term = .data$variable) |>
dplyr::select(dplyr::all_of(c("term", "variable", "var_class"))) |>
dplyr::left_join(
model |> model_get_nlevels(),
by = "variable"
) |>
.compute_var_type()
}
#' @rdname model_identify_variables
#' @export
model_identify_variables.clm <- function(model) {
res <- model_identify_variables.default(model)
if (is.null(model$alpha.mat)) {
res <- dplyr::bind_rows(
res |>
dplyr::filter(.data$term != "(Intercept)"),
dplyr::tibble(
term = names(model$alpha),
var_type = "intercept"
)
)
} else {
y.levels <- colnames(model$alpha.mat)
nominal_terms <- rownames(model$alpha.mat)
res <- dplyr::bind_rows(
res |>
dplyr::filter(!.data$term %in% nominal_terms),
res |>
dplyr::filter(.data$term %in% nominal_terms) |>
tidyr::crossing(y.level = y.levels) |>
dplyr::mutate(term = paste(.data$y.level, .data$term, sep = "."))
)
}
res
}
#' @rdname model_identify_variables
#' @export
model_identify_variables.clmm <- model_identify_variables.clm
#' @rdname model_identify_variables
#' @export
model_identify_variables.gam <- function(model) {
model_identify_variables.default(model) |>
dplyr::bind_rows(
# suppressWarnings to avoid a warning when the result is an empty tibble
suppressWarnings(broom::tidy(model, parametric = FALSE)) |>
dplyr::bind_rows(tibble::tibble(term = character(0))) |>
dplyr::select(dplyr::all_of("term")) |>
dplyr::mutate(variable = .data$term, var_type = "continuous")
)
}
#' @export
#' @rdname model_identify_variables
model_identify_variables.model_fit <- function(model) {
model_identify_variables(model$fit)
}
#' @rdname model_identify_variables
#' @importFrom dplyr add_row
#' @export
model_identify_variables.logitr <- function(model) {
res <- model_identify_variables.default(model)
if (!is.null(model$data$scalePar)) {
res <- res |>
dplyr::add_row(
term = "scalePar",
variable = "scalePar",
var_class = "numeric",
var_nlevels = NA,
var_type = "continuous"
)
}
res
}
## model_identify_variables() helpers --------------------------
.compute_var_type <- function(x) {
cat_classes <- c("factor", "character", "logical")
x |>
dplyr::mutate(
var_type = dplyr::case_when(
is.na(.data$variable) ~ "intercept",
.data$var_class %in% cat_classes & .data$var_nlevels <= 2 ~ "dichotomous",
.data$var_class %in% cat_classes ~ "categorical",
!is.na(.data$var_class) ~ "continuous",
is.na(.data$var_class) & stringr::str_detect(.data$variable, ":") ~ "interaction"
)
)
}
#' @export
#' @rdname model_identify_variables
model_identify_variables.svy_vglm <- function(model) {
model_identify_variables(model$fit)
}
broom.helpers/R/helpers.R 0000644 0001762 0000144 00000003472 14662130321 015023 0 ustar ligges users #' Escapes any characters that would have special
#' meaning in a regular expression
#'
#' This functions has been adapted from `Hmisc::escapeRegex()`
#' @param string (`string`)\cr
#' A character vector.
#' @export
#' @family other_helpers
.escape_regex <- function(string) {
gsub(
"([.|()\\^{}+$*?]|\\[|\\])", "\\\\\\1",
string
)
}
#' Remove backticks around variable names
#'
#' @param x (`string`)\cr
#' A character vector to be cleaned.
#' @param variable_names (`string`)\cr
#' Optional vector of variable names, could be obtained with
#' [model_list_variables(only_variable = TRUE)][model_list_variables()],
#' to properly take into account interaction only terms/variables.
#' @export
#' @family other_helpers
.clean_backticks <- function(x, variable_names = x) {
saved_names <- names(x)
variable_names <- variable_names |>
stats::na.omit() |>
unique() |>
.escape_regex()
# cleaning existing backticks in variable_names
variable_names <- ifelse(
# does string starts and ends with backticks
stringr::str_detect(variable_names, "^`.*`$"),
# if yes remove first and last character of string
stringr::str_sub(variable_names, 2, -2),
# otherwise, return original string
variable_names
)
# cleaning x, including interaction terms
for (v in variable_names) {
x <- stringr::str_replace_all(
x,
paste0("`", v, "`"),
v
)
}
names(x) <- saved_names
x
}
# copied from broom
.exponentiate <- function(data, col = "estimate") {
data <- data |>
dplyr::mutate(
dplyr::across(dplyr::all_of(col), exp)
)
if ("conf.low" %in% colnames(data)) {
data <- data |>
dplyr::mutate(
dplyr::across(dplyr::any_of(c("conf.low", "conf.high")), exp)
)
}
data
}
broom.helpers/R/tidy_remove_intercept.R 0000644 0001762 0000144 00000002260 14662130321 017756 0 ustar ligges users #' Remove intercept(s)
#'
#' Will remove terms where `var_type == "intercept"`.
#'
#' @details
#' If the `variable` column is not yet available in `x`,
#' [tidy_identify_variables()] will be automatically applied.
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @export
#' @family tidy_helpers
#' @examples
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived))
#' glm(Survived ~ Class + Age + Sex, data = df, weights = df$n, family = binomial) |>
#' tidy_and_attach() |>
#' tidy_remove_intercept()
tidy_remove_intercept <- function(x, model = tidy_get_model(x)) {
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
.attributes <- .save_attributes(x)
if (!"var_type" %in% names(x)) {
x <- x |> tidy_identify_variables(model = model)
}
x |>
dplyr::filter(.data$var_type != "intercept") |>
tidy_attach_model(model = model, .attributes = .attributes)
}
broom.helpers/R/tidy_add_coefficients_type.R 0000644 0001762 0000144 00000010506 14662130321 020720 0 ustar ligges users #' Add coefficients type and label as attributes
#'
#' Add the type of coefficients ("generic", "logistic", "poisson",
#' "relative_risk" or "prop_hazard") and the corresponding coefficient labels,
#' as attributes to `x` (respectively
#' named `coefficients_type` and `coefficients_label`).
#'
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param exponentiate (`logical`)\cr
#' Whether or not to exponentiate the coefficient estimates. It should be
#' consistent with the original call to [broom::tidy()].
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @export
#' @family tidy_helpers
#' @examples
#' ex1 <- lm(hp ~ mpg + factor(cyl), mtcars) |>
#' tidy_and_attach() |>
#' tidy_add_coefficients_type()
#' attr(ex1, "coefficients_type")
#' attr(ex1, "coefficients_label")
#'
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#' ex2 <- glm(
#' Survived ~ Class + Age * Sex,
#' data = df,
#' weights = df$n,
#' family = binomial
#' ) |>
#' tidy_and_attach(exponentiate = TRUE) |>
#' tidy_add_coefficients_type()
#' attr(ex2, "coefficients_type")
#' attr(ex2, "coefficients_label")
tidy_add_coefficients_type <- function(
x, exponentiate = attr(x, "exponentiate"),
model = tidy_get_model(x)) {
if (is.null(exponentiate) || !is.logical(exponentiate)) {
cli::cli_abort("'exponentiate' is not provided. You need to pass it explicitely.")
}
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
.attributes <- .save_attributes(x)
.attributes$exponentiate <- exponentiate
# specific case for marginal effects / means / contrasts / prediction
# where coefficients_type is already define by the tidier
if (isTRUE(stringr::str_starts(.attributes$coefficients_type, "marginal"))) {
coefficients_type <- .attributes$coefficients_type
coefficients_label <- dplyr::case_when(
coefficients_type == "marginal_effects_average" ~
"Average Marginal Effects",
coefficients_type == "marginal_effects_at_mean" ~
"Marginal Effects at the Mean",
coefficients_type == "marginal_effects_at_marginalmeans" ~
"Marginal Effects at Marginal Means",
stringr::str_starts(coefficients_type, "marginal_effects") ~
"Marginal Effects",
coefficients_type == "marginal_contrasts_average" ~
"Average Marginal Contrasts",
coefficients_type == "marginal_contrasts_at_mean" ~
"Marginal Contrasts at the Mean",
coefficients_type == "marginal_contrasts_at_marginalmeans" ~
"Marginal Contrasts at Marginal Means",
stringr::str_starts(coefficients_type, "marginal_contrasts") ~
"Marginal Contrasts",
stringr::str_starts(coefficients_type, "marginal_means") ~
"Marginal Means",
coefficients_type == "marginal_predictions_average" ~
"Average Marginal Predictions",
coefficients_type == "marginal_predictions_at_mean" ~
"Marginal Predictions at the Mean",
coefficients_type == "marginal_predictions_at_marginalmeans" ~
"Marginal Predictions at Marginal Means",
stringr::str_starts(coefficients_type, "marginal_predictions") ~
"Marginal Predictions",
TRUE ~ "Marginal values"
)
} else {
coefficients_type <- model_get_coefficients_type(model)
if (exponentiate) {
coefficients_label <- dplyr::case_when(
coefficients_type == "logistic" ~ "OR",
coefficients_type == "poisson" ~ "IRR",
coefficients_type == "relative_risk" ~ "RR",
coefficients_type == "prop_hazard" ~ "HR",
TRUE ~ "exp(Beta)"
)
} else {
coefficients_label <- dplyr::case_when(
coefficients_type == "logistic" ~ "log(OR)",
coefficients_type == "poisson" ~ "log(IRR)",
coefficients_type == "relative_risk" ~ "log(RR)",
coefficients_type == "prop_hazard" ~ "log(HR)",
TRUE ~ "Beta"
)
}
}
attr(x, "coefficients_type") <- coefficients_type
attr(x, "coefficients_label") <- coefficients_label
x |>
tidy_attach_model(model = model, .attributes = .attributes)
}
broom.helpers/R/model_list_higher_order_variables.R 0000644 0001762 0000144 00000003103 14662130321 022254 0 ustar ligges users #' List higher order variables of a model
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @export
#' @family model_helpers
#' @examples
#' lm(hp ~ mpg + factor(cyl) + disp:hp, mtcars) |>
#' model_list_higher_order_variables()
#'
#' mod <- glm(
#' response ~ stage * grade + trt:stage,
#' gtsummary::trial,
#' family = binomial
#' )
#' mod |> model_list_higher_order_variables()
#'
#' mod <- glm(
#' Survived ~ Class * Age + Sex,
#' data = Titanic |> as.data.frame(),
#' weights = Freq,
#' family = binomial
#' )
#' mod |> model_list_higher_order_variables()
model_list_higher_order_variables <- function(model) {
UseMethod("model_list_higher_order_variables")
}
#' @export
#' @rdname model_list_higher_order_variables
model_list_higher_order_variables.default <- function(model) {
variables <- model |>
model_list_variables(only_variable = TRUE)
# exclude response variable
response_variable <- model |> model_get_response_variable()
if (!is.null(response_variable)) {
variables <- variables[!variables %in% response_variable]
}
# exclude (weights)
variables <- variables[variables != "(weights)"]
terms <- strsplit(variables, ":")
# count the number of times a combination of terms appear
.count_combination <- function(i) {
lapply(
terms,
function(x) {
all(i %in% x)
}
) |>
unlist() |>
sum()
}
count <- lapply(terms, .count_combination) |> unlist()
# keep combinations appearing only once
variables[count == 1]
}
broom.helpers/R/tidy_add_pairwise_contrasts.R 0000644 0001762 0000144 00000010475 14762100776 021162 0 ustar ligges users #' Add pairwise contrasts for categorical variables
#'
#' Computes pairwise contrasts with [emmeans::emmeans()] and add them to the
#' results tibble. Works only with models supported by `emmeans`, see
#' `vignette("models", package = "emmeans")`.
#'
#' @note
#' If the `contrasts` column is not yet available in `x`,
#' [tidy_add_contrasts()] will be automatically applied.
#'
#' For multi-components models, such as zero-inflated Poisson or beta
#' regression, support of pairwise contrasts is still experimental.
#'
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param variables include ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' Variables for those pairwise contrasts should be added.
#' Default is [all_categorical()].
#' @param keep_model_terms (`logical`)\cr
#' Keep terms from the model?
#' @param pairwise_reverse (`logical`)\cr
#' Determines whether to use `"pairwise"` (if `TRUE`)
#' or `"revpairwise"` (if `FALSE`), see [emmeans::contrast()].
#' @param contrasts_adjust (`string`)\cr
#' Optional adjustment method when computing contrasts,
#' see [emmeans::contrast()] (if `NULL`, use `emmeans` default).
#' @param conf.level (`numeric`)\cr
#' Confidence level, by default use the value indicated
#' previously in [tidy_and_attach()].
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @inheritParams tidy_plus_plus
#' @export
#' @family tidy_helpers
#' @examplesIf .assert_package("emmeans", boolean = TRUE)
#' \donttest{
#' mod1 <- lm(Sepal.Length ~ Species, data = iris)
#' mod1 |>
#' tidy_and_attach() |>
#' tidy_add_pairwise_contrasts()
#'
#' mod1 |>
#' tidy_and_attach() |>
#' tidy_add_pairwise_contrasts(pairwise_reverse = FALSE)
#'
#' mod1 |>
#' tidy_and_attach() |>
#' tidy_add_pairwise_contrasts(keep_model_terms = TRUE)
#'
#' mod1 |>
#' tidy_and_attach() |>
#' tidy_add_pairwise_contrasts(contrasts_adjust = "none")
#'
#' if (.assert_package("gtsummary", boolean = TRUE)) {
#' mod2 <- glm(
#' response ~ age + trt + grade,
#' data = gtsummary::trial,
#' family = binomial
#' )
#' mod2 |>
#' tidy_and_attach(exponentiate = TRUE) |>
#' tidy_add_pairwise_contrasts()
#' }
#' }
tidy_add_pairwise_contrasts <- function(
x,
variables = all_categorical(),
keep_model_terms = FALSE,
pairwise_reverse = TRUE,
contrasts_adjust = NULL,
conf.level = attr(x, "conf.level"),
emmeans_args = list(),
model = tidy_get_model(x),
quiet = FALSE) {
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
if (is.null(conf.level) || !is.numeric(conf.level)) {
cli::cli_abort("{.arg conf.level} is not provided. You need to pass it explicitely.")
}
if (!"contrasts" %in% names(x)) {
x <- x |> tidy_add_contrasts(model = model)
}
.attributes <- .save_attributes(x)
if (isTRUE(stringr::str_starts(.attributes$coefficients_type, "marginal"))) {
cli::cli_abort("Pairwise contrasts are not compatible with marginal effects / contrasts / means / predictions.") # nolint
}
if (is.null(conf.level)) {
cli::cli_abort("Please specify {.arg conf.level}")
}
# obtain character vector of selected variables
cards::process_selectors(
data = scope_tidy(x),
variables = {{ variables }}
)
if (isTRUE(.attributes$exponentiate) && is.null(emmeans_args$type)) {
emmeans_args$type <- "response"
}
pc <- model_get_pairwise_contrasts(
model = model,
variables = variables,
pairwise_reverse = pairwise_reverse,
contrasts_adjust = contrasts_adjust,
conf.level = conf.level,
emmeans_args = emmeans_args
)
x <- dplyr::bind_rows(x, pc) |>
dplyr::mutate(variableF = forcats::fct_inorder(.data$variable)) |>
dplyr::arrange(.data$variableF) |>
tidyr::fill(all_of(c("var_class", "var_type", "var_nlevels"))) |>
dplyr::select(-all_of("variableF"))
if (!keep_model_terms) {
x <- x |>
dplyr::filter(
!(.data$variable %in% variables) | .data$contrasts_type == "pairwise"
)
}
x |>
tidy_attach_model(model = model, .attributes = .attributes)
}
broom.helpers/R/model_get_weights.R 0000644 0001762 0000144 00000005047 15051120553 017051 0 ustar ligges users #' Get sampling weights used by a model
#'
#' This function does not cover `lavaan` models (`NULL` is returned).
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @note
#' For class `svrepglm` objects (GLM on a survey object with replicate weights),
#' it will return the original sampling weights of the data, not the replicate
#' weights.
#' @export
#' @family model_helpers
#' @examples
#' mod <- lm(Sepal.Length ~ Sepal.Width, iris)
#' mod |> model_get_weights()
#'
#' mod <- lm(hp ~ mpg + factor(cyl) + disp:hp, mtcars, weights = mtcars$gear)
#' mod |> model_get_weights()
#'
#' mod <- glm(
#' response ~ stage * grade + trt,
#' gtsummary::trial,
#' family = binomial
#' )
#' mod |> model_get_weights()
#'
#' mod <- glm(
#' Survived ~ Class * Age + Sex,
#' data = Titanic |> as.data.frame(),
#' weights = Freq,
#' family = binomial
#' )
#' mod |> model_get_weights()
#'
#' d <- dplyr::as_tibble(Titanic) |>
#' dplyr::group_by(Class, Sex, Age) |>
#' dplyr::summarise(
#' n_survived = sum(n * (Survived == "Yes")),
#' n_dead = sum(n * (Survived == "No"))
#' )
#' mod <- glm(cbind(n_survived, n_dead) ~ Class * Age + Sex, data = d, family = binomial)
#' mod |> model_get_weights()
model_get_weights <- function(model) {
UseMethod("model_get_weights")
}
#' @export
#' @rdname model_get_weights
model_get_weights.default <- function(model) {
w <- tryCatch(
stats::weights(model),
error = function(e) {
NULL
}
)
if (is.null(w) || length(w) == 0) {
mf <- model |> model_get_model_frame()
if (!is.null(mf)) {
if ("(weights)" %in% names(mf)) {
w <- mf |> purrr::pluck("(weights)")
} else {
w <- rep_len(1L, mf |> nrow())
}
}
}
# matrix case => transform to vector
if (is.matrix(w)) w <- c(w)
w
}
#' @export
#' @rdname model_get_weights
model_get_weights.svyglm <- function(model) {
stats::weights(model$survey.design)
}
#' @export
#' @rdname model_get_weights
model_get_weights.svrepglm <- function(model) {
model$survey.design$pweights
}
#' @export
#' @rdname model_get_weights
model_get_weights.model_fit <- function(model) {
model_get_weights(model$fit)
}
#' @export
#' @rdname model_get_weights
model_get_weights.svy_vglm <- function(model) {
stats::weights(model$design)
}
#' @export
#' @rdname model_get_weights
model_get_weights.fixest <- function(model) {
if (!is.null(model$weights)) {
model$weights
} else {
rep(1, model$nobs)
}
}
broom.helpers/R/model_get_response.R 0000644 0001762 0000144 00000003706 14662130321 017236 0 ustar ligges users #' Get model response
#'
#' This function does not cover `lavaan` models (`NULL` is returned).
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @export
#' @family model_helpers
#' @examples
#' lm(hp ~ mpg + factor(cyl) + disp:hp, mtcars) |>
#' model_get_response()
#'
#' mod <- glm(
#' response ~ stage * grade + trt,
#' gtsummary::trial,
#' family = binomial,
#' contrasts = list(stage = contr.sum, grade = contr.treatment(3, 2), trt = "contr.SAS")
#' )
#' mod |> model_get_response()
#'
#' mod <- glm(
#' Survived ~ Class * Age + Sex,
#' data = Titanic |> as.data.frame(),
#' weights = Freq,
#' family = binomial
#' )
#' mod |> model_get_response()
#'
#' d <- dplyr::as_tibble(Titanic) |>
#' dplyr::group_by(Class, Sex, Age) |>
#' dplyr::summarise(
#' n_survived = sum(n * (Survived == "Yes")),
#' n_dead = sum(n * (Survived == "No"))
#' )
#' mod <- glm(cbind(n_survived, n_dead) ~ Class * Age + Sex, data = d, family = binomial, y = FALSE)
#' mod |> model_get_response()
model_get_response <- function(model) {
UseMethod("model_get_response")
}
#' @export
#' @rdname model_get_response
model_get_response.default <- function(model) {
tryCatch(
model |>
model_get_model_frame() |>
stats::model.response(),
error = function(e) {
NULL
}
)
}
#' @export
#' @rdname model_get_response
model_get_response.glm <- function(model) {
y <- model |> purrr::pluck("y")
if (is.null(y)) {
y <- model |>
model_get_model_frame() |>
stats::model.response()
}
# model defined with cbind
if (is.matrix(y) && ncol(y) == 2) {
y <- y[, 1] / rowSums(y)
y[is.nan(y)] <- 0
}
y
}
#' @export
#' @rdname model_get_response
model_get_response.glmerMod <- model_get_response.glm
#' @export
#' @rdname model_get_response
model_get_response.model_fit <- function(model) {
model_get_response(model$fit)
}
broom.helpers/R/model_get_model_frame.R 0000644 0001762 0000144 00000005372 15051577134 017665 0 ustar ligges users #' Get the model frame of a model
#'
#' The structure of the object returned by [stats::model.frame()]
#' could slightly differ for certain types of models.
#' `model_get_model_frame()` will always return an object
#' with the same data structure or `NULL` if it is not possible
#' to compute model frame from `model`.
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @export
#' @family model_helpers
#' @seealso [stats::model.frame()]
#' @examples
#' lm(hp ~ mpg + factor(cyl), mtcars) |>
#' model_get_model_frame() |>
#' head()
model_get_model_frame <- function(model) {
if (!is.null(attr(model, "model_frame")))
return(attr(model, "model_frame"))
UseMethod("model_get_model_frame")
}
#' @export
#' @rdname model_get_model_frame
model_get_model_frame.default <- function(model) {
tryCatch(
stats::model.frame(model),
error = function(e) {
NULL
}
)
}
#' @export
#' @rdname model_get_model_frame
model_get_model_frame.coxph <- function(model) {
# variable labels not available, but accessible through model.frame.default()
# however, model.frame.default() does not return (id) and the correct number
# of lines
res <- tryCatch(
stats::model.frame(model),
error = function(e) {
NULL
}
)
if (!is.null(res)) {
res <- res |>
labelled::copy_labels_from(
stats::model.frame.default(model),
.strict = FALSE
)
}
res
}
#' @export
#' @rdname model_get_model_frame
model_get_model_frame.svycoxph <- model_get_model_frame.default
#' @export
#' @rdname model_get_model_frame
model_get_model_frame.survreg <- function(model) {
tryCatch(
stats::model.frame.default(model),
error = function(e) {
NULL # nocov
}
)
}
#' @export
#' @rdname model_get_model_frame
model_get_model_frame.biglm <- function(model) {
stats::model.frame(
stats::formula(model),
data = stats::model.frame.default(model)
)
}
#' @export
#' @rdname model_get_model_frame
model_get_model_frame.model_fit <- function(model) {
model_get_model_frame(model$fit)
}
#' @export
#' @rdname model_get_model_frame
model_get_model_frame.fixest <- function(model) {
# observations to be removed should be removed after model.frame()
mf <- stats::model.frame.default(
model_get_terms(model),
data = eval(model$call$data, model$call_env),
na.action = NULL
)
if (!is.null(model$obs_selection$obsRemoved))
mf <- mf |> dplyr::slice(model$obs_selection$obsRemoved)
mf
}
#' @export
#' @rdname model_get_model_frame
model_get_model_frame.svy_vglm <- function(model) {
stats::model.frame.default(
model |> model_get_terms(),
data = model$design$variables
)
}
broom.helpers/R/tidy_plus_plus.R 0000644 0001762 0000144 00000024355 15002155536 016447 0 ustar ligges users #' Tidy a model and compute additional informations
#'
#' This function will apply sequentially:
#' * [tidy_and_attach()]
#' * [tidy_disambiguate_terms()]
#' * [tidy_identify_variables()]
#' * [tidy_add_contrasts()]
#' * [tidy_add_reference_rows()]
#' * [tidy_add_pairwise_contrasts()]
#' * [tidy_add_estimate_to_reference_rows()]
#' * [tidy_add_variable_labels()]
#' * [tidy_add_term_labels()]
#' * [tidy_add_header_rows()]
#' * [tidy_add_n()]
#' * [tidy_remove_intercept()]
#' * [tidy_select_variables()]
#' * [tidy_group_by()]
#' * [tidy_add_coefficients_type()]
#' * [tidy_detach_model()]
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model to be attached/tidied.
#' @param tidy_fun (`function`)\cr
#' Option to specify a custom tidier function.
#' @param conf.int (`logical`)\cr
#' Should confidence intervals be computed? (see [broom::tidy()])
#' @param conf.level (`numeric`)\cr
#' Level of confidence for confidence intervals (default: 95%).
#' @param exponentiate (`logical`)\cr
#' Whether or not to exponentiate the coefficient estimates.
#' This is typical for logistic, Poisson and Cox models,
#' but a bad idea if there is no log or logit link; defaults to `FALSE`.
#' @param model_matrix_attr (`logical`)\cr
#' Whether model frame and model matrix should be added as attributes of `model`
#' (respectively named `"model_frame"` and `"model_matrix"`) and passed through.
#' @param variable_labels ([`formula-list-selector`][gtsummary::syntax])\cr
#' A named list or a named vector of custom variable labels.
#' @param instrumental_suffix (`string`)\cr
#' Suffix added to variable labels for instrumental variables (`fixest` models).
#' `NULL` to add nothing.
#' @param term_labels (`list` or `vector`)\cr
#' A named list or a named vector of custom term labels.
#' @param interaction_sep (`string`)\cr
#' Separator for interaction terms.
#' @param categorical_terms_pattern ([`glue pattern`][glue::glue()])\cr
#' A [glue pattern][glue::glue()] for labels of categorical terms with treatment
#' or sum contrasts (see [model_list_terms_levels()]).
#' @param relabel_poly Should terms generated with [stats::poly()] be relabeled?
#' @param disambiguate_terms (`logical`)\cr
#' Should terms be disambiguated with
#' [tidy_disambiguate_terms()]? (default `TRUE`)
#' @param disambiguate_sep (`string`)\cr
#' Separator for [tidy_disambiguate_terms()].
#' @param add_reference_rows (`logical`)\cr
#' Should reference rows be added?
#' @param no_reference_row ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' Variables for those no reference row should be added,
#' when `add_reference_rows = TRUE`.
#' @param add_pairwise_contrasts (`logical`)\cr
#' Apply [tidy_add_pairwise_contrasts()]?
#' @param pairwise_variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' Variables to add pairwise contrasts.
#' @param keep_model_terms (`logical`)\cr
#' Keep original model terms for variables where
#' pairwise contrasts are added? (default is `FALSE`)
#' @param pairwise_reverse (`logical`)\cr
#' Determines whether to use `"pairwise"` (if `TRUE`)
#' or `"revpairwise"` (if `FALSE`), see [emmeans::contrast()].
#' @param contrasts_adjust (`string`)\cr
#' Optional adjustment method when computing contrasts,
#' see [emmeans::contrast()] (if `NULL`, use `emmeans` default).
#' @param emmeans_args (`list`)\cr
#' List of additional parameter to pass to
#' [emmeans::emmeans()] when computing pairwise contrasts.
#' @param add_estimate_to_reference_rows (`logical`)\cr
#' Should an estimate value be added to reference rows?
#' @param add_header_rows (`logical`)\cr
#' Should header rows be added?
#' @param show_single_row ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' Variables that should be displayed on a single row,
#' when `add_header_rows` is `TRUE`.
#' @param add_n (`logical`)\cr
#' Should the number of observations be added?
#' @param intercept (`logical`)\cr
#' Should the intercept(s) be included?
#' @inheritParams tidy_select_variables
#' @inheritParams tidy_group_by
#' @param keep_model (`logical`)\cr
#' Should the model be kept as an attribute of the final result?
#' @param tidy_post_fun (`function`)\cr
#' Custom function applied to the results at the end of
#' `tidy_plus_plus()` (see note)
#' @param quiet (`logical`)\cr
#' Whether `broom.helpers` should not return a message when requested output
#' cannot be generated. Default is `FALSE`.
#' @param strict (`logical`)\cr
#' Whether `broom.helpers` should return an error
#' when requested output cannot be generated. Default is `FALSE`.
#' @param ... other arguments passed to `tidy_fun()`
#' @note
#' `tidy_post_fun` is applied to the result at the end of `tidy_plus_plus()`
#' and receive only one argument (the result of `tidy_plus_plus()`). However,
#' if needed, the model is still attached to the tibble as an attribute, even
#' if `keep_model = FALSE`. Therefore, it is possible to use [tidy_get_model()]
#' within `tidy_fun` if, for any reason, you need to access the source model.
#' @family tidy_helpers
#' @examples
#' \donttest{
#' ex1 <- lm(Sepal.Length ~ Sepal.Width + Species, data = iris) |>
#' tidy_plus_plus()
#' ex1
#'
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(
#' Survived = factor(Survived, c("No", "Yes"))
#' ) |>
#' labelled::set_variable_labels(
#' Class = "Passenger's class",
#' Sex = "Gender"
#' )
#' ex2 <- glm(
#' Survived ~ Class + Age * Sex,
#' data = df, weights = df$n,
#' family = binomial
#' ) |>
#' tidy_plus_plus(
#' exponentiate = TRUE,
#' add_reference_rows = FALSE,
#' categorical_terms_pattern = "{level} / {reference_level}",
#' add_n = TRUE
#' )
#' ex2
#' }
#' @examplesIf require("gtsummary") && require("emmeans")
#' \donttest{
#' ex3 <-
#' glm(
#' response ~ poly(age, 3) + stage + grade * trt,
#' na.omit(gtsummary::trial),
#' family = binomial,
#' contrasts = list(
#' stage = contr.treatment(4, base = 3),
#' grade = contr.sum
#' )
#' ) |>
#' tidy_plus_plus(
#' exponentiate = TRUE,
#' variable_labels = c(age = "Age (in years)"),
#' add_header_rows = TRUE,
#' show_single_row = all_dichotomous(),
#' term_labels = c("poly(age, 3)3" = "Cubic age"),
#' keep_model = TRUE
#' )
#' ex3
#' }
#' @export
tidy_plus_plus <- function(model,
tidy_fun = tidy_with_broom_or_parameters,
conf.int = TRUE,
conf.level = .95,
exponentiate = FALSE,
model_matrix_attr = TRUE,
variable_labels = NULL,
instrumental_suffix = " (instrumental)",
term_labels = NULL,
interaction_sep = " * ",
categorical_terms_pattern = "{level}",
relabel_poly = FALSE,
disambiguate_terms = TRUE,
disambiguate_sep = ".",
add_reference_rows = TRUE,
no_reference_row = NULL,
add_pairwise_contrasts = FALSE,
pairwise_variables = all_categorical(),
keep_model_terms = FALSE,
pairwise_reverse = TRUE,
contrasts_adjust = NULL,
emmeans_args = list(),
add_estimate_to_reference_rows = TRUE,
add_header_rows = FALSE,
show_single_row = NULL,
add_n = TRUE,
intercept = FALSE,
include = everything(),
group_by = auto_group_by(),
group_labels = NULL,
keep_model = FALSE,
tidy_post_fun = NULL,
quiet = FALSE,
strict = FALSE,
...) {
res <- model |>
tidy_and_attach(
tidy_fun = tidy_fun,
conf.int = conf.int,
conf.level = conf.level,
exponentiate = exponentiate,
model_matrix_attr = model_matrix_attr,
...
)
if (disambiguate_terms) {
res <- res |>
tidy_disambiguate_terms(sep = disambiguate_sep, quiet = quiet)
}
res <- res |>
tidy_identify_variables(quiet = quiet) |>
tidy_add_contrasts()
if (add_reference_rows) {
res <- res |>
tidy_add_reference_rows(
no_reference_row = {{ no_reference_row }},
quiet = quiet
)
}
if (add_pairwise_contrasts) {
res <- res |>
tidy_add_pairwise_contrasts(
variables = {{ pairwise_variables }},
keep_model_terms = keep_model_terms,
pairwise_reverse = pairwise_reverse,
contrasts_adjust = contrasts_adjust,
emmeans_args = emmeans_args
)
}
if (add_reference_rows && add_estimate_to_reference_rows) {
res <- res |>
tidy_add_estimate_to_reference_rows(exponentiate = exponentiate, quiet = quiet)
}
res <- res |>
tidy_add_variable_labels(
labels = variable_labels,
interaction_sep = interaction_sep,
instrumental_suffix = instrumental_suffix
) |>
tidy_add_term_labels(
labels = term_labels,
interaction_sep = interaction_sep,
categorical_terms_pattern = categorical_terms_pattern,
relabel_poly = relabel_poly,
quiet = quiet
)
if (add_header_rows) {
res <- res |>
tidy_add_header_rows(
show_single_row = {{ show_single_row }},
strict = strict,
quiet = quiet
)
}
if (add_n) {
res <- res |> tidy_add_n()
}
if (!intercept) {
res <- res |> tidy_remove_intercept()
}
res <- res |>
tidy_select_variables(
include = {{ include }},
) |>
tidy_group_by(
group_by = {{ group_by }},
group_labels = group_labels
) |>
tidy_add_coefficients_type()
if (!is.null(tidy_post_fun))
res <- res |> tidy_post_fun()
if (!keep_model) {
res <- res |> tidy_detach_model()
}
res
}
broom.helpers/R/tidy_add_reference_rows.R 0000644 0001762 0000144 00000017405 14762101026 020234 0 ustar ligges users #' Add references rows for categorical variables
#'
#' For categorical variables with a treatment contrast
#' ([stats::contr.treatment()]), a SAS contrast ([stats::contr.SAS()])
#' a sum contrast ([stats::contr.sum()]), or successive differences contrast
#' ([MASS::contr.sdif()]) add a reference row.
#'
#' The added `reference_row` column will be equal to:
#'
#' * `TRUE` for a reference row;
#' * `FALSE` for a normal row of a variable with a reference row;
#' * `NA` for variables without a reference row.
#'
#' If the `contrasts` column is not yet available in `x`,
#' [tidy_add_contrasts()] will be automatically applied.
#'
#' `tidy_add_reference_rows()` will not populate the label
#' of the reference term. It is therefore better to apply
#' [tidy_add_term_labels()] after `tidy_add_reference_rows()`
#' rather than before. Similarly, it is better to apply
#' `tidy_add_reference_rows()` before [tidy_add_n()].
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param no_reference_row ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' Variables for those no reference row should be added.
#' See also [all_categorical()] and [all_dichotomous()].
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @inheritParams tidy_plus_plus
#' @export
#' @family tidy_helpers
#' @examplesIf .assert_package("gtsummary", boolean = TRUE)
#' \donttest{
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#'
#' res <-
#' glm(
#' Survived ~ Class + Age + Sex,
#' data = df, weights = df$n, family = binomial,
#' contrasts = list(Age = contr.sum, Class = "contr.SAS")
#' ) |>
#' tidy_and_attach()
#' res |> tidy_add_reference_rows()
#' res |> tidy_add_reference_rows(no_reference_row = all_dichotomous())
#' res |> tidy_add_reference_rows(no_reference_row = "Class")
#'
#' glm(
#' response ~ stage + grade * trt,
#' gtsummary::trial,
#' family = binomial,
#' contrasts = list(
#' stage = contr.treatment(4, base = 3),
#' grade = contr.treatment(3, base = 2),
#' trt = contr.treatment(2, base = 2)
#' )
#' ) |>
#' tidy_and_attach() |>
#' tidy_add_reference_rows()
#' }
tidy_add_reference_rows <- function(
x, no_reference_row = NULL,
model = tidy_get_model(x),
quiet = FALSE) {
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
.attributes <- .save_attributes(x)
# adding reference rows is not meaningful for stats::aov
if (inherits(model, "aov")) {
return(x |> dplyr::mutate(reference_row = NA))
}
# checking cases where adding reference rows is not meaningful
if (isTRUE(.attributes$skip_add_reference_rows)) {
return(x |> dplyr::mutate(reference_row = NA))
}
if ("header_row" %in% names(x)) {
cli::cli_abort(paste(
"{.fn tidy_add_reference_rows} cannot be applied",
"after {.fn tidy_add_header_rows}."
))
}
if ("reference_row" %in% names(x)) {
if (!quiet) {
cli_alert_danger(paste(
"{.code tidy_add_reference_rows()} has already been applied.",
"x has been returned unchanged."
))
}
return(x)
}
if ("label" %in% names(x)) {
if (!quiet) {
cli_alert_info(paste0(
"tidy_add_reference_rows() has been applied after tidy_add_term_labels().\n",
"You should consider applying tidy_add_reference_rows() first."
))
}
}
if ("n_obs" %in% names(x)) {
if (!quiet) {
cli_alert_info(paste0(
"{.code tidy_add_reference_rows()} has been applied after {.code tidy_add_n()}.\n",
"You should consider applying {.code tidy_add_reference_rows()} first."
))
}
}
if (!"contrasts" %in% names(x)) {
x <- x |> tidy_add_contrasts(model = model)
}
# obtain character vector of selected variables
cards::process_selectors(
data = scope_tidy(x),
no_reference_row = {{ no_reference_row }}
)
terms_levels <- model_list_terms_levels(model)
if (!is.null(terms_levels)) {
terms_levels <- terms_levels |>
# keep only terms corresponding to variable in x
# (e.g. to exclude interaction only variables)
dplyr::filter(
.data$variable %in% unique(stats::na.omit(x$variable)) &
# and exclude variables in no_reference_row
!.data$variable %in% no_reference_row
)
}
if (is.null(terms_levels) || nrow(terms_levels) == 0) {
return(
x |>
dplyr::mutate(reference_row = NA) |>
tidy_attach_model(model)
)
}
terms_levels <- terms_levels |>
dplyr::group_by(.data$variable) |>
dplyr::mutate(rank = seq_len(dplyr::n()))
has_var_label <- "var_label" %in% names(x)
if (!has_var_label) {
x$var_label <- NA_character_
} # temporary populate it
has_instrumental <- "instrumental" %in% names(x)
if (!has_instrumental) {
x$instrumental <- NA
} # temporary populate it
x <- x |>
dplyr::mutate(
reference_row = dplyr::if_else(
.data$variable %in% unique(terms_levels$variable),
FALSE,
NA
),
rank = seq_len(dplyr::n()) # for sorting table at the end
)
group <- NULL
if ("component" %in% names(x)) {
group <- "component"
}
if ("y.level" %in% names(x)) {
group <- "y.level"
}
if (!is.null(group)) {
x$.group_by_var <- x[[group]]
} else {
x$.group_by_var <- ""
}
ref_rows <- terms_levels |>
dplyr::filter(.data$reference) |>
dplyr::mutate(reference_row = TRUE) |>
dplyr::select(
dplyr::all_of(
c("term", "variable", "label", "reference_row", "rank")
)
)
if (!"label" %in% names(x)) {
ref_rows <- ref_rows |> dplyr::select(-all_of("label"))
}
# populate effect column for mixed models
tmp <- x
if (!"effect" %in% names(x)) {
tmp$effect <- NA_character_
}
var_summary <- tmp |>
dplyr::group_by(.data$.group_by_var, .data$variable) |>
dplyr::summarise(
var_class = dplyr::first(.data$var_class),
var_type = dplyr::first(.data$var_type),
var_label = dplyr::first(.data$var_label),
instrumental = dplyr::first(.data$instrumental),
var_nlevels = dplyr::first(.data$var_nlevels),
effect = dplyr::first(.data$effect),
contrasts = dplyr::first(.data$contrasts),
contrasts_type = dplyr::first(.data$contrasts_type),
var_min_rank = min(.data$rank),
var_max_rank = min(.data$rank),
.groups = "drop_last"
)
ref_rows <- ref_rows |>
dplyr::left_join(
var_summary,
by = "variable"
) |>
dplyr::mutate(
rank = .data$var_min_rank - 1.25 + .data$rank,
# if last, reduce by .5 to avoid overlap with next variable
rank = dplyr::if_else(
.data$rank > .data$var_max_rank,
.data$rank - .5,
.data$rank
)
) |>
dplyr::select(-dplyr::all_of(c("var_min_rank", "var_max_rank")))
if (!"effect" %in% names(x)) {
ref_rows <- ref_rows |> dplyr::select(-dplyr::all_of("effect"))
}
x <- x |>
dplyr::bind_rows(ref_rows)
if (!is.null(group)) {
x[[group]] <- x$.group_by_var
}
x <- x |> dplyr::select(-dplyr::all_of(".group_by_var"))
if (!has_var_label) {
x <- x |> dplyr::select(-dplyr::all_of("var_label"))
}
if (!has_instrumental) {
x <- x |> dplyr::select(-dplyr::all_of("instrumental"))
}
x |>
dplyr::arrange(.data$rank) |>
dplyr::select(-dplyr::all_of("rank")) |>
tidy_attach_model(model = model, .attributes = .attributes)
}
broom.helpers/R/broom.helpers-package.R 0000644 0001762 0000144 00000005243 15044413405 017531 0 ustar ligges users ## usethis namespace: start
#' @importFrom cli cli_alert_info cli_alert_info cli_alert_danger cli_code cli_ul
#' @importFrom rlang .data .env
#' @importFrom purrr %||%
## usethis namespace: end
NULL
# because `where` is not exported by tidyselect
# cf. https://github.com/r-lib/tidyselect/issues/201
utils::globalVariables(c("."))
# update named vectors, y values overriding x values if common name
.update_vector <- function(x, y) {
if (is.null(y)) {
return(x)
}
if (is.null(names(y)) || any(names(y) == "")) {
cli::cli_abort("All elements of y should be named.")
}
for (i in names(y)) {
if (utils::hasName(x, i)) {
x[i] <- y[i]
} else {
x <- c(x, y[i])
}
}
x
}
# return superscript character
.superscript_numbers <- function(x) {
if (!is.character(x)) {
x <- as.character(x)
}
x[x == "1"] <- "" # do not show when equal 1
pattern <- c(
"0" = "\u2070", "1" = "\u00b9", "2" = "\u00b2",
"3" = "\u00b3", "4" = "\u2074", "5" = "\u2075",
"6" = "\u2076", "7" = "\u2077", "8" = "\u2078",
"9" = "\u2079"
)
x |> stringr::str_replace_all(pattern)
}
# for consistent column order
.order_tidy_columns <- function(x) {
x |>
dplyr::select(
dplyr::any_of(
c(
"group_by", "y.level", "component", "tau", "term", "original_term",
"variable", "instrumental", "var_label", "var_class", "var_type",
"var_nlevels", "header_row", "contrasts", "contrasts_type",
"reference_row", "label", "n_obs", "n_ind", "n_event", "exposure"
)
),
dplyr::everything()
)
}
# attributes to be saved between tidy_* functions
.save_attributes <- function(x) {
.attributes <- attributes(x)
.attributes_names <- intersect(
names(.attributes),
c(
"exponentiate", "conf.level", "coefficients_type", "coefficients_label",
"variable_labels", "term_labels", "N_obs", "N_ind", "N_event", "Exposure",
"force_contr.treatment", "skip_add_reference_rows",
"find_missing_interaction_terms", "component"
)
)
.attributes[.attributes_names]
}
#' Sequence generation between min and max
#'
#' @param x (`numeric`)\cr
#' A numeric vector.
#' @param length.out (`integer`)\cr
#' Desired length of the sequence (a positive integer).
#' @details
#' `seq_range(x, length.out)` is a shortcut for
#' `seq(min(x, na.rm = TRUE), max(x, na.rm = TRUE), length.out = length.out)`
#' @return
#' a numeric vector
#' @export
#' @examples
#' seq_range(iris$Petal.Length)
seq_range <- function(x, length.out = 25) {
seq(min(x, na.rm = TRUE), max(x, na.rm = TRUE), length.out = length.out)
}
broom.helpers/R/model_compute_terms_contributions.R 0000644 0001762 0000144 00000011133 15051113355 022403 0 ustar ligges users #' Compute a matrix of terms contributions
#'
#' @description
#'
#' Used for [model_get_n()]. For each row and term, equal 1 if this row should
#' be taken into account in the estimate of the number of observations,
#' 0 otherwise.
#'
#' @details
#' This function does not cover `lavaan` models (`NULL` is returned).
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @export
#' @family model_helpers
#' @examples
#' \donttest{
#' mod <- lm(Sepal.Length ~ Sepal.Width, iris)
#' mod |> model_compute_terms_contributions()
#'
#' mod <- lm(hp ~ mpg + factor(cyl) + disp:hp, mtcars)
#' mod |> model_compute_terms_contributions()
#'
#' mod <- glm(
#' response ~ stage * grade + trt,
#' gtsummary::trial,
#' family = binomial,
#' contrasts = list(
#' stage = contr.sum,
#' grade = contr.treatment(3, 2),
#' trt = "contr.SAS"
#' )
#' )
#' mod |> model_compute_terms_contributions()
#'
#' mod <- glm(
#' response ~ stage * trt,
#' gtsummary::trial,
#' family = binomial,
#' contrasts = list(stage = contr.poly)
#' )
#' mod |> model_compute_terms_contributions()
#'
#' mod <- glm(
#' Survived ~ Class * Age + Sex,
#' data = Titanic |> as.data.frame(),
#' weights = Freq, family = binomial
#' )
#' mod |> model_compute_terms_contributions()
#'
#' d <- dplyr::as_tibble(Titanic) |>
#' dplyr::group_by(Class, Sex, Age) |>
#' dplyr::summarise(
#' n_survived = sum(n * (Survived == "Yes")),
#' n_dead = sum(n * (Survived == "No"))
#' )
#' mod <- glm(cbind(n_survived, n_dead) ~ Class * Age + Sex, data = d, family = binomial)
#' mod |> model_compute_terms_contributions()
#' }
model_compute_terms_contributions <- function(model) {
UseMethod("model_compute_terms_contributions")
}
#' @export
#' @rdname model_compute_terms_contributions
model_compute_terms_contributions.default <- function(model) {
contr <- model |> model_get_contrasts()
# check poly contrasts
# we change the contrasts arguments to force positive values
if (!is.null(contr) && length(contr) > 0) {
list.contr.poly <- model |>
model_list_contrasts() |>
dplyr::filter(.data$contrasts == "contr.poly") |>
purrr::pluck("variable")
for (v in list.contr.poly) {
contr[[v]] <- contr.poly.abs
}
}
tcm <- tryCatch(
{
formula <- model_get_terms(model)
if (is.null(formula)) {
return(NULL)
} # stop
# continuous variables converted to 1 to force positive values
d <- model |> model_get_model_frame()
if (is.null(d)) {
return(NULL)
} # stop
d <- d |>
dplyr::mutate(
dplyr::across(
where(~ is.numeric(.x) & (
# check is.matrix for cbind variables
# but include polynomial terms
!is.matrix(.x) | inherits(.x, "poly")
)),
~ abs(.x) + 1 # force positive value
)
)
stats::model.matrix(formula, data = d, contrasts.arg = contr)
},
error = function(e) {
NULL # nocov
}
)
if (is.null(tcm)) {
return(NULL) # nocov
}
tcm <- .add_ref_terms_to_tcm(model, tcm)
# keep only positive terms
tcm <- tcm > 0
storage.mode(tcm) <- "integer"
tcm
}
contr.poly.abs <- function(...) {
stats::contr.poly(...) |> abs()
}
.add_ref_terms_to_tcm <- function(model, tcm) {
# adding reference terms
# for treatment and sum contrasts
tl <- model |>
model_list_terms_levels()
for (v in unique(tl$variable)) {
ct <- tl |>
dplyr::filter(.data$variable == v) |>
purrr::chuck("contrasts_type") |>
dplyr::first()
ref_term <- tl |>
dplyr::filter(.data$variable == v & .data$reference) |>
purrr::chuck("term")
nonref_terms <- tl |>
dplyr::filter(.data$variable == v & !.data$reference) |>
purrr::chuck("term")
if (ct == "treatment" && all(nonref_terms %in% colnames(tcm))) {
tcm <- cbind(
tcm,
matrix(
as.integer(
rowSums(tcm[, nonref_terms, drop = FALSE] == 0L) ==
length(nonref_terms)
),
ncol = 1,
dimnames = list(NULL, ref_term)
)
)
}
if (ct == "sum" && all(nonref_terms %in% colnames(tcm))) {
tcm <- cbind(
tcm,
matrix(
as.integer(
rowSums(tcm[, nonref_terms, drop = FALSE] == -1L) ==
length(nonref_terms)
),
ncol = 1,
dimnames = list(NULL, ref_term)
)
)
}
}
tcm
}
broom.helpers/R/tidy_group_by.R 0000644 0001762 0000144 00000006543 15002155536 016246 0 ustar ligges users #' Group results by selected columns
#'
#' Indicates that results should be grouped. By default
#' (`group_by = auto_group_by()`), results will be grouped according to the
#' `y.level` column (for multinomial models) or the `component` column
#' (multi-components models) if any.
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param group_by ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' One or several variables to group by. Default is `auto_group_by()`.
#' Use `NULL` to force ungrouping.
#' @param group_labels (`string`)\cr
#' An optional named vector of custom term labels.
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @return
#' The `x` tibble with, if relevant, an additional `group_by` column.
#' @export
#' @examplesIf require("nnet")
#' mod <- multinom(Species ~ Petal.Width + Petal.Length, data = iris)
#' mod |> tidy_and_attach() |> tidy_group_by()
#'
#' mod |>
#' tidy_and_attach() |>
#' tidy_group_by(group_labels = c(versicolor = "harlequin blueflag"))
#'
#' mod |> tidy_and_attach() |> tidy_group_by(group_by = NULL)
#'
#' mod |>
#' tidy_and_attach() |>
#' tidy_identify_variables() |>
#' tidy_group_by(group_by = variable)
#' @family tidy_helpers
tidy_group_by <- function(
x,
group_by = auto_group_by(),
group_labels = NULL,
model = tidy_get_model(x)) {
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
.attributes <- .save_attributes(x)
# obtain character vector of selected variables
group_vars <- x |> dplyr::select({{ group_by }}) |> colnames()
# compute groups
if (length(group_vars) > 0) {
x <- x |>
tidyr::unite(col = "group_by", dplyr::all_of(group_vars), remove = FALSE)
groups <- unique(x$group_by)
x$group_by <- factor(x$group_by, levels = groups)
x <- x |> dplyr::arrange(group_by)
# group labels
if (!is.null(group_labels)) {
if (is.null(names(group_labels)) || any(names(group_labels) == ""))
cli::cli_abort("All elements of {.arg group_labels} should be named.")
keep <- names(group_labels) %in% levels(x$group_by)
drop <- names(group_labels[!keep])
if (length(drop) > 0) {
cli::cli_alert_warning(c(
"Problem in {.arg group_labels}:\n",
"value{?s} {.strong {drop}} not found in the data and ignored."
))
}
group_labels <- group_labels[keep]
l <- levels(x$group_by)
names(l) <- l
l[names(group_labels)] <- group_labels
levels(x$group_by) <- l
}
}
if (length(group_vars) == 0 && "group_by" %in% names(x))
x <- x |> dplyr::select(-dplyr::all_of("group_by"))
# sometimes, group_by not relevant after tidy_select_variable
if ("group_by" %in% names(x) && all(x$group_by == ""))
x <- x |> dplyr::select(-dplyr::all_of("group_by"))
x |>
tidy_attach_model(model = model, .attributes = .attributes)
}
#' @rdname tidy_group_by
#' @export
auto_group_by <- function() {
vars <- tidyselect::peek_vars()
if ("group_by" %in% vars) # if already grouped, we keep it
return("group_by")
if ("y.level" %in% vars)
return("y.level")
if ("component" %in% vars)
return("component")
NULL
}
broom.helpers/R/tidy_add_header_rows.R 0000644 0001762 0000144 00000024447 14762100744 017540 0 ustar ligges users #' Add header rows variables with several terms
#'
#' For variables with several terms (usually categorical variables but
#' could also be the case of continuous variables with polynomial terms
#' or splines), `tidy_add_header_rows()` will add an additional row
#' per variable, where `label` will be equal to `var_label`.
#' These additional rows could be identified with `header_row` column.
#'
#' The `show_single_row` argument allows to specify a list
#' of dichotomous variables that should be displayed on a single row
#' instead of two rows.
#'
#' The added `header_row` column will be equal to:
#'
#' * `TRUE` for an header row;
#' * `FALSE` for a normal row of a variable with an header row;
#' * `NA` for variables without an header row.
#'
#' If the `label` column is not yet available in `x`,
#' [tidy_add_term_labels()] will be automatically applied.
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param show_single_row ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' Names of dichotomous variables that should be displayed on a single row.
#' See also [all_dichotomous()].
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @inheritParams tidy_plus_plus
#' @export
#' @family tidy_helpers
#' @examplesIf .assert_package("gtsummary", boolean = TRUE)
#' \donttest{
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#'
#' res <-
#' glm(
#' Survived ~ Class + Age + Sex,
#' data = df, weights = df$n, family = binomial,
#' contrasts = list(Age = contr.sum, Class = "contr.SAS")
#' ) |>
#' tidy_and_attach() |>
#' tidy_add_variable_labels(labels = list(Class = "Custom label for Class")) |>
#' tidy_add_reference_rows()
#' res |> tidy_add_header_rows()
#' res |> tidy_add_header_rows(show_single_row = all_dichotomous())
#'
#' glm(
#' response ~ stage + grade * trt,
#' gtsummary::trial,
#' family = binomial,
#' contrasts = list(
#' stage = contr.treatment(4, base = 3),
#' grade = contr.treatment(3, base = 2),
#' trt = contr.treatment(2, base = 2)
#' )
#' ) |>
#' tidy_and_attach() |>
#' tidy_add_reference_rows() |>
#' tidy_add_header_rows()
#' }
tidy_add_header_rows <- function(x,
show_single_row = NULL,
model = tidy_get_model(x),
quiet = FALSE,
strict = FALSE) {
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
if ("header_row" %in% names(x)) {
if (!quiet) {
cli_alert_danger(paste(
"{.code tidy_add_header_rows()} has already been applied.",
"x has been returned unchanged."
))
}
return(x)
}
.attributes <- .save_attributes(x)
if (!"label" %in% names(x)) {
x <- x |> tidy_add_term_labels(model = model)
}
# management of show_single_row --------------
# if reference_rows have been defined, removal of reference row
variables_to_simplify <- NULL
# obtain character vector of selected variables
cards::process_selectors(
data = scope_tidy(x),
show_single_row = {{ show_single_row }}
)
has_reference_row <- "reference_row" %in% names(x)
if (!has_reference_row) {
x$reference_row <- FALSE
}
xx <- x
if ("y.level" %in% names(x)) {
xx <- xx |>
dplyr::filter(.data$y.level == x$y.level[1])
}
# checking if variables incorrectly requested for single row summary
if ("component" %in% colnames(xx)) {
bad_single_row <- xx |>
dplyr::filter(
!is.na(.data$variable),
is.na(.data$reference_row) | !.data$reference_row,
.data$variable %in% show_single_row
) |>
dplyr::group_by(.data$component, .data$variable) |>
dplyr::count() |>
dplyr::filter(.data$n > 1) |>
dplyr::pull(.data$variable)
} else {
bad_single_row <- xx |>
dplyr::filter(
!is.na(.data$variable),
is.na(.data$reference_row) | !.data$reference_row,
.data$variable %in% show_single_row
) |>
dplyr::group_by(.data$variable) |>
dplyr::count() |>
dplyr::filter(.data$n > 1) |>
dplyr::pull(.data$variable)
}
if (length(bad_single_row) > 0) {
if (!quiet) {
paste(
"Variable(s) {paste(shQuote(bad_single_row), collapse = \", \")} were",
"incorrectly requested to be printed on a single row."
) |>
cli_alert_danger()
}
if (strict) {
cli::cli_abort(
"Incorrect call with `show_single_row=`. Quitting execution.",
call = NULL
)
}
show_single_row <- setdiff(show_single_row, bad_single_row)
}
if (
length(show_single_row) > 0 &&
any(x$variable %in% show_single_row)
) {
if ("component" %in% colnames(xx)) {
variables_to_simplify <- xx |>
dplyr::filter(
.data$variable %in% show_single_row & !.data$reference_row
) |>
dplyr::count(.data$component, .data$variable) |>
dplyr::filter(.data$n == 1) |>
purrr::pluck("variable") |>
unique()
} else {
variables_to_simplify <- xx |>
dplyr::filter(
.data$variable %in% show_single_row & !.data$reference_row
) |>
dplyr::count(.data$variable) |>
dplyr::filter(.data$n == 1) |>
purrr::pluck("variable")
}
# removing reference rows of those variables
if (length(variables_to_simplify) > 0) {
x <- x |>
dplyr::filter(
is.na(.data$variable) |
!.data$variable %in% variables_to_simplify |
(.data$variable %in% variables_to_simplify & !.data$reference_row)
)
}
# for variables in show_single_row
# label should be equal to var_label
x <- x |>
dplyr::mutate(
label = dplyr::if_else(
.data$variable %in% show_single_row,
.data$var_label,
.data$label
)
)
}
if (!has_reference_row) {
x <- x |> dplyr::select(-dplyr::all_of("reference_row"))
}
# computing header rows ---------------
x <- x |>
dplyr::mutate(
rank = seq_len(dplyr::n()) # for sorting table at the end
)
if ("y.level" %in% names(x)) {
header_rows <- x |>
dplyr::filter(!is.na(.data$variable) & !.data$variable %in% show_single_row)
if (nrow(header_rows) > 0) {
header_rows <- header_rows |>
dplyr::mutate(term_cleaned = .clean_backticks(.data$term, .data$variable)) |>
dplyr::group_by(.data$variable, .data$y.level) |>
dplyr::summarise(
var_class = dplyr::first(.data$var_class),
var_type = dplyr::first(.data$var_type),
var_label = dplyr::first(.data$var_label),
var_nlevels = dplyr::first(.data$var_nlevels),
contrasts = dplyr::first(.data$contrasts),
contrasts_type = dplyr::first(.data$contrasts_type),
var_nrow = dplyr::n(),
var_test = sum(.data$term_cleaned != .data$variable),
rank = min(.data$rank) - .25,
.groups = "drop_last"
) |>
dplyr::filter(.data$var_nrow >= 2 | .data$var_test > 0) |>
dplyr::select(-dplyr::all_of(c("var_nrow", "var_test"))) |>
dplyr::mutate(
header_row = TRUE,
label = .data$var_label
)
}
} else if ("component" %in% names(x)) {
header_rows <- x |>
dplyr::filter(!is.na(.data$variable) & !.data$variable %in% show_single_row)
if (nrow(header_rows) > 0) {
header_rows <- header_rows |>
dplyr::mutate(term_cleaned = .clean_backticks(.data$term, .data$variable)) |>
dplyr::group_by(.data$variable, .data$component) |>
dplyr::summarise(
var_class = dplyr::first(.data$var_class),
var_type = dplyr::first(.data$var_type),
var_label = dplyr::first(.data$var_label),
var_nlevels = dplyr::first(.data$var_nlevels),
contrasts = dplyr::first(.data$contrasts),
contrasts_type = dplyr::first(.data$contrasts_type),
var_nrow = dplyr::n(),
var_test = sum(.data$term_cleaned != .data$variable),
rank = min(.data$rank) - .25,
.groups = "drop_last"
) |>
dplyr::filter(.data$var_nrow >= 2 | .data$var_test > 0) |>
dplyr::select(-dplyr::all_of(c("var_nrow", "var_test"))) |>
dplyr::mutate(
header_row = TRUE,
label = .data$var_label
)
}
} else {
header_rows <- x |>
dplyr::filter(
!is.na(.data$variable) &
!.data$variable %in% show_single_row &
!.data$var_type %in% c("ran_pars", "ran_vals")
)
if (nrow(header_rows) > 0) {
header_rows <- header_rows |>
dplyr::mutate(term_cleaned = .clean_backticks(.data$term, .data$variable)) |>
dplyr::group_by(.data$variable) |>
dplyr::summarise(
var_class = dplyr::first(.data$var_class),
var_type = dplyr::first(.data$var_type),
var_label = dplyr::first(.data$var_label),
var_nlevels = dplyr::first(.data$var_nlevels),
contrasts = dplyr::first(.data$contrasts),
contrasts_type = dplyr::first(.data$contrasts_type),
var_nrow = dplyr::n(),
# for dichotomous variables with no reference row
var_test = sum(.data$term_cleaned != .data$variable),
rank = min(.data$rank) - .25,
.groups = "drop_last"
) |>
dplyr::filter(.data$var_nrow >= 2 | .data$var_test > 0) |>
dplyr::select(-dplyr::all_of(c("var_nrow", "var_test"))) |>
dplyr::mutate(
header_row = TRUE,
label = .data$var_label
)
}
}
x <- x |>
dplyr::mutate(
header_row = dplyr::if_else(.data$variable %in% header_rows$variable, FALSE, NA)
) |>
dplyr::bind_rows(header_rows) |>
dplyr::arrange(.data$rank) |>
dplyr::select(-dplyr::all_of("rank"))
x |>
tidy_attach_model(model = model, .attributes = .attributes)
}
broom.helpers/R/tidy_identify_variables.R 0000644 0001762 0000144 00000011620 14733566032 020262 0 ustar ligges users #' Identify the variable corresponding to each model coefficient
#'
#' `tidy_identify_variables()` will add to the tidy tibble
#' three additional columns: `variable`, `var_class`, `var_type` and `var_nlevels`.
#'
#' It will also identify interaction terms and intercept(s).
#'
#' `var_type` could be:
#'
#' * `"continuous"`,
#' * `"dichotomous"` (categorical variable with 2 levels),
#' * `"categorical"` (categorical variable with 3 levels or more),
#' * `"intercept"`
#' * `"interaction"`
#' * `"ran_pars` (random-effect parameters for mixed models)
#' * `"ran_vals"` (random-effect values for mixed models)
#' * `"unknown"` in the rare cases where `tidy_identify_variables()`
#' will fail to identify the list of variables
#'
#' For dichotomous and categorical variables, `var_nlevels` corresponds to the number
#' of original levels in the corresponding variables.
#'
#' For `fixest` models, a new column `instrumental` is added to indicate
#' instrumental variables.
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @inheritParams tidy_plus_plus
#' @export
#' @seealso [model_identify_variables()]
#' @family tidy_helpers
#' @examples
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#' glm(
#' Survived ~ Class + Age * Sex,
#' data = df,
#' weights = df$n,
#' family = binomial
#' ) |>
#' tidy_and_attach() |>
#' tidy_identify_variables()
#'
#' lm(
#' Sepal.Length ~ poly(Sepal.Width, 2) + Species,
#' data = iris,
#' contrasts = list(Species = contr.sum)
#' ) |>
#' tidy_and_attach(conf.int = TRUE) |>
#' tidy_identify_variables()
tidy_identify_variables <- function(x, model = tidy_get_model(x),
quiet = FALSE) {
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
if ("header_row" %in% names(x)) {
cli::cli_abort(paste(
"{.fn tidy_identify_variables} cannot be applied",
"after {.fn tidy_add_header_rows}."
))
}
.attributes <- .save_attributes(x)
# specific case for fixest models to handle instrumental variables
if (inherits(model, "fixest")) {
x <- x |>
dplyr::mutate(
original_term = .data$term,
instrumental = .data$term |> stringr::str_starts("fit_"),
term = dplyr::if_else(
.data$term |> stringr::str_starts("fit_"),
.data$term |> stringr::str_sub(5),
.data$term
)
)
}
# specific case for marginal means / effects / predictions / contrasts
if (
isTRUE(
stringr::str_starts(.attributes$coefficients_type, "marginal") &&
"variable" %in% names(x)
)
) {
x <- x |>
dplyr::left_join(
model_list_variables(model, add_var_type = TRUE),
by = "variable"
) |>
tidy_attach_model(model = model, .attributes = .attributes)
return(x)
}
if ("variable" %in% names(x)) {
x <- x |> dplyr::select(
-any_of(c("variable", "var_class", "var_type", "var_nlevels"))
)
}
variables_list <- model_identify_variables(model)
if (nrow(variables_list) > 0) {
x <- x |>
dplyr::left_join(variables_list, by = "term")
# management of random parameters (mixed models)
if ("effect" %in% names(x)) {
x <- x |>
dplyr::mutate(
var_type = dplyr::if_else(
.data$effect %in% c("ran_pars", "ran_vals", "random"),
.data$effect,
.data$var_type
)
)
}
x |>
dplyr::mutate(
var_type = dplyr::if_else(
is.na(.data$var_type),
"intercept",
.data$var_type
),
variable = dplyr::if_else(
is.na(.data$variable),
.data$term,
.data$variable
)
) |>
tidy_attach_model(model = model, .attributes = .attributes)
} else {
if (!quiet) {
cli_alert_danger(paste0(
"Unable to identify the list of variables.\n\n",
"This is usually due to an error calling {.code stats::model.frame(x)}",
"or {.code stats::model.matrix(x)}.\n",
"It could be the case if that type of model does not implement these methods.\n",
"Rarely, this error may occur if the model object was created within\na ",
"functional programming framework (e.g. using {.code lappy()}, ",
"{.code purrr::map()}, etc.)."
))
}
x |>
dplyr::mutate(
variable = .data$term,
var_class = NA_integer_,
var_type = "unknown",
var_nlevels = NA_integer_
) |>
tidy_attach_model(model = model, .attributes = .attributes)
}
}
broom.helpers/R/model_list_contrasts.R 0000644 0001762 0000144 00000010431 14662130321 017605 0 ustar ligges users #' List contrasts used by a model
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @return
#' A tibble with three columns:
#' * `variable`: variable name
#' * `contrasts`: contrasts used
#' * `contrasts_type`: type of contrasts
#' ("treatment", "sum", "poly", "helmert", "sdiff, "other" or "no.contrast")
#' * `reference`: for variables with treatment, SAS
#' or sum contrasts, position of the reference level
#' @details
#' For models with no intercept, no contrasts will be applied to one of the
#' categorical variable. In such case, one dummy term will be returned for each
#' level of the categorical variable.
#' @export
#' @family model_helpers
#' @examples
#' glm(
#' am ~ mpg + factor(cyl),
#' data = mtcars,
#' family = binomial,
#' contrasts = list(`factor(cyl)` = contr.sum)
#' ) |>
#' model_list_contrasts()
model_list_contrasts <- function(model) {
UseMethod("model_list_contrasts")
}
#' @export
#' @rdname model_list_contrasts
model_list_contrasts.default <- function(model) {
model_contrasts <- model_get_contrasts(model)
if (length(model_contrasts) == 0) {
return(NULL)
}
contrasts_list <- tibble::tibble(
variable = names(model_contrasts),
contrasts = NA_character_,
reference = NA_integer_
)
xlevels <- model_get_xlevels(model)
model_variables <- model_identify_variables(model)
for (i in seq_len(nrow(contrasts_list))) {
n_levels <- length(xlevels[[contrasts_list$variable[i]]])
n_terms <- model_variables |>
dplyr::filter(.data$variable == contrasts_list$variable[i]) |>
nrow()
if (n_levels == n_terms) {
contrasts_list$contrasts[[i]] <- "no.contrast"
} else if (
is.character(model_contrasts[[i]]) &&
length(is.character(model_contrasts[[i]]) == 1)
) {
contrasts_list$contrasts[[i]] <- model_contrasts[[i]]
if (model_contrasts[[i]] == "contr.treatment") {
contrasts_list$reference[[i]] <- 1
}
if (model_contrasts[[i]] == "contr.SAS" || model_contrasts[[i]] == "contr.sum") {
contrasts_list$reference[[i]] <- n_levels
}
if (model_contrasts[[i]] == "contr.sdif") {
contrasts_list$reference[[i]] <- NA
}
} else if (all(model_contrasts[[i]] == stats::contr.treatment(n_levels))) {
contrasts_list$contrasts[[i]] <- "contr.treatment"
contrasts_list$reference[[i]] <- 1
} else if (all(model_contrasts[[i]] == stats::contr.sum(n_levels))) {
contrasts_list$contrasts[[i]] <- "contr.sum"
contrasts_list$reference[[i]] <- n_levels
} else if (all(model_contrasts[[i]] == stats::contr.helmert(n_levels))) {
contrasts_list$contrasts[[i]] <- "contr.helmert"
} else if (all(model_contrasts[[i]] == stats::contr.poly(n_levels))) {
contrasts_list$contrasts[[i]] <- "contr.poly"
} else if (all(model_contrasts[[i]] == stats::contr.SAS(n_levels))) {
contrasts_list$contrasts[[i]] <- "contr.SAS"
contrasts_list$reference[[i]] <- n_levels
} else if (
.assert_package("MASS", boolean = TRUE) &&
all(model_contrasts[[i]] == MASS::contr.sdif(n_levels))
) {
contrasts_list$contrasts[[i]] <- "contr.sdif"
contrasts_list$reference[[i]] <- NA
} else {
for (j in 2:n_levels) { # testing treatment coding width different value for base variable
if (all(model_contrasts[[i]] == stats::contr.treatment(n_levels, base = j))) {
contrasts_list$contrasts[[i]] <- paste0("contr.treatment(base=", j, ")")
contrasts_list$reference[[i]] <- j
}
}
}
# if still not found, just indicate custom contrast
if (is.na(contrasts_list$contrasts[[i]])) {
contrasts_list$contrasts[[i]] <- "custom"
}
}
contrasts_list |>
dplyr::mutate(
contrasts_type = dplyr::case_when(
.data$contrasts |> stringr::str_starts("contr.treatment") ~ "treatment",
.data$contrasts == "contr.SAS" ~ "treatment",
.data$contrasts == "contr.sum" ~ "sum",
.data$contrasts == "contr.helmert" ~ "helmert",
.data$contrasts == "contr.poly" ~ "poly",
.data$contrasts == "contr.sdif" ~ "sdif",
.data$contrasts == "no.contrast" ~ "no.contrast",
TRUE ~ "other"
)
)
}
broom.helpers/R/marginal_tidiers.R 0000644 0001762 0000144 00000123073 14762100504 016677 0 ustar ligges users #' Average Marginal Effects with `margins::margins()`
#'
#' `r lifecycle::badge("superseded")`
#'
#' The `margins` package is no longer under active development and may be
#' removed from CRAN sooner or later. It is advised to use the `marginaleffects`
#' package instead, offering more functionalities. You could have a look at the
#' [article](https://larmarange.github.io/broom.helpers/articles/marginal_tidiers.html)
#' dedicated to marginal estimates with `broom.helpers`. `tidy_avg_slopes()`
#' could be used as an alternative.
#'
#' Use `margins::margins()` to estimate average marginal effects (AME) and
#' return a tibble tidied in a way that it could be used by `broom.helpers`
#' functions. See `margins::margins()` for a list of supported models.
#' @details
#' By default, `margins::margins()` estimate average marginal effects (AME): an
#' effect is computed for each observed value in the original dataset before
#' being averaged.
#'
#' For more information, see `vignette("marginal_tidiers", "broom.helpers")`.
#' @note When applying `margins::margins()`, custom contrasts are ignored.
#' Treatment contrasts (`stats::contr.treatment()`) are applied to all
#' categorical variables. Interactions are also ignored.
#' @param x (a model object, e.g. `glm`)\cr
#' A model to be tidied.
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param ... Additional parameters passed to `margins::margins()`.
#' @family marginal_tieders
#' @seealso `margins::margins()`
#' @export
#' @examplesIf .assert_package("margins", boolean = TRUE)
#' \donttest{
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' tidyr::uncount(n) |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#' mod <- glm(
#' Survived ~ Class + Age + Sex,
#' data = df, family = binomial
#' )
#' tidy_margins(mod)
#' tidy_plus_plus(mod, tidy_fun = tidy_margins)
#' }
tidy_margins <- function(x, conf.int = TRUE, conf.level = 0.95, ...) {
.assert_package("margins")
dots <- rlang::dots_list(...)
if (isTRUE(dots$exponentiate)) {
cli::cli_abort("{.arg exponentiate = TRUE} is not relevant for {.fun broom.helpers::tidy_margins}.") # nolint
}
res <- broom::tidy(
margins::margins(x, ...),
conf.int = conf.int,
conf.level = conf.level
)
attr(res, "coefficients_type") <- "marginal_effects_average"
attr(res, "force_contr.treatment") <- TRUE
res
}
#' Marginal Predictions at the mean with `effects::allEffects()`
#'
#' Use `effects::allEffects()` to estimate marginal predictions and
#' return a tibble tidied in a way that it could be used by `broom.helpers`
#' functions.
#' See `vignette("functions-supported-by-effects", package = "effects")` for
#' a list of supported models.
#' @details
#' By default, `effects::allEffects()` estimate marginal predictions at the mean
#' at the observed means for continuous variables and weighting modalities
#' of categorical variables according to their observed distribution in the
#' original dataset. Marginal predictions are therefore computed at
#' a sort of averaged situation / typical values for the other variables fixed
#' in the model.
#'
#' For more information, see `vignette("marginal_tidiers", "broom.helpers")`.
#' @note
#' If the model contains interactions, `effects::allEffects()` will return
#' marginal predictions for the different levels of the interactions.
#' @param x (a model object, e.g. `glm`)\cr
#' A model to be tidied.
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param ... Additional parameters passed to `effects::allEffects()`.
#' @family marginal_tieders
#' @seealso `effects::allEffects()`
#' @export
#' @examplesIf .assert_package("effects", boolean = TRUE)
#' \donttest{
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' tidyr::uncount(n) |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#' mod <- glm(
#' Survived ~ Class + Age + Sex,
#' data = df, family = binomial
#' )
#' tidy_all_effects(mod)
#' tidy_plus_plus(mod, tidy_fun = tidy_all_effects)
#' }
tidy_all_effects <- function(x, conf.int = TRUE, conf.level = .95, ...) {
.assert_package("effects")
dots <- rlang::dots_list(...)
if (isTRUE(dots$exponentiate)) {
cli::cli_abort("{.arg exponentiate = TRUE} is not relevant for {.fun broom.helpers::tidy_all_effects}.") # nolint
}
if (
inherits(x, "multinom") || inherits(x, "polr") ||
inherits(x, "clm") || inherits(x, "clmm")
) {
return(tidy_all_effects_effpoly(x, conf.int, conf.level, ...))
}
.clean <- function(x) {
# merge first columns if interaction
x <- tidyr::unite(x, "term", 1:(ncol(x) - 4), sep = ":")
names(x) <- c("term", "estimate", "std.error", "conf.low", "conf.high")
x$term <- as.character(x$term)
rownames(x) <- NULL
x
}
res <- x |>
effects::allEffects(se = conf.int, level = conf.level, ...) |>
as.data.frame() |>
purrr::map(.clean) |>
dplyr::bind_rows(.id = "variable") |>
dplyr::relocate("variable", "term")
attr(res, "coefficients_type") <- "marginal_predictions_at_mean"
attr(res, "skip_add_reference_rows") <- TRUE
attr(res, "find_missing_interaction_terms") <- TRUE
res
}
tidy_all_effects_effpoly <- function(x, conf.int = TRUE, conf.level = .95, ...) {
res <- x |>
effects::allEffects(se = conf.int, level = conf.level, ...) |>
purrr::map(effpoly_to_df) |>
dplyr::bind_rows(.id = "variable") |>
dplyr::relocate("y.level", "variable", "term")
attr(res, "coefficients_type") <- "marginal_predictions_at_mean"
attr(res, "skip_add_reference_rows") <- TRUE
attr(res, "find_missing_interaction_terms") <- TRUE
res
}
effpoly_to_df <- function(x) {
factors <- sapply(x$variables, function(x) x$is.factor)
factor.levels <- lapply(x$variables[factors], function(x) x$levels)
if (!length(factor.levels) == 0) {
factor.names <- names(factor.levels)
for (fac in factor.names) {
x$x[[fac]] <- factor(x$x[[fac]],
levels = factor.levels[[fac]],
exclude = NULL
)
}
}
result <- rep.int(list(x$x), length(x$y.levels))
names(result) <- x$y.levels
result <- result |> dplyr::bind_rows(.id = "y.level")
# merge columns if interaction
result <- result |> tidyr::unite("term", 2:ncol(result), sep = ":")
result$estimate <- as.vector(x$prob)
result$std.error <- as.vector(x$se.prob)
if (!is.null(x$confidence.level)) {
result$conf.low <- as.vector(x$lower.prob)
result$conf.high <- as.vector(x$upper.prob)
}
result
}
#' Marginal Predictions with `ggeffects::ggpredict()`
#'
#' Use `ggeffects::ggpredict()` to estimate marginal predictions
#' and return a tibble tidied in a way that it could be used by `broom.helpers`
#' functions.
#' See for a list of supported
#' models.
#' @details
#' By default, `ggeffects::ggpredict()` estimate marginal predictions at the
#' observed mean of continuous variables and at the first modality of categorical
#' variables (regardless of the type of contrasts used in the model).
#'
#' For more information, see `vignette("marginal_tidiers", "broom.helpers")`.
#' @note
#' By default, `ggeffects::ggpredict()` estimates marginal predictions for each
#' individual variable, regardless of eventual interactions.
#' @param x (a model object, e.g. `glm`)\cr
#' A model to be tidied.
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param ... Additional parameters passed to `ggeffects::ggpredict()`.
#' @family marginal_tieders
#' @seealso `ggeffects::ggpredict()`
#' @export
#' @examplesIf .assert_package("ggeffects", boolean = TRUE)
#' \donttest{
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' tidyr::uncount(n) |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#' mod <- glm(
#' Survived ~ Class + Age + Sex,
#' data = df, family = binomial
#' )
#' tidy_ggpredict(mod)
#' tidy_plus_plus(mod, tidy_fun = tidy_ggpredict)
#' }
tidy_ggpredict <- function(x, conf.int = TRUE, conf.level = .95, ...) {
.assert_package("ggeffects")
dots <- rlang::dots_list(...)
if (isTRUE(dots$exponentiate)) {
cli::cli_abort("{.arg exponentiate = TRUE} is not relevant for {.fun broom.helpers::tidy_ggpredict}.") # nolint
}
if (isFALSE(conf.int)) conf.level <- NA
res <- x |>
ggeffects::ggpredict(ci_level = conf.level) |> # add ...
purrr::map(
~ .x |>
dplyr::as_tibble() |>
dplyr::mutate(x = as.character(.data$x))
) |>
dplyr::bind_rows() |>
dplyr::rename(
variable = "group",
term = "x",
estimate = "predicted"
) |>
dplyr::relocate("variable", "term")
# multinomial models
if ("response.level" %in% names(res)) {
res <- res |>
dplyr::rename(y.level = "response.level") |>
dplyr::relocate("y.level")
}
attr(res, "coefficients_type") <- "marginal_predictions"
attr(res, "skip_add_reference_rows") <- TRUE
res
}
#' Marginal Slopes / Effects with `marginaleffects::avg_slopes()`
#'
#' Use `marginaleffects::avg_slopes()` to estimate marginal slopes / effects and
#' return a tibble tidied in a way that it could be used by `broom.helpers`
#' functions. See `marginaleffects::avg_slopes()` for a list of supported
#' models.
#' @details
#' By default, `marginaleffects::avg_slopes()` estimate average marginal
#' effects (AME): an effect is computed for each observed value in the original
#' dataset before being averaged. Marginal Effects at the Mean (MEM) could be
#' computed by specifying `newdata = "mean"`. Other types of marginal effects
#' could be computed. Please refer to the documentation page of
#' `marginaleffects::avg_slopes()`.
#'
#' For more information, see `vignette("marginal_tidiers", "broom.helpers")`.
#' @param x (a model object, e.g. `glm`)\cr
#' A model to be tidied.
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param ... Additional parameters passed to
#' `marginaleffects::avg_slopes()`.
#' @family marginal_tieders
#' @seealso `marginaleffects::avg_slopes()`
#' @export
#' @examplesIf .assert_package("marginaleffects", boolean = TRUE)
#' \donttest{
#' # Average Marginal Effects (AME)
#'
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' tidyr::uncount(n) |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#' mod <- glm(
#' Survived ~ Class + Age + Sex,
#' data = df, family = binomial
#' )
#' tidy_avg_slopes(mod)
#' tidy_plus_plus(mod, tidy_fun = tidy_avg_slopes)
#'
#' mod2 <- lm(Petal.Length ~ poly(Petal.Width, 2) + Species, data = iris)
#' tidy_avg_slopes(mod2)
#'
#' # Marginal Effects at the Mean (MEM)
#' tidy_avg_slopes(mod, newdata = "mean")
#' tidy_plus_plus(mod, tidy_fun = tidy_avg_slopes, newdata = "mean")
#' }
tidy_avg_slopes <- function(x, conf.int = TRUE, conf.level = 0.95, ...) {
.assert_package("marginaleffects")
dots <- rlang::dots_list(...)
if (isTRUE(dots$exponentiate)) {
cli::cli_abort("{.arg exponentiate = TRUE} is not relevant for {.fun broom.helpers::tidy_avg_slopes}.") # nolint
}
dots$exponentiate <- NULL
dots$conf_level <- conf.level
dots$model <- x
res <- do.call(marginaleffects::avg_slopes, dots) |>
dplyr::rename(variable = "term")
if ("contrast" %in% names(res)) {
res <- res |> dplyr::rename(term = "contrast")
} else {
res <- res |> dplyr::mutate(term = .data$variable)
}
res <- res |>
dplyr::relocate("variable", "term")
# multinomial models
if ("group" %in% names(res)) {
res <- res |>
dplyr::rename(y.level = "group") |>
dplyr::relocate("y.level")
}
attr(res, "coefficients_type") <- dplyr::case_when(
is.null(dots$newdata) ~ "marginal_effects_average",
isTRUE(dots$newdata == "mean") ~ "marginal_effects_at_mean",
isTRUE(dots$newdata == "balanced") ~ "marginal_effects_at_marginalmeans",
TRUE ~ "marginal_effects"
)
attr(res, "skip_add_reference_rows") <- TRUE
res |> dplyr::as_tibble()
}
#' Marginal Contrasts with `marginaleffects::avg_comparisons()`
#'
#' Use `marginaleffects::avg_comparisons()` to estimate marginal contrasts and
#' return a tibble tidied in a way that it could be used by `broom.helpers`
#' functions. See `marginaleffects::avg_comparisons()` for a list of supported
#' models.
#' @details
#' By default, `marginaleffects::avg_comparisons()` estimate average marginal
#' contrasts: a contrast is computed for each observed value in the original
#' dataset (counterfactual approach) before being averaged.
#' Marginal Contrasts at the Mean could be computed by specifying
#' `newdata = "mean"`. The `variables` argument can be used to select the
#' contrasts to be computed. Please refer to the documentation page of
#' `marginaleffects::avg_comparisons()`.
#'
#' See also `tidy_marginal_contrasts()` for taking into account interactions.
#' For more information, see `vignette("marginal_tidiers", "broom.helpers")`.
#' @param x (a model object, e.g. `glm`)\cr
#' A model to be tidied.
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param ... Additional parameters passed to
#' `marginaleffects::avg_comparisons()`.
#' @family marginal_tieders
#' @seealso `marginaleffects::avg_comparisons()`
#' @export
#' @examplesIf .assert_package("marginaleffects", boolean = TRUE)
#' \donttest{
#' # Average Marginal Contrasts
#'
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' tidyr::uncount(n) |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#' mod <- glm(
#' Survived ~ Class + Age + Sex,
#' data = df, family = binomial
#' )
#' tidy_avg_comparisons(mod)
#' tidy_plus_plus(mod, tidy_fun = tidy_avg_comparisons)
#'
#' mod2 <- lm(Petal.Length ~ poly(Petal.Width, 2) + Species, data = iris)
#' tidy_avg_comparisons(mod2)
#'
#' # Custumizing the type of contrasts
#' tidy_avg_comparisons(
#' mod2,
#' variables = list(Petal.Width = 2, Species = "pairwise")
#' )
#'
#' # Marginal Contrasts at the Mean
#' tidy_avg_comparisons(mod, newdata = "mean")
#' tidy_plus_plus(mod, tidy_fun = tidy_avg_comparisons, newdata = "mean")
#' }
tidy_avg_comparisons <- function(x, conf.int = TRUE, conf.level = 0.95, ...) {
.assert_package("marginaleffects")
dots <- rlang::dots_list(...)
if (isTRUE(dots$exponentiate)) {
cli::cli_abort("{.arg exponentiate = TRUE} is not relevant for {.fun broom.helpers::tidy_avg_comparisons}.") # nolint
}
dots$exponentiate <- NULL
dots$conf_level <- conf.level
dots$model <- x
res <- do.call(marginaleffects::avg_comparisons, dots) |>
dplyr::rename(variable = "term")
if ("contrast" %in% names(res)) {
res <- res |> dplyr::rename(term = "contrast")
} else {
res <- res |> dplyr::mutate(term = .data$variable)
}
res <- res |>
dplyr::relocate("variable", "term")
# multinomial models
if ("group" %in% names(res)) {
res <- res |>
dplyr::rename(y.level = "group") |>
dplyr::relocate("y.level")
}
attr(res, "coefficients_type") <- dplyr::case_when(
is.null(dots$newdata) ~ "marginal_contrasts_average",
isTRUE(dots$newdata == "mean") ~ "marginal_contrasts_at_mean",
isTRUE(dots$newdata == "balanced") ~ "marginal_contrasts_at_marginalmeans",
TRUE ~ "marginal_contrasts"
)
attr(res, "skip_add_reference_rows") <- TRUE
res |> dplyr::as_tibble()
}
#' Marginal Means with deprecated `marginaleffects::marginal_means()`
#'
#' `r lifecycle::badge("deprecated")`
#' This function is deprecated. `marginal_means()` is not anymore exported
#' by `marginaleffects`. Use instead `tidy_marginal_predictions()` with
#' the option `newdata = "balanced"`.
#' @param x (a model object, e.g. `glm`)\cr
#' A model to be tidied.
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param ... Additional parameters.
#' @export
tidy_marginal_means <- function(x, conf.int = TRUE, conf.level = 0.95, ...) {
lifecycle::deprecate_stop(
when = "1.19.0",
what = "tidy_marginal_means()",
with = "tidy_marginal_predictions()",
details = "Specify `newdata = \"balanced\"`."
)
}
#' Marginal Predictions with `marginaleffects::avg_predictions()`
#'
#' Use `marginaleffects::avg_predictions()` to estimate marginal predictions for
#' each variable of a model and return a tibble tidied in a way that it could
#' be used by `broom.helpers` functions.
#' See `marginaleffects::avg_predictions()` for a list of supported models.
#' @details
#' Marginal predictions are obtained by calling, for each variable,
#' `marginaleffects::avg_predictions()` with the same variable being used for
#' the `variables` and the `by` argument.
#'
#' Considering a categorical variable named `cat`, `tidy_marginal_predictions()`
#' will call `avg_predictions(model, variables = list(cat = unique), by = "cat")`
#' to obtain average marginal predictions for this variable.
#'
#' Considering a continuous variable named `cont`, `tidy_marginal_predictions()`
#' will call `avg_predictions(model, variables = list(cont = "fivenum"), by = "cont")`
#' to obtain average marginal predictions for this variable at the minimum, the
#' first quartile, the median, the third quartile and the maximum of the observed
#' values of `cont`.
#'
#' By default, *average marginal predictions* are computed: predictions are made
#' using a counterfactual grid for each value of the variable of interest,
#' before averaging the results. *Marginal predictions at the mean* could be
#' obtained by indicating `newdata = "mean"`. Other assumptions are possible,
#' see the help file of `marginaleffects::avg_predictions()`.
#'
#' `tidy_marginal_predictions()` will compute marginal predictions for each
#' variable or combination of variables, before stacking the results in a unique
#' tibble. This is why `tidy_marginal_predictions()` has a `variables_list`
#' argument consisting of a list of specifications that will be passed
#' sequentially to the `variables` argument of `marginaleffects::avg_predictions()`.
#'
#' The helper function `variables_to_predict()` could be used to automatically
#' generate a suitable list to be used with `variables_list`. By default, all
#' unique values are retained for categorical variables and `fivenum` (i.e.
#' Tukey's five numbers, minimum, quartiles and maximum) for continuous variables.
#' When `interactions = FALSE`, `variables_to_predict()` will return a list of
#' all individual variables used in the model. If `interactions = FALSE`, it
#' will search for higher order combinations of variables (see
#' `model_list_higher_order_variables()`).
#'
#' `variables_list`'s default value, `"auto"`, calls
#' `variables_to_predict(interactions = TRUE)` while `"no_interaction"` is a
#' shortcut for `variables_to_predict(interactions = FALSE)`.
#'
#' You can also provide custom specifications (see examples).
#'
#' `plot_marginal_predictions()` works in a similar way and returns a list of
#' plots that could be combined with `patchwork::wrap_plots()` (see examples).
#'
#' For more information, see `vignette("marginal_tidiers", "broom.helpers")`.
#' @param x (a model object, e.g. `glm`)\cr
#' A model to be tidied.
#' @param variables_list (`list` or `string`)\cr
#' A list whose elements will be sequentially passed to
#' `variables` in `marginaleffects::avg_predictions()` (see details below);
#' alternatively, it could also be the string `"auto"` (default) or
#' `"no_interaction"`.
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param ... Additional parameters passed to
#' `marginaleffects::avg_predictions()`.
#' @family marginal_tieders
#' @seealso `marginaleffects::avg_predictions()`
#' @export
#' @examplesIf .assert_package("marginaleffects", boolean = TRUE)
#' # example code
#'
#' \donttest{
#' # Average Marginal Predictions
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' tidyr::uncount(n) |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#' mod <- glm(
#' Survived ~ Class + Age + Sex,
#' data = df, family = binomial
#' )
#' tidy_marginal_predictions(mod)
#' tidy_plus_plus(mod, tidy_fun = tidy_marginal_predictions)
#' if (require("patchwork")) {
#' plot_marginal_predictions(mod) |> patchwork::wrap_plots()
#' plot_marginal_predictions(mod) |>
#' patchwork::wrap_plots() &
#' ggplot2::scale_y_continuous(limits = c(0, 1), label = scales::percent)
#' }
#'
#' mod2 <- lm(Petal.Length ~ poly(Petal.Width, 2) + Species, data = iris)
#' tidy_marginal_predictions(mod2)
#' if (require("patchwork")) {
#' plot_marginal_predictions(mod2) |> patchwork::wrap_plots()
#' }
#' tidy_marginal_predictions(
#' mod2,
#' variables_list = variables_to_predict(mod2, continuous = "threenum")
#' )
#' tidy_marginal_predictions(
#' mod2,
#' variables_list = list(
#' list(Petal.Width = c(0, 1, 2, 3)),
#' list(Species = unique)
#' )
#' )
#' tidy_marginal_predictions(
#' mod2,
#' variables_list = list(list(Species = unique, Petal.Width = 1:3))
#' )
#'
#' # Model with interactions
#' mod3 <- glm(
#' Survived ~ Sex * Age + Class,
#' data = df, family = binomial
#' )
#' tidy_marginal_predictions(mod3)
#' tidy_marginal_predictions(mod3, "no_interaction")
#' if (require("patchwork")) {
#' plot_marginal_predictions(mod3) |>
#' patchwork::wrap_plots()
#' plot_marginal_predictions(mod3, "no_interaction") |>
#' patchwork::wrap_plots()
#' }
#' tidy_marginal_predictions(
#' mod3,
#' variables_list = list(
#' list(Class = unique, Sex = "Female"),
#' list(Age = unique)
#' )
#' )
#'
#' # Marginal Predictions at the Mean
#' tidy_marginal_predictions(mod, newdata = "mean")
#' if (require("patchwork")) {
#' plot_marginal_predictions(mod, newdata = "mean") |>
#' patchwork::wrap_plots()
#' }
#' }
tidy_marginal_predictions <- function(x, variables_list = "auto",
conf.int = TRUE, conf.level = 0.95, ...) {
.assert_package("marginaleffects")
dots <- rlang::dots_list(...)
if (isTRUE(dots$exponentiate)) {
cli::cli_abort("{.arg exponentiate = TRUE} is not relevant for {.fun broom.helpers::tidy_marginal_predictions}.") # nolint
}
dots$exponentiate <- NULL
dots$conf_level <- conf.level
dots$model <- x
if (is.character(variables_list) && variables_list == "auto") {
variables_list <- variables_to_predict(x, interactions = TRUE)
}
if (is.character(variables_list) && variables_list == "no_interaction") {
variables_list <- variables_to_predict(x, interactions = FALSE)
}
if (!is.list(variables_list)) {
cli::cli_abort("{.arg variables_list} should be a list or \"auto\" or \"no_interaction\".")
}
res <- purrr::map_df(variables_list, .tidy_one_marginal_prediction, dots)
attr(res, "coefficients_type") <- dplyr::case_when(
is.null(dots$newdata) ~ "marginal_predictions_average",
isTRUE(dots$newdata == "mean") ~ "marginal_predictions_at_mean",
isTRUE(dots$newdata == "balanced") ~ "marginal_predictions_at_marginalmeans",
TRUE ~ "marginal_predictions"
)
attr(res, "skip_add_reference_rows") <- TRUE
res
}
.tidy_one_marginal_prediction <- function(variables, dots) {
dots$variables <- variables
dots$by <- names(variables)
if (
inherits(dots$model, "multinom") || inherits(dots$model, "polr") ||
inherits(dots$model, "clm") || inherits(dots$model, "clmm")
) {
dots$by <- c(dots$by, "group")
}
res <- do.call(marginaleffects::avg_predictions, dots) |>
dplyr::arrange(dplyr::pick(dplyr::any_of(c(names(variables)))), "group") |>
dplyr::mutate(variable = paste(names(variables), collapse = ":")) |>
tidyr::unite(col = "term", sep = " * ", dplyr::all_of(names(variables))) |>
dplyr::relocate("variable", "term")
if ("group" %in% names(res)) {
res <- res |>
dplyr::rename(y.level = "group") |>
dplyr::relocate("y.level")
}
res
}
#' @export
#' @param model (a model object, e.g. `glm`)\cr
#' A model.
#' @param interactions (`logical`)\cr
#' Should combinations of variables corresponding to
#' interactions be returned?
#' @param categorical ([`predictor values`][marginaleffects::predictions()])\cr
#' Default values for categorical variables.
#' @param continuous ([`predictor values`][marginaleffects::predictions()])\cr
#' Default values for continuous variables.
#' @rdname tidy_marginal_predictions
variables_to_predict <- function(model, interactions = TRUE,
categorical = unique,
continuous = stats::fivenum) {
variables <- model |>
model_list_variables(add_var_type = TRUE)
if (interactions) {
keep <- model_list_higher_order_variables(model)
} else {
keep <- variables[variables$var_type != "interaction", ]$variable
}
response_variable <- model |> model_get_response_variable()
if (!is.null(response_variable)) {
keep <- keep[keep != response_variable]
}
ret <- list(
categorical = categorical,
dichotomous = categorical,
continuous = continuous
)
variables <- variables |>
tibble::column_to_rownames("variable")
one_element <- function(v) {
v <- strsplit(v, ":") |> unlist()
one <- variables[v, "var_type"]
one <- ret[one]
names(one) <- v
one
}
lapply(keep, one_element)
}
#' @export
#' @rdname tidy_marginal_predictions
plot_marginal_predictions <- function(x, variables_list = "auto",
conf.level = 0.95, ...) {
.assert_package("marginaleffects")
.assert_package("ggplot2")
dots <- rlang::dots_list(...)
dots$conf_level <- conf.level
dots$model <- x
if (is.character(variables_list) && variables_list == "auto") {
variables_list <- variables_to_predict(x, interactions = TRUE) |>
purrr::map(rev)
}
if (is.character(variables_list) && variables_list == "no_interaction") {
variables_list <- variables_to_predict(x, interactions = FALSE) |>
purrr::map(rev)
}
if (!is.list(variables_list)) {
cli::cli_abort("{.arg variables_list} should be a list or \"auto\" or \"no_interaction\".")
}
purrr::map(variables_list, .plot_one_marginal_prediction, dots)
}
.plot_one_marginal_prediction <- function(variables, dots) {
if (length(variables) >= 4) {
cli::cli_abort(paste(
"Combination of 4 or more variables. {.fun plot_marginal_predictions} can",
"manage only combinations of 3 variables or less."
))
}
multinom <- inherits(dots$model, "multinom") | inherits(dots$model, "polr") |
inherits(dots$model, "clm") | inherits(dots$model, "clmm")
list_variables <- dots$model |> model_list_variables(add_var_type = TRUE)
x_variable <- names(variables[1])
x_type <- list_variables |>
dplyr::filter(.data$variable == x_variable) |>
dplyr::pull("var_type")
if (x_type == "dichotomous") x_type <- "categorical"
x_label <- list_variables |>
dplyr::filter(.data$variable == x_variable) |>
dplyr::pull("var_label")
if (is.character(variables[[1]]) && variables[[1]] == "fivenum") {
variables[[1]] <- broom.helpers::seq_range
}
dots$variables <- variables
dots$by <- names(variables)
if (multinom) {
dots$by <- c(dots$by, "group")
}
d <- do.call(marginaleffects::avg_predictions, dots)
mapping <- ggplot2::aes(
x = .data[[x_variable]],
y = .data[["estimate"]],
ymin = .data[["conf.low"]],
ymax = .data[["conf.high"]]
)
if (x_type == "continuous") {
mapping$group <- ggplot2::aes(group = 1L)$group
}
if (length(variables) >= 2) {
colour_variable <- names(variables[2])
d[[colour_variable]] <- factor(d[[colour_variable]])
colour_label <- list_variables |>
dplyr::filter(.data$variable == colour_variable) |>
dplyr::pull("var_label")
mapping$colour <- ggplot2::aes(colour = .data[[colour_variable]])$colour
if (x_type == "continuous") {
mapping$fill <- ggplot2::aes(fill = .data[[colour_variable]])$fill
mapping$group <- ggplot2::aes(group = .data[[colour_variable]])$group
}
}
if (x_type == "continuous") {
p <- ggplot2::ggplot(d, mapping = mapping) +
ggplot2::geom_ribbon(
mapping = ggplot2::aes(colour = NULL),
alpha = 0.1,
show.legend = FALSE
) +
ggplot2::geom_line()
} else {
p <- ggplot2::ggplot(d, mapping = mapping) +
ggplot2::geom_pointrange(position = ggplot2::position_dodge(.5))
}
if (length(variables) >= 2) {
p <- p +
ggplot2::labs(colour = colour_label, fill = colour_label)
}
if (length(variables) == 3 && !multinom) {
facet_variable <- names(variables[3])
p <- p +
ggplot2::facet_wrap(facet_variable)
}
if (multinom && length(variables) <= 2) {
p <- p +
ggplot2::facet_wrap("group")
}
if (multinom && length(variables) == 3) {
facet_variable <- c("group", names(variables[3]))
p <- p +
ggplot2::facet_wrap(facet_variable)
}
p +
ggplot2::xlab(x_label) +
ggplot2::ylab(NULL) +
ggplot2::theme_light() +
ggplot2::theme(legend.position = "bottom")
}
#' Marginal Contrasts with `marginaleffects::avg_comparisons()`
#'
#' Use `marginaleffects::avg_comparisons()` to estimate marginal contrasts for
#' each variable of a model and return a tibble tidied in a way that it could
#' be used by `broom.helpers` functions.
#' See `marginaleffects::avg_comparisons()` for a list of supported models.
#' @details
#' Marginal contrasts are obtained by calling, for each variable or combination
#' of variables, `marginaleffects::avg_comparisons()`.
#'
#' `tidy_marginal_contrasts()` will compute marginal contrasts for each
#' variable or combination of variables, before stacking the results in a unique
#' tibble. This is why `tidy_marginal_contrasts()` has a `variables_list`
#' argument consisting of a list of specifications that will be passed
#' sequentially to the `variables` and the `by` argument of
#' `marginaleffects::avg_comparisons()`.
#'
#' Considering a single categorical variable named `cat`, `tidy_marginal_contrasts()`
#' will call `avg_comparisons(model, variables = list(cat = "reference"))`
#' to obtain average marginal contrasts for this variable.
#'
#' Considering a single continuous variable named `cont`, `tidy_marginalcontrasts()`
#' will call `avg_comparisons(model, variables = list(cont = 1))`
#' to obtain average marginal contrasts for an increase of one unit.
#'
#' For a combination of variables, there are several possibilities. You could
#' compute "cross-contrasts" by providing simultaneously several variables
#' to `variables` and specifying `cross = TRUE` to
#' `marginaleffects::avg_comparisons()`. Alternatively, you could compute the
#' contrasts of a first variable specified to `variables` for the
#' different values of a second variable specified to `by`.
#'
#' The helper function `variables_to_contrast()` could be used to automatically
#' generate a suitable list to be used with `variables_list`. Each combination
#' of variables should be a list with two named elements: `"variables"` a list
#' of named elements passed to `variables` and `"by"` a list of named elements
#' used for creating a relevant `datagrid` and whose names are passed to `by`.
#'
#' `variables_list`'s default value, `"auto"`, calls
#' `variables_to_contrast(interactions = TRUE, cross = FALSE)` while
#' `"no_interaction"` is a shortcut for
#' `variables_to_contrast(interactions = FALSE)`. `"cross"` calls
#' `variables_to_contrast(interactions = TRUE, cross = TRUE)`
#'
#' You can also provide custom specifications (see examples).
#'
#' By default, *average marginal contrasts* are computed: contrasts are computed
#' using a counterfactual grid for each value of the variable of interest,
#' before averaging the results. *Marginal contrasts at the mean* could be
#' obtained by indicating `newdata = "mean"`. Other assumptions are possible,
#' see the help file of `marginaleffects::avg_comparisons()`.
#'
#' For more information, see `vignette("marginal_tidiers", "broom.helpers")`.
#' @param x (a model object, e.g. `glm`)\cr
#' A model to be tidied.
#' @param variables_list (`list` or `string`)\cr
#' A list whose elements will be sequentially passed to
#' `variables` in `marginaleffects::avg_comparisons()` (see details below);
#' alternatively, it could also be the string `"auto"` (default), `"cross"` or
#' `"no_interaction"`
#' @param conf.int (`logical`)\cr
#' Whether or not to include a confidence interval in the tidied output.
#' @param conf.level (`numeric`)\cr
#' The confidence level to use for the confidence interval (between `0` ans `1`).
#' @param ... Additional parameters passed to
#' `marginaleffects::avg_comparisons()`.
#' @family marginal_tieders
#' @seealso `marginaleffects::avg_comparisons()`, `tidy_avg_comparisons()`
#' @export
#' @examplesIf .assert_package("marginaleffects", boolean = TRUE)
#' \donttest{
#' # Average Marginal Contrasts
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' tidyr::uncount(n) |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#' mod <- glm(
#' Survived ~ Class + Age + Sex,
#' data = df, family = binomial
#' )
#' tidy_marginal_contrasts(mod)
#' tidy_plus_plus(mod, tidy_fun = tidy_marginal_contrasts)
#'
#' mod2 <- lm(Petal.Length ~ poly(Petal.Width, 2) + Species, data = iris)
#' tidy_marginal_contrasts(mod2)
#' tidy_marginal_contrasts(
#' mod2,
#' variables_list = variables_to_predict(
#' mod2,
#' continuous = 3,
#' categorical = "pairwise"
#' )
#' )
#'
#' # Model with interactions
#' mod3 <- glm(
#' Survived ~ Sex * Age + Class,
#' data = df, family = binomial
#' )
#' tidy_marginal_contrasts(mod3)
#' tidy_marginal_contrasts(mod3, "no_interaction")
#' tidy_marginal_contrasts(mod3, "cross")
#' tidy_marginal_contrasts(
#' mod3,
#' variables_list = list(
#' list(variables = list(Class = "pairwise"), by = list(Sex = unique)),
#' list(variables = list(Age = "all")),
#' list(variables = list(Class = "sequential", Sex = "reference"))
#' )
#' )
#'
#' mod4 <- lm(Sepal.Length ~ Petal.Length * Petal.Width + Species, data = iris)
#' tidy_marginal_contrasts(mod4)
#' tidy_marginal_contrasts(
#' mod4,
#' variables_list = list(
#' list(
#' variables = list(Species = "sequential"),
#' by = list(Petal.Length = c(2, 5))
#' ),
#' list(
#' variables = list(Petal.Length = 2),
#' by = list(Species = unique, Petal.Width = 2:4)
#' )
#' )
#' )
#'
#' # Marginal Contrasts at the Mean
#' tidy_marginal_contrasts(mod, newdata = "mean")
#' tidy_marginal_contrasts(mod3, newdata = "mean")
#' }
tidy_marginal_contrasts <- function(x, variables_list = "auto",
conf.int = TRUE, conf.level = 0.95, ...) {
.assert_package("marginaleffects")
dots <- rlang::dots_list(...)
if (isTRUE(dots$exponentiate)) {
cli::cli_abort("{.arg exponentiate = TRUE} is not relevant for {.fun broom.helpers::tidy_marginal_contrasts}.") # nolint
}
dots$exponentiate <- NULL
dots$conf_level <- conf.level
dots$model <- x
if (is.character(variables_list) && variables_list == "auto") {
variables_list <- variables_to_contrast(
x,
interactions = TRUE,
cross = FALSE
)
}
if (is.character(variables_list) && variables_list == "no_interaction") {
variables_list <- variables_to_contrast(
x,
interactions = FALSE
)
}
if (is.character(variables_list) && variables_list == "cross") {
variables_list <- variables_to_contrast(
x,
interactions = TRUE,
cross = TRUE
)
}
if (!is.list(variables_list)) {
cli::cli_abort("{.arg variables_list} should be a list or \"auto\" or \"no_interaction\".")
}
res <- purrr::map_df(variables_list, .tidy_one_marginal_contrast, dots)
attr(res, "coefficients_type") <- dplyr::case_when(
is.null(dots$newdata) ~ "marginal_contrasts_average",
isTRUE(dots$newdata == "mean") ~ "marginal_contrasts_at_mean",
isTRUE(dots$newdata == "balanced") ~ "marginal_contrasts_at_marginalmeans",
TRUE ~ "marginal_contrasts"
)
attr(res, "skip_add_reference_rows") <- TRUE
res
}
.tidy_one_marginal_contrast <- function(variables, dots) {
# allowing passing directly variables names
if (length(variables) > 0 && !all(names(variables) %in% c("variables", "by"))) {
variables <- list(variables = variables)
}
dots$variables <- variables$variables
dots$cross <- TRUE
if (!is.null(variables$by)) {
dots$by <- names(variables$by)
}
if (!is.null(variables$by) && is.null(dots$newdata)) {
args <- variables$by
args$model <- dots$model
args$grid_type <- "counterfactual"
dots$newdata <- do.call(marginaleffects::datagrid, args)
}
if (!is.null(variables$by) && identical(dots$newdata, "mean")) {
args <- variables$by
args$model <- dots$model
dots$newdata <- do.call(marginaleffects::datagrid, args)
}
res <- do.call(marginaleffects::avg_comparisons, dots) |>
dplyr::select(-dplyr::any_of("term"))
if (is.null(variables$by)) {
res <- res |>
dplyr::mutate(
variable = paste(names(variables$variables), collapse = ":")
)
} else {
res <- res |>
dplyr::mutate(
variable = paste(
paste(names(variables$by), collapse = ":"),
paste(names(variables$variables), collapse = ":"),
sep = ":"
)
)
}
res <- res |>
tidyr::unite(
col = "term",
sep = " * ",
dplyr::all_of(names(variables$by)),
dplyr::starts_with("contrast")
) |>
dplyr::relocate("variable", "term")
if ("group" %in% names(res)) {
res <- res |>
dplyr::rename(y.level = "group") |>
dplyr::relocate("y.level")
}
res
}
#' @export
#' @param model (a model object, e.g. `glm`)\cr
#' A model.
#' @param interactions (`logical`)\cr
#' Should combinations of variables corresponding to interactions be returned?
#' @param cross (`logical`)\cr
#' If `interaction` is `TRUE`, should "cross-contrasts" be
#' computed? (if `FALSE`, only the last term of an interaction is passed to
#' `variable` and the other terms are passed to `by`)
#' @param var_categorical ([`predictor values`][marginaleffects::comparisons()])\cr
#' Default `variable` value for categorical variables.
#' @param var_continuous ([`predictor values`][marginaleffects::comparisons()])\cr
#' Default `variable` value for continuous variables.
#' @param by_categorical ([`predictor values`][marginaleffects::comparisons()])\cr
#' Default `by` value for categorical variables.
#' @param by_continuous ([`predictor values`][marginaleffects::comparisons()])\cr
#' Default `by` value for continuous variables.
#' @rdname tidy_marginal_contrasts
variables_to_contrast <- function(model,
interactions = TRUE,
cross = FALSE,
var_categorical = "reference",
var_continuous = 1,
by_categorical = unique,
by_continuous = stats::fivenum) {
variables <- model |>
model_list_variables(add_var_type = TRUE)
if (interactions) {
keep <- model_list_higher_order_variables(model)
} else {
keep <- variables[variables$var_type != "interaction", ]$variable
}
response_variable <- model |> model_get_response_variable()
if (!is.null(response_variable)) {
keep <- keep[keep != response_variable]
}
var_ret <- list(
categorical = var_categorical,
dichotomous = var_categorical,
continuous = var_continuous
)
by_ret <- list(
categorical = by_categorical,
dichotomous = by_categorical,
continuous = by_continuous
)
variables <- variables |>
tibble::column_to_rownames("variable")
one_element <- function(v) {
v <- strsplit(v, ":") |> unlist()
if (length(v) == 1 || isTRUE(cross)) {
one_variables <- variables[v, "var_type"]
one_variables <- var_ret[one_variables]
names(one_variables) <- v
one_by <- NULL
} else {
one_variables <- variables[utils::tail(v, 1), "var_type"]
one_variables <- var_ret[one_variables]
names(one_variables) <- utils::tail(v, 1)
one_by <- variables[utils::head(v, -1), "var_type"]
one_by <- by_ret[one_by]
names(one_by) <- utils::head(v, -1)
}
list(variables = one_variables, by = one_by)
}
lapply(keep, one_element)
}
broom.helpers/R/model_get_xlevels.R 0000644 0001762 0000144 00000004124 15002155536 017061 0 ustar ligges users #' Get xlevels used in the model
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @export
#' @family model_helpers
#' @examples
#' lm(hp ~ mpg + factor(cyl), mtcars) |>
#' model_get_xlevels()
model_get_xlevels <- function(model) {
UseMethod("model_get_xlevels")
}
#' @export
#' @rdname model_get_xlevels
model_get_xlevels.default <- function(model) {
xlevels <- tryCatch(
model |> purrr::chuck("xlevels"),
error = function(e) {
NULL # nocov
}
)
if (is.null(xlevels)) {
xlevels <- tryCatch(
stats::.getXlevels(
model |> model_get_terms(),
model |> model_get_model_frame()
),
error = function(e) {
NULL # nocov
}
)
}
xlevels |> .add_xlevels_for_logical_variables(model)
}
.add_xlevels_for_logical_variables <- function(xlevels, model) {
log_vars <- model |>
model_list_variables() |>
dplyr::filter(.data$var_class == "logical") |>
purrr::pluck("variable")
for (v in setdiff(log_vars, names(xlevels))) {
xlevels[[v]] <- c("FALSE", "TRUE")
}
xlevels
}
#' @export
#' @rdname model_get_xlevels
model_get_xlevels.lmerMod <- function(model) {
stats::model.frame(model) |>
lapply(levels) |>
purrr::compact() |> # keep only not null
.add_xlevels_for_logical_variables(model)
}
#' @export
#' @rdname model_get_xlevels
model_get_xlevels.glmerMod <- model_get_xlevels.lmerMod
#' @export
#' @rdname model_get_xlevels
model_get_xlevels.felm <- model_get_xlevels.lmerMod
#' @export
#' @rdname model_get_xlevels
model_get_xlevels.brmsfit <- model_get_xlevels.lmerMod
#' @export
#' @rdname model_get_xlevels
model_get_xlevels.glmmTMB <- model_get_xlevels.lmerMod
#' @export
#' @rdname model_get_xlevels
model_get_xlevels.plm <- model_get_xlevels.lmerMod
#' @export
#' @rdname model_get_xlevels
model_get_xlevels.model_fit <- function(model) {
model_get_xlevels(model$fit)
}
#' @export
#' @rdname model_get_xlevels
model_get_xlevels.svy_vglm <- function(model) {
model_get_xlevels(model$fit)
}
broom.helpers/R/model_list_variables.R 0000644 0001762 0000144 00000020410 15002155536 017537 0 ustar ligges users #' List all the variables used in a model
#'
#' Including variables used only in an interaction.
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @param labels (`list` or `string`)\cr
#' An optional named list or named vector of
#' custom variable labels.
#' @param only_variable (`logical`)\cr
#' If `TRUE`, will return only "variable" column.
#' @param add_var_type (`logical`)\cr
#' If `TRUE`, add `var_nlevels` and `var_type` columns.
#' @param instrumental_suffix (`string`)\cr
#' Suffix added to variable labels for instrumental variables (`fixest` models).
#' `NULL` to add nothing.
#' @return
#' A tibble with three columns:
#' * `variable`: the corresponding variable
#' * `var_class`: class of the variable (cf. [stats::.MFclass()])
#' * `label_attr`: variable label defined in the original data frame
#' with the label attribute (cf. [labelled::var_label()])
#' * `var_label`: a variable label (by priority, `labels` if defined,
#' `label_attr` if available, otherwise `variable`)
#'
#' If `add_var_type = TRUE`:
#' * `var_type`: `"continuous"`, `"dichotomous"` (categorical variable with 2 levels),
#' `"categorical"` (categorical variable with 3 or more levels), `"intercept"`
#' or `"interaction"`
#' * `var_nlevels`: number of original levels for categorical variables
#'
#' @export
#' @family model_helpers
#' @examplesIf .assert_package("gtsummary", boolean = TRUE)
#' \donttest{
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes")))
#' glm(
#' Survived ~ Class + Age:Sex,
#' data = df, weights = df$n,
#' family = binomial
#' ) |>
#' model_list_variables()
#'
#' lm(
#' Sepal.Length ~ poly(Sepal.Width, 2) + Species,
#' data = iris,
#' contrasts = list(Species = contr.sum)
#' ) |>
#' model_list_variables()
#'
#' glm(
#' response ~ poly(age, 3) + stage + grade * trt,
#' na.omit(gtsummary::trial),
#' family = binomial,
#' ) |>
#' model_list_variables()
#' }
model_list_variables <- function(model,
labels = NULL,
only_variable = FALSE,
add_var_type = FALSE,
instrumental_suffix = " (instrumental)") {
UseMethod("model_list_variables")
}
#' @rdname model_list_variables
#' @export
model_list_variables.default <- function(model,
labels = NULL,
only_variable = FALSE,
add_var_type = FALSE,
instrumental_suffix = " (instrumental)") {
model_frame <- model_get_model_frame(model)
model_terms <- model_get_terms(model)
if (!is.null(model_terms) && inherits(model_terms, "terms")) {
variable_names <- attr(model_terms, "term.labels")
dataClasses <- purrr::map(model_frame, .MFclass2) |> unlist()
if (is.null(dataClasses)) {
dataClasses <- attr(model_terms, "dataClasses")
}
} else {
dataClasses <- model_frame |>
lapply(.MFclass2) |>
unlist()
variable_names <- names(dataClasses)
}
if (is.null(variable_names)) {
return(NULL)
}
# update the list with all elements of dataClasses
variable_names <- names(dataClasses) |>
c(variable_names) |>
.clean_backticks() |>
unique()
res <- tibble::tibble(
variable = variable_names
) |>
.add_var_class(dataClasses) |>
.add_label_attr(model) |>
# specific case of polynomial terms defined with poly()
dplyr::mutate(
variable = stringr::str_replace(.data$variable, "^poly\\(([^,]*),(.*)\\)$", "\\1")
) |>
.compute_var_label(labels)
if (only_variable) {
return(res$variable)
}
# specific case for instrumental variables
if (inherits(model, "fixest") && !is.null(instrumental_suffix)) {
iv <- all.vars(model$iv_endo_fml)
res <- res |>
dplyr::mutate(
var_label = dplyr::if_else(
.data$variable %in% iv,
paste0(.data$var_label, instrumental_suffix),
.data$var_label
)
)
}
if (add_var_type) {
return(.add_var_type(res, model))
}
res
}
#' @rdname model_list_variables
#' @export
model_list_variables.lavaan <- function(model,
labels = NULL,
only_variable = FALSE,
add_var_type = FALSE,
instrumental_suffix = " (instrumental)") {
res <- tibble::tibble(
variable = .clean_backticks(unique(model@ParTable$lhs))
) |>
dplyr::left_join(
tibble::tibble(
variable = .clean_backticks(model@Data@ov$name),
var_class = model@Data@ov$type
),
by = "variable"
) |>
dplyr::mutate(
var_class = dplyr::if_else(
.data$var_class == "ordered",
"factor",
.data$var_class
)
) |>
.add_label_attr(model) |>
.compute_var_label(labels)
if (only_variable) {
return(res$variable)
}
if (add_var_type) {
return(.add_var_type(res, model))
}
res
}
#' @rdname model_list_variables
#' @export
model_list_variables.logitr <- function(model,
labels = NULL,
only_variable = FALSE,
add_var_type = FALSE,
instrumental_suffix = " (instrumental)") {
res <- model_list_variables.default(model, labels, FALSE)
if (!is.null(model$data$scalePar)) {
label_scalePar <- labels |> purrr::pluck("scalePar")
res <- res |>
dplyr::add_row(
variable = "scalePar",
var_class = "numeric",
label_attr = ifelse(
is.null(label_scalePar),
NA,
label_scalePar
),
var_label = ifelse(
is.null(label_scalePar),
"scalePar",
label_scalePar
)
)
}
if (only_variable) {
return(res$variable)
}
if (add_var_type) {
return(.add_var_type(res, model))
}
res
}
## model_list_variables() helpers --------------------------
.add_var_class <- function(x, dataClasses) {
x |>
dplyr::left_join(
tibble::tibble(
variable = names(dataClasses),
var_class = dataClasses
),
by = "variable"
)
}
.add_label_attr <- function(x, model) {
labels <- unlist(labelled::var_label(model_get_model_frame(model)))
if (length(labels) > 0) {
x |>
dplyr::left_join(
dplyr::tibble(
variable = names(labels),
label_attr = labels
),
by = "variable"
)
} else {
x |>
dplyr::mutate(label_attr = NA)
}
}
# stats::.MFclass do not distinct integer and numeric
.MFclass2 <- function(x) {
if (is.logical(x)) {
return("logical")
}
if (is.ordered(x)) {
return("ordered")
}
if (is.factor(x)) {
return("factor")
}
if (is.character(x)) {
return("character")
}
if (is.matrix(x) && is.numeric(x)) {
return(paste0("nmatrix.", ncol(x)))
}
if (is.integer(x)) {
return("integer")
}
if (is.numeric(x)) {
return("numeric")
}
"other"
}
.compute_var_label <- function(x, labels = NULL) {
if (is.list(labels)) {
labels <- unlist(labels)
}
if (is.null(labels)) {
x$var_custom_label <- NA_character_
} else {
x <- x |>
dplyr::left_join(
dplyr::tibble(
variable = names(labels),
var_custom_label = labels
),
by = "variable"
)
}
x |>
dplyr::mutate(
label_attr = as.character(.data$label_attr),
var_label = dplyr::case_when(
!is.na(.data$var_custom_label) ~ .data$var_custom_label,
!is.na(.data$label_attr) ~ .data$label_attr,
TRUE ~ .data$variable
)
) |>
dplyr::select(-dplyr::all_of("var_custom_label"))
}
.add_var_type <- function(x, model) {
x <- x |>
dplyr::left_join(
model_get_nlevels(model),
by = "variable"
)
x |> .compute_var_type()
}
broom.helpers/R/model_get_model_matrix.R 0000644 0001762 0000144 00000010342 15055305404 020061 0 ustar ligges users #' Get the model matrix of a model
#'
#' The structure of the object returned by [stats::model.matrix()]
#' could slightly differ for certain types of models.
#' `model_get_model_matrix()` will always return an object
#' with the same structure as [stats::model.matrix.default()].
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @param ... Additional arguments passed to [stats::model.matrix()].
#' @export
#' @family model_helpers
#' @seealso [stats::model.matrix()]
#' @examples
#' lm(hp ~ mpg + factor(cyl), mtcars) |>
#' model_get_model_matrix() |>
#' head()
model_get_model_matrix <- function(model, ...) {
if (!is.null(attr(model, "model_matrix")))
return(attr(model, "model_matrix"))
UseMethod("model_get_model_matrix")
}
#' @export
#' @rdname model_get_model_matrix
model_get_model_matrix.default <- function(model, ...) {
tryCatch(
stats::model.matrix(model, ...),
error = function(e) {
tryCatch( # test second approach
stats::model.matrix(stats::terms(model), model$model, ...),
error = function(e) {
NULL
}
)
}
)
}
#' @export
#' @rdname model_get_model_matrix
# For multinom models, names of the model matrix are not
# consistent with the terms names when contrasts other
# than treatment are used, resulting in an issue for
# the identification of variables
model_get_model_matrix.multinom <- function(model, ...) {
mm <- stats::model.matrix(model, ...)
co <- stats::coef(model)
if (is.matrix(co)) {
colnames(mm) <- colnames(co)
} else {
colnames(mm) <- names(co)
}
mm
}
#' @export
#' @rdname model_get_model_matrix
model_get_model_matrix.clm <- function(model, ...) {
stats::model.matrix(model, ...)[[1]]
}
#' @export
#' @rdname model_get_model_matrix
model_get_model_matrix.brmsfit <- function(model, ...) {
model |>
brms::standata() |>
purrr::pluck("X")
}
#' @export
#' @rdname model_get_model_matrix
#' @details
#' For models fitted with `glmmTMB::glmmTMB()`, it will return a model matrix
#' taking into account all components ("cond", "zi" and "disp"). For a more
#' restricted model matrix, please refer to `glmmTMB::model.matrix.glmmTMB()`.
model_get_model_matrix.glmmTMB <- function(model, ...) {
# load lme4 if available
.assert_package("lme4", fn = "broom.helpers::model_get_model_matrix.glmmTMB()")
stats::model.matrix(
lme4::nobars(model$modelInfo$allForm$combForm),
stats::model.frame(model, ...),
contrasts.arg = model$modelInfo$contrasts
)
}
#' @export
#' @rdname model_get_model_matrix
#' @details
#' For [plm::plm()] models, constant columns are not removed.
model_get_model_matrix.plm <- function(model, ...) {
stats::model.matrix(model, cstcovar.rm = "none", ...)
}
#' @export
#' @rdname model_get_model_matrix
model_get_model_matrix.biglm <- function(model, ...) {
stats::model.matrix(
model,
data = stats::model.frame.default(model)
)
}
#' @export
#' @rdname model_get_model_matrix
model_get_model_matrix.model_fit <- function(model, ...) {
model_get_model_matrix(model$fit, ...)
}
#' @export
#' @rdname model_get_model_matrix
model_get_model_matrix.LORgee <- function(model, ...) {
stats::model.matrix.default(
model,
data = stats::model.frame(model)
)
}
#' @export
#' @rdname model_get_model_matrix
model_get_model_matrix.betareg <- function(model, ...) {
stats::model.matrix.default(
model |> model_get_terms(),
data = model |> model_get_model_frame()
)
}
#' @export
#' @rdname model_get_model_matrix
model_get_model_matrix.cch <- function(model, ...) {
stats::model.matrix.default(
model$call$formula |> stats::formula(),
data = model |> model_get_model_frame()
)
}
#' @export
#' @rdname model_get_model_matrix
model_get_model_matrix.vglm <- function(model, ...) {
stats::model.matrix(model, ..., type = "lm")
}
#' @export
#' @rdname model_get_model_matrix
model_get_model_matrix.vgam <- function(model, ...) {
stats::model.matrix(model, ..., type = "lm")
}
#' @export
#' @rdname model_get_model_matrix
model_get_model_matrix.svy_vglm <- function(model, ...) {
model_get_model_matrix(model$fit, ...)
}
broom.helpers/R/tidy_add_term_labels.R 0000644 0001762 0000144 00000022024 15002155536 017511 0 ustar ligges users #' Add term labels
#'
#' Will add term labels in a `label` column, based on:
#' 1. labels provided in `labels` argument if provided;
#' 2. factor levels for categorical variables coded with
#' treatment, SAS or sum contrasts (the label could be
#' customized with `categorical_terms_pattern` argument);
#' 3. variable labels when there is only one term per variable;
#' 4. term name otherwise.
#'
#' @details
#' If the `variable_label` column is not yet available in `x`,
#' [tidy_add_variable_labels()] will be automatically applied.
#' If the `contrasts` column is not yet available in `x`,
#' [tidy_add_contrasts()] will be automatically applied.
#'
#' It is possible to pass a custom label for any term in `labels`,
#' including interaction terms.
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param labels (`list` or `string`)\cr
#' An optional named list or named vector of custom term labels.
#' @param interaction_sep (`string`)\cr
#' Separator for interaction terms.
#' @param categorical_terms_pattern ([`glue pattern`][glue::glue()])\cr
#' A [glue pattern][glue::glue()] for labels of categorical terms with treatment
#' or sum contrasts (see examples and [model_list_terms_levels()]).
#' @param relabel_poly Should terms generated with [stats::poly()] be relabeled?
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @inheritParams tidy_plus_plus
#' @export
#' @family tidy_helpers
#' @examples
#' \donttest{
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes"))) |>
#' labelled::set_variable_labels(
#' Class = "Passenger's class",
#' Sex = "Sex"
#' )
#'
#' mod <-
#' glm(Survived ~ Class * Age * Sex, data = df, weights = df$n, family = binomial)
#' mod |>
#' tidy_and_attach() |>
#' tidy_add_term_labels()
#' mod |>
#' tidy_and_attach() |>
#' tidy_add_term_labels(
#' interaction_sep = " x ",
#' categorical_terms_pattern = "{level} / {reference_level}"
#' )
#' }
tidy_add_term_labels <- function(x,
labels = NULL,
interaction_sep = " * ",
categorical_terms_pattern = "{level}",
relabel_poly = FALSE,
model = tidy_get_model(x),
quiet = FALSE,
strict = FALSE) {
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
if ("header_row" %in% names(x)) {
cli::cli_abort("{.fn tidy_add_term_labels} cannot be applied after {.fn tidy_add_header_rows}.")
}
.attributes <- .save_attributes(x)
if ("label" %in% names(x)) {
x <- x |> dplyr::select(-dplyr::all_of("label"))
}
if (is.list(labels)) {
labels <- unlist(labels)
}
if (!"var_label" %in% names(x)) {
x <- x |> tidy_add_variable_labels(model = model)
}
if (!"contrasts" %in% names(x)) {
x <- x |> tidy_add_contrasts(model = model)
}
# specific case for nnet::multinom
# keeping only one level for computing term_labels
if ("y.level" %in% names(x)) {
xx <- x |>
dplyr::distinct(.data$term, .keep_all = TRUE)
} else {
xx <- x
}
# start with term names
term_labels <- unique(stats::na.omit(xx$term))
names(term_labels) <- term_labels
# add categorical terms levels
sdif_term_level <- "diff"
if (.attributes$exponentiate) sdif_term_level <- "ratio"
terms_levels <- model |> model_list_terms_levels(
label_pattern = categorical_terms_pattern,
variable_labels = .attributes$variable_labels,
sdif_term_level = sdif_term_level
)
if (!is.null(terms_levels)) {
additional_term_labels <- terms_levels$label
names(additional_term_labels) <- terms_levels$term
term_labels <- term_labels |>
.update_vector(additional_term_labels)
# also consider "variablelevel" notation
# when not already used (e.g. for sum contrasts)
terms_levels2 <- terms_levels |>
dplyr::mutate(term2 = paste0(.data$variable, .data$level)) |>
dplyr::filter(.data$term2 != .data$term)
if (nrow(terms_levels2) > 0) {
additional_term_labels <- terms_levels2$label
names(additional_term_labels) <- terms_levels2$term2
term_labels <- term_labels |>
.update_vector(additional_term_labels)
}
# also consider "variablelevel_rank" notation
# for no intercept model (because type of interaction unknown)
terms_levels3 <- terms_levels |>
dplyr::mutate(term3 = paste0(.data$variable, .data$level_rank)) |>
dplyr::filter(.data$term3 != .data$term & .data$contrasts_type == "no.contrast")
if (nrow(terms_levels3) > 0) {
additional_term_labels <- terms_levels3$label
names(additional_term_labels) <- terms_levels3$term3
term_labels <- term_labels |>
.update_vector(additional_term_labels)
}
}
# add variable labels
# first variable list (for interaction only terms)
# then current variable labels in x
variables_list <- model_list_variables(model)
if (!is.null(variables_list)) {
variables_list <- variables_list |>
dplyr::mutate(
label = dplyr::if_else(
is.na(.data$label_attr),
.data$variable,
as.character(.data$label_attr)
),
)
additional_term_labels <- variables_list$label
names(additional_term_labels) <- variables_list$variable
term_labels <- term_labels |>
.update_vector(additional_term_labels)
# add version with backtips for variables with non standard names
names(additional_term_labels) <- paste0(
"`", names(additional_term_labels), "`"
)
term_labels <- term_labels |>
.update_vector(additional_term_labels)
}
x_var_labels <- xx |>
dplyr::mutate(
variable = dplyr::if_else(
is.na(.data$variable), # for intercept
.data$term,
.data$variable
)
) |>
dplyr::group_by(.data$variable) |>
dplyr::summarise(
var_label = dplyr::first(.data$var_label),
.groups = "drop_last"
)
additional_term_labels <- x_var_labels$var_label
names(additional_term_labels) <- x_var_labels$variable
term_labels <- term_labels |>
.update_vector(additional_term_labels)
# add version with backtips for variables with non standard names
names(additional_term_labels) <- paste0(
"`", names(additional_term_labels), "`"
)
term_labels <- term_labels |>
.update_vector(additional_term_labels)
# check if all elements of labels are in x
# show a message otherwise
not_found <- setdiff(names(labels), names(term_labels))
if (length(not_found) > 0 && !quiet) {
cli_alert_danger("{.code {not_found}} terms have not been found in {.code x}.")
}
if (length(not_found) > 0 && strict) {
cli::cli_abort("Incorrect call with `labels=`. Quitting execution.", call = NULL)
}
# labels for polynomial terms
if (relabel_poly) {
poly_terms <- xx |>
dplyr::filter(
.data$term |> stringr::str_starts("poly\\(")
) |>
dplyr::mutate(
degree = .data$term |> stringr::str_replace("poly\\(.+\\)([0-9]+)", "\\1"),
label = paste0(.data$var_label, .superscript_numbers(.data$degree))
)
poly_labels <- poly_terms$label
names(poly_labels) <- poly_terms$term
term_labels <- term_labels |>
.update_vector(poly_labels)
}
# labels argument
term_labels <- term_labels |>
.update_vector(labels)
# save custom labels
.attributes$term_labels <- labels
# management of interaction terms
interaction_terms <- xx$term[!is.na(xx$var_type) & xx$var_type == "interaction"]
# do not treat those specified in labels
interaction_terms <- setdiff(interaction_terms, names(labels))
names(interaction_terms) <- interaction_terms
interaction_terms <-
interaction_terms |>
strsplit(":")
# in some cases (e.g. marginal predictions)
# interaction terms are not prefixed by variable names
# => need to identify them from interaction_terms directly
if (isTRUE(.attributes$find_missing_interaction_terms)) {
it <- unname(unlist(interaction_terms))
missing_terms <- setdiff(it[it != ""], names(term_labels))
if (length(missing_terms) > 0) {
names(missing_terms) <- missing_terms
term_labels <- term_labels |>
.update_vector(missing_terms)
}
}
interaction_terms <- interaction_terms |>
lapply(function(x) {
paste(term_labels[x], collapse = interaction_sep)
}) |>
unlist()
term_labels <- term_labels |>
.update_vector(interaction_terms)
x |>
dplyr::left_join(
tibble::tibble(
term = names(term_labels),
label = term_labels
),
by = "term"
) |>
tidy_attach_model(model = model, .attributes = .attributes)
}
broom.helpers/R/data.R 0000644 0001762 0000144 00000000452 14357760763 014311 0 ustar ligges users #' Listing of Supported Models
#'
#' @format A data frame with one row per supported model
#' \describe{
#' \item{model}{Model}
#' \item{notes}{Notes}
#' }
#'
#' @section Supported models:
#'
#' ```{r, echo = FALSE}
#' knitr::kable(supported_models)
#' ```
"supported_models"
broom.helpers/R/tidy_select_variables.R 0000644 0001762 0000144 00000006032 14662130321 017714 0 ustar ligges users #' Select variables to keep/drop
#'
#' Will remove unselected variables from the results.
#' To remove the intercept, use [tidy_remove_intercept()].
#'
#' @details
#' If the `variable` column is not yet available in `x`,
#' [tidy_identify_variables()] will be automatically applied.
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param include ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' Variables to include. Default is `everything()`.
#' See also [all_continuous()], [all_categorical()], [all_dichotomous()]
#' and [all_interaction()].
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @return
#' The `x` tibble limited to the included variables (and eventually the intercept),
#' sorted according to the `include` parameter.
#' @export
#' @family tidy_helpers
#' @examples
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived))
#' res <-
#' glm(Survived ~ Class + Age * Sex, data = df, weights = df$n, family = binomial) |>
#' tidy_and_attach() |>
#' tidy_identify_variables()
#'
#' res
#' res |> tidy_select_variables()
#' res |> tidy_select_variables(include = "Class")
#' res |> tidy_select_variables(include = -c("Age", "Sex"))
#' res |> tidy_select_variables(include = starts_with("A"))
#' res |> tidy_select_variables(include = all_categorical())
#' res |> tidy_select_variables(include = all_dichotomous())
#' res |> tidy_select_variables(include = all_interaction())
#' res |> tidy_select_variables(
#' include = c("Age", all_categorical(dichotomous = FALSE), all_interaction())
#' )
tidy_select_variables <- function(
x, include = everything(), model = tidy_get_model(x)) {
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
if (!"variable" %in% names(x)) {
x <- x |> tidy_identify_variables(model = model)
}
.attributes <- .save_attributes(x)
# obtain character vector of selected variables
cards::process_selectors(
data = scope_tidy(x),
include = {{ include }}
)
# order result, intercept first then by the order of include
if ("y.level" %in% names(x)) {
x$group_order <- factor(x$y.level) |> forcats::fct_inorder()
} else if ("component" %in% names(x)) {
x$group_order <- factor(x$component) |> forcats::fct_inorder()
} else {
x$group_order <- 1
}
x |>
dplyr::filter(
.data$var_type == "intercept" |
.data$variable %in% include
) |>
dplyr::mutate(
log_intercept = .data$var_type == "intercept",
fct_variable = factor(.data$variable, levels = include)
) |>
dplyr::arrange(
.data$group_order,
dplyr::desc(.data$log_intercept),
.data$fct_variable
) |>
dplyr::select(
-dplyr::any_of(c("group_order", "log_intercept", "fct_variable"))
) |>
tidy_attach_model(model = model, .attributes = .attributes)
}
broom.helpers/R/scope_tidy.R 0000644 0001762 0000144 00000006553 14760074335 015541 0 ustar ligges users #' Scoping a tidy tibble allowing to tidy select
#'
#' This function uses the information from a model tidy tibble to generate
#' a data frame exposing the different variables of the model,
#' data frame that could be used for tidy selection. In addition, columns
#' `"var_type"`, `"var_class"` and `"contrasts_type"` are scoped and their
#' values are added as attributes to the data frame.
#' For example, if `var_type='continuous'` for variable `"age"`, then the
#' attribute `attr(.$age, 'gtsummary.var_type') <- 'continuous'` is set.
#' That attribute is then used in a selector like `all_continuous()`.
#' Note: attributes are prefixed with `"gtsummary."` to be compatible with
#' selectors provided by `{gtsummary}`.
#'
#' @param x (`data.frame`)\cr
#' A tidy tibble, with a `"variable"` column, as returned by
#' [`tidy_identify_variables()`].
#' @param data (`data.frame`)\cr
#' An optional data frame the attributes will be added to.
#' @return A data frame.
#' @export
#' @examples
#' mod <- lm(Sepal.Length ~ Sepal.Width * Species, data = iris)
#' tt <- mod |> tidy_and_attach() |> tidy_add_contrasts()
#'
#' scope_tidy(tt) |> str()
#' scope_tidy(tt, data = model_get_model_frame(mod)) |> str()
#'
#' scope_tidy(tt) |> dplyr::select(dplyr::starts_with("Se")) |> names()
#' scope_tidy(tt) |> dplyr::select(where(is.factor)) |> names()
#' scope_tidy(tt) |> dplyr::select(all_continuous()) |> names()
#' scope_tidy(tt) |> dplyr::select(all_contrasts()) |> names()
#' scope_tidy(tt) |> dplyr::select(all_interaction()) |> names()
#' scope_tidy(tt) |> dplyr::select(all_intercepts()) |> names()
scope_tidy <- function(x, data = NULL) {
if (!"variable" %in% names(x)) {
cli::cli_abort(
"The {.code .$x} data frame does not have the required {.val variable} column."
)
}
# if data not passed, use x to construct one
if (rlang::is_empty(data)) {
data <- dplyr::tibble(
!!!rlang::rep_named(
unique(as.character(x$variable)),
logical(0L)
)
)
# if var_class available in x, convert columns
if ("var_class" %in% names(x)) {
df_class <- x[c("variable", "var_class")] |>
unique() |>
tidyr::drop_na()
for (i in seq_len(nrow(df_class))) {
f <- switch(
df_class$var_class[i],
"character" = as.character,
"factor" = as.factor,
"ordered" = as.ordered,
"integer" = as.integer,
"numeric" = as.numeric,
"complex" = as.complex,
"Date" = as.Date,
"POSIXlt" = as.POSIXlt,
"POSIXct" = as.POSIXct,
"difftime" = as.difftime,
as.logical
)
data[[df_class$variable[i]]] <- f(NA)
}
}
}
# only keeping rows that have corresponding column names in data
x <- x |> dplyr::filter(.data$variable %in% names(data))
# if x passed, add columns as attr to data
base_attr_cols <- c("var_type", "var_class", "contrasts_type")
attr_cols <- x |>
dplyr::select(any_of(base_attr_cols)) |>
names()
# add attributes
for (v in attr_cols) {
df_attr <- x[c("variable", v)] |>
unique() |>
tidyr::drop_na()
for (i in seq_len(nrow(df_attr))) {
attr(data[[df_attr$variable[i]]], paste0("gtsummary.", v)) <- df_attr[[v]][i]
}
}
# return data frame with attributes
data
}
broom.helpers/R/reexport.R 0000644 0001762 0000144 00000001330 14662130321 015220 0 ustar ligges users #' @importFrom dplyr vars
#' @export
dplyr::vars
#' @importFrom dplyr starts_with
#' @export
dplyr::starts_with
#' @importFrom dplyr ends_with
#' @export
dplyr::ends_with
#' @importFrom dplyr contains
#' @export
dplyr::contains
#' @importFrom dplyr matches
#' @export
dplyr::matches
#' @importFrom dplyr num_range
#' @export
dplyr::num_range
#' @importFrom dplyr all_of
#' @export
dplyr::all_of
#' @importFrom dplyr any_of
#' @export
dplyr::any_of
#' @importFrom dplyr everything
#' @export
dplyr::everything
#' @importFrom dplyr last_col
#' @export
dplyr::last_col
#' @importFrom dplyr one_of
#' @export
dplyr::one_of
#' @importFrom dplyr where
#' @export
dplyr::where
broom.helpers/R/model_get_response_variable.R 0000644 0001762 0000144 00000002036 14760117573 021113 0 ustar ligges users #' Get the name of the response variable
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @export
#' @family model_helpers
#' @examples
#' lm(hp ~ mpg + factor(cyl) + disp:hp, mtcars) |>
#' model_get_response_variable()
#'
#' mod <- glm(
#' response ~ stage * grade + trt,
#' gtsummary::trial,
#' family = binomial
#' )
#' mod |> model_get_response_variable()
#'
#' mod <- glm(
#' Survived ~ Class * Age + Sex,
#' data = Titanic |> as.data.frame(),
#' weights = Freq,
#' family = binomial
#' )
#' mod |> model_get_response_variable()
model_get_response_variable <- function(model) {
UseMethod("model_get_response_variable")
}
#' @export
#' @rdname model_get_response_variable
model_get_response_variable.default <- function(model) {
model_frame <- model |> model_get_model_frame()
model_terms <- model |> model_get_terms()
if (!is.null(model_terms) && inherits(model_terms, "terms")) {
names(model_frame)[attr(model_terms, "response")]
} else {
NULL
}
}
broom.helpers/R/model_get_pairwise_contrasts.R 0000644 0001762 0000144 00000007621 14762100563 021331 0 ustar ligges users #' Get pairwise comparison of the levels of a categorical variable
#'
#' It is computed with [emmeans::emmeans()].
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @param variables ([`tidy-select`][dplyr::dplyr_tidy_select])\cr
#' Variables to add pairwise contrasts.
#' @param pairwise_reverse (`logical`)\cr
#' Determines whether to use `"pairwise"` (if `TRUE`)
#' or `"revpairwise"` (if `FALSE`), see [emmeans::contrast()].
#' @param contrasts_adjust optional adjustment method when computing contrasts,
#' see [emmeans::contrast()] (if `NULL`, use `emmeans` default)
#' @param conf.level (`numeric`)\cr
#' Level of confidence for confidence intervals (default: 95%).
#' @param emmeans_args (`logical`)\cr
#' List of additional parameter to pass to
#' [emmeans::emmeans()] when computing pairwise contrasts.
#' @details
#' For `pscl::zeroinfl()` and `pscl::hurdle()` models, pairwise contrasts are
#' computed separately for each component, using `mode = "count"` and
#' `mode = "zero"` (see documentation of `emmeans`) and a component column
#' is added to the results.
#' @family model_helpers
#' @export
#' @examplesIf .assert_package("emmeans", boolean = TRUE)
#' \donttest{
#' mod <- lm(Sepal.Length ~ Species, data = iris)
#' mod |> model_get_pairwise_contrasts(variables = "Species")
#' mod |>
#' model_get_pairwise_contrasts(
#' variables = "Species",
#' contrasts_adjust = "none"
#' )
#' }
model_get_pairwise_contrasts <- function(
model,
variables,
pairwise_reverse = TRUE,
contrasts_adjust = NULL,
conf.level = .95,
emmeans_args = list()) {
UseMethod("model_get_pairwise_contrasts")
}
#' @export
model_get_pairwise_contrasts.default <- function(
model,
variables,
pairwise_reverse = TRUE,
contrasts_adjust = NULL,
conf.level = .95,
emmeans_args = list()) {
purrr::map_df(
variables,
.get_pairwise_contrasts_one_var,
model = model,
pairwise_reverse = pairwise_reverse,
contrasts_adjust = contrasts_adjust,
conf.level = conf.level,
emmeans_args = emmeans_args
)
}
.get_pairwise_contrasts_one_var <- function(
model,
variable,
pairwise_reverse = TRUE,
contrasts_adjust = NULL,
conf.level = .95,
emmeans_args = list()) {
.assert_package(
"emmeans",
fn = "broom.helpers::model_get_pairwise_contrasts()"
)
emmeans_args$object <- model
emmeans_args$specs <- variable
e <- do.call(emmeans::emmeans, emmeans_args)
if (is.null(contrasts_adjust)) {
e <- e |>
graphics::pairs(reverse = pairwise_reverse)
} else {
e <- e |>
graphics::pairs(reverse = pairwise_reverse, adjust = contrasts_adjust)
}
r <- e |>
dplyr::as_tibble()
if (!is.numeric(r[[2]])) { # if by
r <- r |>
tidyr::unite("term", 1:2, sep = " | ")
}
r <- r[, c(1:3, ncol(r) - 1, ncol(r))]
colnames(r) <- c(
"term", "estimate", "std.error",
"statistic", "p.value"
)
ci <- stats::confint(e, level = conf.level) |>
dplyr::as_tibble()
if (!is.numeric(ci[[2]])) { # if by
ci <- ci |>
tidyr::unite("term", 1:2, sep = " | ")
}
ci <- ci[, c(1, ncol(ci) - 1, ncol(ci))]
colnames(ci) <- c("term", "conf.low", "conf.high")
r <- dplyr::left_join(r, ci, by = "term")
r$variable <- variable
r$contrasts <- ifelse(pairwise_reverse, "pairwise", "revpairwise")
r$contrasts_type <- "pairwise"
r |> dplyr::relocate(dplyr::all_of("variable"))
}
#' @export
model_get_pairwise_contrasts.zeroinfl <- function(model, ...) {
cli::cli_abort(c(
"Pairwise contrasts are not supported for multi-components model.",
"Use directly {.fn emmeans::emmeans}."
))
}
#' @export
model_get_pairwise_contrasts.hurdle <- model_get_pairwise_contrasts.zeroinfl
#' @export
model_get_pairwise_contrasts.betareg <- model_get_pairwise_contrasts.zeroinfl
broom.helpers/R/model_get_nlevels.R 0000644 0001762 0000144 00000001671 15002155536 017053 0 ustar ligges users #' Get the number of levels for each factor used in `xlevels`
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @return a tibble with two columns: `"variable"` and `"var_nlevels"`
#' @export
#' @family model_helpers
#' @examples
#' lm(hp ~ mpg + factor(cyl), mtcars) |>
#' model_get_nlevels()
model_get_nlevels <- function(model) {
UseMethod("model_get_nlevels")
}
#' @export
#' @rdname model_get_nlevels
model_get_nlevels.default <- function(model) {
nlevels <- model_get_xlevels(model) |> lapply(length)
if (length(nlevels) == 0) {
return(
dplyr::tibble(variable = NA_character_, var_nlevels = NA_integer_) |>
dplyr::filter(FALSE) # empty tibble
)
}
dplyr::tibble(
variable = names(nlevels),
var_nlevels = unlist(nlevels)
)
}
#' @export
#' @rdname model_get_nlevels
model_get_nlevels.svy_vglm <- function(model) {
model_get_nlevels(model$fit)
}
broom.helpers/R/model_get_offset.R 0000644 0001762 0000144 00000001253 14662130321 016661 0 ustar ligges users #' Get model offset
#'
#' This function does not cover `lavaan` models (`NULL` is returned).
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @export
#' @family model_helpers
#' @examples
#' mod <- glm(
#' response ~ trt + offset(log(ttdeath)),
#' gtsummary::trial,
#' family = poisson
#' )
#' mod |> model_get_offset()
model_get_offset <- function(model) {
UseMethod("model_get_offset")
}
#' @export
#' @rdname model_get_offset
model_get_offset.default <- function(model) {
tryCatch(
model |>
model_get_model_frame() |>
stats::model.offset(),
error = function(e) {
NULL # nocov
}
)
}
broom.helpers/R/model_get_model.R 0000644 0001762 0000144 00000001313 15003727101 016466 0 ustar ligges users #' Get the model from model objects
#'
#' Most model objects are proper R model objects. There are, however, some
#' model objects that store the proper object internally (e.g. mice models).
#' This function extracts that model object in those cases.
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @export
#' @family model_helpers
#' @examples
#' lm(hp ~ mpg + factor(cyl), mtcars) |>
#' model_get_model()
model_get_model <- function(model) {
UseMethod("model_get_model")
}
#' @export
#' @rdname model_get_model
model_get_model.default <- function(model) model
#' @export
#' @rdname model_get_model
model_get_model.mira <- function(model) model$analyses[[1]]
broom.helpers/R/model_get_assign.R 0000644 0001762 0000144 00000004013 15055266765 016677 0 ustar ligges users #' Get the assign attribute of model matrix of a model
#'
#' Return the assign attribute attached to the object returned by
#' [stats::model.matrix()].
#'
#' @param model (a model object, e.g. `glm`)\cr
#' A model object.
#' @export
#' @family model_helpers
#' @seealso [stats::model.matrix()]
#' @examples
#' lm(hp ~ mpg + factor(cyl), mtcars) |>
#' model_get_assign()
model_get_assign <- function(model) {
UseMethod("model_get_assign")
}
#' @export
#' @rdname model_get_assign
model_get_assign.default <- function(model) {
model_matrix <- model_get_model_matrix(model)
get_assign <- purrr::attr_getter("assign")
assign <- model_matrix |> get_assign()
if (is.null(assign)) {
# an alternative generic way to compute assign
# (e.g. for felm models)
model_matrix <- tryCatch(
stats::model.matrix(stats::terms(model), stats::model.frame(model)),
error = function(e) {
NULL # nocov
}
)
assign <- model_matrix |> get_assign()
}
if (!is.atomic(assign)) {
return(NULL)
} # nocov
attr(assign, "model_matrix") <- model_matrix
assign
}
#' @export
#' @rdname model_get_assign
model_get_assign.vglm <- function(model) {
model_matrix <- model_get_model_matrix(model)
get_assign <- purrr::attr_getter("orig.assign.lm")
assign <- model_matrix |> get_assign()
attr(assign, "model_matrix") <- model_matrix
assign
}
#' @export
#' @rdname model_get_assign
model_get_assign.svy_vglm <- function(model) {
model_get_assign(model$fit)
}
#' @export
#' @rdname model_get_assign
model_get_assign.model_fit <- function(model) {
model_get_assign(model$fit)
}
#' @export
#' @rdname model_get_assign
model_get_assign.fixest <- function(model) {
model_matrix <- stats::model.matrix.default(
model_get_terms(model),
data = eval(model$call$data, model$call_env)
)
get_assign <- purrr::attr_getter("assign")
assign <- model_matrix |> get_assign()
attr(assign, "model_matrix") <- model_matrix
assign
}
broom.helpers/R/assert_package.R 0000644 0001762 0000144 00000013356 14760117573 016354 0 ustar ligges users #' Check a package installation status or minimum required version
#'
#' The function `.assert_package()` checks whether a package is installed and
#' returns an error or `FALSE` if not available. If a package search is provided,
#' the function will check whether a minimum version of a package is required.
#' The function `.get_package_dependencies()` returns a tibble with all
#' dependencies of a specific package. Finally, `.get_min_version_required()`
#' will return, if any, the minimum version of `pkg` required by `pkg_search`,
#' `NULL` if no minimum version required.
#'
#' @param pkg (`string`)\cr
#' Name of the required package.
#' @param fn (`string`)\cr
#' Name of the calling function from the user perspective. Used to write
#' informative error messages.
#' @param pkg_search (`string`)\cr
#' Name of the package the function will search for a minimum
#' required version from.
#' @param boolean (`logical`)\cr
#' Whether to return a `TRUE`/`FALSE`, rather
#' than error when package/package version not available. Default is `FALSE`,
#' which will return an error if `pkg` is not installed.
#' @param remove_duplicates (`logical`)\cr
#' If several versions of a package are installed,
#' should only the first one be returned?
#' @param lib.loc (`string`)\cr
#' Location of `R` library trees to search through, see
#' `utils::installed.packages()`.
#' @details
#' `get_all_packages_dependencies()` could be used to get the list of
#' dependencies of all installed packages.
#'
#' @return logical or error for `.assert_package()`, `NULL` or character with
#' the minimum version required for `.get_min_version_required()`, a tibble for
#' `.get_package_dependencies()`.
#'
#' @name assert_package
#' @examples
#' \donttest{
#' .assert_package("broom", boolean = TRUE)
#' .get_package_dependencies()
#' .get_min_version_required("brms")
#' }
NULL
#' @rdname assert_package
#' @export
.assert_package <- function(pkg, fn = NULL, pkg_search = "broom.helpers", boolean = FALSE) {
# check if min version is required -------------------------------------------
version <- .get_min_version_required(pkg, pkg_search)
compare <- purrr::attr_getter("compare")(version)
# check installation TRUE/FALSE ----------------------------------------------
if (isTRUE(boolean)) {
return(rlang::is_installed(pkg = pkg, version = version, compare = compare))
}
# prompt user to install package ---------------------------------------------
rlang::check_installed(
pkg = pkg,
version = version,
compare = compare,
reason = switch(!is.null(fn),
stringr::str_glue("for `{fn}`")
)
)
invisible()
}
#' @rdname assert_package
#' @export
.get_package_dependencies <- function(pkg_search = "broom.helpers") {
if (is.null(pkg_search)) {
return(NULL)
}
description <- utils::packageDescription(pkg_search)
if (identical(description, NA)) {
return(NULL)
}
description |>
unclass() |>
tibble::as_tibble() |>
dplyr::select(dplyr::any_of(
c(
"Package", "Version", "Imports", "Depends",
"Suggests", "Enhances", "LinkingTo"
)
)) |>
dplyr::rename(
pkg_search = "Package",
pkg_search_version = "Version"
) |>
tidyr::pivot_longer(
-dplyr::all_of(c("pkg_search", "pkg_search_version")),
values_to = "pkg",
names_to = "dependency_type",
) |>
tidyr::separate_rows("pkg", sep = ",") |>
dplyr::mutate(pkg = stringr::str_squish(.data$pkg)) |>
dplyr::filter(!is.na(.data$pkg)) |>
tidyr::separate(
.data$pkg,
into = c("pkg", "version"),
sep = " ", extra = "merge", fill = "right"
) |>
dplyr::mutate(
compare = .data$version |> stringr::str_extract(pattern = "[>=<]+"),
version = .data$version |> stringr::str_remove_all(pattern = "[\\(\\) >=<]")
)
}
#' @rdname assert_package
#' @export
.get_all_packages_dependencies <- function(
pkg_search = NULL,
remove_duplicates = FALSE,
lib.loc = NULL) {
deps <-
utils::installed.packages(lib.loc = lib.loc) |>
tibble::as_tibble() |>
dplyr::select(dplyr::all_of(
c("Package", "Version", "LibPath", "Imports", "Depends", "Suggests", "Enhances", "LinkingTo")
)) |>
dplyr::rename(
pkg_search = "Package",
pkg_search_version = "Version",
lib_path = "LibPath"
)
if (!is.null(pkg_search)) {
deps <- deps |> dplyr::filter(.data$pkg_search %in% .env$pkg_search)
}
if (remove_duplicates) {
deps <- deps |> dplyr::distinct("pkg_search", .keep_all = TRUE)
}
deps |>
tidyr::pivot_longer(
-dplyr::all_of(c("pkg_search", "pkg_search_version", "lib_path")),
values_to = "pkg",
names_to = "dependency_type",
) |>
tidyr::separate_rows("pkg", sep = ",") |>
dplyr::mutate(pkg = stringr::str_squish(.data$pkg)) |>
dplyr::filter(!is.na(.data$pkg)) |>
tidyr::separate(
.data$pkg,
into = c("pkg", "version"),
sep = " ", extra = "merge", fill = "right"
) |>
dplyr::mutate(
compare = .data$version |> stringr::str_extract(pattern = "[>=<]+"),
version = .data$version |> stringr::str_remove_all(pattern = "[\\(\\) >=<]")
)
}
#' @rdname assert_package
#' @export
.get_min_version_required <- function(pkg, pkg_search = "broom.helpers") {
if (is.null(pkg_search)) {
return(NULL)
}
res <- .get_package_dependencies(pkg_search) |>
dplyr::filter(.data$pkg == .env$pkg & !is.na(.data$version))
if (nrow(res) == 0) {
return(NULL)
}
version <- res |> purrr::pluck("version")
attr(version, "compare") <- res |> purrr::pluck("compare")
names(version) <- res |> purrr::pluck("dependency_type")
version
}
broom.helpers/R/tidy_add_variable_labels.R 0000644 0001762 0000144 00000010402 14733566032 020333 0 ustar ligges users #' Add variable labels
#'
#' Will add variable labels in a `var_label` column, based on:
#' 1. labels provided in `labels` argument if provided;
#' 2. variable labels defined in the original data frame with
#' the `label` attribute (cf. [labelled::var_label()]);
#' 3. variable name otherwise.
#'
#' @details
#' If the `variable` column is not yet available in `x`,
#' [tidy_identify_variables()] will be automatically applied.
#'
#' It is possible to pass a custom label for an interaction
#' term in `labels` (see examples).
#' @param x (`data.frame`)\cr
#' A tidy tibble as produced by `tidy_*()` functions.
#' @param labels ([`formula-list-selector`][gtsummary::syntax])\cr
#' An optional named list or a named vector of custom variable labels.
#' @param instrumental_suffix (`string`)\cr
#' Suffix added to variable labels for instrumental variables (`fixest` models).
#' `NULL` to add nothing.
#' @param model (a model object, e.g. `glm`)\cr
#' The corresponding model, if not attached to `x`.
#' @inheritParams tidy_plus_plus
#' @export
#' @family tidy_helpers
#' @examples
#' df <- Titanic |>
#' dplyr::as_tibble() |>
#' dplyr::mutate(Survived = factor(Survived, c("No", "Yes"))) |>
#' labelled::set_variable_labels(
#' Class = "Passenger's class",
#' Sex = "Sex"
#' )
#'
#' glm(Survived ~ Class * Age * Sex, data = df, weights = df$n, family = binomial) |>
#' tidy_and_attach() |>
#' tidy_add_variable_labels(
#' labels = list(
#' "(Intercept)" ~ "Custom intercept",
#' Sex ~ "Gender",
#' "Class:Age" ~ "Custom label"
#' )
#' )
tidy_add_variable_labels <- function(x,
labels = NULL,
interaction_sep = " * ",
instrumental_suffix = " (instrumental)",
model = tidy_get_model(x)) {
if (is.null(model)) {
cli::cli_abort(c(
"{.arg model} is not provided.",
"You need to pass it or to use {.fn tidy_and_attach}."
))
}
if ("header_row" %in% names(x)) {
cli::cli_abort(paste(
"{.fn tidy_add_variable_labels} cannot be applied",
"after {.fn tidy_add_header_rows}."
))
}
.attributes <- .save_attributes(x)
if ("var_label" %in% names(x)) {
x <- x |> dplyr::select(-dplyr::all_of("var_label"))
}
if (!"variable" %in% names(x) || !"var_type" %in% names(x)) {
x <- x |> tidy_identify_variables(model = model)
}
if (is.atomic(labels)) labels <- as.list(labels) # vectors allowed
cards::process_formula_selectors(
data = scope_tidy(x),
labels = labels
)
labels <- unlist(labels)
# start with the list of terms
var_labels <- unique(x$term)
names(var_labels) <- var_labels
# add the list of variables from x
additional_labels <- x$variable[!is.na(x$variable)] |> unique()
names(additional_labels) <- additional_labels
var_labels <- var_labels |>
.update_vector(additional_labels)
# add the list of variables from model_list_variables
variable_list <- model_list_variables(
model,
labels = labels,
instrumental_suffix = instrumental_suffix
)
additional_labels <- variable_list$var_label
names(additional_labels) <- variable_list$variable
var_labels <- var_labels |>
.update_vector(additional_labels)
var_labels <- var_labels |>
.update_vector(labels)
# save custom labels
.attributes$variable_labels <- labels
# management of interaction terms
interaction_terms <- x$variable[!is.na(x$var_type) & x$var_type == "interaction"]
# do not treat those specified in labels
interaction_terms <- setdiff(interaction_terms, names(labels))
names(interaction_terms) <- interaction_terms
# compute labels for interaction terms
interaction_terms <- interaction_terms |>
strsplit(":") |>
lapply(function(x) {
paste(var_labels[x], collapse = interaction_sep)
}) |>
unlist()
var_labels <- var_labels |> .update_vector(interaction_terms)
x |>
dplyr::left_join(
tibble::tibble(
variable = names(var_labels),
var_label = var_labels
),
by = "variable"
) |>
tidy_attach_model(model = model, .attributes = .attributes)
}
broom.helpers/vignettes/ 0000755 0001762 0000144 00000000000 15062475005 015045 5 ustar ligges users broom.helpers/vignettes/broom-helpers.Rmd 0000644 0001762 0000144 00000042171 15024565236 020300 0 ustar ligges users ---
title: "Getting Started with broom.helpers"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Getting Started with broom.helpers}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
rows.print = 25
)
# one of the functions below needs emmeans, so dont evaluate code check in vignette
# on old R versions where emmeans is not available
if (!rlang::is_installed("emmeans")) {
knitr::opts_chunk$set(eval = FALSE)
}
```
The `broom.helpers` package offers a suite of functions that make easy to interact, add information, and manipulate tibbles created with `broom::tidy()` (and friends).
The suite includes functions to group regression model terms by variable, insert reference and header rows for categorical variables, add variable labels, and more.
As a motivating example, let's summarize a logistic regression model with a forest plot and in a table.
To begin, let's load our packages.
```{r setup, warning=FALSE, message=FALSE}
library(broom.helpers)
library(gtsummary)
library(ggplot2)
library(dplyr)
# paged_table() was introduced only in rmarkdwon v1.2
print_table <- function(tab) {
if (packageVersion("rmarkdown") >= "1.2") {
rmarkdown::paged_table(tab)
} else {
knitr::kable(tab)
}
}
```
Our model predicts tumor response using chemotherapy treatment and tumor grade.
The data set we're utilizing has already labelled the columns using the [labelled package](https://larmarange.github.io/labelled/).
The column labels will be carried through to our figure and table.
```{r}
model_logit <- glm(response ~ trt + grade, trial, family = binomial)
broom::tidy(model_logit)
```
## Forest Plot
To create the figure, we'll need to add some information to the tidy tibble, i.e. we'll need to group the terms that belong to the same variable, add the reference row, etc.
Parsing this information can be difficult, but the `broom.helper` package has made it simple.
```{r}
tidy_forest <-
model_logit |>
# perform initial tidying of the model
tidy_and_attach(exponentiate = TRUE, conf.int = TRUE) |>
# adding in the reference row for categorical variables
tidy_add_reference_rows() |>
# adding a reference value to appear in plot
tidy_add_estimate_to_reference_rows() |>
# adding the variable labels
tidy_add_term_labels() |>
# removing intercept estimate from model
tidy_remove_intercept()
tidy_forest
```
**Note:** we used `tidy_and_attach()` instead of `broom::tidy()`. `broom.helpers` functions needs a copy of the original model. To avoid passing the model at each step, the easier way is to attach the model as an attribute of the tibble with `tidy_attach_model()`. `tidy_and_attach()` is simply a shortcut of `model |> broom::tidy() |> tidy_and_attach(model)`.
We now have a tibble with every piece of information we need to create our forest plot using `ggplot2`.
```{r, warning=FALSE}
tidy_forest |>
mutate(
plot_label = paste(var_label, label, sep = ":") |>
forcats::fct_inorder() |>
forcats::fct_rev()
) |>
ggplot(aes(x = plot_label, y = estimate, ymin = conf.low, ymax = conf.high, color = variable)) +
geom_hline(yintercept = 1, linetype = 2) +
geom_pointrange() +
coord_flip() +
theme(legend.position = "none") +
labs(
y = "Odds Ratio",
x = " ",
title = "Forest Plot using broom.helpers"
)
```
**Note::** for more advanced and nicely formatted plots of model coefficients, look at `ggstats::ggcoef_model()` and its [dedicated vignette](https://larmarange.github.io/ggstats/articles/ggcoef_model.html). `ggstats::ggcoef_model()` internally uses `broom.helpers`.
## Table Summary
In addition to aiding in figure creation, the broom.helpers package can help summarize a model in a table.
In the example below, we add header and reference rows, and utilize existing variable labels.
Let's change the labels shown in our summary table as well.
```{r}
tidy_table <-
model_logit |>
# perform initial tidying of the model
tidy_and_attach(exponentiate = TRUE, conf.int = TRUE) |>
# adding in the reference row for categorical variables
tidy_add_reference_rows() |>
# adding the variable labels
tidy_add_term_labels() |>
# add header row
tidy_add_header_rows() |>
# removing intercept estimate from model
tidy_remove_intercept()
# print summary table
options(knitr.kable.NA = "")
tidy_table |>
# format model estimates
select(label, estimate, conf.low, conf.high, p.value) |>
mutate(across(all_of(c("estimate", "conf.low", "conf.high")), style_ratio)) |>
mutate(across(p.value, style_pvalue)) |>
print_table()
```
**Note::** for more advanced and nicely formatted tables of model coefficients, look at `gtsummary::tbl_regression()` and its [dedicated vignette](https://www.danieldsjoberg.com/gtsummary/articles/tbl_regression.html). `gtsummary::tbl_regression()` internally uses `broom.helpers`.
## All-in-one function
There is also a handy wrapper, called `tidy_plus_plus()`, for the most commonly used `tidy_*()` functions, and they can be executed with a single line of code:
```{r}
model_logit |>
tidy_plus_plus(exponentiate = TRUE)
```
```{r}
model_logit |>
tidy_plus_plus(exponentiate = TRUE) |>
print_table()
```
See the documentation of `tidy_plus_plus()` for the full list of available options.
## Advanced examples
`broom.helpers` can also handle different contrasts for categorical variables and the use of polynomial terms for continuous variables.
### Polynomial terms
When polynomial terms of a continuous variable are defined with `stats::poly()`, `broom.helpers` will be able to identify the corresponding variable, and add header rows.
```{r}
model_poly <- glm(response ~ poly(age, 3) + ttdeath, na.omit(trial), family = binomial)
model_poly |>
tidy_plus_plus(
exponentiate = TRUE,
add_header_rows = TRUE,
variable_labels = c(age = "Age in years")
) |>
select(term, variable, label, header_row, estimate) |>
print_table()
```
You also have an option to relabel polynomial terms. Be aware that, by default, `stats::poly()` generates orthogonal polynomials. Relabels are more appropriate with raw polynomials.
```{r}
model_poly2 <- glm(response ~ poly(age, 3, raw = TRUE) + ttdeath, na.omit(trial), family = binomial)
model_poly2 |>
tidy_plus_plus(
exponentiate = TRUE,
add_header_rows = TRUE,
variable_labels = c(age = "Age in years"),
relabel_poly = TRUE
) |>
select(term, variable, label, header_row, estimate) |>
print_table()
```
### Different type of contrasts
By default, categorical variables are coded with a treatment contrasts (see `stats::contr.treatment()`). With such contrasts, model coefficients correspond to the effect of a modality compared with the reference modality (by default, the first one). `tidy_add_reference_rows()` allows to add a row for this reference modality and `tidy_add_estimate_to_reference_rows()` will populate the estimate value of these references rows by 0 (or 1 if `exponentiate = TRUE`). `tidy_add_term_labels()` is able to retrieve the label of the factor level associated with a specific model term.
```{r}
model_1 <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial
)
model_1 |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_reference_rows() |>
tidy_add_estimate_to_reference_rows(exponentiate = TRUE) |>
tidy_add_term_labels() |>
print_table()
```
Using `stats::contr.treatment()`, it is possible to defined alternative reference rows. It will be properly managed by `broom.helpers`.
```{r}
model_2 <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial,
contrasts = list(
stage = contr.treatment(4, base = 3),
grade = contr.treatment(3, base = 2),
trt = contr.treatment(2, base = 2)
)
)
model_2 |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_reference_rows() |>
tidy_add_estimate_to_reference_rows(exponentiate = TRUE) |>
tidy_add_term_labels() |>
print_table()
```
You can also use sum contrasts (cf. `stats::contr.sum()`). In that case, each model coefficient corresponds to the difference of that modality with the grand mean. A variable with 4 modalities will be coded with 3 terms. However, a value could be computed (using `emmeans::emmeans()`) for the last modality, corresponding to the difference of that modality with the grand mean and equal to sum of all other coefficients multiplied by -1. `broom.helpers` will identify categorical variables coded with sum contrasts and could retrieve an estimate value for the reference term.
```{r}
model_3 <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial,
contrasts = list(
stage = contr.sum,
grade = contr.sum,
trt = contr.sum
)
)
model_3 |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_reference_rows() |>
tidy_add_estimate_to_reference_rows(exponentiate = TRUE) |>
tidy_add_term_labels() |>
print_table()
```
Other types of contrasts exist, like Helmert (`contr.helmert()`) or polynomial (`contr.poly()`). They are more complex as a modality will be coded with a combination of terms. Therefore, for such contrasts, it will not be possible to associate a specific model term with a level of the original factor. `broom.helpers` will not add a reference term in such case.
```{r}
model_4 <- glm(
response ~ stage + grade * trt,
gtsummary::trial,
family = binomial,
contrasts = list(
stage = contr.poly,
grade = contr.helmert,
trt = contr.poly
)
)
model_4 |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_reference_rows() |>
tidy_add_estimate_to_reference_rows(exponentiate = TRUE) |>
tidy_add_term_labels() |>
print_table()
```
### Pairwise contrasts of categorical variable
Pairwise contrasts of categorical variables could be computed with `tidy_add_pairwise_contrasts()`.
```{r}
model_logit <- glm(response ~ age + trt + grade, trial, family = binomial)
model_logit |>
tidy_and_attach() |>
tidy_add_pairwise_contrasts() |>
print_table()
model_logit |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_pairwise_contrasts() |>
print_table()
model_logit |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_pairwise_contrasts(pairwise_reverse = FALSE) |>
print_table()
model_logit |>
tidy_and_attach(exponentiate = TRUE) |>
tidy_add_pairwise_contrasts(keep_model_terms = TRUE) |>
print_table()
```
## Column Details
Below is a summary of the additional columns that may be added by a `broom.helpers` function.
The table includes the column name, the function that adds the column, and a short description of the information in the column.
```{r, echo=FALSE}
# nolint start
tibble::tribble(
~Column, ~Function, ~Description,
"original_term", "`tidy_disambiguate_terms()`, `tidy_multgee()`, `tidy_zeroinfl()` or `tidy_identify_variables()`", "Original term before disambiguation. This columns is added only when disambiguation is needed (i.e. for mixed models). Also used for \"multgee\", \"zeroinfl\" and \"hurdle\" models. For instrumental variables in \"fixest\" models, the \"fit_\" prefix is removed, and the original terms is stored in this column.",
"variable", "`tidy_identify_variables()`", "String of variable names from the model. For categorical variables and polynomial terms defined with `stats::poly()`, terms belonging to the variable are identified.",
"var_class", "`tidy_identify_variables()`", "Class of the variable.",
"var_type", "`tidy_identify_variables()`", "One of \"intercept\", \"continuous\", \"dichotomous\", \"categorical\", \"interaction\", \"ran_pars\" or \"ran_vals\"",
"var_nlevels", "`tidy_identify_variables()`", "Number of original levels for categorical variables",
"contrasts", "`tidy_add_contrasts()`", "Contrasts used for categorical variables.
Require \"variable\" column. If needed, will automatically apply `tidy_identify_variables()`.",
"contrasts_type", "`tidy_add_contrasts()`", "Type of contrasts (\"treatment\", \"sum\", \"poly\", \"helmert\", \"sdif\", \"other\" or \"no.contrast\"). \"pairwise\ is used for pairwise contrasts computed with `tidy_add_pairwise_contrasts()`.",
"reference_row", "`tidy_add_reference_rows()`", "Logical indicating if a row is a reference row for categorical variables using a treatment or a sum contrast. Is equal to `NA` for variables who do not have a reference row.
Require \"contrasts\" column. If needed, will automatically apply `tidy_add_contrasts()`.
`tidy_add_reference_rows()` will not populate the label of the reference term. It is therefore better to apply `tidy_add_term_labels()` after `tidy_add_reference_rows()` rather than before.",
"var_label", "`tidy_add_variable_labels()`", "String of variable labels from the model. Columns labelled with the `labelled` package are retained. It is possible to pass a custom label for an interaction term with the `labels` argument.
Require \"variable\" column. If needed, will automatically apply `tidy_identify_variables()`.",
"label", "`tidy_add_term_labels()`", "String of term labels based on (1) labels provided in `labels` argument if provided; (2) factor levels for categorical variables coded with treatment, SAS or sum contrasts; (3) variable labels when there is only one term per variable; and (4) term name otherwise.
Require \"variable_label\" column. If needed, will automatically apply `tidy_add_variable_labels()`.
Require \"contrasts\" column. If needed, will automatically apply `tidy_add_contrasts()`.",
"header_row", "`tidy_add_header_rows()`", "Logical indicating if a row is a header row for variables with several terms. Is equal to `NA` for variables who do not have an header row.Require \"label\" column. If needed, will automatically apply `tidy_add_term_labels()`.
It is better to apply `tidy_add_header_rows()` after other `tidy_*` functions",
"n_obs", "`tidy_add_n()`", "Number of observations",
"n_ind", "`tidy_add_n()`", "Number of individuals (for Cox models)",
"n_event", "`tidy_add_n()`", "Number of events (for binomial and multinomial logistic models, Poisson and Cox models)",
"exposure", "`tidy_add_n()`", "Exposure time (for Poisson and Cox models)",
"instrumental", "`tidy_identify_variables()`", "For \"fixest\" models, indicate if a variable was instrumental.",
"group_by", "`tidy_group_by()`", "Grouping variable (particularly for multinomial or multi-components models).",
) |>
dplyr::arrange(Column, .locale = "en") |>
gt::gt() |>
gt::fmt_markdown(columns = everything()) |>
gt::tab_options(
column_labels.font.weight = "bold"
) |>
gt::opt_row_striping() |>
gt::tab_style("vertical-align:top; font-size: 12px;", gt::cells_body())
# nolint end
```
Note: `tidy_add_estimate_to_reference_rows()` does not create an additional column; rather, it populates the 'estimate' column for reference rows.
## Additional attributes
Below is a list of additional attributes that `broom.helpers` may attached to the results. The table includes the attribute name, the function that adds the attribute, and a short description.
```{r, echo=FALSE}
tibble::tribble(
~Attribute, ~Function, ~Description,
"exponentiate", "`tidy_and_attach()`", "Indicates if estimates were exponentiated",
"conf.level", "`tidy_and_attach()`", "Level of confidence used for confidence intervals",
"coefficients_type", "`tidy_add_coefficients_type()`", "Type of coefficients",
"coefficients_label", "`tidy_add_coefficients_type()`", "Coefficients label",
"variable_labels", "`tidy_add_variable_labels()`",
"Custom variable labels passed to `tidy_add_variable_labels()`",
"term_labels", "`tidy_add_term_labels()`",
"Custom term labels passed to `tidy_add_term_labels()`",
"N_obs", "`tidy_add_n()`", "Total number of observations",
"N_event", "`tidy_add_n()`", "Total number of events",
"N_ind", "`tidy_add_n()`", "Total number of individuals (for Cox models)",
"Exposure", "`tidy_add_n()`", "Total of exposure time",
"component", "`tidy_zeroinfl()`", "`component` argument passed to `tidy_zeroinfl()`"
) |>
dplyr::arrange(Attribute, .locale = "en") |>
gt::gt() |>
gt::fmt_markdown(columns = everything()) |>
gt::tab_options(column_labels.font.weight = "bold") |>
gt::opt_row_striping() |>
gt::tab_style("vertical-align:top; font-size: 12px;", gt::cells_body())
```
## Supported models
```{r, echo=FALSE}
supported_models |>
dplyr::rename_with(stringr::str_to_title) |>
gt::gt() |>
gt::fmt_markdown(columns = everything()) |>
gt::tab_options(column_labels.font.weight = "bold") |>
gt::opt_row_striping() |>
gt::tab_style("vertical-align:top; font-size: 12px;", gt::cells_body())
```
Note: this list of models has been tested. `broom.helpers` may or may not work properly or partially with other
types of models. Do not hesitate to provide feedback on [GitHub](https://github.com/larmarange/broom.helpers/issues).
broom.helpers/data/ 0000755 0001762 0000144 00000000000 15044413405 013742 5 ustar ligges users broom.helpers/data/supported_models.rda 0000644 0001762 0000144 00000002372 15044413405 020026 0 ustar ligges users BZh91AY&SYБŠõÿÆÿ7ÿÁLglW?¯Ÿ@ÿÿÿþ@ € Ðå$ÌÀÓRL%ÍO'ªiäCG¨4hѦš
4ЀڛSCLÀÐiÓF†2#CLš4 Ä ‚D)’i='ªiè£ÔÓ&™#M4È2OSdšÐp4d4Ñ¡„††ÐÓ&