ggstats/0000755000176200001440000000000015122161450011723 5ustar liggesusersggstats/tests/0000755000176200001440000000000014737222737013106 5ustar liggesusersggstats/tests/testthat/0000755000176200001440000000000015122161450014725 5ustar liggesusersggstats/tests/testthat/test-gglikert.R0000644000176200001440000001645615122045054017652 0ustar liggesuserstest_that("gglikert()", { skip_on_cran() skip_if_not_installed("labelled") skip_if_not_installed("ggplot2") skip_if_not_installed("dplyr") likert_levels <- c( "Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree" ) set.seed(42) df <- dplyr::tibble( q1 = sample(likert_levels, 150, replace = TRUE), q2 = sample(likert_levels, 150, replace = TRUE, prob = 5:1), q3 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), q4 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), q5 = sample(c(likert_levels, NA), 150, replace = TRUE), q6 = sample(likert_levels, 150, replace = TRUE, prob = c(1, 0, 1, 1, 0)) ) |> dplyr::mutate(dplyr::across( dplyr::everything(), ~ factor(.x, levels = likert_levels) )) likert_levels_dk <- c( "Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree", "Don't know" ) df_dk <- dplyr::tibble( q1 = sample(likert_levels_dk, 150, replace = TRUE), q2 = sample(likert_levels_dk, 150, replace = TRUE, prob = 6:1), q3 = sample(likert_levels_dk, 150, replace = TRUE, prob = 1:6), q4 = sample(likert_levels_dk, 150, replace = TRUE, prob = 1:6), q5 = sample(c(likert_levels_dk, NA), 150, replace = TRUE), q6 = sample( likert_levels_dk, 150, replace = TRUE, prob = c(1, 0, 1, 1, 0, 1) ) ) |> dplyr::mutate(dplyr::across( dplyr::everything(), ~ factor(.x, levels = likert_levels_dk) )) expect_doppelganger( "gglikert() mod simple", gglikert(df) ) expect_no_error( d <- gglikert_data(df) ) expect_equal(levels(d$.answer), likert_levels) expect_doppelganger( "gglikert() include and width", gglikert(df, include = q1:q3, width = .5) ) expect_doppelganger( "gglikert() variable_labels", gglikert(df, variable_labels = c(q2 = "second question")) ) expect_doppelganger( "gglikert() sort prop asc", gglikert(df, sort = "asc") ) expect_doppelganger( "gglikert() sort prop desc", gglikert(df, sort = "desc") ) expect_doppelganger( "gglikert() sort prop_lower asc", gglikert(df, sort = "asc", sort_method = "prop_lower") ) expect_doppelganger( "gglikert() sort prop_lower desc", gglikert(df, sort = "desc", sort_method = "prop_lower") ) expect_doppelganger( "gglikert() sort mean asc", gglikert(df, sort = "asc", sort_method = "mean") ) expect_doppelganger( "gglikert() sort mean desc", gglikert(df, sort = "desc", sort_method = "mean") ) expect_doppelganger( "gglikert() sort median asc", gglikert(df, sort = "asc", sort_method = "median") ) expect_doppelganger( "gglikert() sort median desc", gglikert(df, sort = "desc", sort_method = "median") ) expect_doppelganger( "gglikert() sort prop asc include_center", gglikert(df, sort = "asc", sort_prop_include_center = TRUE) ) expect_doppelganger( "gglikert() exclude_fill_values", gglikert(df, exclude_fill_values = "Neither agree nor disagree") ) expect_doppelganger( "gglikert() add_labels", gglikert(df, add_labels = FALSE) ) expect_doppelganger( "gglikert() customize labels", gglikert(df, labels_size = 5, labels_hide_below = .3, labels_accuracy = .1) ) expect_doppelganger( "gglikert() add_totals", gglikert(df, add_totals = FALSE) ) expect_doppelganger( "gglikert() customize totals", gglikert( df, totals_size = 5, totals_fontface = "italic", totals_include_center = TRUE, totals_hjust = 0 ) ) expect_doppelganger( "gglikert() colors", gglikert(df, labels_color = "red", totals_color = "blue") ) expect_doppelganger( "gglikert() reverse", gglikert(df, y_reverse = TRUE, reverse_likert = TRUE) ) expect_doppelganger( "gglikert() variable labels and y_label_wrap", df |> labelled::set_variable_labels( q1 = "first question", q2 = "second question", q3 = "third question with a very very very veru very very long label" ) |> gglikert( variable_labels = c( q2 = "question 2", q4 = "another question with a long long long long long long label" ), y_label_wrap = 20 ) ) expect_doppelganger( "gglikert() cutoff 0", gglikert(df, cutoff = 0) ) expect_doppelganger( "gglikert() cutoff 1", gglikert(df, cutoff = 1) ) expect_doppelganger( "gglikert() cutoff 1 symmetric", gglikert(df, cutoff = 1, symmetric = TRUE) ) expect_doppelganger( "gglikert() cutoff 1.5", gglikert(df, cutoff = 1.5) ) expect_doppelganger( "gglikert() cutoff 5", gglikert(df, cutoff = 5) ) expect_doppelganger( "gglikert_stacked()", gglikert_stacked(df) ) expect_doppelganger( "gglikert_stacked() add_median_line", gglikert_stacked(df, add_median_line = TRUE) ) expect_doppelganger( "gglikert_stacked() labels_color red", gglikert_stacked(df, labels_color = "red") ) expect_doppelganger( "gglikert_stacked() labels_color auto", gglikert_stacked(df, labels_color = "auto") ) expect_doppelganger( "gglikert_stacked() labels_color black", gglikert_stacked(df, labels_color = "black") ) df_group <- df df_group$group1 <- sample(c("A", "B"), 150, replace = TRUE) df_group$group2 <- sample(c("a", "b", "c"), 150, replace = TRUE) expect_doppelganger( "gglikert() facet_cols", gglikert(df_group, q1:q6, facet_cols = vars(group1)) ) expect_doppelganger( "gglikert() facet_rows", gglikert(df_group, q1:q2, facet_rows = vars(group1, group2)) ) expect_doppelganger( "gglikert() facet_rows and facet_cols", gglikert( df_group, q3:q6, facet_cols = vars(group1), facet_rows = vars(group2) ) ) expect_doppelganger( "gglikert() facet_rows with group on y", gglikert(df_group, q1:q6, y = "group1", facet_rows = vars(.question)) ) f <- function(d) { d$.question <- forcats::fct_relevel(d$.question, "q5", "q2") d } expect_doppelganger( "gglikert() with data_fun", gglikert(df_group, q1:q6, data_fun = f) ) expect_error(gglikert(df_group, data_fun = "text")) # testing compatibility with survey object skip_if_not_installed("survey") set.seed(42) df$w <- .5 + stats::runif(n = nrow(df)) ds <- survey::svydesign(ids = ~ 1, weights = ~ w, data = df) expect_no_error( df |> gglikert_data(include = q1:q4, weights = w) ) expect_no_error( ds |> gglikert_data(include = q1:q4) ) expect_error( ds |> gglikert_data(include = q1:q4, weights = w) ) expect_doppelganger( "gglikert() survey", gglikert(ds, q1:q4) ) expect_doppelganger( "gglikert_stacked() survey", gglikert_stacked(ds, q1:q4) ) }) test_that("hex_bw()", { expect_equal(hex_bw("#FFFFFF"), "#000000") expect_equal(hex_bw("#BBBBBB"), "#000000") expect_equal(hex_bw("#000000"), "#ffffff") expect_equal(hex_bw("#444444"), "#ffffff") }) ggstats/tests/testthat/test-ggcascade.R0000644000176200001440000000224415122045054017731 0ustar liggesuserstest_that("ggcascade() works", { skip_on_cran() library(ggplot2) p <- ggplot2::diamonds |> ggcascade( all = TRUE, big = carat > .5, "big & ideal" = carat > .5 & cut == "Ideal" ) expect_doppelganger( "ggcascade diamonds", p ) p <- ggplot2::mpg |> ggcascade( all = TRUE, recent = year > 2000, "recent & economic" = year > 2000 & displ < 3, .by = cyl, .ncol = 3, .arrows = FALSE ) expect_doppelganger( "ggcascade mpg by, no arrow and ncol", p ) p <- ggplot2::mpg |> ggcascade( all = TRUE, recent = year > 2000, "recent & economic" = year > 2000 & displ < 3, .by = pick(cyl, drv), .add_n = FALSE, .text_size = 2 ) expect_doppelganger( "ggcascade mpg py pick, no n, text_size", p ) d <- as.data.frame(Titanic) p <- d |> ggcascade( all = TRUE, female = Sex == "Female", "female & survived" = Sex == "Female" & Survived == "Yes", .weights = Freq, .by = Class ) expect_doppelganger( "ggcascade titanic weights", p ) }) ggstats/tests/testthat/test-ggcoef_model.R0000644000176200001440000003075215122045054020447 0ustar liggesuserstest_that("ggcoef_model()", { skip_on_cran() skip_if_not_installed("broom.helpers") skip_if_not_installed("reshape") data(tips, package = "reshape") mod_simple <- lm(tip ~ day + time + total_bill, data = tips) expect_doppelganger( "ggcoef_model() mod simple", ggcoef_model(mod_simple) ) expect_doppelganger( "ggcoef_model() mod simple no guide", ggcoef_model(mod_simple, shape_guide = FALSE, colour_guide = FALSE) ) expect_error( mod_simple |> ggcoef_dodged() ) expect_error( mod_simple |> ggcoef_faceted() ) # custom variable labels # you can use to define variable labels before computing model if (requireNamespace("labelled")) { tips_labelled <- tips |> labelled::set_variable_labels( day = "Day of the week", time = "Lunch or Dinner", total_bill = "Bill's total" ) mod_labelled <- lm(tip ~ day + time + total_bill, data = tips_labelled) expect_doppelganger( "ggcoef_model() mod labelled", ggcoef_model(mod_labelled) ) } expect_doppelganger( "ggcoef_model() mod simple with variable labels", ggcoef_model( mod_simple, variable_labels = c( day = "Week day", time = "Time (lunch or dinner ?)", total_bill = "Total of the bill" ) ) ) # if labels are too long, you can use 'facet_labeller' to wrap them expect_doppelganger( "ggcoef_model() mod simple facet_labeller", ggcoef_model( mod_simple, variable_labels = c( day = "Week day", time = "Time (lunch or dinner ?)", total_bill = "Total of the bill" ), facet_labeller = ggplot2::label_wrap_gen(10) ) ) # do not display variable facets but add colour guide expect_doppelganger( "ggcoef_model() mod simple no variable facets", ggcoef_model( mod_simple, facet_row = NULL, colour_guide = TRUE ) ) # a logistic regression example d_titanic <- as.data.frame(Titanic) d_titanic$Survived <- factor(d_titanic$Survived, c("No", "Yes")) mod_titanic <- glm( Survived ~ Sex * Age + Class, weights = Freq, data = d_titanic, family = binomial ) expect_doppelganger( "ggcoef_model() logistic regression", ggcoef_model(mod_titanic, exponentiate = TRUE) ) # display intercept expect_doppelganger( "ggcoef_model() logistic regression with intercept", ggcoef_model(mod_titanic, exponentiate = TRUE, intercept = TRUE) ) # display only a subset of terms expect_doppelganger( "ggcoef_model() logistic regression subset", ggcoef_model(mod_titanic, exponentiate = TRUE, include = c("Age", "Class")) ) # do not change points' shape based on significance expect_doppelganger( "ggcoef_model() logistic regression no significance", ggcoef_model(mod_titanic, exponentiate = TRUE, significance = NULL) ) # a black and white version expect_doppelganger( "ggcoef_model() logistic regression black and white", ggcoef_model( mod_titanic, exponentiate = TRUE, colour = NULL, stripped_rows = FALSE ) ) # show dichotomous terms on one row expect_doppelganger( "ggcoef_model() logistic regression no reference row", ggcoef_model( mod_titanic, exponentiate = TRUE, no_reference_row = broom.helpers::all_dichotomous(), categorical_terms_pattern = "{ifelse(dichotomous, paste0(level, ' / ', reference_level), level)}", show_p_values = FALSE ) ) # works also with with polynomial terms mod_poly <- lm( tip ~ poly(total_bill, 3) + day, data = tips, ) expect_doppelganger( "ggcoef_model() polynomial terms", ggcoef_model(mod_poly) ) # or with different type of contrasts # for sum contrasts, the value of the reference term is computed if (requireNamespace("emmeans")) { mod2 <- lm( tip ~ day + time + sex, data = tips, contrasts = list(time = contr.sum, day = contr.treatment(4, base = 3)) ) expect_doppelganger( "ggcoef_model() different types of contrasts", ggcoef_model(mod2) ) } }) test_that("ggcoef_compare()", { skip_if_not_installed("broom.helpers") skip_on_cran() # Use ggcoef_compare() for comparing several models on the same plot mod1 <- lm(Fertility ~ ., data = swiss) mod2 <- step(mod1, trace = 0) mod3 <- lm(Fertility ~ Agriculture + Education * Catholic, data = swiss) models <- list( "Full model" = mod1, "Simplified model" = mod2, "With interaction" = mod3 ) expect_doppelganger( "ggcoef_compare() dodged", ggcoef_compare(models) ) expect_doppelganger( "ggcoef_compare() faceted", ggcoef_compare(models, type = "faceted") ) expect_doppelganger( "ggcoef_compare() table", ggcoef_compare(models, type = "table") ) d <- as.data.frame(Titanic) m1 <- glm(Survived ~ Sex + Age, family = binomial, data = d, weights = Freq) m2 <- glm( Survived ~ Sex + Age + Class, family = binomial, data = d, weights = Freq ) models <- list("Model 1" = m1, "Model 2" = m2) expect_doppelganger( "ggcoef_compare() titanic dodged", ggcoef_compare(models) ) expect_doppelganger( "ggcoef_compare() titanic faceted", ggcoef_compare(models, type = "faceted") ) expect_doppelganger( "ggcoef_compare() titanic table", ggcoef_compare(models, type = "table") ) rd <- ggcoef_compare(models, return_data = TRUE) expect_equal( levels(rd$label), c("Male", "Female", "Child", "Adult", "1st", "2nd", "3rd", "Crew") ) expect_no_error( ggcoef_compare(models, add_reference_rows = FALSE) ) }) test_that("ggcoef_*() and multinom models", { skip_if_not_installed("broom.helpers") skip_if_not_installed("nnet") skip_on_cran() library(nnet) hec <- as.data.frame(HairEyeColor) mod <- multinom( Hair ~ Eye + Sex, data = hec, weights = hec$Freq ) expect_doppelganger( "ggcoef_model() multinom", ggcoef_model(mod, exponentiate = TRUE) ) expect_doppelganger( "ggcoef_table() multinom", ggcoef_table(mod, exponentiate = TRUE) ) expect_doppelganger( "ggcoef_dodged() multinom", ggcoef_dodged(mod, exponentiate = TRUE) ) expect_doppelganger( "ggcoef_faceted() multinom", ggcoef_faceted(mod, exponentiate = TRUE) ) expect_doppelganger( "ggcoef_faceted() with custom group_labels", ggcoef_faceted( mod, group_labels = c("Brown" = "Brown\n(ref: Black)"), exponentiate = TRUE ) ) }) test_that("ggcoef_model() works with tieders not returning p-values", { skip_if_not_installed("broom.helpers") skip_on_cran() mod <- lm(Sepal.Width ~ Species, iris) my_tidier <- function(x, ...) { x |> broom::tidy(...) |> dplyr::select(-dplyr::all_of("p.value")) } expect_doppelganger( "ggcoef_model() no p values", ggcoef_model(mod, tidy_fun = my_tidier) ) }) test_that("ggcoef_compare() complete NA respecting variables order", { skip_if_not_installed("broom.helpers") m1 <- lm(Fertility ~ Education + Catholic, data = swiss) m2 <- lm(Fertility ~ Education + Catholic + Agriculture, data = swiss) m3 <- lm( Fertility ~ Education + Catholic + Agriculture + Infant.Mortality, data = swiss ) res <- ggcoef_compare(models = list(m1, m2, m3), return_data = TRUE) expect_equal( res$variable[1:4], structure(1:4, .Label = c( "Education", "Catholic", "Agriculture", "Infant.Mortality" ), class = "factor") ) }) test_that("ggcoef_compare() does not produce an error with an include", { skip_if_not_installed("survival") skip_if_not_installed("broom.helpers") skip_on_cran() m1 <- survival::coxph( survival::Surv(time, status) ~ prior + age, data = survival::veteran ) m2 <- survival::coxph( survival::Surv(time, status) ~ prior + celltype, data = survival::veteran ) models <- list("Model 1" = m1, "Model 2" = m2) expect_doppelganger( "ggcoef_compare() with include", ggcoef_compare(models, include = broom.helpers::starts_with("p")) ) }) test_that("ggcoef_model() works with pairwise contratst", { skip_if_not_installed("broom.helpers") skip_if_not_installed("emmeans") mod <- lm(Sepal.Length ~ Sepal.Width + Species, data = iris) expect_no_error( ggcoef_model(mod, add_pairwise_contrasts = TRUE) ) expect_no_error( ggcoef_model( mod, add_pairwise_contrasts = TRUE, pairwise_variables = dplyr::starts_with("Sp"), keep_model_terms = TRUE ) ) mod2 <- lm(Sepal.Length ~ Species, data = iris) expect_no_error( ggcoef_compare(list(mod, mod2), add_pairwise_contrasts = TRUE) ) }) test_that("tidy_args is supported", { mod <- lm(Sepal.Length ~ Sepal.Width, data = iris) custom <- function(x, force = 1, ...) { broom::tidy(x, ...) |> dplyr::mutate(estimate = force) } res <- ggcoef_model( mod, tidy_fun = custom, tidy_args = list(force = 3), return_data = TRUE ) expect_equal(res$estimate, 3) }) test_that("ggcoef_table()", { skip_on_cran() skip_if_not_installed("broom.helpers") skip_if_not_installed("reshape") data(tips, package = "reshape") mod_simple <- lm(tip ~ day + time + total_bill, data = tips) expect_doppelganger( "ggcoef_table() mod simple", ggcoef_table(mod_simple) ) expect_doppelganger( "ggcoef_table() table_stat", ggcoef_table(mod_simple, table_stat = c("p.value", "ci")) ) expect_doppelganger( "ggcoef_table() table_header", ggcoef_table(mod_simple, table_header = c("A", "B", "C")) ) expect_error( ggcoef_table(mod_simple, table_header = c("A", "B", "C", "D")) ) expect_doppelganger( "ggcoef_table() table_text_size", ggcoef_table(mod_simple, table_text_size = 5) ) expect_doppelganger( "ggcoef_table() table_stat_label ", ggcoef_table( mod_simple, table_stat_label = list( estimate = scales::label_percent(.1) ) ) ) expect_doppelganger( "ggcoef_table() ci_pattern", ggcoef_table(mod_simple, ci_pattern = "{conf.low} to {conf.high}") ) expect_doppelganger( "ggcoef_table() table_widths", ggcoef_table(mod_simple, table_widths = c(1, 2)) ) expect_doppelganger( "ggcoef_table() stripped_rows", ggcoef_table(mod_simple, stripped_rows = FALSE) ) expect_doppelganger( "ggcoef_table() show_p_values & signif_stars", ggcoef_table(mod_simple, show_p_values = TRUE, signif_stars = TRUE) ) expect_doppelganger( "ggcoef_table() show_p_values only", ggcoef_table(mod_simple, show_p_values = TRUE, signif_stars = FALSE) ) expect_doppelganger( "ggcoef_table() signif_stars only", ggcoef_table(mod_simple, show_p_values = FALSE, signif_stars = TRUE) ) expect_doppelganger( "ggcoef_table() customized statistics", ggcoef_table( mod_simple, table_stat = c("label", "estimate", "std.error", "ci"), ci_pattern = "{conf.low} to {conf.high}", table_stat_label = list( estimate = scales::label_number(accuracy = .01), conf.low = scales::label_number(accuracy = .1), conf.high = scales::label_number(accuracy = .1), std.error = scales::label_number(accuracy = .001), label = toupper ), table_header = c("Term", "Coef.", "SE", "CI"), table_widths = c(2, 3) ) ) }) test_that("ggcoef_*() and multicomponents models", { skip_on_cran() skip_if_not_installed("broom.helpers") skip_if_not_installed("pscl") library(pscl) data("bioChemists", package = "pscl") mod <- zeroinfl(art ~ fem * mar | fem + mar, data = bioChemists) expect_doppelganger( "ggcoef_model() zeroinfl", ggcoef_model(mod) ) expect_doppelganger( "ggcoef_table() zeroinfl", ggcoef_table(mod) ) expect_doppelganger( "ggcoef_dodged() zeroinfl", ggcoef_dodged(mod) ) expect_doppelganger( "ggcoef_faceted() zeroinfl", ggcoef_faceted(mod) ) expect_doppelganger( "ggcoef_faceted() zeroinfl with custom group_labels", ggcoef_faceted( mod, group_labels = c( conditional = "Count", zero_inflated = "Zero-inflated" ) ) ) }) ggstats/tests/testthat/test-position_likert.R0000644000176200001440000000561415122045054021252 0ustar liggesuserstest_that("position_likert()", { skip_on_cran() library(ggplot2) p <- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = "likert") + scale_x_continuous(label = label_percent_abs()) + scale_fill_brewer(palette = "PiYG") + xlab("proportion") expect_doppelganger( "position_likert() base", p ) expect_doppelganger( "position_likert() facet", p + facet_grid(~ price > 2500) ) p <- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = "diverging") + scale_x_continuous(label = label_number_abs()) + scale_fill_brewer(palette = "PiYG") expect_doppelganger( "position_diverging() base", p ) expect_doppelganger( "position_diverging() facet", p + facet_grid(~ price > 2500) ) p <- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = position_likert(reverse = TRUE)) expect_doppelganger( "position_likert() reverse", p ) p <- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = position_diverging(reverse = TRUE)) expect_doppelganger( "position_diverging() reverse", p ) custom_label <- function(x) { p <- scales::percent(x, accuracy = 1) p[x < .075] <- "" p } p <- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = "likert") + geom_text( aes(by = clarity, label = custom_label(after_stat(prop))), stat = "prop", position = position_likert(vjust = .5) ) + scale_x_continuous(label = label_percent_abs()) + scale_fill_brewer(palette = "PiYG", direction = -1) + xlab("proportion") expect_doppelganger( "position_likert() vjust", p ) p <- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = position_likert(exclude_fill_values = "Very Good")) + scale_x_continuous(label = label_percent_abs()) + scale_fill_brewer(palette = "PiYG") + xlab("proportion") expect_doppelganger( "position_likert() exclude_fill_values", p ) }) test_that("geom_diverging() & associates", { library(ggplot2) p <- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_diverging() + geom_diverging_text(aes(color = after_scale(hex_bw(.data$fill)))) expect_doppelganger( "geom_diverging and geom_diverging_text", p ) p <- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_likert() + geom_likert_text(aes(color = after_scale(hex_bw(.data$fill)))) expect_doppelganger( "geom_likert and geom_likert_text", p ) d <- Titanic |> as.data.frame() p <- ggplot(d) + aes(y = Class, fill = Sex, weight = Freq) + geom_pyramid() + geom_pyramid_text() expect_doppelganger( "geom_pyramid and geom_pyramid_text", p ) }) ggstats/tests/testthat/test_ggsurvey.R0000644000176200001440000000126715122045054017771 0ustar liggesuserstest_that("ggsurvey works correctly", { skip_on_cran() skip_if_not_installed("survey") skip_if_not_installed("ggplot2") library(ggplot2) data(api, package = "survey") dstrat <- survey::svydesign( id = ~1, strata = ~stype, weights = ~pw, data = apistrat, fpc = ~fpc ) expect_doppelganger( "ggsurvey() dstrat", ggsurvey(dstrat) + aes(x = cnum, y = dnum) + geom_count() ) d <- as.data.frame(Titanic) dw <- survey::svydesign(ids = ~1, weights = ~Freq, data = d) expect_doppelganger( "ggsurvey() titanic", ggsurvey(dw) + aes(x = Class, fill = Survived) + geom_bar(position = "fill") ) }) ggstats/tests/testthat/test-geom_connector.R0000644000176200001440000000454615122045054021040 0ustar liggesuserstest_that("geom_connector() and geom_bar_connector()() works", { skip_on_cran() library(ggplot2) p <- ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_bar(width = .5) + geom_bar_connector(width = .5, linewidth = .25) + theme_minimal() + theme(legend.position = "bottom") expect_doppelganger( "geom_bar_connector", p ) p <- ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_bar(width = .5) + geom_bar_connector( width = .5, continuous = TRUE, colour = "red", linetype = "dotted", add_baseline = FALSE, ) + theme(legend.position = "bottom") expect_doppelganger( "geom_bar_connector continuous and no baseline", p ) p <- ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_bar(width = .5, position = "fill") + geom_bar_connector(width = .5, position = "fill") + theme(legend.position = "bottom") expect_doppelganger( "geom_bar_connector position fill", p ) p <- ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_bar(width = .5, position = "diverging") + geom_bar_connector(width = .5, position = "diverging", linewidth = .25) + theme(legend.position = "bottom") expect_doppelganger( "geom_bar_connector position diverging", p ) p <- ggplot(mtcars) + aes(x = wt, y = mpg, colour = factor(cyl)) + geom_connector() + geom_point() expect_doppelganger( "geom_connector", p ) p <- ggplot(mtcars) + aes(x = wt, y = mpg, colour = factor(cyl)) + geom_connector(continuous = TRUE) + geom_point() expect_doppelganger( "geom_connector continuous", p ) p <- ggplot(mtcars) + aes(x = wt, y = mpg, colour = factor(cyl)) + geom_connector(width = 0) + geom_point() expect_doppelganger( "geom_connector zero width", p ) p <- ggplot(mtcars) + aes(x = wt, y = mpg, colour = factor(cyl)) + geom_connector(width = Inf) + geom_point() expect_doppelganger( "geom_connector infinite width", p ) p <- ggplot(mtcars) + aes(x = wt, y = mpg, colour = factor(cyl)) + geom_connector(width = Inf, continuous = TRUE) + geom_point() expect_doppelganger( "geom_connector infinite width and continuous", p ) }) ggstats/tests/testthat/test-stat_weighted_mean.R0000644000176200001440000000336215122045054021665 0ustar liggesuserstest_that("stat_weighted_mean()", { skip_on_cran() skip_if_not_installed("reshape") library(ggplot2) data(tips, package = "reshape") expect_doppelganger( "stat_weighted_mean() point", ggplot(tips) + aes(x = day, y = total_bill) + geom_point() ) expect_doppelganger( "stat_weighted_mean() geom-default", ggplot(tips) + aes(x = day, y = total_bill) + stat_weighted_mean() ) expect_doppelganger( "stat_weighted_mean() geom-line", ggplot(tips) + aes(x = day, y = total_bill, group = 1) + stat_weighted_mean(geom = "line") ) expect_doppelganger( "stat_weighted_mean() geom-line-grouped", ggplot(tips) + aes(x = day, y = total_bill, colour = sex, group = sex) + stat_weighted_mean(geom = "line") ) expect_doppelganger( "stat_weighted_mean() geom-bar-dodge", ggplot(tips) + aes(x = day, y = total_bill, fill = sex) + stat_weighted_mean(geom = "bar", position = "dodge") ) # computing a proportion on the fly expect_doppelganger( "stat_weighted_mean() geom-bar-dodge-percent", ggplot(tips) + aes(x = day, y = as.integer(smoker == "Yes"), fill = sex) + stat_weighted_mean(geom = "bar", position = "dodge") + scale_y_continuous(labels = scales::percent) ) # taking into account some weights d <- as.data.frame(Titanic) expect_doppelganger( "stat_weighted_mean() titanic", ggplot(d) + aes( x = Class, y = as.integer(Survived == "Yes"), weight = Freq, fill = Sex ) + geom_bar(stat = "weighted_mean", position = "dodge") + scale_y_continuous(labels = scales::percent) + labs(y = "Survived") ) }) ggstats/tests/testthat/test-stat_cross.R0000644000176200001440000000425415122045054020217 0ustar liggesuserstest_that("stat_cross()", { skip_on_cran() library(ggplot2) d <- as.data.frame(Titanic) # plot number of observations p <- ggplot(d) + aes(x = Class, y = Survived, weight = Freq, size = after_stat(observed)) + stat_cross() + scale_size_area(max_size = 20) expect_doppelganger("stat_cross() n obs", p) # custom shape and fill colour based on chi-squared residuals p <- ggplot(d) + aes( x = Class, y = Survived, weight = Freq, size = after_stat(observed), fill = after_stat(std.resid) ) + stat_cross(shape = 22) + scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) + scale_size_area(max_size = 20) expect_doppelganger("stat_cross() shape-22", p) # custom shape and fill colour based phi coefficients p <- ggplot(d) + aes( x = Class, y = Survived, weight = Freq, size = after_stat(observed), fill = after_stat(phi) ) + stat_cross(shape = 22) + scale_fill_steps2(show.limits = TRUE) + scale_size_area(max_size = 20) expect_doppelganger("stat_cross() phi coefficients", p) # plotting the number of observations as a table p <- ggplot(d) + aes( x = Class, y = Survived, weight = Freq, label = after_stat(observed) ) + geom_text(stat = "cross") expect_doppelganger("stat_cross() table", p) # Row proportions with standardized residuals p <- ggplot(d) + aes( x = Class, y = Survived, weight = Freq, label = scales::percent(after_stat(row.prop)), size = NULL, fill = after_stat(std.resid) ) + stat_cross(shape = 22, size = 30) + geom_text(stat = "cross") + scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) + facet_grid(Sex ~ .) + labs(fill = "Standardized residuals") expect_doppelganger("stat_cross() residuals", p) }) test_that("phi coefficients", { res <- Titanic |> as.data.frame() |> xtabs(Freq ~ Sex + Class, data = _) |> chisq.test() |> augment_chisq_add_phi() |> dplyr::mutate(.phi = round(.data$.phi, digits = 3)) expect_equal( res$.phi, c(-0.236, 0.236, -0.149, 0.149, -0.107, 0.107, 0.375, -0.375) ) }) ggstats/tests/testthat/test-geom_stripped.R0000644000176200001440000000062615122045054020673 0ustar liggesuserstest_that("geom_stripped_cols() and geom_stripped_rows() works", { skip_on_cran() library(ggplot2) p <- ggplot(iris) + aes(x = Species, y = Petal.Length) + geom_count() expect_doppelganger( "stripped rows and cols", p + geom_stripped_rows( odd = "blue", even = "yellow", alpha = .1, nudge_y = .5 ) + geom_stripped_cols() ) }) ggstats/tests/testthat/test-stat_prop.R0000644000176200001440000001105515122045054020043 0ustar liggesuserstest_that("stat_prop()", { skip_on_cran() library(ggplot2) d <- as.data.frame(Titanic) p <- ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, by = Class) + geom_bar(position = "fill") + geom_text(stat = "prop", position = position_fill(.5)) expect_doppelganger( "stat_prop() titanic", p ) expect_doppelganger( "stat_prop() direct call", ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, by = Class) + stat_prop(geom = "bar") ) expect_doppelganger( "stat_prop() titanic-facet", p + facet_grid(~Sex) ) expect_doppelganger( "stat_prop() titanic-dodge", ggplot(d) + aes(x = Class, fill = Survived, weight = Freq) + geom_bar(position = "dodge") + geom_text( aes(by = Survived), stat = "prop", position = position_dodge(0.9), vjust = "bottom" ) ) expect_doppelganger( "stat_prop() titanic-dodge (not specifying by)", ggplot(d) + aes(x = Class, fill = Survived, weight = Freq) + geom_bar(position = "dodge") + geom_text( stat = "prop", position = position_dodge(0.9), vjust = "bottom" ) ) expect_doppelganger( "stat_prop() titanic-stack", ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, by = 1) + geom_bar() + geom_text( aes(label = scales::percent(after_stat(prop), accuracy = 1)), stat = "prop", position = position_stack(.5) ) ) }) test_that("stat_prop() works with an y aesthetic", { library(ggplot2) skip_on_cran() d <- as.data.frame(Titanic) p <- ggplot(d) + aes(y = Class, fill = Survived, weight = Freq, by = Class) + geom_bar(position = "fill") + geom_text(stat = "prop", position = position_fill(.5)) expect_doppelganger("stat_prop() y-aes", p) }) test_that("stat_prop() works with a character by", { library(ggplot2) skip_on_cran() d <- as.data.frame(Titanic) p <- ggplot(d) + aes( y = Class, fill = Survived, weight = Freq, by = as.character(Class), label = scales::percent(after_stat(prop)) ) + geom_bar(position = "fill") + geom_text(stat = "prop", position = position_fill(.5)) expect_doppelganger("stat_prop() by-character", p) }) test_that("stat_prop() works with default_by", { library(ggplot2) skip_on_cran() d <- as.data.frame(Titanic) p <- ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, y = after_stat(prop)) + geom_bar(stat = "prop") expect_doppelganger("stat_prop() default_by none", p) p <- ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, y = after_stat(prop)) + geom_bar(stat = "prop", default_by = "fill") expect_doppelganger("stat_prop() default_by fill", p) p <- ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, y = after_stat(prop)) + geom_bar(stat = "prop", default_by = "x") expect_doppelganger("stat_prop() default_by x", p) p <- ggplot(d) + aes(y = Class, fill = Survived, weight = Freq, x = after_stat(prop)) + geom_bar(stat = "prop", default_by = "x") expect_doppelganger("stat_prop() default_by x horizontal", p) }) test_that("stat_prop() complete argument", { skip_on_cran() library(ggplot2) df <- dplyr::tibble( year = c(rep(2020, 60), rep(2021, 80), rep(2022, 60)), grp = c( rep("a", 20), rep("b", 10), rep("c", 30), rep("a", 45), rep("c", 35), rep("a", 20), rep("b", 30), rep("c", 10) ) |> factor(levels = c("a", "b", "c")) ) p <- ggplot(df) + aes(x = year, color = grp) + geom_line( stat = "prop", complete = "color" ) expect_doppelganger("stat_prop() complete color", p) p <- ggplot(df) + aes(x = year, colour = grp) + geom_line( stat = "prop", complete = "colour" ) expect_doppelganger("stat_prop() complete colour", p) p <- ggplot(df) + aes(x = year, group = grp) + geom_line( stat = "prop", complete = "group" ) expect_doppelganger("stat_prop() complete group", p) }) test_that("geom_prop_bar() & geom_prop_text() & geom_prop_connector()", { library(ggplot2) d <- as.data.frame(Titanic) p <- ggplot(d) + aes(y = Class, fill = Survived, weight = Freq) + geom_prop_bar() + geom_prop_text() + geom_prop_connector() expect_doppelganger( "geom_prop_bar() & geom_prop_text() & geom_prop_connector()", p ) }) ggstats/tests/testthat/test-pal_extender.R0000644000176200001440000000041314625277577020526 0ustar liggesuserstest_that("pal_extender() works", { skip_if_not_installed("scales") pal <- scales::pal_brewer(palette = "PiYG") pal_e <- pal_extender(pal = pal) expect_equal(pal(5), pal_e(5)) expect_false(any(is.na(pal_e(20)))) expect_length(pal_e(20), 20L) }) ggstats/tests/testthat/test-utilities.R0000644000176200001440000000067614657111214020056 0ustar liggesuserstest_that("signif_stars() works", { x <- c(0.5, 0.1, 0.05, 0.01, 0.001) expect_equal( signif_stars(x), c("", ".", "*", "**", "***") ) expect_equal( signif_stars(x, one = .15, point = NULL), c("", "*", "*", "**", "***") ) }) test_that("symmetric_limits() works", { expect_equal( symmetric_limits(c(-1, 5)), c(-5, 5) ) expect_equal( symmetric_limits(c(-8, 5)), c(-8, 8) ) }) ggstats/tests/testthat.R0000644000176200001440000000062614737222737015075 0ustar liggesusers# This file is part of the standard setup for testthat. # It is recommended that you do not modify it. # # Where should you do additional test configuration? # Learn more about the roles of various files in: # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview # * https://testthat.r-lib.org/articles/special-files.html library(testthat) library(ggstats) test_check("ggstats") ggstats/MD50000644000176200001440000001441615122161450012241 0ustar liggesuserse7da5dfad347861e44d1fc6c0ce1681d *DESCRIPTION bbfab1862d14689d96f9bef5ffb3eec5 *NAMESPACE aaf7ab6f7491673e96e01fc3943dd212 *NEWS.md 8a9186441794d7b116ff80ce38e57b72 *R/deprecated.R 7c0d0ed45122080bce7fb89b9c74610e *R/geom_connector.R f552d61a1736115672ba3881227f531f *R/geom_diverging.R 79a6d70006904f6bd1ce00b774551ae9 *R/geom_stripped_rows.R f81bb0e370788b3c0405e8c5c5b7a9d3 *R/ggcascade.R 4f160ba210d54e9070a7c6b2a6ce6bdb *R/ggcoef_model.R 55ed80c5f59b683c90542739d7d2e756 *R/gglikert.R 77ce8808eff5c4b342bb53e0c6fa1be9 *R/ggstats-package.R 953e73724fb8b0057a745c342d9d7b22 *R/ggsurvey.R c66c83a015221ed897311c2c6ab6111f *R/hex_bw.R c98e4b527646b20b3eb12f971ea38ea6 *R/label_number_abs.R 53777b19a7546712a32a9b128b739f4c *R/pal_extender.R 88c2ad8cb862d496d4d191ff4122ef4a *R/position_likert.R 3b1fdb66cd57067a20e46728f164c55c *R/round_any.R 365376ccbadc09cf99d6282994843ef7 *R/scale_fill_likert.R da97f039976af617e0731565c2ab13f3 *R/signif_stars.R 4c6668b24d12e94b24249afae6eaee11 *R/stat_cross.R cb2fb8c45adc2e1cfc20549804b1825d *R/stat_prop.R 3e3e00b232d7898b21429aa8e1388bff *R/stat_weighted_mean.R 2812c01ed28845bb99c972976e5301a4 *R/symmetric_limits.R 082115d40f7c8326ef91f064082e8615 *R/vdiffr-helper.R 01701b0044b651fffb567d537444bd64 *R/weighted_quantile.R 30b51ffb810a3156f834e004b11b5950 *R/weighted_sum.R 3b26a859425385cc75608e783d318ae9 *README.md c63a2086ccae0726e84ea5b30a1b647d *build/vignette.rds 94e28aa5b1932b50c67bf7ed0714aaf4 *inst/WORDLIST e5daf6523421ecb3d0a175ca52c6046e *inst/doc/geom_diverging.R 573285925a93fd66410800c4136d1a56 *inst/doc/geom_diverging.Rmd a5c735af2be167a2d843cfe9db967737 *inst/doc/geom_diverging.html 0e34aea158f6a922bb8e157d60865408 *inst/doc/ggcoef_model.R 4a6aa66c4c8e3773242d3085cd000cca *inst/doc/ggcoef_model.Rmd b62c13e436b3dbde088cc4d90373f632 *inst/doc/ggcoef_model.html 811f81af02614832973666fa2a5939a1 *inst/doc/gglikert.R 4c258131d3a1cbab6792e71ea6b80d06 *inst/doc/gglikert.Rmd 5c5bec56315549097b72abe20d8fb662 *inst/doc/gglikert.html 1b8081a7ad40a9dd366a906798cc7300 *inst/doc/stat_cross.R bceaf8ba6563ed5198d7e8cd325ec352 *inst/doc/stat_cross.Rmd d81dbf6fc7b6b54875faebea27859e43 *inst/doc/stat_cross.html 55d63c39d237804640b959599a466805 *inst/doc/stat_prop.R b5dd71e6e2bce8814cca82cab4b89768 *inst/doc/stat_prop.Rmd c4b431b6850c9125a8e2f4920b10162b *inst/doc/stat_prop.html 06e26e6b115620dae874f56b5ff93f16 *inst/doc/stat_weighted_mean.R 8b364b651c16d66c845af52161f585b4 *inst/doc/stat_weighted_mean.Rmd 25f4d7e585d2686249edc8d285f46bdf *inst/doc/stat_weighted_mean.html 613bdbc4f6d83370ef17432b0635bdb6 *man/augment_chisq_add_phi.Rd a9d7a7ee43e51eb3372041f59ee61114 *man/figures/README-unnamed-chunk-10-1.png a851a47fc2e8fd2d8c24a450416fea42 *man/figures/README-unnamed-chunk-11-1.png d0a4333762e42b0dad1bf8d685bc1dab *man/figures/README-unnamed-chunk-12-1.png 03a5e871242f767ba810099e7eb77129 *man/figures/README-unnamed-chunk-4-1.png 5df2a4bcf3eb0c15407cf553057abfcf *man/figures/README-unnamed-chunk-4-2.png dc2c0c70d6e5d530bf37a20a1bd12fc1 *man/figures/README-unnamed-chunk-5-1.png 1f4d510518fde159e4842543cfa9af6b *man/figures/README-unnamed-chunk-6-1.png 91aafc0b9e155083e9571ad20b60069d *man/figures/README-unnamed-chunk-7-1.png ede7edf7937a5f04052ea0c2514a891d *man/figures/README-unnamed-chunk-8-1.png 3fa90de59c83d7cd641856c6c85c943e *man/figures/README-unnamed-chunk-9-1.png cb1e46f469cfbbbde29c8b5113e1d789 *man/figures/lifecycle-archived.svg c0d2e5a54f1fa4ff02bf9533079dd1f7 *man/figures/lifecycle-defunct.svg a1b8c987c676c16af790f563f96cbb1f *man/figures/lifecycle-deprecated.svg c3978703d8f40f2679795335715e98f4 *man/figures/lifecycle-experimental.svg 952b59dc07b171b97d5d982924244f61 *man/figures/lifecycle-maturing.svg 27b879bf3677ea76e3991d56ab324081 *man/figures/lifecycle-questioning.svg 53b3f893324260b737b3c46ed2a0e643 *man/figures/lifecycle-stable.svg 1c1fe7a759b86dc6dbcbe7797ab8246c *man/figures/lifecycle-superseded.svg 643fa7fb9f8e84a47eb2f3e8d043bfc2 *man/geom_connector.Rd 2d24110dd382ca7e6c2d008397d448aa *man/geom_diverging.Rd a4da99d720baad092466ce249379e206 *man/geom_prop_bar.Rd b603c591b8843c1cc6586630e8779fa4 *man/geom_stripped_rows.Rd 848bbadf3889ef0c44377462b8943b98 *man/ggcascade.Rd 9d903454124800b083789735f23ddc5c *man/ggcoef_model.Rd e9a7f91b3102651dac4736391c9b2779 *man/ggcoef_multicomponents.Rd 078e9c61f04739326994f66457f97385 *man/gglikert.Rd 50314b608b6fe16a9bfdc8c272728f37 *man/ggstats-package.Rd 5466fed55c9dd78cbfdf2bb0be067236 *man/ggsurvey.Rd 0d5b0853293e29eedd5918c79939814c *man/hex_bw.Rd a20fbb6f509575a4b439a0450276857f *man/label_number_abs.Rd 0531b55b43d14c617c0674168b40f357 *man/pal_extender.Rd d34275c502c52dc83c1955e444feb8e8 *man/position_likert.Rd 87de156ae0d0bcc84a9fdd9d45db32e9 *man/round_any.Rd 2e2298b6ad8b5d90a8bdb449786c192f *man/scale_fill_likert.Rd ed4752eb6f36bd29490e9b14dc376a21 *man/signif_stars.Rd 8603105241431f69feef0b333fb0615c *man/stat_cross.Rd 09ec3e329e8a0e4b69934b3eabd90315 *man/stat_prop.Rd b208fe4f8cc954aa6169de20ac2ebb79 *man/stat_weighted_mean.Rd f88cb1037595dee61eb600697abef8ed *man/symmetric_limits.Rd ca70a231c2ab7cd2bfb3ef3ebc63fe4f *man/weighted.median.Rd fe3c22bb256c150ec50eb9fb3ed843a4 *man/weighted.sum.Rd 914243e8386fc3cc9e02d38298231578 *tests/testthat.R afc13ceb061d8c24d427f28fd4d18c29 *tests/testthat/test-geom_connector.R c8934023b24911c991e94922353f4eb4 *tests/testthat/test-geom_stripped.R bcd9107a262cdc35c098b226693246bd *tests/testthat/test-ggcascade.R fc4e56bd17ff62587c03ea95b2efd0d6 *tests/testthat/test-ggcoef_model.R 26031e9093fcf34aeaa25b163f423620 *tests/testthat/test-gglikert.R 3812959b4e98027d513ca7c847644084 *tests/testthat/test-pal_extender.R 43e80b5e24b21a9b7c6a0527bfa6b7be *tests/testthat/test-position_likert.R 383f792813610156d70e663e6617a724 *tests/testthat/test-stat_cross.R 34da266d3f4d60a86298c54953e6af58 *tests/testthat/test-stat_prop.R f8a7684c46ed1db2c95ddc56abb1f044 *tests/testthat/test-stat_weighted_mean.R 06f67c29001808c7d4694200ef0745d7 *tests/testthat/test-utilities.R 27f452d6de483eb39ccb14d2067ecd35 *tests/testthat/test_ggsurvey.R 573285925a93fd66410800c4136d1a56 *vignettes/geom_diverging.Rmd 4a6aa66c4c8e3773242d3085cd000cca *vignettes/ggcoef_model.Rmd 4c258131d3a1cbab6792e71ea6b80d06 *vignettes/gglikert.Rmd bceaf8ba6563ed5198d7e8cd325ec352 *vignettes/stat_cross.Rmd b5dd71e6e2bce8814cca82cab4b89768 *vignettes/stat_prop.Rmd 8b364b651c16d66c845af52161f585b4 *vignettes/stat_weighted_mean.Rmd ggstats/R/0000755000176200001440000000000015122045054012125 5ustar liggesusersggstats/R/vdiffr-helper.R0000644000176200001440000000053715122045054015012 0ustar liggesusersif ( requireNamespace("vdiffr", quietly = TRUE) && utils::packageVersion("testthat") >= "3.0.3" && !identical(Sys.getenv("VDIFFR_RUN_TESTS"), "false") ) { expect_doppelganger <- vdiffr::expect_doppelganger } else { # Otherwise, assign a dummy function expect_doppelganger <- function(...) testthat::skip("vdiffr not run.") } ggstats/R/geom_connector.R0000644000176200001440000001566715122034143015265 0ustar liggesusers#' Connect bars / points #' #' `geom_connector()` is a variation of [ggplot2::geom_step()]. #' Its variant `geom_bar_connector()` is particularly adapted to #' connect bars. #' #' @inheritParams ggplot2::geom_step #' @param width Bar width (see examples). #' @param continuous Should connect segments be continuous? #' @param add_baseline Add connectors at baseline? #' @export #' @examples #' library(ggplot2) #' #' # geom_bar_connector() ----------- #' #' ggplot(diamonds) + #' aes(x = clarity, fill = cut) + #' geom_bar(width = .5) + #' geom_bar_connector(width = .5, linewidth = .25) + #' theme_minimal() + #' theme(legend.position = "bottom") #' #' \donttest{ #' ggplot(diamonds) + #' aes(x = clarity, fill = cut) + #' geom_bar(width = .5) + #' geom_bar_connector( #' width = .5, #' continuous = TRUE, #' colour = "red", #' linetype = "dotted", #' add_baseline = FALSE, #' ) + #' theme(legend.position = "bottom") #' #' ggplot(diamonds) + #' aes(x = clarity, fill = cut) + #' geom_bar(width = .5, position = "fill") + #' geom_bar_connector(width = .5, position = "fill") + #' theme(legend.position = "bottom") #' #' ggplot(diamonds) + #' aes(x = clarity, fill = cut) + #' geom_bar(width = .5, position = "diverging") + #' geom_bar_connector(width = .5, position = "diverging", linewidth = .25) + #' theme(legend.position = "bottom") #' #' # geom_connector() ----------- #' #' ggplot(mtcars) + #' aes(x = wt, y = mpg, colour = factor(cyl)) + #' geom_connector() + #' geom_point() #' #' ggplot(mtcars) + #' aes(x = wt, y = mpg, colour = factor(cyl)) + #' geom_connector(continuous = TRUE) + #' geom_point() #' #' ggplot(mtcars) + #' aes(x = wt, y = mpg, colour = factor(cyl)) + #' geom_connector(continuous = TRUE, width = .3) + #' geom_point() #' #' ggplot(mtcars) + #' aes(x = wt, y = mpg, colour = factor(cyl)) + #' geom_connector(width = 0) + #' geom_point() #' #' ggplot(mtcars) + #' aes(x = wt, y = mpg, colour = factor(cyl)) + #' geom_connector(width = Inf) + #' geom_point() #' #' ggplot(mtcars) + #' aes(x = wt, y = mpg, colour = factor(cyl)) + #' geom_connector(width = Inf, continuous = TRUE) + #' geom_point() #' } geom_connector <- function(mapping = NULL, data = NULL, stat = "identity", position = "identity", width = 0.1, continuous = FALSE, na.rm = FALSE, orientation = NA, show.legend = NA, inherit.aes = TRUE, ...) { layer( data = data, mapping = mapping, stat = stat, geom = GeomConnector, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = rlang::list2( width = width, continuous = continuous, orientation = orientation, na.rm = na.rm, ... ) ) } #' @rdname geom_connector #' @export geom_bar_connector <- function(mapping = NULL, data = NULL, stat = "prop", position = "stack", width = 0.9, continuous = FALSE, add_baseline = TRUE, na.rm = FALSE, orientation = NA, show.legend = NA, inherit.aes = TRUE, ...) { params <- rlang::list2( width = width, continuous = continuous, orientation = orientation, add_baseline = add_baseline, na.rm = na.rm, ... ) if (is.character(stat) && stat == "prop" && !"complete" %in% names(params)) params$complete <- "fill" layer( data = data, mapping = mapping, stat = stat, geom = GeomConnector, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = params ) } #' @rdname geom_connector #' @format NULL #' @usage NULL #' @export GeomConnector <- ggproto( "GeomConnector", ggplot2::GeomPath, setup_params = function(data, params) { params$flipped_aes <- has_flipped_aes(data, params, ambiguous = TRUE) params }, extra_params = c("na.rm", "width", "orientation", "continuous", "add_baseline"), draw_panel = function(data, panel_params, coord, lineend = "butt", linejoin = "round", linemitre = 10, arrow = NULL, width = 0.1, continuous = FALSE, add_baseline = FALSE, flipped_aes = FALSE) { if (!is.numeric(width)) cli::cli_abort( "{.arg width} should be a positive number.", call. = FALSE ) if (width < 0) cli::cli_abort( "{.arg width} should be positive.", call. = FALSE ) data <- flip_data(data, flipped_aes) if (add_baseline) { d0 <- data |> dplyr::filter(.data$group == min(.data$group)) if ("ymin" %in% names(d0) && d0$ymin[1] < 0) { d0$y <- d0$ymin } else { d0$y <- 0 } d0$group <- d0$group - 1 data <- dplyr::bind_rows(d0, data) } data <- data |> by( data$group, connect_points, width = width, continuous = continuous ) |> unclass() |> as.list() |> undim() |> dplyr::bind_rows() data <- flip_data(data, flipped_aes) GeomPath$draw_panel( data, panel_params, coord, lineend = lineend, linejoin = linejoin, linemitre = linemitre, arrow = arrow ) } ) #' Calculate connections for `geom_connector()` #' Used by `GeomConnector()` #' #' @noRd connect_points <- function(data, width = 0.9, continuous = FALSE) { data <- as.data.frame(data)[order(data$x), ] n <- nrow(data) if (n <= 1) { # Need at least one observation return(data[0, , drop = FALSE]) } gaps <- data$x[-1] - data$x[-n] nudge <- pmin(gaps / 2, width / 2) data[["..rank.."]] <- seq_along(data$x) d1 <- data d1[["..order.."]] <- 0 if (!continuous) d1$y <- NA d2 <- data[-1, ] d2[["..order.."]] <- -1 d2$x <- d2$x - nudge d3 <- data[-n, ] d3[["..order.."]] <- 1 d3$x <- d3$x + nudge dplyr::bind_rows(d1, d2, d3) |> dplyr::arrange(.data[["..rank.."]], .data[["..order.."]]) |> dplyr::select(-dplyr::all_of(c("..rank..", "..order.."))) } undim <- function(x) { dim <- dim(x) if (is.null(dim)) { return(x) } dim(x) <- NULL if (length(dim) == 1L && !is.null(rownames(x))) { # Preserve names of 1D arrays names(x) <- rownames(x) } x } ggstats/R/label_number_abs.R0000644000176200001440000000223714527332004015532 0ustar liggesusers#' Label absolute values #' #' @param ... arguments passed to [scales::label_number()] or #' [scales::label_percent()] #' @param hide_below if provided, values below `hide_below` will be masked #' (i.e. an empty string `""` will be returned) #' @returns A "labelling" function, , i.e. a function that takes a vector and #' returns a character vector of same length giving a label for each input #' value. #' @seealso [scales::label_number()], [scales::label_percent()] #' @export #' @examples #' x <- c(-0.2, -.05, 0, .07, .25, .66) #' #' scales::label_number()(x) #' label_number_abs()(x) #' #' scales::label_percent()(x) #' label_percent_abs()(x) #' label_percent_abs(hide_below = .1)(x) label_number_abs <- function(..., hide_below = NULL) { function(x) { res <- scales::label_number(...)(abs(x)) if (!is.null(hide_below)) { res[abs(x) < hide_below] <- "" } res } } #' @rdname label_number_abs #' @export label_percent_abs <- function(..., hide_below = NULL) { function(x) { res <- scales::label_percent(...)(abs(x)) if (!is.null(hide_below)) { res[abs(x) < hide_below] <- "" } res } } ggstats/R/geom_stripped_rows.R0000644000176200001440000001314514674033502016175 0ustar liggesusers#' Alternating Background Color #' #' Add alternating background color along the y-axis. The geom takes default #' aesthetics `odd` and `even` that receive color codes. #' #' @inheritParams ggplot2::layer #' @inheritParams ggplot2::geom_rect #' @param xfrom,xto limitation of the strips along the x-axis #' @param width width of the strips #' @param nudge_x,nudge_y horizontal or vertical adjustment to nudge strips by #' @export #' @return A `ggplot2` plot with the added geometry. #' @examplesIf requireNamespace("reshape") #' data(tips, package = "reshape") #' #' library(ggplot2) #' p <- ggplot(tips) + #' aes(x = time, y = day) + #' geom_count() + #' theme_light() #' #' p #' p + geom_stripped_rows() #' p + geom_stripped_cols() #' p + geom_stripped_rows() + geom_stripped_cols() #' #' \donttest{ #' p <- ggplot(tips) + #' aes(x = total_bill, y = day) + #' geom_count() + #' theme_light() #' p #' p + geom_stripped_rows() #' p + geom_stripped_rows() + scale_y_discrete(expand = expansion(0, 0.5)) #' p + geom_stripped_rows(xfrom = 10, xto = 35) #' p + geom_stripped_rows(odd = "blue", even = "yellow") #' p + geom_stripped_rows(odd = "blue", even = "yellow", alpha = .1) #' p + geom_stripped_rows(odd = "#00FF0022", even = "#FF000022") #' #' p + geom_stripped_cols() #' p + geom_stripped_cols(width = 10) #' p + geom_stripped_cols(width = 10, nudge_x = 5) #' } geom_stripped_rows <- function(mapping = NULL, data = NULL, stat = "identity", position = "identity", ..., show.legend = NA, inherit.aes = TRUE, xfrom = -Inf, xto = Inf, width = 1, nudge_y = 0) { ggplot2::layer( data = data, mapping = mapping, stat = stat, geom = GeomStrippedRows, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list( xfrom = xfrom, xto = xto, width = width, nudge_y = nudge_y, ... ) ) } GeomStrippedRows <- ggplot2::ggproto("GeomStrippedRows", ggplot2::Geom, required_aes = c("y"), default_aes = ggplot2::aes( odd = "#11111111", even = "#00000000", alpha = NA, colour = NA, linetype = "solid", linewidth = .5 ), draw_key = ggplot2::draw_key_rect, draw_panel = function(data, panel_params, coord, xfrom, xto, width = 1, nudge_y = 0) { ggplot2::GeomRect$draw_panel( data |> dplyr::mutate( y = round_any(.data$y, width), ymin = .data$y - width / 2 + nudge_y, ymax = .data$y + width / 2 + nudge_y, xmin = xfrom, xmax = xto ) |> dplyr::select(dplyr::all_of(c( "xmin", "xmax", "ymin", "ymax", "odd", "even", "alpha", "colour", "linetype", "linewidth" ))) |> dplyr::distinct(.data$ymin, .keep_all = TRUE) |> dplyr::arrange(.data$ymin) |> dplyr::mutate( .n = dplyr::row_number(), fill = dplyr::if_else( .data$.n %% 2L == 1L, true = .data$odd, false = .data$even ) ) |> dplyr::select(-dplyr::all_of(c(".n", "odd", "even"))), panel_params, coord ) } ) #' @rdname geom_stripped_rows #' @param yfrom,yto limitation of the strips along the y-axis #' @export geom_stripped_cols <- function(mapping = NULL, data = NULL, stat = "identity", position = "identity", ..., show.legend = NA, inherit.aes = TRUE, yfrom = -Inf, yto = Inf, width = 1, nudge_x = 0) { ggplot2::layer( data = data, mapping = mapping, stat = stat, geom = GeomStrippedCols, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list( yfrom = yfrom, yto = yto, width = width, nudge_x = nudge_x, ... ) ) } GeomStrippedCols <- ggplot2::ggproto("GeomStrippedCols", ggplot2::Geom, required_aes = c("y"), default_aes = ggplot2::aes( odd = "#11111111", even = "#00000000", alpha = NA, colour = NA, linetype = "solid", linewidth = .5 ), draw_key = ggplot2::draw_key_rect, draw_panel = function(data, panel_params, coord, yfrom, yto, width = 1, nudge_x = 0) { ggplot2::GeomRect$draw_panel( data |> dplyr::mutate( x = round_any(.data$x, width), xmin = .data$x - width / 2 + nudge_x, xmax = .data$x + width / 2 + nudge_x, ymin = yfrom, ymax = yto ) |> dplyr::select(dplyr::all_of(c( "xmin", "xmax", "ymin", "ymax", "odd", "even", "alpha", "colour", "linetype", "linewidth" ))) |> dplyr::distinct(.data$xmin, .keep_all = TRUE) |> dplyr::arrange(.data$xmin) |> dplyr::mutate( .n = dplyr::row_number(), fill = dplyr::if_else( .data$.n %% 2L == 1L, true = .data$odd, false = .data$even ) ) |> dplyr::select(-dplyr::all_of(c(".n", "odd", "even"))), panel_params, coord ) } ) ggstats/R/round_any.R0000644000176200001440000000140614600506637014260 0ustar liggesusers#' Round to multiple of any number. # #' @param x numeric or date-time (POSIXct) vector to round #' @param accuracy number to round to; for POSIXct objects, a number of seconds #' @param f rounding function: \code{\link{floor}}, \code{\link{ceiling}} or #' \code{\link{round}} #' @source adapted from `plyr` #' @export #' @examples #' round_any(1.865, accuracy = .25) round_any <- function(x, accuracy, f = round) { UseMethod("round_any") } #' @export round_any.numeric <- function(x, accuracy, f = round) { f(x / accuracy) * accuracy } #' @export round_any.POSIXct <- function(x, accuracy, f = round) { tz <- format(x[1], "%Z") xr <- round_any(as.numeric(x), accuracy, f) as.POSIXct(xr, origin = "1970-01-01 00:00.00 UTC", tz = tz) } ggstats/R/position_likert.R0000644000176200001440000002332414674033502015500 0ustar liggesusers#' Stack objects on top of each another and center them around 0 #' #' `position_diverging()` stacks bars on top of each other and #' center them around zero (the same number of categories are displayed on #' each side). #' `position_likert()` uses proportions instead of counts. This type of #' presentation is commonly used to display Likert-type scales. #' #' #' It is recommended to use `position_likert()` with `stat_prop()` #' and its `complete` argument (see examples). #' #' @param vjust Vertical adjustment for geoms that have a position #' (like points or lines), not a dimension (like bars or areas). Set to #' `0` to align with the bottom, `0.5` for the middle, #' and `1` (the default) for the top. #' @param reverse If `TRUE`, will reverse the default stacking order. #' This is useful if you're rotating both the plot and legend. #' @param exclude_fill_values Vector of values from the variable associated with #' the `fill` aesthetic that should not be displayed (but still taken into #' account for computing proportions) #' @param cutoff number of categories to be displayed negatively (i.e. on the #' left of the x axis or the bottom of the y axis), could be a decimal value: #' `2` to display negatively the two first categories, `2.5` to display #' negatively the two first categories and half of the third, `2.2` to display #' negatively the two first categories and a fifth of the third (see examples). #' By default (`NULL`), it will be equal to the number of categories divided #' by 2, i.e. it will be centered. #' @seealso See [ggplot2::position_stack()] and [ggplot2::position_fill()] #' @export #' @examples #' library(ggplot2) #' #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = "fill") + #' scale_x_continuous(label = scales::label_percent()) + #' xlab("proportion") #' #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = "likert") + #' scale_x_continuous(label = label_percent_abs()) + #' scale_fill_likert() + #' xlab("proportion") #' #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = "stack") + #' scale_fill_likert(pal = scales::brewer_pal(palette = "PiYG")) #' #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = "diverging") + #' scale_x_continuous(label = label_number_abs()) + #' scale_fill_likert() #' #' \donttest{ #' # Reverse order ------------------------------------------------------------- #' #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = position_likert(reverse = TRUE)) + #' scale_x_continuous(label = label_percent_abs()) + #' scale_fill_likert() + #' xlab("proportion") #' #' # Custom center ------------------------------------------------------------- #' #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = position_likert(cutoff = 1)) + #' scale_x_continuous(label = label_percent_abs()) + #' scale_fill_likert(cutoff = 1) + #' xlab("proportion") #' #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = position_likert(cutoff = 3.75)) + #' scale_x_continuous(label = label_percent_abs()) + #' scale_fill_likert(cutoff = 3.75) + #' xlab("proportion") #' #' # Missing items ------------------------------------------------------------- #' # example with a level not being observed for a specific value of y #' d <- diamonds #' d <- d[!(d$cut == "Premium" & d$clarity == "I1"), ] #' d <- d[!(d$cut %in% c("Fair", "Good") & d$clarity == "SI2"), ] #' #' # by default, the two lowest bar are not properly centered #' ggplot(d) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = "likert") + #' scale_fill_likert() #' #' # use stat_prop() with `complete = "fill"` to fix it #' ggplot(d) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = "likert", stat = "prop", complete = "fill") + #' scale_fill_likert() #' #' # Add labels ---------------------------------------------------------------- #' #' custom_label <- function(x) { #' p <- scales::percent(x, accuracy = 1) #' p[x < .075] <- "" #' p #' } #' #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = "likert") + #' geom_text( #' aes(by = clarity, label = custom_label(after_stat(prop))), #' stat = "prop", #' position = position_likert(vjust = .5) #' ) + #' scale_x_continuous(label = label_percent_abs()) + #' scale_fill_likert() + #' xlab("proportion") #' #' # Do not display specific fill values --------------------------------------- #' # (but taken into account to compute proportions) #' #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = position_likert(exclude_fill_values = "Very Good")) + #' scale_x_continuous(label = label_percent_abs()) + #' scale_fill_likert() + #' xlab("proportion") #' } position_likert <- function(vjust = 1, reverse = FALSE, exclude_fill_values = NULL, cutoff = NULL) { ggplot2::ggproto( NULL, PositionLikert, vjust = vjust, reverse = reverse, exclude_fill_values = exclude_fill_values, cutoff = cutoff ) } #' @export #' @rdname position_likert position_diverging <- function(vjust = 1, reverse = FALSE, exclude_fill_values = NULL, cutoff = NULL) { ggplot2::ggproto( NULL, PositionDiverging, vjust = vjust, reverse = reverse, exclude_fill_values = exclude_fill_values, cutoff = cutoff ) } #' @rdname position_likert #' @format NULL #' @usage NULL #' @export PositionLikert <- ggplot2::ggproto("PositionLikert", Position, type = NULL, vjust = 1, fill = TRUE, exclude_fill_values = NULL, cutoff = NULL, reverse = FALSE, setup_params = function(self, data) { flipped_aes <- ggplot2::has_flipped_aes(data) data <- ggplot2::flip_data(data, flipped_aes) list( var = self$var %||% likert_var(data), fill = self$fill, vjust = self$vjust, reverse = self$reverse, exclude_fill_values = self$exclude_fill_values, cutoff = self$cutoff, flipped_aes = flipped_aes ) }, setup_data = function(self, data, params) { data <- ggplot2::flip_data(data, params$flipped_aes) if (is.null(params$var)) { return(data) } if (!"ymin" %in% names(data)) data$ymin <- 0 data$ymax <- switch(params$var, y = data$y, ymax = as.numeric(ifelse(data$ymax == 0, data$ymin, data$ymax)) ) data <- ggplot2::remove_missing( data, vars = c("x", "xmin", "xmax", "y"), name = "position_likert" ) ggplot2::flip_data(data, params$flipped_aes) }, compute_panel = function(data, params, scales) { data <- ggplot2::flip_data(data, params$flipped_aes) if (is.null(params$var)) { return(data) } negative <- data$ymax < 0 negative[is.na(negative)] <- FALSE if (any(negative)) { cli::cli_abort("{.fn position_liker} does not work with negative values") } data <- data |> tidyr::nest(.by = "x", .key = "d") |> dplyr::mutate( d = purrr::map( .data$d, function(x) { pos_likert( x, vjust = params$vjust, fill = params$fill, reverse = params$reverse, exclude_fill_values = params$exclude_fill_values, cutoff = params$cutoff ) } ) ) |> tidyr::unnest(cols = "d") ggplot2::flip_data(data, params$flipped_aes) } ) pos_likert <- function(df, vjust = 1, fill = FALSE, reverse = FALSE, exclude_fill_values = NULL, cutoff = NULL) { if (reverse) { df <- df[nrow(df):1, ] # nolint } if (fill) { df$y <- df$y / sum(abs(df$y), na.rm = TRUE) } # Values to be excluded after computation of proportions if (!is.null(exclude_fill_values) && "fill" %in% names(df)) { exclude <- df$fill %in% exclude_fill_values df <- df[!exclude, ] } n <- nrow(df) + 1 y <- ifelse(is.na(df$y), 0, df$y) heights <- c(0, cumsum(y)) df$ymin <- pmin(heights[-n], heights[-1]) df$ymax <- pmax(heights[-n], heights[-1]) df$y <- (1 - vjust) * df$ymin + vjust * df$ymax # Now, we have to center the results if (is.null(cutoff)) cutoff <- nrow(df) / 2 if (cutoff < 0) cli::cli_abort("{.arg cutoff} cannot be negative.") if (cutoff > nrow(df)) cli::cli_abort( "{.arg cutoff} cannot be higher than the number of categories." ) if (cutoff == nrow(df)) { y_adjust <- df$ymax[cutoff] } else if (cutoff < 1) { y_adjust <- cutoff * df$ymax[1] } else { y_adjust <- df$ymax[cutoff %/% 1] + cutoff %% 1 * (df$ymax[cutoff %/% 1 + 1] - df$ymax[cutoff %/% 1]) } df$y <- df$y - y_adjust df$ymin <- df$ymin - y_adjust df$ymax <- df$ymax - y_adjust df } #' @rdname position_likert #' @format NULL #' @usage NULL #' @export PositionDiverging <- ggproto("PositionDiverging", PositionLikert, fill = FALSE ) likert_var <- function(data) { if (!is.null(data$ymax)) { "ymax" } else if (!is.null(data$y)) { "y" } else { cli::cli_warn(c( "Stacking requires either the {.field ymin} {.emph and} {.field ymin}", "or the {.field y} aesthetics", "i" = "Maybe you want {.code position = \"identity\"}?" )) NULL } } ggstats/R/deprecated.R0000644000176200001440000003050215122045301014343 0ustar liggesusers#' Deprecated functions #' #' `r lifecycle::badge("deprecated")` #' #' @inheritParams ggcoef_model #' @param component_col name of the component column #' @param component_label an optional named vector for labeling components #' @export ggcoef_multicomponents <- function( model, type = c("dodged", "faceted", "table"), component_col = "component", component_label = NULL, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = .95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), significance = 1 - conf.level, significance_labels = NULL, return_data = FALSE, table_stat = c("estimate", "ci", "p.value"), table_header = NULL, table_text_size = 3, table_stat_label = NULL, ci_pattern = "{conf.low}, {conf.high}", table_witdhs = c(3, 2), ...) { type <- match.arg(type) lifecycle::deprecate_stop( when = "0.9.0", what = "ggcoef_multicomponents()", with = type |> switch( "dodged" = "ggcoef_dodged()", "faceted" = "ggcoef_faceted()", "table" = "ggcoef_table()" ) ) if (return_data && type == "table") type <- "faceted" if (type %in% c("dodged", "faceted")) { res <- ggcoef_multi_d_f( model = model, type = type, component_col = component_col, component_label = component_label, tidy_fun = tidy_fun, tidy_args = tidy_args, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate, variable_labels = variable_labels, term_labels = term_labels, interaction_sep = interaction_sep, categorical_terms_pattern = categorical_terms_pattern, add_reference_rows = add_reference_rows, no_reference_row = {{ no_reference_row }}, intercept = intercept, include = {{ include }}, significance = significance, significance_labels = significance_labels, return_data = return_data, ... ) } else { res <- ggcoef_multi_t( model = model, type = type, component_col = component_col, component_label = component_label, tidy_fun = tidy_fun, tidy_args = tidy_args, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate, variable_labels = variable_labels, term_labels = term_labels, interaction_sep = interaction_sep, categorical_terms_pattern = categorical_terms_pattern, add_reference_rows = add_reference_rows, no_reference_row = {{ no_reference_row }}, intercept = intercept, include = {{ include }}, significance = significance, significance_labels = significance_labels, ... ) } res } # dodged & faceted version ggcoef_multi_d_f <- function( model, type = c("dodged", "faceted"), component_col = "component", component_label = NULL, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = .95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), significance = 1 - conf.level, significance_labels = NULL, return_data = FALSE, ...) { component_label_arg <- attr(model, "component_label_arg") if (is.null(component_label_arg)) component_label_arg <- "component_label" data <- ggcoef_data( model, tidy_fun = tidy_fun, tidy_args = {{ tidy_args }}, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate, variable_labels = variable_labels, term_labels = term_labels, interaction_sep = interaction_sep, categorical_terms_pattern = categorical_terms_pattern, add_reference_rows = add_reference_rows, no_reference_row = {{ no_reference_row }}, intercept = intercept, include = {{ include }}, significance = significance, significance_labels = significance_labels ) if (!component_col %in% names(data)) data[[component_col]] <- " " data[[component_col]] <- .in_order(data[[component_col]]) if (!is.null(component_label)) { if ( is.null(names(component_label)) || any(names(component_label) == "") ) { cli::cli_abort( "All elements of {.arg {component_label_arg}} should be named." ) } keep <- names(component_label) %in% levels(data[[component_col]]) drop <- component_label[!keep] if (length(drop) > 0) { cli::cli_alert_warning(c( "Error in {.arg {component_label_arg}}:\n", "value{?s} {.strong {drop}} not found in the data and ignored." )) } component_label <- component_label[keep] missing_levels <- setdiff( levels(.in_order(data[[component_col]])), names(component_label) ) names(missing_levels) <- missing_levels data[[component_col]] <- factor( data[[component_col]], levels = c(names(component_label), missing_levels), labels = c(component_label, missing_levels) ) } if (return_data) { return(data) } type <- match.arg(type) args <- list(...) args$data <- data args$exponentiate <- exponentiate if (!"y" %in% names(args) && !"facet_row" %in% names(args)) { args$y <- "label_light" } if (type == "dodged") { if (!"dodged " %in% names(args)) { args$dodged <- TRUE } if (!"colour" %in% names(args)) { args$colour <- component_col } if (!"errorbar_coloured" %in% names(args)) { args$errorbar_coloured <- TRUE } } else { if (!"facet_col" %in% names(args)) { args$facet_col <- component_col } if (!"colour" %in% names(args) && !all(is.na(data$var_label))) { args$colour <- "var_label" if (!"colour_guide" %in% names(args)) { args$colour_guide <- FALSE } } } do.call(ggcoef_plot, args) } # table version ggcoef_multi_t <- function( model, type = c("table"), component_col = "component", component_label = NULL, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = .95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), significance = 1 - conf.level, significance_labels = NULL, table_stat = c("estimate", "ci", "p.value"), table_header = NULL, table_text_size = 3, table_stat_label = NULL, ci_pattern = "{conf.low}, {conf.high}", table_witdhs = c(3, 2), ...) { type <- match.arg(type) component_label_arg <- attr(model, "component_label_arg") if (is.null(component_label_arg)) component_label_arg <- "component_label" data <- ggcoef_data( model = model, tidy_fun = tidy_fun, tidy_args = {{ tidy_args }}, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate, variable_labels = variable_labels, term_labels = term_labels, interaction_sep = interaction_sep, categorical_terms_pattern = categorical_terms_pattern, add_reference_rows = add_reference_rows, no_reference_row = {{ no_reference_row }}, intercept = intercept, include = {{ include }}, significance = significance, significance_labels = significance_labels ) if (!component_col %in% names(data)) data[[component_col]] <- " " data[[component_col]] <- .in_order(data[[component_col]]) if (!is.null(component_label)) { if ( is.null(names(component_label)) || any(names(component_label) == "") ) { cli::cli_abort( "All elements of {.arg {component_label_arg}} should be named." ) } keep <- names(component_label) %in% levels(data[[component_col]]) drop <- component_label[!keep] if (length(drop) > 0) { cli::cli_alert_warning(c( "Error in {.arg {component_label_arg}}:\n", "value{?s} {.strong {drop}} not found in the data and ignored." )) } component_label <- component_label[keep] missing_levels <- setdiff( levels(.in_order(data[[component_col]])), names(component_label) ) names(missing_levels) <- missing_levels data[[component_col]] <- factor( data[[component_col]], levels = c(names(component_label), missing_levels), labels = c(component_label, missing_levels) ) } res <- levels(data[[component_col]]) |> purrr::map( ~ ggcoef_table( data = dplyr::filter(data, .data[[component_col]] == .x), plot_title = .x, model = model, tidy_fun = tidy_fun, tidy_args = tidy_args, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate, variable_labels = variable_labels, term_labels = term_labels, interaction_sep = interaction_sep, categorical_terms_pattern = categorical_terms_pattern, add_reference_rows = add_reference_rows, no_reference_row = {{ no_reference_row }}, intercept = intercept, include = {{ include }}, significance = significance, significance_labels = significance_labels, show_p_values = FALSE, signif_stars = FALSE, table_stat = table_stat, table_header = table_header, table_text_size = table_text_size, table_stat_label = table_stat_label, ci_pattern = ci_pattern, table_witdhs = table_witdhs ) ) patchwork::wrap_plots(res, ncol = 1) } #' @rdname ggcoef_multicomponents #' @param y.level_label an optional named vector for labeling `y.level` #' (see examples) #' @export ggcoef_multinom <- function( model, type = c("dodged", "faceted", "table"), y.level_label = NULL, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = .95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), significance = 1 - conf.level, significance_labels = NULL, return_data = FALSE, table_stat = c("estimate", "ci", "p.value"), table_header = NULL, table_text_size = 3, table_stat_label = NULL, ci_pattern = "{conf.low}, {conf.high}", table_witdhs = c(3, 2), ...) { type <- match.arg(type) lifecycle::deprecate_stop( when = "0.9.0", what = "ggcoef_multicomponents()", with = type |> switch( "dodged" = "ggcoef_dodged()", "faceted" = "ggcoef_faceted()", "table" = "ggcoef_table()" ) ) attr(model, "component_label_arg") <- "y.level_label" ggcoef_multicomponents( model = model, type = type, component_col = "y.level", component_label = y.level_label, tidy_fun = tidy_fun, tidy_args = tidy_args, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate, variable_labels = variable_labels, term_labels = term_labels, interaction_sep = interaction_sep, categorical_terms_pattern = categorical_terms_pattern, add_reference_rows = add_reference_rows, no_reference_row = {{ no_reference_row }}, intercept = intercept, include = {{ include }}, significance = significance, significance_labels = significance_labels, return_data = return_data, table_stat = table_stat, table_header = table_header, table_text_size = table_text_size, table_stat_label = table_stat_label, ci_pattern = ci_pattern, table_witdhs = table_witdhs, ... ) } ggstats/R/stat_cross.R0000644000176200001440000001574314674033502014454 0ustar liggesusers#' Compute cross-tabulation statistics #' #' Computes statistics of a 2-dimensional matrix using [broom::augment.htest]. #' #' @inheritParams ggplot2::stat_identity #' @param geom Override the default connection with #' [ggplot2::geom_point()]. #' @param na.rm If `TRUE`, the default, missing values are #' removed with a warning. #' If `TRUE`, missing values are silently removed. #' @param keep.zero.cells If `TRUE`, cells with no observations are kept. #' @section Aesthetics: #' `stat_cross()` requires the **x** and the **y** aesthetics. #' @section Computed variables: #' \describe{ #' \item{observed}{number of observations in x,y} #' \item{prop}{proportion of total} #' \item{row.prop}{row proportion} #' \item{col.prop}{column proportion} #' \item{expected}{expected count under the null hypothesis} #' \item{resid}{Pearson's residual} #' \item{std.resid}{standardized residual} #' \item{row.observed}{total number of observations within row} #' \item{col.observed}{total number of observations within column} #' \item{total.observed}{total number of observations within the table} #' \item{phi}{phi coefficients, see [augment_chisq_add_phi()]} #' } #' #' @export #' @return A `ggplot2` plot with the added statistic. #' @seealso `vignette("stat_cross")` #' @examples #' library(ggplot2) #' d <- as.data.frame(Titanic) #' #' # plot number of observations #' ggplot(d) + #' aes(x = Class, y = Survived, weight = Freq, size = after_stat(observed)) + #' stat_cross() + #' scale_size_area(max_size = 20) #' #' # custom shape and fill colour based on chi-squared residuals #' ggplot(d) + #' aes( #' x = Class, y = Survived, weight = Freq, #' size = after_stat(observed), fill = after_stat(std.resid) #' ) + #' stat_cross(shape = 22) + #' scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) + #' scale_size_area(max_size = 20) #' #' \donttest{ #' # custom shape and fill colour based on phi coeffients #' ggplot(d) + #' aes( #' x = Class, y = Survived, weight = Freq, #' size = after_stat(observed), fill = after_stat(phi) #' ) + #' stat_cross(shape = 22) + #' scale_fill_steps2(show.limits = TRUE) + #' scale_size_area(max_size = 20) #' #' #' # plotting the number of observations as a table #' ggplot(d) + #' aes( #' x = Class, y = Survived, weight = Freq, label = after_stat(observed) #' ) + #' geom_text(stat = "cross") #' #' # Row proportions with standardized residuals #' ggplot(d) + #' aes( #' x = Class, y = Survived, weight = Freq, #' label = scales::percent(after_stat(row.prop)), #' size = NULL, fill = after_stat(std.resid) #' ) + #' stat_cross(shape = 22, size = 30) + #' geom_text(stat = "cross") + #' scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) + #' facet_grid(Sex ~ .) + #' labs(fill = "Standardized residuals") + #' theme_minimal() #' } stat_cross <- function(mapping = NULL, data = NULL, geom = "point", position = "identity", ..., na.rm = TRUE, show.legend = NA, inherit.aes = TRUE, keep.zero.cells = FALSE) { params <- list( na.rm = na.rm, keep.zero.cells = keep.zero.cells, ... ) layer( data = data, mapping = mapping, stat = StatCross, geom = geom, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = params ) } #' @rdname stat_cross #' @format NULL #' @usage NULL #' @export StatCross <- ggplot2::ggproto( "StatCross", ggplot2::Stat, required_aes = c("x", "y"), default_aes = ggplot2::aes(weight = 1), setup_params = function(data, params) { params }, extra_params = c("na.rm"), compute_panel = function(self, data, scales, keep.zero.cells = FALSE) { if (is.null(data$weight)) { data$weight <- rep(1, nrow(data)) } # compute cross statistics panel <- augment_chisq_add_phi( chisq.test(xtabs(weight ~ y + x, data = data)) ) panel_names <- names(panel) for (to_name in c( "observed", "prop", "row.prop", "col.prop", "expected", "resid", "std.resid", "row.observed", "col.observed", "total.observed", "phi" )) { from_name <- paste0(".", to_name) panel_names[which(panel_names == from_name)] <- to_name } names(panel) <- panel_names # to handle the fact that ggplot2 could transform factors into integers # before computation of the statistic if (is.numeric(data$x)) panel$x <- as.numeric(panel$x) if (is.numeric(data$y)) panel$y <- as.numeric(panel$y) # keeping first value of other aesthetics in data panel <- merge( panel, dplyr::select(data, -dplyr::all_of("PANEL")), by = c("x", "y"), all.x = TRUE ) panel <- panel |> dplyr::distinct(.data$x, .data$y, .keep_all = TRUE) if (!keep.zero.cells) { panel <- panel[panel$observed != 0, ] } panel } ) # Compute phi coefficients # see psych::phi() and GDAtools::phi.table() .compute_phi <- function(.prop, .row.observed, .col.observed, .total.observed) { rp <- .row.observed / .total.observed cp <- .col.observed / .total.observed (.prop - rp * cp) / sqrt(rp * (1 - rp) * cp * (1 - cp)) } #' Augment a chi-squared test and compute phi coefficients #' @details #' Phi coefficients are a measurement of the degree of association #' between two binary variables. #' #' - A value between -1.0 to -0.7 indicates a strong negative association. #' - A value between -0.7 to -0.3 indicates a weak negative association. #' - A value between -0.3 to +0.3 indicates a little or no association. #' - A value between +0.3 to +0.7 indicates a weak positive association. #' - A value between +0.7 to +1.0 indicates a strong positive association. #' @export #' @param x a chi-squared test as returned by [stats::chisq.test()] #' @return A `tibble`. #' @seealso [stat_cross()], `GDAtools::phi.table()` or `psych::phi()` #' @examples #' tab <- xtabs(Freq ~ Sex + Class, data = as.data.frame(Titanic)) #' augment_chisq_add_phi(chisq.test(tab)) augment_chisq_add_phi <- function(x) { if (!inherits(x, "htest") && names(x$statistic) != "X-squared") { cli::cli_abort(paste( "{.arg x} should be the result of a chi-squared test", "(see {.fn stats::chisq.test})." )) } broom::augment(x) |> dplyr::group_by(dplyr::across(1)) |> dplyr::mutate(.row.observed = sum(.data$.observed)) |> dplyr::group_by(dplyr::across(2)) |> dplyr::mutate(.col.observed = sum(.data$.observed)) |> dplyr::ungroup() |> dplyr::mutate( .total.observed = sum(.data$.observed), .phi = .compute_phi( .data$.prop, .data$.row.observed, .data$.col.observed, .data$.total.observed ) ) } ggstats/R/stat_weighted_mean.R0000644000176200001440000000676215031230123016106 0ustar liggesusers#' Compute weighted y mean #' #' This statistic will compute the mean of **y** aesthetic for #' each unique value of **x**, taking into account **weight** #' aesthetic if provided. #' #' @section Computed variables: #' \describe{ #' \item{y}{weighted y (numerator / denominator)} #' \item{numerator}{numerator} #' \item{denominator}{denominator} #' } #' #' @inheritParams ggplot2::stat_bin #' @param geom Override the default connection with [ggplot2::geom_point()]. #' @seealso `vignette("stat_weighted_mean")` #' @export #' @return A `ggplot2` plot with the added statistic. #' @examplesIf requireNamespace("reshape") #' library(ggplot2) #' #' data(tips, package = "reshape") #' #' ggplot(tips) + #' aes(x = day, y = total_bill) + #' geom_point() #' #' ggplot(tips) + #' aes(x = day, y = total_bill) + #' stat_weighted_mean() #' #' \donttest{ #' ggplot(tips) + #' aes(x = day, y = total_bill, group = 1) + #' stat_weighted_mean(geom = "line") #' #' ggplot(tips) + #' aes(x = day, y = total_bill, colour = sex, group = sex) + #' stat_weighted_mean(geom = "line") #' #' ggplot(tips) + #' aes(x = day, y = total_bill, fill = sex) + #' stat_weighted_mean(geom = "bar", position = "dodge") #' #' # computing a proportion on the fly #' if (requireNamespace("scales")) { #' ggplot(tips) + #' aes(x = day, y = as.integer(smoker == "Yes"), fill = sex) + #' stat_weighted_mean(geom = "bar", position = "dodge") + #' scale_y_continuous(labels = scales::percent) #' } #' } #' @examplesIf requireNamespace("scales") #' library(ggplot2) #' #' # taking into account some weights #' d <- as.data.frame(Titanic) #' ggplot(d) + #' aes( #' x = Class, y = as.integer(Survived == "Yes"), #' weight = Freq, fill = Sex #' ) + #' geom_bar(stat = "weighted_mean", position = "dodge") + #' scale_y_continuous(labels = scales::percent) + #' labs(y = "Survived") stat_weighted_mean <- function(mapping = NULL, data = NULL, geom = "point", position = "identity", ..., na.rm = FALSE, orientation = NA, show.legend = NA, inherit.aes = TRUE) { layer( data = data, mapping = mapping, stat = StatWeightedMean, geom = geom, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list( na.rm = na.rm, orientation = orientation, ... ) ) } #' @rdname stat_weighted_mean #' @format NULL #' @usage NULL #' @export StatWeightedMean <- ggplot2::ggproto( "StatSummary", ggplot2::Stat, required_aes = c("x", "y"), extra_params = c("na.rm", "orientation"), setup_params = function(data, params) { params$flipped_aes <- ggplot2::has_flipped_aes(data, params) params }, compute_panel = function(data, scales, na.rm = FALSE, flipped_aes = FALSE) { data <- ggplot2::flip_data(data, flipped_aes) if (is.null(data$weight)) { data$weight <- rep(1, nrow(data)) } summarised <- aggregate( cbind(numerator = y * weight, denominator = weight) ~ ., data, FUN = sum, na.rm = TRUE ) summarised$y <- summarised$numerator / summarised$denominator summarised$flipped_aes <- flipped_aes ggplot2::flip_data(summarised, flipped_aes) } ) ggstats/R/weighted_sum.R0000644000176200001440000000055314674033502014745 0ustar liggesusers#' Weighted Sum #' #' @param x a numeric vector of values #' @param w a numeric vector of weights #' @param na.rm a logical indicating whether to ignore `NA` values #' @returns A numeric vector. #' @export #' @examples #' x <- 1:20 #' w <- runif(20) #' weighted.sum(x, w) weighted.sum <- function(x, w, na.rm = TRUE) { sum(x * w, na.rm = na.rm) } ggstats/R/geom_diverging.R0000644000176200001440000001213514702251241015237 0ustar liggesusers#' Geometries for diverging bar plots #' #' These geometries are variations of [ggplot2::geom_bar()] and #' [ggplot2::geom_text()] but provides different set of default values. #' #' - `geom_diverging()` is designed for stacked diverging bar plots, using #' [position_diverging()]. #' - `geom_likert()` is designed for Likert-type items. Using #' [position_likert()] (each bar sums to 100%). #' - `geom_pyramid()` is similar to `geom_diverging()` but uses #' proportions of the total instead of counts. #' #' To add labels on the bar plots, simply use `geom_diverging_text()`, #' `geom_likert_text()`, or `geom_pyramid_text()`. #' #' All these geometries relies on [stat_prop()]. #' #' @param mapping Optional set of aesthetic mappings. #' @param data The data to be displayed in this layers. #' @param position A position adjustment to use on the data for this layer. #' @param ... Other arguments passed on to [`ggplot2::geom_bar()`] #' @param complete An aesthetic for those unobserved values should be completed, #' see [`stat_prop()`]. #' @param default_by Name of an aesthetic determining denominators by default, #' see [`stat_prop()`]. #' @export #' @examples #' library(ggplot2) #' ggplot(diamonds) + #' aes(x = clarity, fill = cut) + #' geom_diverging() #' #' ggplot(diamonds) + #' aes(x = clarity, fill = cut) + #' geom_diverging(position = position_diverging(cutoff = 4)) #' #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_likert() + #' geom_likert_text() #' #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_likert() + #' geom_likert_text( #' aes( #' label = label_percent_abs(accuracy = 1, hide_below = .10)( #' after_stat(prop) #' ), #' colour = after_scale(hex_bw(.data$fill)) #' ) #' ) #' #' d <- Titanic |> as.data.frame() #' #' ggplot(d) + #' aes(y = Class, fill = Sex, weight = Freq) + #' geom_diverging() + #' geom_diverging_text() #' #' ggplot(d) + #' aes(y = Class, fill = Sex, weight = Freq) + #' geom_pyramid() + #' geom_pyramid_text() geom_diverging <- function(mapping = NULL, data = NULL, position = "diverging", ..., complete = "fill", default_by = "total") { ggplot2::geom_bar( mapping = mapping, data = data, position = position, complete = complete, default_by = default_by, stat = StatPropCount, ... ) } #' @rdname geom_diverging #' @export geom_likert <- function(mapping = NULL, data = NULL, position = "likert", ..., complete = "fill", default_by = "x") { ggplot2::geom_bar( mapping = mapping, data = data, position = position, complete = complete, default_by = default_by, stat = StatPropProp, ... ) } #' @rdname geom_diverging #' @export geom_pyramid <- function(mapping = NULL, data = NULL, position = "diverging", ..., complete = NULL, default_by = "total") { ggplot2::geom_bar( mapping = mapping, data = data, position = position, complete = complete, default_by = default_by, stat = StatPropProp, ... ) } #' @rdname geom_diverging #' @export geom_diverging_text <- function(mapping = ggplot2::aes(!!!auto_contrast), data = NULL, position = position_diverging(0.5), ..., complete = "fill", default_by = "total") { ggplot2::geom_text( mapping = mapping, data = data, position = position, complete = complete, default_by = default_by, stat = StatPropCount, ... ) } #' @rdname geom_diverging #' @export geom_likert_text <- function(mapping = ggplot2::aes(!!!auto_contrast), data = NULL, position = position_likert(0.5), ..., complete = "fill", default_by = "x") { ggplot2::geom_text( mapping = mapping, data = data, position = position, complete = complete, default_by = default_by, stat = StatPropProp, ... ) } #' @rdname geom_diverging #' @export geom_pyramid_text <- function(mapping = ggplot2::aes(!!!auto_contrast), data = NULL, position = position_diverging(0.5), ..., complete = NULL, default_by = "total") { ggplot2::geom_text( mapping = mapping, data = data, position = position, complete = complete, default_by = default_by, stat = StatPropProp, ... ) } ggstats/R/pal_extender.R0000644000176200001440000000427714625277577014764 0ustar liggesusers#' Extend a discrete colour palette #' #' If the palette returns less colours than requested, the list of colours #' will be expanded using [scales::pal_gradient_n()]. To be used with a #' sequential or diverging palette. Not relevant for qualitative palettes. #' #' @param pal A palette function, such as returned by [scales::brewer_pal], #' taking a number of colours as entry and returning a list of colours. #' @return A palette function. #' @export #' @examples #' pal <- scales::pal_brewer(palette = "PiYG") #' scales::show_col(pal(16)) #' scales::show_col(pal_extender(pal)(16)) pal_extender <- function(pal = scales::brewer_pal(palette = "BrBG")) { function(n) { cols <- suppressWarnings( stats::na.omit(pal(n)) ) if (length(cols) <= n) { cols <- scales::pal_gradient_n(cols)(seq(0, 1, length.out = n)) } cols } } #' @rdname pal_extender #' @param name The name of the scale. Used as the axis or legend title. #' If `waiver()`, the default, the name of the scale is taken from the first #' mapping used for that aesthetic. If `NULL`, the legend title will be omitted. #' @param ... Other arguments passed on to `discrete_scale()` to control name, #' limits, breaks, labels and so forth. #' @param aesthetics Character string or vector of character strings listing #' the name(s) of the aesthetic(s) that this scale works with. This can be #' useful, for example, to apply colour settings to the colour and fill #' aesthetics at the same time, via `aesthetics = c("colour", "fill")`. #' @export scale_fill_extended <- function(name = waiver(), ..., pal = scales::brewer_pal(palette = "BrBG"), aesthetics = "fill") { ggplot2::discrete_scale( aesthetics, name = name, palette = pal_extender(pal = pal), ... ) } #' @rdname pal_extender #' @export scale_colour_extended <- function(name = waiver(), ..., pal = scales::brewer_pal(palette = "BrBG"), aesthetics = "colour") { ggplot2::discrete_scale( aesthetics, name = name, palette = pal_extender(pal = pal), ... ) } ggstats/R/weighted_quantile.R0000644000176200001440000000701515122034143015752 0ustar liggesusers#' Weighted Median and Quantiles #' #' Compute the median or quantiles a set of numbers which have weights #' associated with them. #' #' @param x a numeric vector of values #' @param w a numeric vector of weights #' @param probs probabilities for which the quantiles should be computed, a #' numeric vector of values between 0 and 1 #' @param na.rm a logical indicating whether to ignore `NA` values #' @param type Integer specifying the rule for calculating the median or #' quantile, corresponding to the rules available for `stats:quantile()`. #' The only valid choices are type=1, 2 or 4. See Details. #' @details #' The `i`th observation `x[i]` is treated as having a weight proportional to #' `w[i]`. #' #' The weighted median is a value `m` such that the total weight of data less #' than or equal to `m` is equal to half the total weight. More generally, the #' weighted quantile with probability `p` is a value `q` such that the total #' weight of data less than or equal to `q` is equal to `p` times the total #' weight. #' #' If there is no such value, then #' #' - if `type = 1`, the next largest value is returned (this is the #' right-continuous inverse of the left-continuous cumulative distribution #' function); #' - if `type = 2`, the average of the two surrounding values is returned #' (the average of the right-continuous and left-continuous inverses); #' - if `type = 4`, linear interpolation is performed. #' #' Note that the default rule for `weighted.median()` is `type = 2`, consistent #' with the traditional definition of the median, while the default for #' `weighted.quantile()` is `type = 4`. #' @source These functions are adapted from their homonyms developed by Adrian #' Baddeley in the `spatstat` package. #' @returns A numeric vector. #' @export #' @examples #' x <- 1:20 #' w <- runif(20) #' weighted.median(x, w) #' weighted.quantile(x, w) weighted.median <- function(x, w, na.rm = TRUE, type = 2) { unname(weighted.quantile(x, probs = 0.5, w = w, na.rm = na.rm, type = type )) } #' @export #' @rdname weighted.median weighted.quantile <- function(x, w, probs = seq(0, 1, 0.25), na.rm = TRUE, type = 4) { x <- as.numeric(as.vector(x)) w <- as.numeric(as.vector(w)) if (length(x) == 0) { cli::cli_abort("No data given") } stopifnot(length(x) == length(w)) if (is.na(m <- match(type, c(1, 2, 4)))) { cli::cli_abort("Argument 'type' must equal 1, 2 or 4", call. = FALSE) } type <- c(1, 2, 4)[m] if (anyNA(x) || anyNA(w)) { ok <- !(is.na(x) | is.na(w)) x <- x[ok] w <- w[ok] } if (length(x) == 0) { cli::cli_abort("At least one non-NA value is required") } stopifnot(all(w >= 0)) if (all(w == 0)) { cli::cli_abort("All weights are zero", call. = FALSE) } oo <- order(x) x <- x[oo] w <- w[oo] Fx <- cumsum(w) / sum(w) if (length(x) > 1) { out <- switch(as.character(type), `1` = stats::approx(Fx, x, xout = probs, ties = "ordered", rule = 2, method = "constant", f = 1 ), `2` = stats::approx(Fx, x, xout = probs, ties = "ordered", rule = 2, method = "constant", f = 1 / 2 ), `4` = stats::approx(Fx, x, xout = probs, ties = "ordered", rule = 2, method = "linear" ) ) result <- out$y } else { result <- rep.int(x, length(probs)) } names(result) <- paste0( format(100 * probs, trim = TRUE), "%" ) result } ggstats/R/scale_fill_likert.R0000644000176200001440000000557714657065717015761 0ustar liggesusers#' Colour scale for Likert-type plots #' #' This scale is similar to other diverging discrete colour scales, but allows #' to change the "center" of the scale using `cutoff` argument, as used by #' [position_likert()]. #' #' @param name The name of the scale. Used as the axis or legend title. #' If `waiver()`, the default, the name of the scale is taken from the first #' mapping used for that aesthetic. If `NULL`, the legend title will be omitted. #' @param ... Other arguments passed on to `discrete_scale()` to control name, #' limits, breaks, labels and so forth. #' @param pal A palette function taking a number of colours as entry and #' returning a list of colours (see examples), ideally a diverging palette #' @param cutoff Number of categories displayed negatively (see #' [position_likert()]) and therefore changing the center of the colour scale #' (see examples). #' @param aesthetics Character string or vector of character strings listing #' the name(s) of the aesthetic(s) that this scale works with. This can be #' useful, for example, to apply colour settings to the colour and fill #' aesthetics at the same time, via `aesthetics = c("colour", "fill")`. #' @examples #' library(ggplot2) #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = "likert") + #' scale_x_continuous(label = label_percent_abs()) + #' xlab("proportion") #' #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = "likert") + #' scale_x_continuous(label = label_percent_abs()) + #' xlab("proportion") + #' scale_fill_likert() #' #' ggplot(diamonds) + #' aes(y = clarity, fill = cut) + #' geom_bar(position = position_likert(cutoff = 1)) + #' scale_x_continuous(label = label_percent_abs()) + #' xlab("proportion") + #' scale_fill_likert(cutoff = 1) #' @export scale_fill_likert <- function(name = waiver(), ..., pal = scales::brewer_pal(palette = "BrBG"), cutoff = NULL, aesthetics = "fill") { ggplot2::discrete_scale( aesthetics, name = name, palette = likert_pal(pal = pal, cutoff = cutoff), ... ) } #' @rdname scale_fill_likert #' @export likert_pal <- function(pal = scales::brewer_pal(palette = "BrBG"), cutoff = NULL) { function(n) { if (is.null(cutoff)) cutoff <- n / 2 if (cutoff < 0) cli::cli_abort("{.arg cutoff} should be positive.") if (cutoff > n) cli::cli_abort( "{.arg cutoff} higher than the number of requested colours." ) left <- floor(cutoff) center <- cutoff %% 1 > 0 right <- n - ceiling(cutoff) nc <- 2 * max(left, right) + center # needed colors cols <- pal_extender(pal = pal)(nc) if (left <= right) { cols[(nc - n + 1):nc] } else { cols[1:n] } } } ggstats/R/ggsurvey.R0000644000176200001440000000334314415524646014143 0ustar liggesusers#' Easy ggplot2 with survey objects #' #' A function to facilitate `ggplot2` graphs using a survey object. #' It will initiate a ggplot and map survey weights to the #' corresponding aesthetic. #' #' Graphs will be correct as long as only weights are required #' to compute the graph. However, statistic or geometry requiring #' correct variance computation (like [ggplot2::geom_smooth()]) will #' be statistically incorrect. #' #' @param design A survey design object, usually created with #' [survey::svydesign()] #' @param mapping Default list of aesthetic mappings to use for plot, #' to be created with [ggplot2::aes()]. #' @param ... Other arguments passed on to methods. Not currently used. #' @importFrom stats weights #' @return A `ggplot2` plot. #' @export #' @examplesIf requireNamespace("survey") #' data(api, package = "survey") #' dstrat <- survey::svydesign( #' id = ~1, strata = ~stype, #' weights = ~pw, data = apistrat, #' fpc = ~fpc #' ) #' ggsurvey(dstrat) + #' ggplot2::aes(x = cnum, y = dnum) + #' ggplot2::geom_count() #' #' d <- as.data.frame(Titanic) #' dw <- survey::svydesign(ids = ~1, weights = ~Freq, data = d) #' ggsurvey(dw) + #' ggplot2::aes(x = Class, fill = Survived) + #' ggplot2::geom_bar(position = "fill") ggsurvey <- function(design = NULL, mapping = NULL, ...) { if (!inherits(design, "survey.design")) { cli::cli_abort("{.var design} should be a {.cls survey.design} object.") } rlang::check_installed("survey") data <- design$variables data$.weights <- weights(design) if (is.null(mapping)) { mapping <- ggplot2::aes() } mapping$weight <- ggplot2::aes(weight = .data[[".weights"]])$weight ggplot2::ggplot(data, mapping, ...) } ggstats/R/ggcoef_model.R0000644000176200001440000011750415122045337014676 0ustar liggesusers#' Plot model coefficients #' #' `ggcoef_model()`, `ggcoef_table()`, `ggcoef_dodged()`, #' `ggcoef_faceted()` and `ggcoef_compare()` #' use [broom.helpers::tidy_plus_plus()] #' to obtain a `tibble` of the model coefficients, #' apply additional data transformation and then pass the #' produced `tibble` to `ggcoef_plot()` to generate the plot. #' #' For more control, you can use the argument `return_data = TRUE` to #' get the produced `tibble`, apply any transformation of your own and #' then pass your customized `tibble` to `ggcoef_plot()`. #' @inheritParams broom.helpers::tidy_plus_plus #' @param tidy_args Additional arguments passed to #' [broom.helpers::tidy_plus_plus()] and to `tidy_fun` #' @param model a regression model object #' @param conf.level the confidence level to use for the confidence #' interval if `conf.int = TRUE`; must be strictly greater than 0 #' and less than 1; defaults to 0.95, which corresponds to a 95 #' percent confidence interval #' @param show_p_values if `TRUE`, add p-value to labels #' @param signif_stars if `TRUE`, add significant stars to labels #' @param significance level (between 0 and 1) below which a #' coefficient is consider to be significantly different from 0 #' (or 1 if `exponentiate = TRUE`), `NULL` for not highlighting #' such coefficients #' @param significance_labels optional vector with custom labels #' for significance variable #' @param return_data if `TRUE`, will return the data.frame used #' for plotting instead of the plot #' @param ... parameters passed to [ggcoef_plot()] #' @return A `ggplot2` plot or a `tibble` if `return_data = TRUE`. #' @export #' @examples #' mod <- lm(Sepal.Length ~ Sepal.Width + Species, data = iris) #' ggcoef_model(mod) #' #' ggcoef_table(mod) #' #' #' \donttest{ #' ggcoef_table(mod, table_stat = c("estimate", "ci")) #' #' ggcoef_table( #' mod, #' table_stat_label = list( #' estimate = scales::label_number(.001) #' ) #' ) #' #' ggcoef_table(mod, table_text_size = 5, table_widths = c(1, 1)) #' #' # a logistic regression example #' d_titanic <- as.data.frame(Titanic) #' d_titanic$Survived <- factor(d_titanic$Survived, c("No", "Yes")) #' mod_titanic <- glm( #' Survived ~ Sex * Age + Class, #' weights = Freq, #' data = d_titanic, #' family = binomial #' ) #' #' # use 'exponentiate = TRUE' to get the Odds Ratio #' ggcoef_model(mod_titanic, exponentiate = TRUE) #' #' ggcoef_table(mod_titanic, exponentiate = TRUE) #' #' # display intercepts #' ggcoef_model(mod_titanic, exponentiate = TRUE, intercept = TRUE) #' #' # customize terms labels #' ggcoef_model( #' mod_titanic, #' exponentiate = TRUE, #' show_p_values = FALSE, #' signif_stars = FALSE, #' add_reference_rows = FALSE, #' categorical_terms_pattern = "{level} (ref: {reference_level})", #' interaction_sep = " x ", #' y_labeller = scales::label_wrap(15) #' ) #' #' # display only a subset of terms #' ggcoef_model(mod_titanic, exponentiate = TRUE, include = c("Age", "Class")) #' #' # do not change points' shape based on significance #' ggcoef_model(mod_titanic, exponentiate = TRUE, significance = NULL) #' #' # a black and white version #' ggcoef_model( #' mod_titanic, #' exponentiate = TRUE, #' colour = NULL, stripped_rows = FALSE #' ) #' #' # show dichotomous terms on one row #' ggcoef_model( #' mod_titanic, #' exponentiate = TRUE, #' no_reference_row = broom.helpers::all_dichotomous(), #' categorical_terms_pattern = #' "{ifelse(dichotomous, paste0(level, ' / ', reference_level), level)}", #' show_p_values = FALSE #' ) #' } #' @examplesIf requireNamespace("reshape") #' #' \donttest{ #' data(tips, package = "reshape") #' mod_simple <- lm(tip ~ day + time + total_bill, data = tips) #' ggcoef_model(mod_simple) #' #' # custom variable labels #' # you can use the labelled package to define variable labels #' # before computing model #' if (requireNamespace("labelled")) { #' tips_labelled <- tips |> #' labelled::set_variable_labels( #' day = "Day of the week", #' time = "Lunch or Dinner", #' total_bill = "Bill's total" #' ) #' mod_labelled <- lm(tip ~ day + time + total_bill, data = tips_labelled) #' ggcoef_model(mod_labelled) #' } #' #' # you can provide custom variable labels with 'variable_labels' #' ggcoef_model( #' mod_simple, #' variable_labels = c( #' day = "Week day", #' time = "Time (lunch or dinner ?)", #' total_bill = "Total of the bill" #' ) #' ) #' # if labels are too long, you can use 'facet_labeller' to wrap them #' ggcoef_model( #' mod_simple, #' variable_labels = c( #' day = "Week day", #' time = "Time (lunch or dinner ?)", #' total_bill = "Total of the bill" #' ), #' facet_labeller = ggplot2::label_wrap_gen(10) #' ) #' #' # do not display variable facets but add colour guide #' ggcoef_model(mod_simple, facet_row = NULL, colour_guide = TRUE) #' #' # works also with with polynomial terms #' mod_poly <- lm( #' tip ~ poly(total_bill, 3) + day, #' data = tips, #' ) #' ggcoef_model(mod_poly) #' #' # or with different type of contrasts #' # for sum contrasts, the value of the reference term is computed #' if (requireNamespace("emmeans")) { #' mod2 <- lm( #' tip ~ day + time + sex, #' data = tips, #' contrasts = list(time = contr.sum, day = contr.treatment(4, base = 3)) #' ) #' ggcoef_model(mod2) #' } #' } #' @examplesIf requireNamespace("nnet") && requireNamespace("gtsummary") #' #' \donttest{ #' # multinomial model #' mod <- nnet::multinom(grade ~ stage + trt + age, data = gtsummary::trial) #' ggcoef_model(mod, exponentiate = TRUE) #' ggcoef_table(mod, group_labels = c(II = "Stage 2 vs. 1")) #' ggcoef_dodged(mod, exponentiate = TRUE) #' ggcoef_faceted(mod, exponentiate = TRUE) #' } #' @examplesIf requireNamespace("pscl") #' \donttest{ #' library(pscl) #' data("bioChemists", package = "pscl") #' mod <- zeroinfl(art ~ fem * mar | fem + mar, data = bioChemists) #' ggcoef_model(mod) #' ggcoef_table(mod) #' ggcoef_dodged(mod) #' ggcoef_faceted( #' mod, #' group_labels = c(conditional = "Count", zero_inflated = "Zero-inflated") #' ) #' #' mod2 <- zeroinfl(art ~ fem + mar | 1, data = bioChemists) #' ggcoef_table(mod2) #' ggcoef_table(mod2, intercept = TRUE) #' } ggcoef_model <- function( model, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = .95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), group_by = broom.helpers::auto_group_by(), group_labels = NULL, add_pairwise_contrasts = FALSE, pairwise_variables = broom.helpers::all_categorical(), keep_model_terms = FALSE, pairwise_reverse = TRUE, emmeans_args = list(), significance = 1 - conf.level, significance_labels = NULL, show_p_values = TRUE, signif_stars = TRUE, return_data = FALSE, ...) { args <- list(...) # undocumented feature, we can pass directly `data` if (is.null(args$data)) { data <- ggcoef_data( model = model, tidy_fun = tidy_fun, tidy_args = {{ tidy_args }}, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate, variable_labels = variable_labels, term_labels = term_labels, interaction_sep = interaction_sep, categorical_terms_pattern = categorical_terms_pattern, add_reference_rows = add_reference_rows, no_reference_row = {{ no_reference_row }}, intercept = intercept, include = {{ include }}, group_by = {{ group_by }}, group_labels = group_labels, add_pairwise_contrasts = add_pairwise_contrasts, pairwise_variables = {{ pairwise_variables }}, keep_model_terms = keep_model_terms, pairwise_reverse = pairwise_reverse, emmeans_args = emmeans_args, significance = significance, significance_labels = significance_labels ) } else { data <- args$data } if (show_p_values && signif_stars) { data$add_to_label <- paste0(data$p_value_label, data$signif_stars) } if (show_p_values && !signif_stars) { data$add_to_label <- data$p_value_label } if (!show_p_values && signif_stars) { data$add_to_label <- data$signif_stars } if (show_p_values || signif_stars) { data$label <- forcats::fct_inorder( factor( paste0( data$label, ifelse( data$add_to_label == "", "", paste0(" (", data$add_to_label, ")") ) ) ) ) data$label_light <- forcats::fct_inorder( factor( paste0( data$label_light, ifelse( data$add_to_label == "", "", paste0(" (", data$add_to_label, ")") ) ) ) ) } if (return_data) { return(data) } args$exponentiate <- exponentiate if (!"y" %in% names(args) && !"facet_row" %in% names(args)) { args$y <- "label_light" } if (!"colour" %in% names(args) && !all(is.na(data$var_label))) { args$colour <- "var_label" if (!"colour_guide" %in% names(args)) { args$colour_guide <- FALSE } } if ("group_by" %in% names(data)) { d <- data |> tidyr::nest(.by = dplyr::all_of("group_by")) purrr::map2( d$data, d$group_by, ~ .call_ggcoef_plot(args, .x, plot_title = as.character(.y)) ) |> patchwork::wrap_plots(ncol = 1) } else { .call_ggcoef_plot(args, data) } } #' @describeIn ggcoef_model a variation of [ggcoef_model()] adding a table #' with estimates, confidence intervals and p-values #' @param table_stat statistics to display in the table, use any column name #' returned by the tidier or `"ci"` for confidence intervals formatted #' according to `ci_pattern` #' @param table_header optional custom headers for the table #' @param table_text_size text size for the table #' @param table_stat_label optional named list of labeller functions for the #' displayed statistic (see examples) #' @param ci_pattern glue pattern for confidence intervals in the table #' @param table_widths relative widths of the forest plot and the coefficients #' table #' @param table_witdhs `r lifecycle::badge("deprecated")`\cr #' use `table_widths` instead #' @importFrom lifecycle deprecated #' @export ggcoef_table <- function( model, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = .95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), group_by = broom.helpers::auto_group_by(), group_labels = NULL, add_pairwise_contrasts = FALSE, pairwise_variables = broom.helpers::all_categorical(), keep_model_terms = FALSE, pairwise_reverse = TRUE, emmeans_args = list(), significance = 1 - conf.level, significance_labels = NULL, show_p_values = FALSE, signif_stars = FALSE, table_stat = c("estimate", "ci", "p.value"), table_header = NULL, table_text_size = 3, table_stat_label = NULL, ci_pattern = "{conf.low}, {conf.high}", table_widths = c(3, 2), table_witdhs = deprecated(), ...) { if (lifecycle::is_present(table_witdhs)) { lifecycle::deprecate_stop( "0.10.0", "ggcoef_table(table_witdhs)", "ggcoef_table(table_widths)" ) } args <- list(...) # undocumented feature, we can pass directly `data` if (is.null(args$data)) { data <- ggcoef_data( model = model, tidy_fun = tidy_fun, tidy_args = {{ tidy_args }}, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate, variable_labels = variable_labels, term_labels = term_labels, interaction_sep = interaction_sep, categorical_terms_pattern = categorical_terms_pattern, add_reference_rows = add_reference_rows, no_reference_row = {{ no_reference_row }}, intercept = intercept, include = {{ include }}, group_by = {{ group_by }}, group_labels = group_labels, add_pairwise_contrasts = add_pairwise_contrasts, pairwise_variables = {{ pairwise_variables }}, keep_model_terms = keep_model_terms, pairwise_reverse = pairwise_reverse, emmeans_args = emmeans_args, significance = significance, significance_labels = significance_labels ) } else { data <- args$data } if ("group_by" %in% colnames(data)) { x_limits <- args$x_limits if (is.null(x_limits)) { if (all(c("conf.low", "conf.high") %in% names(data))) x_limits <- range( data$estimate, data$conf.low, data$conf.high, na.rm = TRUE ) else x_limits <- range(data$estimate, na.rm = TRUE) } d <- data |> tidyr::nest(.by = dplyr::all_of("group_by")) res <- purrr::map2( d$data, d$group_by, ~ ggcoef_table( data = .x, plot_title = .y, model = model, tidy_fun = tidy_fun, tidy_args = tidy_args, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate, variable_labels = variable_labels, term_labels = term_labels, interaction_sep = interaction_sep, categorical_terms_pattern = categorical_terms_pattern, add_reference_rows = add_reference_rows, no_reference_row = {{ no_reference_row }}, intercept = intercept, include = {{ include }}, significance = significance, significance_labels = significance_labels, show_p_values = show_p_values, signif_stars = signif_stars, table_stat = table_stat, table_header = table_header, table_text_size = table_text_size, table_stat_label = table_stat_label, ci_pattern = ci_pattern, table_widths = table_widths, x_limits = x_limits ) ) |> patchwork::wrap_plots(ncol = 1) return(res) } if (show_p_values && signif_stars) { data$add_to_label <- paste0(data$p_value_label, data$signif_stars) } if (show_p_values && !signif_stars) { data$add_to_label <- data$p_value_label } if (!show_p_values && signif_stars) { data$add_to_label <- data$signif_stars } if (show_p_values || signif_stars) { data$label <- forcats::fct_inorder( factor( paste0( data$label, ifelse( data$add_to_label == "", "", paste0(" (", data$add_to_label, ")") ) ) ) ) data$label_light <- forcats::fct_inorder( factor( paste0( data$label_light, ifelse( data$add_to_label == "", "", paste0(" (", data$add_to_label, ")") ) ) ) ) } args$data <- data args$exponentiate <- exponentiate if (!"y" %in% names(args) && !"facet_row" %in% names(args)) { args$y <- "label_light" } if (!"colour" %in% names(args) && !all(is.na(data$var_label))) { args$colour <- "var_label" if (!"colour_guide" %in% names(args)) { args$colour_guide <- FALSE } } if (!"y" %in% names(args)) args$y <- "label" if (!"facet_row" %in% names(args)) args$facet_row <- "var_label" if (!"stripped_rows" %in% names(args)) args$stripped_rows <- TRUE if (!"strips_odd" %in% names(args)) args$strips_odd <- "#11111111" if (!"strips_even" %in% names(args)) args$strips_even <- "#00000000" coef_plot <- do.call(ggcoef_plot, args) if (args$stripped_rows) { if (!"term" %in% names(data)) { data$term <- data[[args$y]] } data <- data |> dplyr::mutate(.fill = dplyr::if_else( as.integer(.in_order(.data$term)) %% 2L == 1, args$strips_even, args$strips_odd )) } # building the coefficient table tbl_data <- data if (!"estimate" %in% names(table_stat_label)) { table_stat_label$estimate <- scales::label_number(accuracy = .1) } if (!"conf.low" %in% names(table_stat_label)) { table_stat_label$conf.low <- scales::label_number(accuracy = .1) } if (!"conf.high" %in% names(table_stat_label)) { table_stat_label$conf.high <- scales::label_number(accuracy = .1) } if (!"p.value" %in% names(table_stat_label)) { table_stat_label$p.value <- scales::label_pvalue(add_p = FALSE) } for (v in names(table_stat_label)) { tbl_data[[v]] <- table_stat_label[[v]](tbl_data[[v]]) tbl_data[[v]][is.na(tbl_data[[v]])] <- "" } tbl_data$ci <- stringr::str_glue_data(tbl_data, ci_pattern) tbl_data$ci[is.na(data$conf.low) & is.na(data$conf.high)] <- " " tbl_data <- tbl_data |> tidyr::pivot_longer( dplyr::any_of(table_stat), names_to = "stat", values_to = "value", values_transform = as.character ) tbl_data$stat <- factor(tbl_data$stat, levels = table_stat) if (!is.null(table_header) && length(table_header) != length(table_stat)) { cli::cli_abort("{.arg table_header} should have the same length as {.arg table_stat}.") # nolint } if (is.null(table_header)) { table_header <- table_stat if ("estimate" %in% table_header) { table_header[table_header == "estimate"] <- attr(data, "coefficients_label") } if ("ci" %in% table_header) { table_header[table_header == "ci"] <- paste(scales::percent(conf.level), "CI") } if ("p.value" %in% table_header) { table_header[table_header == "p.value"] <- "p" } } table_plot <- ggplot2::ggplot(tbl_data) + ggplot2::aes( x = .data[["stat"]], y = .data[[args$y]], label = .data[["value"]] ) + ggplot2::geom_text(hjust = .5, vjust = .5, size = table_text_size) + ggplot2::scale_x_discrete(position = "top", labels = table_header) + ggplot2::scale_y_discrete( limits = rev, expand = ggplot2::expansion(mult = 0, add = .5) ) + ggplot2::facet_grid( rows = args$facet_row, scales = "free_y", space = "free_y", switch = "y" ) + ggplot2::theme_light() + ggplot2::theme( axis.text.x = ggplot2::element_text(face = "bold", hjust = .5), axis.text.y = ggplot2::element_blank(), axis.title = ggplot2::element_blank(), strip.text = ggplot2::element_blank(), panel.grid = ggplot2::element_blank(), axis.ticks = ggplot2::element_blank() ) if (args$stripped_rows) { table_plot <- table_plot + geom_stripped_rows( mapping = ggplot2::aes( odd = .data[[".fill"]], even = .data[[".fill"]], colour = NULL, linetype = NULL ) ) } # join the plots patchwork::wrap_plots(coef_plot, table_plot, nrow = 1, widths = table_widths) } #' @describeIn ggcoef_model a dodged variation of [ggcoef_model()] for #' multi groups models #' @export ggcoef_dodged <- function( model, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = .95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), group_by = broom.helpers::auto_group_by(), group_labels = NULL, significance = 1 - conf.level, significance_labels = NULL, return_data = FALSE, ...) { data <- ggcoef_data( model, tidy_fun = tidy_fun, tidy_args = {{ tidy_args }}, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate, variable_labels = variable_labels, term_labels = term_labels, interaction_sep = interaction_sep, categorical_terms_pattern = categorical_terms_pattern, add_reference_rows = add_reference_rows, no_reference_row = {{ no_reference_row }}, intercept = intercept, include = {{ include }}, group_by = {{ group_by }}, group_labels = group_labels, significance = significance, significance_labels = significance_labels ) if (return_data) { return(data) } if (!"group_by" %in% colnames(data)) cli::cli_abort("No groups detected. Define groups with {.arg group_by}.") args <- list(...) args$exponentiate <- exponentiate if (!"y" %in% names(args) && !"facet_row" %in% names(args)) { args$y <- "label_light" } if (!"dodged " %in% names(args)) { args$dodged <- TRUE } if (!"colour" %in% names(args)) { args$colour <- "group_by" } if (!"errorbar_coloured" %in% names(args)) { args$errorbar_coloured <- TRUE } .call_ggcoef_plot(args, data) } #' @describeIn ggcoef_model a faceted variation of [ggcoef_model()] for #' multi groups models #' @export ggcoef_faceted <- function( model, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = .95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), group_by = broom.helpers::auto_group_by(), group_labels = NULL, significance = 1 - conf.level, significance_labels = NULL, return_data = FALSE, ...) { data <- ggcoef_data( model, tidy_fun = tidy_fun, tidy_args = {{ tidy_args }}, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate, variable_labels = variable_labels, term_labels = term_labels, interaction_sep = interaction_sep, categorical_terms_pattern = categorical_terms_pattern, add_reference_rows = add_reference_rows, no_reference_row = {{ no_reference_row }}, intercept = intercept, include = {{ include }}, group_by = {{ group_by }}, group_labels = group_labels, significance = significance, significance_labels = significance_labels ) if (return_data) { return(data) } if (!"group_by" %in% colnames(data)) cli::cli_abort("No groups detected. Define groups with {.arg group_by}.") args <- list(...) args$exponentiate <- exponentiate if (!"y" %in% names(args) && !"facet_row" %in% names(args)) { args$y <- "label_light" } if (!"facet_col" %in% names(args)) { args$facet_col <- "group_by" } if (!"colour" %in% names(args) && !all(is.na(data$var_label))) { args$colour <- "var_label" if (!"colour_guide" %in% names(args)) { args$colour_guide <- FALSE } } .call_ggcoef_plot(args, data) } #' @describeIn ggcoef_model designed for displaying several models on the same #' plot. #' @export #' @param models named list of models #' @param type a dodged plot, a faceted plot or multiple table plots? #' @note #' `ggcoef_compare(type = "table")` is not compatible with multi-components #' models. #' @examples #' \donttest{ #' # Use ggcoef_compare() for comparing several models on the same plot #' mod1 <- lm(Fertility ~ ., data = swiss) #' mod2 <- step(mod1, trace = 0) #' mod3 <- lm(Fertility ~ Agriculture + Education * Catholic, data = swiss) #' models <- list( #' "Full model" = mod1, #' "Simplified model" = mod2, #' "With interaction" = mod3 #' ) #' #' ggcoef_compare(models) #' ggcoef_compare(models, type = "faceted") #' ggcoef_compare(models, type = "table") #' #' # you can reverse the vertical position of the point by using a negative #' # value for dodged_width (but it will produce some warnings) #' ggcoef_compare(models, dodged_width = -.9) #' } ggcoef_compare <- function( models, type = c("dodged", "faceted", "table"), tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = .95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), add_pairwise_contrasts = FALSE, pairwise_variables = broom.helpers::all_categorical(), keep_model_terms = FALSE, pairwise_reverse = TRUE, emmeans_args = list(), significance = 1 - conf.level, significance_labels = NULL, table_stat = c("estimate", "ci", "p.value"), table_header = NULL, table_text_size = 3, table_stat_label = NULL, ci_pattern = "{conf.low}, {conf.high}", table_widths = c(3, 2), return_data = FALSE, ...) { data <- lapply( X = models, FUN = ggcoef_data, tidy_fun = tidy_fun, tidy_args = {{ tidy_args }}, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate, variable_labels = variable_labels, term_labels = term_labels, interaction_sep = interaction_sep, categorical_terms_pattern = categorical_terms_pattern, add_reference_rows = add_reference_rows, no_reference_row = {{ no_reference_row }}, intercept = intercept, add_pairwise_contrasts = add_pairwise_contrasts, pairwise_variables = {{ pairwise_variables }}, keep_model_terms = keep_model_terms, pairwise_reverse = pairwise_reverse, emmeans_args = emmeans_args, significance = significance, significance_labels = significance_labels ) data <- dplyr::bind_rows(data, .id = "model") coefficients_label <- attr(data, "coefficients_label") data$model <- .in_order(data$model) data$term <- .in_order(data$term) data$var_label <- .in_order(data$var_label) data$variable <- .in_order(data$variable) data$label <- .in_order(data$label) # include should be applied after lapply data <- data |> broom.helpers::tidy_select_variables( include = {{ include }}, model = models[[1]] # just need to pass 1 model for the function to work ) |> broom.helpers::tidy_detach_model() # Add NA values for unobserved combinations # (i.e. for a term present in one model but not in another) data <- data |> tidyr::complete( .data$model, tidyr::nesting( !!sym("var_label"), !!sym("variable"), !!sym("var_class"), !!sym("var_type"), !!sym("contrasts"), !!sym("label"), !!sym("label_light"), !!sym("term") ) ) |> # order lost after nesting dplyr::arrange(.data$model, .data$variable, .data$term) attr(data, "coefficients_label") <- coefficients_label if (return_data) { return(data) } type <- match.arg(type) if (type == "table") { data$group_by <- data$model data <- data[!is.na(data$estimate), ] return( ggcoef_table( data = data, ..., exponentiate = exponentiate, table_stat = table_stat, table_header = table_header, table_text_size = table_text_size, table_stat_label = table_stat_label, ci_pattern = ci_pattern, table_widths = table_widths ) ) } args <- list(...) args$data <- data args$exponentiate <- exponentiate if (!"y" %in% names(args) && !"facet_row" %in% names(args)) { args$y <- "label_light" } if (type == "dodged") { if (!"dodged " %in% names(args)) { args$dodged <- TRUE } if (!"colour" %in% names(args)) { args$colour <- "model" } if (!"errorbar_coloured" %in% names(args)) { args$errorbar_coloured <- TRUE } } else { if (!"facet_col" %in% names(args)) { args$facet_col <- "model" } if (!"colour" %in% names(args) && !all(is.na(data$var_label))) { args$colour <- "var_label" if (!"colour_guide" %in% names(args)) { args$colour_guide <- FALSE } } } do.call(ggcoef_plot, args) } # not exporting ggcoef_data ggcoef_data <- function( model, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = .95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), group_by = broom.helpers::auto_group_by(), group_labels = NULL, add_pairwise_contrasts = FALSE, pairwise_variables = broom.helpers::all_categorical(), keep_model_terms = FALSE, pairwise_reverse = TRUE, emmeans_args = list(), significance = conf.level, significance_labels = NULL) { rlang::check_installed("broom.helpers") if (length(significance) == 0) { significance <- NULL } data <- rlang::inject(broom.helpers::tidy_plus_plus( model = model, tidy_fun = tidy_fun, conf.int = conf.int, conf.level = conf.level, exponentiate = exponentiate, variable_labels = variable_labels, term_labels = term_labels, interaction_sep = interaction_sep, categorical_terms_pattern = categorical_terms_pattern, add_reference_rows = add_reference_rows, no_reference_row = {{ no_reference_row }}, add_pairwise_contrasts = add_pairwise_contrasts, pairwise_variables = {{ pairwise_variables }}, keep_model_terms = keep_model_terms, pairwise_reverse = pairwise_reverse, emmeans_args = emmeans_args, add_estimate_to_reference_rows = TRUE, add_header_rows = FALSE, intercept = intercept, include = {{ include }}, group_by = {{ group_by }}, group_labels = group_labels, keep_model = FALSE, !!!tidy_args )) if (!"p.value" %in% names(data)) { data$p.value <- NA_real_ significance <- NULL } if (!is.null(significance)) { if (is.null(significance_labels)) { significance_labels <- paste(c("p <=", "p >"), significance) } data$significance <- factor( !is.na(data$p.value) & data$p.value <= significance, levels = c(TRUE, FALSE), labels = significance_labels ) } data$signif_stars <- signif_stars(data$p.value, point = NULL) data$p_value_label <- ifelse( is.na(data$p.value), "", scales::pvalue(data$p.value, add_p = TRUE) ) # keep only rows with estimate data <- data[!is.na(data$estimate), ] data$term <- .in_order(data$term) data$var_label <- .in_order(data$var_label) data$variable <- .in_order(data$variable) data$label <- .in_order(data$label) data$label_light <- dplyr::if_else( as.character(data$label) == as.character(data$var_label) & ((!grepl("^nmatrix", data$var_class)) | is.na(data$var_class)), "", as.character(data$label) ) |> .in_order() data } #' @describeIn ggcoef_model plot a tidy `tibble` of coefficients #' @param data a data frame containing data to be plotted, #' typically the output of `ggcoef_model()`, `ggcoef_compare()` #' or `ggcoef_multinom()` with the option `return_data = TRUE` #' @param x,y variables mapped to x and y axis #' @param exponentiate if `TRUE` a logarithmic scale will #' be used for x-axis #' @param y_labeller optional function to be applied on y labels (see examples) #' @param point_size size of the points #' @param point_stroke thickness of the points #' @param point_fill fill colour for the points #' @param colour optional variable name to be mapped to #' colour aesthetic #' @param colour_guide should colour guide be displayed #' in the legend? #' @param colour_lab label of the colour aesthetic in the legend #' @param colour_labels labels argument passed to #' [ggplot2::scale_colour_discrete()] and #' [ggplot2::discrete_scale()] #' @param shape optional variable name to be mapped to the #' shape aesthetic #' @param shape_values values of the different shapes to use in #' [ggplot2::scale_shape_manual()] #' @param shape_guide should shape guide be displayed in the legend? #' @param shape_lab label of the shape aesthetic in the legend #' @param errorbar should error bars be plotted? #' @param errorbar_height height of error bars #' @param errorbar_coloured should error bars be colored as the points? #' @param stripped_rows should stripped rows be displayed in the background? #' @param strips_odd color of the odd rows #' @param strips_even color of the even rows #' @param vline should a vertical line be drawn at 0 (or 1 if #' `exponentiate = TRUE`)? #' @param vline_colour colour of vertical line #' @param dodged should points be dodged (according to the colour aesthetic)? #' @param dodged_width width value for [ggplot2::position_dodge()] #' @param facet_row variable name to be used for row facets #' @param facet_col optional variable name to be used for column facets #' @param facet_labeller labeller function to be used for labeling facets; #' if labels are too long, you can use [ggplot2::label_wrap_gen()] (see #' examples), more information in the documentation of [ggplot2::facet_grid()] #' @param plot_title an optional plot title #' @param x_limits optional limits for the x axis #' @seealso `vignette("ggcoef_model")` #' @export ggcoef_plot <- function( data, x = "estimate", y = "label", exponentiate = FALSE, y_labeller = NULL, point_size = 2, point_stroke = 2, point_fill = "white", colour = NULL, colour_guide = TRUE, colour_lab = "", colour_labels = ggplot2::waiver(), shape = "significance", shape_values = c(16, 21), shape_guide = TRUE, shape_lab = "", errorbar = TRUE, errorbar_height = .1, errorbar_coloured = FALSE, stripped_rows = TRUE, strips_odd = "#11111111", strips_even = "#00000000", vline = TRUE, vline_colour = "grey50", dodged = FALSE, dodged_width = .8, facet_row = "var_label", facet_col = NULL, facet_labeller = "label_value", plot_title = NULL, x_limits = NULL) { if (!is.null(facet_row)) { data[[facet_row]] <- .in_order(data[[facet_row]]) } data[[y]] <- forcats::fct_rev(.in_order(data[[y]])) if (!"term" %in% names(data)) { data$term <- data[[y]] } data$term <- forcats::fct_rev(.in_order(data$term)) if (stripped_rows) { data <- data |> dplyr::mutate(.fill = dplyr::if_else( as.integer(.in_order(.data$term)) %% 2L == 1, strips_even, strips_odd )) } # mapping mapping <- ggplot2::aes(x = .data[[x]], y = .data$term) errorbar <- errorbar & all(c("conf.low", "conf.high") %in% names(data)) if (errorbar) { mapping$xmin <- ggplot2::aes(xmin = .data[["conf.low"]])$xmin mapping$xmax <- ggplot2::aes(xmax = .data[["conf.high"]])$xmax } if (!is.null(shape) && shape %in% names(data)) { mapping$shape <- ggplot2::aes(shape = .data[[shape]])$shape } if (!is.null(colour) && colour %in% names(data)) { mapping$colour <- ggplot2::aes(colour = .data[[colour]])$colour mapping$group <- ggplot2::aes(group = .data[[colour]])$group } # position if (dodged) { position <- ggplot2::position_dodge(dodged_width) } else { position <- ggplot2::position_identity() } # plot p <- ggplot2::ggplot(data = data, mapping = mapping) if (stripped_rows) { p <- p + geom_stripped_rows( mapping = ggplot2::aes( odd = .data[[".fill"]], even = .data[[".fill"]], colour = NULL, linetype = NULL ) ) } if (vline) { p <- p + ggplot2::geom_vline( xintercept = ifelse(exponentiate, 1, 0), colour = vline_colour ) } if (errorbar) { if (!is.null(colour) && errorbar_coloured) { p <- p + ggplot2::geom_errorbar( na.rm = TRUE, width = errorbar_height, position = position, orientation = "y" ) } else { p <- p + ggplot2::geom_errorbar( mapping = ggplot2::aes(colour = NULL), na.rm = TRUE, width = errorbar_height, colour = "black", position = position, orientation = "y" ) } } if (!is.null(facet_col) && is.character(facet_col)) { facet_col <- ggplot2::vars(!!sym(facet_col)) } if (!is.null(facet_row) && is.character(facet_row)) { facet_row <- ggplot2::vars(!!sym(facet_row)) } # used later for reapplying appropriate labels l <- data[[y]] names(l) <- data$term p <- p + ggplot2::geom_point( size = point_size, stroke = point_stroke, fill = point_fill, position = position, na.rm = TRUE ) + ggplot2::facet_grid( rows = facet_row, cols = facet_col, labeller = facet_labeller, scales = "free_y", space = "free_y", switch = "y" ) + ggplot2::ylab("") + ggplot2::scale_y_discrete( expand = ggplot2::expansion(mult = 0, add = .5), labels = .find_label(l, y_labeller) ) + ggplot2::theme_light() + ggplot2::theme( legend.position = "bottom", legend.box = "vertical", strip.placement = "outside", strip.text.y.left = ggplot2::element_text( face = "bold", angle = 0, colour = "black", hjust = 0, vjust = 1 ), strip.text.x = ggplot2::element_text(face = "bold", colour = "black"), strip.background = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), panel.grid.major.y = ggplot2::element_blank(), panel.grid.major.x = ggplot2::element_line(linetype = "dashed"), axis.title.x = ggplot2::element_text(face = "bold"), axis.ticks.y = ggplot2::element_blank() ) if (!is.null(colour) && colour %in% names(data)) { if (colour_guide) { colour_guide <- ggplot2::guide_legend() } else { colour_guide <- "none" } p <- p + ggplot2::scale_colour_discrete( guide = colour_guide, labels = colour_labels ) + ggplot2::labs(colour = colour_lab) } if (!is.null(shape) && shape %in% names(data)) { if (shape_guide) { shape_guide <- ggplot2::guide_legend() } else { shape_guide <- "none" } p <- p + ggplot2::scale_shape_manual( values = shape_values, drop = FALSE, guide = shape_guide, na.translate = FALSE ) + ggplot2::labs(shape = shape_lab) } if (exponentiate) { p <- p + ggplot2::scale_x_log10(limits = x_limits) } else { p <- p + ggplot2::scale_x_continuous(limits = x_limits) } if (!is.null(attr(data, "coefficients_label"))) { p <- p + ggplot2::xlab(attr(data, "coefficients_label")) } if (!is.null(plot_title)) { p <- p + ggplot2::ggtitle(plot_title) + ggplot2::theme( plot.title = ggplot2::element_text(face = "bold"), plot.title.position = "plot" ) } p } .call_ggcoef_plot <- function(args, data, plot_title = NULL) { args$data <- data if (!is.null(plot_title)) args$plot_title <- plot_title do.call(ggcoef_plot, args) } .in_order <- function(x) { # droping unobserved value if needed forcats::fct_inorder(as.character(x)) } .find_label <- function(l, y_labeller = NULL) { function(y) { if (is.null(y_labeller)) y_labeller <- function(x) {x} # nolint y_labeller(as.character(l[y])) } } ggstats/R/stat_prop.R0000644000176200001440000002476615062213140014276 0ustar liggesusers#' Compute proportions according to custom denominator #' #' `stat_prop()` is a variation of [ggplot2::stat_count()] allowing to #' compute custom proportions according to the **by** aesthetic defining #' the denominator (i.e. all proportions for a same value of **by** will #' sum to 1). If the **by** aesthetic is not specified, denominators will be #' determined according to the `default_by` argument. #' #' @inheritParams ggplot2::stat_count #' @param geom Override the default connection with [ggplot2::geom_bar()]. #' @param complete Name (character) of an aesthetic for those statistics should #' be completed for unobserved values (see example). #' @param default_by If the **by** aesthetic is not available, name of another #' aesthetic that will be used to determine the denominators (e.g. `"fill"`), #' or `NULL` or `"total"` to compute proportions of the total. To be noted, #' `default_by = "x"` works both for vertical and horizontal bars. #' @section Aesthetics: #' `stat_prop()` understands the following aesthetics #' (required aesthetics are in bold): #' #' - **x *or* y** #' - by #' - weight #' @section Computed variables: #' \describe{ #' \item{`after_stat(count)`}{number of points in bin} #' \item{`after_stat(denominator)`}{denominator for the proportions} #' \item{`after_stat(prop)`}{computed proportion, i.e. #' `after_stat(count)`/`after_stat(denominator)`} #' } #' @seealso `vignette("stat_prop")`, [ggplot2::stat_count()]. For an alternative #' approach, see #' . #' #' @import ggplot2 #' @return A `ggplot2` plot with the added statistic. #' @export #' @examples #' library(ggplot2) #' d <- as.data.frame(Titanic) #' #' p <- ggplot(d) + #' aes(x = Class, fill = Survived, weight = Freq, by = Class) + #' geom_bar(position = "fill") + #' geom_text(stat = "prop", position = position_fill(.5)) #' p #' p + facet_grid(~Sex) #' #' ggplot(d) + #' aes(x = Class, fill = Survived, weight = Freq) + #' geom_bar(position = "dodge") + #' geom_text( #' aes(by = Survived), #' stat = "prop", #' position = position_dodge(0.9), vjust = "bottom" #' ) #' \donttest{ #' if (requireNamespace("scales")) { #' ggplot(d) + #' aes(x = Class, fill = Survived, weight = Freq, by = 1) + #' geom_bar() + #' geom_text( #' aes(label = scales::percent(after_stat(prop), accuracy = 1)), #' stat = "prop", #' position = position_stack(.5) #' ) #' } #' #' # displaying unobserved levels with complete #' d <- diamonds |> #' dplyr::filter(!(cut == "Ideal" & clarity == "I1")) |> #' dplyr::filter(!(cut == "Very Good" & clarity == "VS2")) |> #' dplyr::filter(!(cut == "Premium" & clarity == "IF")) #' p <- ggplot(d) + #' aes(x = clarity, fill = cut, by = clarity) + #' geom_bar(position = "fill") #' p + geom_text(stat = "prop", position = position_fill(.5)) #' p + geom_text(stat = "prop", position = position_fill(.5), complete = "fill") #' } stat_prop <- function(mapping = NULL, data = NULL, geom = "bar", position = "fill", ..., na.rm = FALSE, orientation = NA, show.legend = NA, inherit.aes = TRUE, complete = NULL, default_by = "total") { params <- list( na.rm = na.rm, orientation = orientation, complete = complete, default_by = default_by, ... ) if (!is.null(params$y)) { cli::cli_abort( "{.fn stat_prop} must not be used with a {.arg y} aesthetic.", call. = FALSE ) } layer( data = data, mapping = mapping, stat = StatProp, geom = geom, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = params ) } #' @rdname stat_prop #' @format NULL #' @usage NULL #' @export StatProp <- ggplot2::ggproto("StatProp", ggplot2::Stat, required_aes = c("x|y"), default_aes = ggplot2::aes( x = after_stat(count), y = after_stat(count), weight = 1, label = scales::percent(after_stat(prop), accuracy = .1), by = 1 ), setup_params = function(data, params) { params$flipped_aes <- ggplot2::has_flipped_aes( data, params, main_is_orthogonal = FALSE ) has_x <- !(is.null(data$x) && is.null(params$x)) has_y <- !(is.null(data$y) && is.null(params$y)) if (!has_x && !has_y) { cli::cli_abort( "{.fn stat_prop} requires an {.arg x} or {.arg y} aesthetic.", call. = FALSE ) } if (has_x && has_y) { cli::cli_abort( "{.fn stat_prop} can only have an {.arg x} or an {.arg y} aesthetic.", call. = FALSE ) } if (is.null(params$width)) { x <- if (params$flipped_aes) "y" else "x" params$width <- resolution(data[[x]], discrete = TRUE) * 0.9 } params }, extra_params = c("na.rm"), compute_panel = function(self, data, scales, width = NULL, flipped_aes = FALSE, complete = NULL, default_by = "total") { data <- ggplot2::flip_data(data, flipped_aes) data$weight <- data$weight %||% rep(1, nrow(data)) if (default_by == "y") default_by <- "x" if ( is.null(data[["by"]]) && !is.null(default_by) && !is.null(data[[default_by]]) ) { data$by <- data[[default_by]] } data$by <- data$by %||% rep(1, nrow(data)) width <- width %||% (ggplot2::resolution(data$x) * 0.9) if (is.character(data$by)) data$by <- factor(data$by) # sum weights for each combination of by and aesthetics # the use of . allows to consider all aesthetics defined in data panel <- stats::aggregate(weight ~ ., data = data, sum, na.rm = TRUE) names(panel)[which(names(panel) == "weight")] <- "count" panel$count[is.na(panel$count)] <- 0 if (!is.null(complete) && complete == "color") complete <- "colour" if (!is.null(complete) && complete %in% names(panel)) { if (complete != "group") panel <- panel |> dplyr::select(-dplyr::all_of("group")) cols <- names(panel) cols <- cols[!cols %in% c("count", complete)] panel <- panel |> tidyr::complete( tidyr::nesting(!!!syms(cols)), .data[[complete]], fill = list(count = 0) ) |> dplyr::mutate( group = .data[[complete]] |> factor() |> as.integer() ) } # compute proportions by by sum_abs <- function(x) { sum(abs(x)) } panel$denominator <- ave(panel$count, panel$by, FUN = sum_abs) panel$prop <- panel$count / panel$denominator panel$width <- width panel$flipped_aes <- flipped_aes ggplot2::flip_data(panel, flipped_aes) } ) #' Convenient geometries for proportion bar plots #' #' `geom_prop_bar()`, `geom_prop_text()` and `geom_prop_connector()` are #' variations of [ggplot2::geom_bar()], [ggplot2::geom_text()] and #' [geom_bar_connector()] using [stat_prop()], with custom default aesthetics: #' `after_stat(prop)` for **x** or **y**, and #' `scales::percent(after_stat(prop))` for **label**. #' #' @inheritParams stat_prop #' @param width Bar width (`0.9` by default). #' @param ... Additional parameters passed to [ggplot2::geom_bar()], #' [ggplot2::geom_text()] or [geom_bar_connector()]. #' @export #' @seealso [geom_bar_connector()] #' @examples #' library(ggplot2) #' d <- as.data.frame(Titanic) #' ggplot(d) + #' aes(x = Class, fill = Survived, weight = Freq) + #' geom_prop_bar() + #' geom_prop_text() + #' geom_prop_connector() #' #' ggplot(d) + #' aes(y = Class, fill = Survived, weight = Freq) + #' geom_prop_bar(width = .5) + #' geom_prop_text() + #' geom_prop_connector(width = .5, linetype = "dotted") #' #' ggplot(d) + #' aes( #' x = Class, #' fill = Survived, #' weight = Freq, #' y = after_stat(count), #' label = after_stat(count) #' ) + #' geom_prop_bar() + #' geom_prop_text() + #' geom_prop_connector() geom_prop_bar <- function(mapping = NULL, data = NULL, position = "stack", ..., width = 0.9, complete = NULL, default_by = "x") { ggplot2::geom_bar( mapping = mapping, data = data, position = position, complete = complete, default_by = default_by, stat = StatPropProp, width = width, ... ) } #' @rdname geom_prop_bar #' @importFrom scales percent #' @export geom_prop_text <- function(mapping = ggplot2::aes(!!!auto_contrast), data = NULL, position = ggplot2::position_stack(0.5), ..., complete = NULL, default_by = "x") { ggplot2::geom_text( mapping = mapping, data = data, position = position, complete = complete, default_by = default_by, stat = StatPropProp, ... ) } #' @rdname geom_prop_bar #' @export geom_prop_connector <- function(mapping = NULL, data = NULL, position = "stack", ..., width = 0.9, complete = "fill", default_by = "x") { geom_bar_connector( mapping = mapping, data = data, position = position, complete = complete, default_by = default_by, stat = StatPropProp, width = width, ... ) } # all defaults to after_stat(prop) #' @importFrom utils modifyList StatPropProp <- ggplot2::ggproto( "StatPropProp", StatProp, default_aes = utils::modifyList( StatProp$default_aes, ggplot2::aes( x = after_stat(prop), y = after_stat(prop), label = scales::percent(after_stat(prop), accuracy = .1) ) ) ) # all defaults to after_stat(count) StatPropCount <- ggplot2::ggproto( "StatPropCount", StatProp, default_aes = utils::modifyList( StatProp$default_aes, ggplot2::aes( x = after_stat(count), y = after_stat(count), label = after_stat(count) ) ) ) ggstats/R/ggstats-package.R0000644000176200001440000000145215031235710015316 0ustar liggesusers#' @keywords internal "_PACKAGE" # The following block is used by usethis to automatically manage # roxygen namespace tags. Modify with care! ## usethis namespace: start #' @importFrom lifecycle deprecate_soft deprecate_warn deprecate_stop #' @importFrom lifecycle deprecated #' @importFrom dplyr .data sym #' @importFrom ggplot2 after_stat after_scale ## usethis namespace: end NULL utils::globalVariables(c("prop", "count")) # \lifecycle{experimental} # \lifecycle{maturing} # \lifecycle{stable} # \lifecycle{superseded} # \lifecycle{questioning} # \lifecycle{soft-deprecated} # \lifecycle{deprecated} # \lifecycle{defunct} # \lifecycle{archived} # from ggplot2 (but not exported by ggplot2) `%||%` <- function(a, b) { if (!is.null(a)) { a } else { b } } ggstats/R/ggcascade.R0000644000176200001440000001451614674034015014166 0ustar liggesusers#' Cascade plot #' #' `r lifecycle::badge("experimental")` #' #' @param .data A data frame, or data frame extension (e.g. a tibble). For #' `plot_cascade()`, the variable displayed on the x-axis should be named #' `"x"` and the number of observations should be named `"n"`, like the #' tibble returned by `compute_cascade()`. #' @param ... <[`data-masking`][rlang::args_data_masking]> Name-value pairs of #' conditions defining the different statuses to be plotted (see examples). #' @param .weights <[`tidy-select`][dplyr::dplyr_tidy_select]> Optional weights. #' Should select only one variable. #' @param .by <[`tidy-select`][dplyr::dplyr_tidy_select]> A variable or a set #' of variables to group by the computation of the cascade, and to generate #' facets. To select several variables, use [dplyr::pick()] (see examples). #' @param .nrow,.ncol Number of rows and columns, for faceted plots. #' @param .add_n Display the number of observations? #' @param .text_size Size of the labels, passed to [ggplot2::geom_text()]. #' @param .arrows Display arrows between statuses? #' @details #' `ggcascade()` calls `compute_cascade()` to generate a data set passed #' to `plot_cascade()`. Use `compute_cascade()` and `plot_cascade()` for #' more controls. #' @return A `ggplot2` plot or a `tibble`. #' @examples #' ggplot2::diamonds |> #' ggcascade( #' all = TRUE, #' big = carat > .5, #' "big & ideal" = carat > .5 & cut == "Ideal" #' ) #' #' ggplot2::mpg |> #' ggcascade( #' all = TRUE, #' recent = year > 2000, #' "recent & economic" = year > 2000 & displ < 3, #' .by = cyl, #' .ncol = 3, #' .arrows = FALSE, #' .text_size = 3 #' ) #' #' ggplot2::mpg |> #' ggcascade( #' all = TRUE, #' recent = year > 2000, #' "recent & economic" = year > 2000 & displ < 3, #' .by = pick(cyl, drv), #' .add_n = FALSE, #' .text_size = 2 #' ) #' @export ggcascade <- function(.data, ..., .weights = NULL, .by = NULL, .nrow = NULL, .ncol = NULL, .add_n = TRUE, .text_size = 4, .arrows = TRUE) { .data |> compute_cascade(..., .weights = {{ .weights }}, .by = {{ .by }}) |> plot_cascade( .by = {{ .by }}, .nrow = .nrow, .ncol = .ncol, .add_n = .add_n, .text_size = .text_size, .arrows = .arrows ) } #' @rdname ggcascade #' @export compute_cascade <- function(.data, ..., .weights = NULL, .by = NULL) { w <- .data |> dplyr::select({{ .weights }}) if (ncol(w) > 1) cli::cli_abort("{.arg .weights} should select only one column.") if (ncol(w) == 0) { w <- 1 } else { w <- w[[1]] } dots <- rlang::enquos(...) .data |> dplyr::mutate(.w = w) |> dplyr::mutate(!!! dots) |> dplyr::group_by({{ .by }}) |> dplyr::summarise( dplyr::across( dplyr::all_of(names(dots)), \(x) { weighted.sum(x, .data$.w) } ), .groups = "keep" ) |> tidyr::pivot_longer( dplyr::all_of(names(dots)), names_to = "x", values_to = "n" ) |> dplyr::mutate( x = factor(.data$x, levels = names(dots), ordered = TRUE) ) |> dplyr::arrange(.data$x, .by_group = TRUE) } #' @rdname ggcascade #' @export plot_cascade <- function(.data, .by = NULL, .nrow = NULL, .ncol = NULL, .add_n = TRUE, .text_size = 4, .arrows = TRUE) { .data <- .data |> dplyr::group_by({{ .by }}) |> dplyr::mutate( prop = .data$n / max(.data$n), label = scales::percent(.data$prop, accuracy = .1), y_label = dplyr::if_else(.data$prop < .1 & .add_n, .1, .data$prop), xend = dplyr::lead(.data$x), yend = dplyr::lead(.data$prop) / 2, prop_step = dplyr::lead(.data$n) / .data$n, label_step = scales::percent( .data$prop_step, accuracy = .1, prefix = "\u00d7" ) ) p <- ggplot2::ggplot(.data) + ggplot2::aes( x = .data$x, y = .data$prop, fill = .data$x ) + ggplot2::geom_bar( stat = "identity", width = .5, colour = "black", linewidth = .25 ) + ggplot2::geom_text( mapping = ggplot2::aes( y = .data$y_label, label = .data$label ), vjust = 0, nudge_y = .02, size = .text_size ) + ggplot2::scale_y_continuous( breaks = 0:5 / 5, labels = scales::percent ) + ggplot2::xlab("") + ggplot2::ylab("") + ggplot2::theme_minimal() + ggplot2::theme( legend.position = "none", panel.grid.major.x = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(face = "bold") ) if (.add_n) { p <- p + ggplot2::layer( geom = "text", stat = "identity", position = position_nudge(y = .02), mapping = ggplot2::aes( y = 0, label = paste0("n=", .data$n), prop = .data$prop, color = after_scale(hex_bw_threshold(.data$fill, .data$prop, .02)) ), check.aes = FALSE, params = list( vjust = 0, size = .text_size ) ) } if (.arrows) { p <- p + ggplot2::geom_segment( mapping = ggplot2::aes( x = as.integer(.data$x) + .3, xend = as.integer(.data$xend) - .3, y = .data$yend, yend = .data$yend ), na.rm = TRUE, arrow = ggplot2::arrow( type = "closed", length = unit(0.25, "cm") ) ) + ggplot2::geom_text( aes( x = as.integer(.data$x) + .5, y = .data$yend, label = .data$label_step ), vjust = 0, nudge_y = .04, na.rm = TRUE, size = .text_size ) } .by_vars <- dplyr::group_vars(.data) if (length(.by_vars) > 0) { p <- p + ggplot2::facet_wrap( facets = .by_vars, nrow = .nrow, ncol = .ncol ) } p } ggstats/R/signif_stars.R0000644000176200001440000000207414357760261014763 0ustar liggesusers#' Significance Stars #' #' Calculate significance stars #' #' @param x numeric values that will be compared to the `point`, #' `one`, `two`, and `three` values #' @param three threshold below which to display three stars #' @param two threshold below which to display two stars #' @param one threshold below which to display one star #' @param point threshold below which to display one point #' (`NULL` to deactivate) #' @return Character vector containing the appropriate number of #' stars for each `x` value. #' @author Joseph Larmarange #' @export #' @examples #' x <- c(0.5, 0.1, 0.05, 0.01, 0.001) #' signif_stars(x) #' signif_stars(x, one = .15, point = NULL) signif_stars <- function(x, three = 0.001, two = 0.01, one = 0.05, point = 0.1) { res <- rep_len("", length.out = length(x)) if (!is.null(point)) { res[x <= point] <- "." } if (!is.null(one)) { res[x <= one] <- "*" } if (!is.null(two)) { res[x <= two] <- "**" } if (!is.null(three)) { res[x <= three] <- "***" } res } ggstats/R/gglikert.R0000644000176200001440000006364315122045054014074 0ustar liggesusers#' Plotting Likert-type items #' #' Combines several factor variables using the same list of ordered levels #' (e.g. Likert-type scales) into a unique data frame and generates a centered #' bar plot. #' #' You could use `gglikert_data()` to just produce the dataset to be plotted. #' #' If variable labels have been defined (see [labelled::var_label()]), they will #' be considered. You can also pass custom variables labels with the #' `variable_labels` argument. #' #' @param data a data frame, a data frame extension (e.g. a tibble), #' or a survey design object #' @param include variables to include, accepts [tidy-select][dplyr::select] #' syntax #' @param weights optional variable name of a weighting variable, #' accepts [tidy-select][dplyr::select] syntax #' @param y name of the variable to be plotted on `y` axis (relevant when #' `.question` is mapped to "facets, see examples), #' accepts [tidy-select][dplyr::select] syntax #' @param variable_labels a named list or a named vector of custom variable #' labels #' @param sort should the factor defined by `factor_to_sort` be sorted according #' to the answers (see `sort_method`)? One of "none" (default), "ascending" or #' "descending" #' @param sort_method method used to sort the variables: `"prop"` sort according #' to the proportion of answers higher than the centered level, `"prop_lower"` #' according to the proportion lower than the centered level, `"mean"` #' considers answer as a score and sort according to the mean score, `"median"` #' used the median and the majority judgment rule for tie-breaking. #' @param sort_prop_include_center when sorting with `"prop"` and if the number #' of levels is uneven, should half of the central level be taken into account #' to compute the proportion? #' @param factor_to_sort name of the factor column to sort if `sort` is not #' equal to `"none"`; by default the list of questions passed to `include`; #' should be one factor column of the tibble returned by `gglikert_data()`; #' accepts [tidy-select][dplyr::select] syntax #' @param exclude_fill_values Vector of values that should not be displayed #' (but still taken into account for computing proportions), #' see [position_likert()] #' @param cutoff number of categories to be displayed negatively (i.e. on the #' left of the x axis or the bottom of the y axis), could be a decimal value: #' `2` to display negatively the two first categories, `2.5` to display #' negatively the two first categories and half of the third, `2.2` to display #' negatively the two first categories and a fifth of the third (see examples). #' By default (`NULL`), it will be equal to the number of categories divided #' by 2, i.e. it will be centered. #' @param data_fun for advanced usage, custom function to be applied to the #' generated dataset at the end of `gglikert_data()` #' @param add_labels should percentage labels be added to the plot? #' @param labels_size size of the percentage labels #' @param labels_color color of the percentage labels (`"auto"` to use #' `hex_bw()` to determine a font color based on background color) #' @param labels_accuracy accuracy of the percentages, see #' [scales::label_percent()] #' @param labels_hide_below if provided, values below will be masked, see #' [label_percent_abs()] #' @param add_totals should the total proportions of negative and positive #' answers be added to plot? **This option is not compatible with facets!** #' @param totals_size size of the total proportions #' @param totals_color color of the total proportions #' @param totals_accuracy accuracy of the total proportions, see #' [scales::label_percent()] #' @param totals_fontface font face of the total proportions #' @param totals_include_center if the number of levels is uneven, should half #' of the center level be added to the total proportions? #' @param totals_hjust horizontal adjustment of totals labels on the x axis #' @param y_reverse should the y axis be reversed? #' @param y_label_wrap number of characters per line for y axis labels, see #' [scales::label_wrap()] #' @param reverse_likert if `TRUE`, will reverse the default stacking order, #' see [position_likert()] #' @param width bar width, see [ggplot2::geom_bar()] #' @param facet_rows,facet_cols A set of variables or expressions quoted by #' [ggplot2::vars()] and defining faceting groups on the rows or columns #' dimension (see examples) #' @param facet_label_wrap number of characters per line for facet labels, see #' [ggplot2::label_wrap_gen()] #' @param symmetric should the x-axis be symmetric? #' @return A `ggplot2` plot or a `tibble`. #' @seealso `vignette("gglikert")`, [position_likert()], [stat_prop()] #' @export #' @examples #' library(ggplot2) #' library(dplyr) #' #' likert_levels <- c( #' "Strongly disagree", #' "Disagree", #' "Neither agree nor disagree", #' "Agree", #' "Strongly agree" #' ) #' set.seed(42) #' df <- #' tibble( #' q1 = sample(likert_levels, 150, replace = TRUE), #' q2 = sample(likert_levels, 150, replace = TRUE, prob = 5:1), #' q3 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), #' q4 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), #' q5 = sample(c(likert_levels, NA), 150, replace = TRUE), #' q6 = sample(likert_levels, 150, replace = TRUE, prob = c(1, 0, 1, 1, 0)) #' ) |> #' mutate(across(everything(), ~ factor(.x, levels = likert_levels))) #' #' gglikert(df) #' #' gglikert(df, include = q1:3) + #' scale_fill_likert(pal = scales::brewer_pal(palette = "PRGn")) #' #' gglikert(df, sort = "ascending") #' #' \donttest{ #' gglikert(df, sort = "ascending", sort_prop_include_center = TRUE) #' #' gglikert(df, sort = "ascending", sort_method = "mean") #' #' gglikert(df, reverse_likert = TRUE) #' #' gglikert(df, add_totals = FALSE, add_labels = FALSE) #' #' gglikert( #' df, #' totals_include_center = TRUE, #' totals_hjust = .25, #' totals_size = 4.5, #' totals_fontface = "italic", #' totals_accuracy = .01, #' labels_accuracy = 1, #' labels_size = 2.5, #' labels_hide_below = .25 #' ) #' #' gglikert(df, exclude_fill_values = "Neither agree nor disagree") #' #' if (require("labelled")) { #' df |> #' set_variable_labels( #' q1 = "First question", #' q2 = "Second question" #' ) |> #' gglikert( #' variable_labels = c( #' q4 = "a custom label", #' q6 = "a very very very very very very very very very very long label" #' ), #' y_label_wrap = 25 #' ) #' } #' #' # Facets #' df_group <- df #' df_group$group <- sample(c("A", "B"), 150, replace = TRUE) #' #' gglikert(df_group, q1:q6, facet_rows = vars(group)) #' #' gglikert(df_group, q1:q6, facet_cols = vars(group)) #' #' gglikert(df_group, q1:q6, y = "group", facet_rows = vars(.question)) #' #' # Custom function to be applied on data #' f <- function(d) { #' d$.question <- forcats::fct_relevel(d$.question, "q5", "q2") #' d #' } #' gglikert(df, include = q1:q6, data_fun = f) #' #' # Custom center #' gglikert(df, cutoff = 2) #' #' gglikert(df, cutoff = 1) #' #' gglikert(df, cutoff = 1, symmetric = TRUE) #' #' } gglikert <- function(data, include = dplyr::everything(), weights = NULL, y = ".question", variable_labels = NULL, sort = c("none", "ascending", "descending"), sort_method = c("prop", "prop_lower", "mean", "median"), sort_prop_include_center = totals_include_center, factor_to_sort = ".question", exclude_fill_values = NULL, cutoff = NULL, data_fun = NULL, add_labels = TRUE, labels_size = 3.5, labels_color = "auto", labels_accuracy = 1, labels_hide_below = .05, add_totals = TRUE, totals_size = labels_size, totals_color = "black", totals_accuracy = labels_accuracy, totals_fontface = "bold", totals_include_center = FALSE, totals_hjust = .1, y_reverse = TRUE, y_label_wrap = 50, reverse_likert = FALSE, width = .9, facet_rows = NULL, facet_cols = NULL, facet_label_wrap = 50, symmetric = FALSE) { data <- gglikert_data( data, {{ include }}, weights = {{ weights }}, variable_labels = variable_labels, sort = sort, sort_method = sort_method, sort_prop_include_center = sort_prop_include_center, factor_to_sort = {{ factor_to_sort }}, exclude_fill_values = exclude_fill_values, cutoff = cutoff, data_fun = data_fun ) y <- data |> dplyr::select({{ y }}) |> colnames() if (length(y) != 1) cli::cli_abort("{.arg y} should select only one column.") if (!is.factor(data[[y]])) { data[[y]] <- factor(data[[y]]) } if (y_reverse) { data[[y]] <- data[[y]] |> forcats::fct_rev() } p <- ggplot(data) + aes( y = .data[[y]], fill = .data[[".answer"]], by = .data[[y]], weight = .data[[".weights"]] ) + geom_bar( position = position_likert( reverse = reverse_likert, exclude_fill_values = exclude_fill_values, cutoff = cutoff ), stat = StatProp, complete = "fill", width = width ) if (add_labels && labels_color == "auto") { p <- p + geom_text( mapping = aes( label = label_percent_abs( hide_below = labels_hide_below, accuracy = labels_accuracy )(after_stat(prop)), color = after_scale(hex_bw(.data$fill)) ), stat = StatProp, complete = "fill", position = position_likert( vjust = .5, reverse = reverse_likert, exclude_fill_values = exclude_fill_values, cutoff = cutoff ), size = labels_size ) } if (add_labels && labels_color != "auto") { p <- p + geom_text( mapping = aes( label = label_percent_abs( hide_below = labels_hide_below, accuracy = labels_accuracy )(after_stat(prop)) ), stat = StatProp, complete = "fill", position = position_likert( vjust = .5, reverse = reverse_likert, exclude_fill_values = exclude_fill_values, cutoff = cutoff ), size = labels_size, color = labels_color ) } if (add_totals) { d <- data if (reverse_likert) d$.answer <- forcats::fct_rev(d$.answer) dtot <- d |> dplyr::group_by(.data[[y]], !!!facet_rows, !!!facet_cols) |> dplyr::summarise( prop_lower = .prop_lower( .data$.answer, .data$.weights, include_center = TRUE, exclude_fill_values = exclude_fill_values, cutoff = cutoff ), prop_higher = .prop_higher( .data$.answer, .data$.weights, include_center = TRUE, exclude_fill_values = exclude_fill_values, cutoff = cutoff ), label_lower = .prop_lower( .data$.answer, .data$.weights, include_center = totals_include_center, exclude_fill_values = exclude_fill_values, cutoff = cutoff ), label_higher = .prop_higher( .data$.answer, .data$.weights, include_center = totals_include_center, exclude_fill_values = exclude_fill_values, cutoff = cutoff ) ) |> dplyr::ungroup() |> dplyr::mutate( label_lower = label_percent_abs(accuracy = totals_accuracy)(.data$label_lower), label_higher = label_percent_abs(accuracy = totals_accuracy)(.data$label_higher), x_lower = dplyr::if_else( symmetric, -1 * max(.data$prop_lower, .data$prop_higher) - totals_hjust, -1 * max(.data$prop_lower) - totals_hjust ), x_higher = dplyr::if_else( symmetric, max(.data$prop_higher, .data$prop_lower) + totals_hjust, max(.data$prop_higher) + totals_hjust ) ) |> dplyr::group_by(!!!facet_rows, !!!facet_cols) dtot <- dplyr::bind_rows( dtot |> dplyr::select( dplyr::all_of(c(y, x = "x_lower", label = "label_lower")), dplyr::group_cols() ), dtot |> dplyr::select( dplyr::all_of(c(y, x = "x_higher", label = "label_higher")), dplyr::group_cols() ) ) p <- p + geom_text( mapping = aes( y = .data[[y]], x = .data[["x"]], label = .data[["label"]], fill = NULL, by = NULL, weight = NULL ), data = dtot, size = totals_size, color = totals_color, fontface = totals_fontface ) } if (symmetric) { p <- p + scale_x_continuous( labels = label_percent_abs(), limits = symmetric_limits ) } else { p <- p + scale_x_continuous(labels = label_percent_abs()) } p <- p + labs(x = NULL, y = NULL, fill = NULL) + scale_y_discrete(labels = scales::label_wrap(y_label_wrap)) + theme_light() + theme( legend.position = "bottom", panel.grid.major.y = element_blank() ) + scale_fill_likert(cutoff = cutoff) + guides(fill = guide_legend(reverse = reverse_likert)) p + facet_grid( rows = facet_rows, cols = facet_cols, labeller = ggplot2::label_wrap_gen(facet_label_wrap) ) } #' @rdname gglikert #' @export gglikert_data <- function(data, include = dplyr::everything(), weights = NULL, variable_labels = NULL, sort = c("none", "ascending", "descending"), sort_method = c( "prop", "prop_lower", "mean", "median" ), sort_prop_include_center = TRUE, factor_to_sort = ".question", exclude_fill_values = NULL, cutoff = NULL, data_fun = NULL) { rlang::check_installed("labelled") if (inherits(data, "survey.design")) { survey_weights <- stats::weights(data) data <- data$variables weights <- enquo(weights) if (!rlang::quo_is_null(weights)) cli::cli_abort(paste( "{.arg data} is a survey object:", "you can't pass a {.arg weights} argument.", "Survey weights from the survey object will automatically be retrieved." )) data$survey_weights <- survey_weights weights <- dplyr::all_of("survey_weights") } sort <- match.arg(sort) sort_method <- match.arg(sort_method) variables <- data |> dplyr::select({{ include }}) |> colnames() weights_var <- data |> dplyr::select({{ weights }}) |> colnames() if (length(weights_var) > 1) cli::cli_abort("{.arg weights} should select only one column.") if (length(weights_var) == 0) { data$.weights <- 1 } else { data$.weights <- data[[weights_var]] } if (!is.numeric(data$.weights)) { cli::cli_abort("{.arg weights} should correspond to a numerical variable.") } if (is.list(variable_labels)) { variable_labels <- unlist(variable_labels) } data_labels <- data |> labelled::var_label(unlist = TRUE, null_action = "fill") if (!is.null(variable_labels)) { data_labels[names(variable_labels)] <- variable_labels } data_labels <- data_labels[variables] data <- data |> dplyr::mutate( dplyr::across(dplyr::all_of(variables), .fns = labelled::to_factor) ) data <- data |> dplyr::mutate( dplyr::bind_cols(forcats::fct_unify(data[, variables])) ) |> tidyr::pivot_longer( cols = dplyr::all_of(variables), names_to = ".question", values_to = ".answer" ) data$.question <- data_labels[data$.question] |> forcats::fct_inorder() factor_to_sort <- data |> dplyr::select({{ factor_to_sort }}) |> colnames() if (length(factor_to_sort) != 1) cli::cli_abort("{.arg factor_to_sort} should select only one column.") if (sort == "ascending" && sort_method == "prop") { data[[factor_to_sort]] <- data[[factor_to_sort]] |> forcats::fct_reorder2( data$.answer, data$.weights, .fun = .prop_higher, include_center = sort_prop_include_center, exclude_fill_values = exclude_fill_values, cutoff = cutoff, .na_rm = FALSE, .desc = FALSE ) } if (sort == "descending" && sort_method == "prop") { data[[factor_to_sort]] <- data[[factor_to_sort]] |> forcats::fct_reorder2( data$.answer, data$.weights, .fun = .prop_higher, include_center = sort_prop_include_center, exclude_fill_values = exclude_fill_values, cutoff = cutoff, .na_rm = FALSE, .desc = TRUE ) } if (sort == "ascending" && sort_method == "prop_lower") { data[[factor_to_sort]] <- data[[factor_to_sort]] |> forcats::fct_reorder2( data$.answer, data$.weights, .fun = .prop_lower, include_center = sort_prop_include_center, exclude_fill_values = exclude_fill_values, cutoff = cutoff, .na_rm = FALSE, .desc = FALSE ) } if (sort == "descending" && sort_method == "prop_lower") { data[[factor_to_sort]] <- data[[factor_to_sort]] |> forcats::fct_reorder2( data$.answer, data$.weights, .fun = .prop_lower, include_center = sort_prop_include_center, exclude_fill_values = exclude_fill_values, cutoff = cutoff, .na_rm = FALSE, .desc = TRUE ) } if (sort == "ascending" && sort_method == "mean") { data[[factor_to_sort]] <- data[[factor_to_sort]] |> forcats::fct_reorder2( data$.answer, data$.weights, .fun = .sort_mean, exclude_fill_values = exclude_fill_values, .na_rm = FALSE, .desc = FALSE ) } if (sort == "descending" && sort_method == "mean") { data[[factor_to_sort]] <- data[[factor_to_sort]] |> forcats::fct_reorder2( data$.answer, data$.weights, .fun = .sort_mean, exclude_fill_values = exclude_fill_values, .na_rm = FALSE, .desc = TRUE ) } if (sort == "ascending" && sort_method == "median") { data[[factor_to_sort]] <- data[[factor_to_sort]] |> forcats::fct_reorder2( data$.answer, data$.weights, .fun = .sort_median, exclude_fill_values = exclude_fill_values, .na_rm = FALSE, .desc = FALSE ) } if (sort == "descending" && sort_method == "median") { data[[factor_to_sort]] <- data[[factor_to_sort]] |> forcats::fct_reorder2( data$.answer, data$.weights, .fun = .sort_median, exclude_fill_values = exclude_fill_values, .na_rm = FALSE, .desc = TRUE ) } if (!is.null(data_fun)) { if (!is.function(data_fun)) cli::cli_abort("{arg data_fun} should be a function.") data <- data_fun(data) } data } # Compute the proportion being higher than the center # Option to include the centre (if yes, only half taken into account) .prop_higher <- function(x, w, include_center = TRUE, exclude_fill_values = NULL, cutoff = NULL) { N <- sum(as.integer(!is.na(x)) * w) if (!is.factor(x)) x <- factor(x) if (!is.null(exclude_fill_values)) { l <- levels(x) l <- l[!l %in% exclude_fill_values] x <- factor(x, levels = l) } if (is.null(cutoff)) cutoff <- length(levels(x)) / 2 x <- as.numeric(x) m <- ceiling(cutoff) sum( w * as.integer(x >= cutoff + 1), include_center * w * (x == m) * (m - cutoff), na.rm = TRUE ) / N } # Compute the proportion being higher than the center # Option to include the centre (if yes, only half taken into account) .prop_lower <- function(x, w, include_center = TRUE, exclude_fill_values = NULL, cutoff = NULL) { N <- sum(as.integer(!is.na(x)) * w) if (!is.factor(x)) x <- factor(x) if (!is.null(exclude_fill_values)) { l <- levels(x) l <- l[!l %in% exclude_fill_values] x <- factor(x, levels = l) } if (is.null(cutoff)) cutoff <- length(levels(x)) / 2 x <- as.numeric(x) m <- ceiling(cutoff) sum( w * as.integer(x <= cutoff), include_center * w * (x == m) * (cutoff %% 1), na.rm = TRUE ) / N } #' @importFrom stats weighted.mean .sort_mean <- function(x, w, exclude_fill_values = NULL) { if (!is.factor(x)) x <- factor(x) if (!is.null(exclude_fill_values)) { l <- levels(x) l <- l[!l %in% exclude_fill_values] x <- factor(x, levels = l) } x <- as.integer(x) stats::weighted.mean(x, w, na.rm = TRUE) } .sort_median <- function(x, w, exclude_fill_values = NULL) { if (!is.factor(x)) x <- factor(x) if (!is.null(exclude_fill_values)) { l <- levels(x) l <- l[!l %in% exclude_fill_values] x <- factor(x, levels = l) } x <- as.integer(x) med <- weighted.median(x, w, na.rm = TRUE) med + stats::weighted.mean(x > med, w, na.rm = TRUE) - stats::weighted.mean(x < med, w, na.rm = TRUE) } #' @rdname gglikert #' @param add_median_line add a vertical line at 50%? #' @param reverse_fill if `TRUE`, will reverse the default stacking order, #' see [ggplot2::position_fill()] #' @export #' @examples #' gglikert_stacked(df, q1:q6) #' #' gglikert_stacked(df, q1:q6, add_median_line = TRUE, sort = "asc") #' #' \donttest{ #' gglikert_stacked(df_group, q1:q6, y = "group", add_median_line = TRUE) + #' facet_grid(rows = vars(.question)) #' } gglikert_stacked <- function(data, include = dplyr::everything(), weights = NULL, y = ".question", variable_labels = NULL, sort = c("none", "ascending", "descending"), sort_method = c( "prop", "prop_lower", "mean", "median" ), sort_prop_include_center = FALSE, factor_to_sort = ".question", data_fun = NULL, add_labels = TRUE, labels_size = 3.5, labels_color = "auto", labels_accuracy = 1, labels_hide_below = .05, add_median_line = FALSE, y_reverse = TRUE, y_label_wrap = 50, reverse_fill = TRUE, width = .9) { data <- gglikert_data( data, {{ include }}, weights = {{ weights }}, variable_labels = variable_labels, sort = sort, sort_method = sort_method, sort_prop_include_center = sort_prop_include_center, factor_to_sort = {{ factor_to_sort }}, exclude_fill_values = NULL, data_fun = data_fun ) y <- data |> dplyr::select({{ y }}) |> colnames() if (length(y) != 1) cli::cli_abort("{.arg y} should select only one column.") if (!is.factor(data[[y]])) { data[[y]] <- factor(data[[y]]) } if (y_reverse) { data[[y]] <- data[[y]] |> forcats::fct_rev() } p <- ggplot(data) + aes( y = .data[[y]], fill = .data[[".answer"]], by = .data[[y]], weight = .data[[".weights"]] ) + geom_bar( position = position_fill(reverse = reverse_fill), stat = StatProp, complete = "fill", width = width ) if (add_labels && labels_color == "auto") { p <- p + geom_text( mapping = aes( label = label_percent_abs( hide_below = labels_hide_below, accuracy = labels_accuracy )(after_stat(prop)), color = after_scale(hex_bw(.data$fill)) ), stat = StatProp, complete = "fill", position = position_fill( vjust = .5, reverse = reverse_fill ), size = labels_size ) } if (add_labels && labels_color != "auto") { p <- p + geom_text( mapping = aes( label = label_percent_abs( hide_below = labels_hide_below, accuracy = labels_accuracy )(after_stat(prop)) ), stat = StatProp, complete = "fill", position = position_fill( vjust = .5, reverse = reverse_fill ), size = labels_size, color = labels_color ) } if (add_median_line) { p <- p + ggplot2::geom_vline(xintercept = .5) } p <- p + labs(x = NULL, y = NULL, fill = NULL) + scale_x_continuous(labels = label_percent_abs()) + scale_y_discrete(labels = scales::label_wrap(y_label_wrap)) + theme_light() + theme( legend.position = "bottom", panel.grid.major.y = element_blank() ) + scale_fill_extended() + guides(fill = guide_legend(reverse = !reverse_fill)) p } ggstats/R/hex_bw.R0000644000176200001440000000474014702251241013531 0ustar liggesusers#' Identify a suitable font color (black or white) given a background HEX color #' #' You could use `auto_contrast` as a shortcut of #' `aes(colour = after_scale(hex_bw(.data$fill)))`. You should use `!!!` to #' inject it within [ggplot2::aes()] (see examples). #' #' @param hex_code Background color in hex-format. #' @return Either black or white, in hex-format #' @source Adapted from `saros` for `hex_code()` and from #' #' for `auto_contrast`. #' @export #' @examples #' hex_bw("#0dadfd") #' #' library(ggplot2) #' ggplot(diamonds) + #' aes(x = cut, fill = color, label = after_stat(count)) + #' geom_bar() + #' geom_text( #' mapping = aes(color = after_scale(hex_bw(.data$fill))), #' position = position_stack(.5), #' stat = "count", #' size = 2 #' ) #' #' ggplot(diamonds) + #' aes(x = cut, fill = color, label = after_stat(count)) + #' geom_bar() + #' geom_text( #' mapping = auto_contrast, #' position = position_stack(.5), #' stat = "count", #' size = 2 #' ) #' #' ggplot(diamonds) + #' aes(x = cut, fill = color, label = after_stat(count), !!!auto_contrast) + #' geom_bar() + #' geom_text( #' mapping = auto_contrast, #' position = position_stack(.5), #' stat = "count", #' size = 2 #' ) hex_bw <- function(hex_code) { rgb_conv <- lapply( grDevices::col2rgb(hex_code), FUN = function(.x) { ifelse( .x / 255 <= 0.04045, .x * 12.92 / 255, ((.x / 255 + 0.055) / 1.055)^2.4 ) } ) |> unlist() |> matrix(ncol = length(hex_code), byrow = FALSE) |> sweep(MARGIN = 1, STATS = c(0.2126, 0.7152, 0.0722), FUN = `*`) |> apply(MARGIN = 2, FUN = sum) bw <- ifelse( rgb_conv > 0.2, # 0.179 in the original code "#000000", "#ffffff" ) bw[is.na(hex_code)] <- "#ffffff" bw } #' @rdname hex_bw #' @description #' `hex_bw_threshold()` is a variation of `hex_bw()`. For `values` below #' `threshold`, black (`"#000000"`) will always be returned, regardless of #' `hex_code`. #' @export #' @param values Values to be compared. #' @param threshold Threshold. hex_bw_threshold <- function(hex_code, values, threshold) { x <- hex_bw(hex_code) x[values < threshold] <- "#000000" x } #' @rdname hex_bw #' @export auto_contrast <- ggplot2::aes(colour = after_scale(hex_bw(.data$fill))) ggstats/R/symmetric_limits.R0000644000176200001440000000201714657111214015651 0ustar liggesusers#' Symmetric limits #' #' Expand scale limits to make them symmetric around zero. #' Can be passed as argument to parameter `limits` of continuous scales from #' packages `{ggplot2}` or `{scales}`. Can be also used to obtain an enclosing #' symmetric range for numeric vectors. #' #' @param x a vector of numeric values, possibly a range, from which to compute #' enclosing range #' @return A numeric vector of length two with the new limits, which are always #' such that the absolute value of upper and lower limits is the same. #' @source Adapted from the homonym function in `{ggpmisc}` #' @export #' @examples #' library(ggplot2) #' #' ggplot(iris) + #' aes(x = Sepal.Length - 5, y = Sepal.Width - 3, colour = Species) + #' geom_vline(xintercept = 0) + #' geom_hline(yintercept = 0) + #' geom_point() #' #' last_plot() + #' scale_x_continuous(limits = symmetric_limits) + #' scale_y_continuous(limits = symmetric_limits) symmetric_limits <- function(x) { max <- max(abs(x)) c(-max, max) } ggstats/vignettes/0000755000176200001440000000000015122116420013730 5ustar liggesusersggstats/vignettes/ggcoef_model.Rmd0000644000176200001440000002263215122046567017031 0ustar liggesusers--- title: "Plot model coefficients with `ggcoef_model()`" author: Joseph Larmarange output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Plot model coefficients with `ggcoef_model()`} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(ggstats) ``` ```{r include=FALSE} if ( !broom.helpers::.assert_package("emmeans", boolean = TRUE) ) { knitr::opts_chunk$set(eval = FALSE) } ``` The purpose of `ggcoef_model()` is to quickly plot the coefficients of a model. It is an updated and improved version of `GGally::ggcoef()` based on `broom.helpers::tidy_plus_plus()`. For displaying a nicely formatted table of the same models, look at `gtsummary::tbl_regression()`. ## Quick coefficients plot To work automatically, this function requires the `{broom.helpers}`. Simply call `ggcoef_model()` with a model object. It could be the result of `stats::lm`, `stats::glm` or any other model covered by `{broom.helpers}`. ```{r ggcoef-reg} data(tips, package = "reshape") mod_simple <- lm(tip ~ day + time + total_bill, data = tips) ggcoef_model(mod_simple) ``` In the case of a logistic regression (or any other model for which coefficients are usually exponentiated), simply indicated `exponentiate = TRUE`. Note that a logarithmic scale will be used for the x-axis. ```{r ggcoef-titanic} d_titanic <- as.data.frame(Titanic) d_titanic$Survived <- factor(d_titanic$Survived, c("No", "Yes")) mod_titanic <- glm( Survived ~ Sex * Age + Class, weights = Freq, data = d_titanic, family = binomial ) ggcoef_model(mod_titanic, exponentiate = TRUE) ``` ## Customizing the plot ### Variable labels You can use the `{labelled}` package to define variable labels. They will be automatically used by `ggcoef_model()`. Note that variable labels should be defined before computing the model. ```{r} library(labelled) tips_labelled <- tips |> set_variable_labels( day = "Day of the week", time = "Lunch or Dinner", total_bill = "Bill's total" ) mod_labelled <- lm(tip ~ day + time + total_bill, data = tips_labelled) ggcoef_model(mod_labelled) ``` You can also define custom variable labels directly by passing a named vector to the `variable_labels` option. ```{r} ggcoef_model( mod_simple, variable_labels = c( day = "Week day", time = "Time (lunch or dinner ?)", total_bill = "Total of the bill" ) ) ``` If variable labels are to long, you can pass `ggplot2::label_wrap_gen()` or any other labeller function to `facet_labeller.` ```{r} ggcoef_model( mod_simple, variable_labels = c( day = "Week day", time = "Time (lunch or dinner ?)", total_bill = "Total of the bill" ), facet_labeller = ggplot2::label_wrap_gen(10) ) ``` Use `facet_row = NULL` to hide variable names. ```{r} ggcoef_model(mod_simple, facet_row = NULL, colour_guide = TRUE) ``` ### Term labels Several options allows you to customize term labels. ```{r} ggcoef_model(mod_titanic, exponentiate = TRUE) ggcoef_model( mod_titanic, exponentiate = TRUE, show_p_values = FALSE, signif_stars = FALSE, add_reference_rows = FALSE, categorical_terms_pattern = "{level} (ref: {reference_level})", interaction_sep = " x " ) + ggplot2::scale_y_discrete(labels = scales::label_wrap(15)) ``` By default, for categorical variables using treatment and sum contrasts, reference rows will be added and displayed on the graph. ```{r} mod_titanic2 <- glm( Survived ~ Sex * Age + Class, weights = Freq, data = d_titanic, family = binomial, contrasts = list(Sex = contr.sum, Class = contr.treatment(4, base = 3)) ) ggcoef_model(mod_titanic2, exponentiate = TRUE) ``` Continuous variables with polynomial terms defined with `stats::poly()` are also properly managed. ```{r} mod_poly <- lm(Sepal.Length ~ poly(Petal.Width, 3) + Petal.Length, data = iris) ggcoef_model(mod_poly) ``` Use `no_reference_row` to indicate which variables should not have a reference row added. ```{r} ggcoef_model( mod_titanic2, exponentiate = TRUE, no_reference_row = "Sex" ) ggcoef_model( mod_titanic2, exponentiate = TRUE, no_reference_row = broom.helpers::all_dichotomous() ) ggcoef_model( mod_titanic2, exponentiate = TRUE, no_reference_row = broom.helpers::all_categorical(), categorical_terms_pattern = "{level}/{reference_level}" ) ``` ### Elements to display Use `intercept = TRUE` to display intercepts. ```{r} ggcoef_model(mod_simple, intercept = TRUE) ``` You can remove confidence intervals with `conf.int = FALSE`. ```{r} ggcoef_model(mod_simple, conf.int = FALSE) ``` By default, significant terms (i.e. with a p-value below 5%) are highlighted using two types of dots. You can control the level of significance with `significance` or remove it with `significance = NULL`. ```{r} ggcoef_model(mod_simple, significance = NULL) ``` By default, dots are colored by variable. You can deactivate this behavior with `colour = NULL`. ```{r} ggcoef_model(mod_simple, colour = NULL) ``` You can display only a subset of terms with **include**. ```{r} ggcoef_model(mod_simple, include = c("time", "total_bill")) ``` It is possible to use `tidyselect` helpers. ```{r} ggcoef_model(mod_simple, include = dplyr::starts_with("t")) ``` You can remove stripped rows with `stripped_rows = FALSE`. ```{r} ggcoef_model(mod_simple, stripped_rows = FALSE) ``` Do not hesitate to consult the help file of `ggcoef_model()` to see all available options. ### ggplot2 elements The plot returned by `ggcoef_model()` is a classic `ggplot2` plot. You can therefore apply `ggplot2` functions to it. ```{r} ggcoef_model(mod_simple) + ggplot2::xlab("Coefficients") + ggplot2::ggtitle("Custom title") + ggplot2::scale_color_brewer(palette = "Set1") + ggplot2::theme(legend.position = "right") ``` ## Forest plot with a coefficient table `ggcoef_table()` is a variant of `ggcoef_model()` displaying a coefficient table on the right of the forest plot. ```{r} ggcoef_table(mod_simple) ggcoef_table(mod_titanic, exponentiate = TRUE) ``` You can easily customize the columns to be displayed. ```{r} ggcoef_table( mod_simple, table_stat = c("label", "estimate", "std.error", "ci"), ci_pattern = "{conf.low} to {conf.high}", table_stat_label = list( estimate = scales::label_number(accuracy = .001), conf.low = scales::label_number(accuracy = .01), conf.high = scales::label_number(accuracy = .01), std.error = scales::label_number(accuracy = .001), label = toupper ), table_header = c("Term", "Coef.", "SE", "CI"), table_widths = c(2, 3) ) ``` ## Multinomial models For multinomial models, simply use `ggcoef_model()` or `ggcoef_table()`. Additional visualizations are available using `ggcoef_dodged()` or `ggcoef_faceted()`. ```{r} library(nnet) hec <- as.data.frame(HairEyeColor) mod <- multinom( Hair ~ Eye + Sex, data = hec, weights = hec$Freq ) ``` ```{r, fig.height=9, fig.width=6} mod |> ggcoef_model(exponentiate = TRUE) mod |> ggcoef_table(exponentiate = TRUE) ``` ```{r, fig.height=4, fig.width=6} mod |> ggcoef_dodged(exponentiate = TRUE) mod |> ggcoef_faceted(exponentiate = TRUE) ``` You can use `group_labels` to customize the label of each level. ```{r} mod |> ggcoef_faceted( group_labels = c("Brown" = "Brown\n(ref: Black)"), exponentiate = TRUE ) ``` ## Multi-components models Multi-components models such as zero-inflated Poisson or beta regression generate a set of terms for each of their components. simply use `ggcoef_model()` or `ggcoef_table()`. Additional visualizations are available using `ggcoef_dodged()` or `ggcoef_faceted()`. ```{r} library(pscl) data("bioChemists", package = "pscl") mod <- zeroinfl(art ~ fem * mar | fem + mar, data = bioChemists) ``` ```{r, fig.height=9, fig.width=6} mod |> ggcoef_model() mod |> ggcoef_table() ``` ```{r, fig.height=4, fig.width=6} mod |> ggcoef_dodged(exponentiate = TRUE) mod |> ggcoef_faceted( exponentiate = TRUE, group_labels = c(conditional = "Count", zero_inflated = "Zero-inflated") ) ``` ## Comparing several models You can easily compare several models with `ggcoef_compare()`. To be noted, `ggcoef_compare()` is not compatible with multinomial or multi-components models. ```{r} mod1 <- lm(Fertility ~ ., data = swiss) mod2 <- step(mod1, trace = 0) mod3 <- lm(Fertility ~ Agriculture + Education * Catholic, data = swiss) models <- list( "Full model" = mod1, "Simplified model" = mod2, "With interaction" = mod3 ) ggcoef_compare(models) ggcoef_compare(models, type = "faceted") ``` ```{r fig.height=10, fig.width=8} ggcoef_compare(models, type = "table") ``` ## Advanced users Advanced users could use their own dataset and pass it to `ggcoef_plot()`. Such dataset could be produced by `ggcoef_model()`, `ggcoef_dodged()`, `ggcoef_faceted()` or `ggcoef_compare()` with the option `return_data = TRUE` or by using `broom::tidy()` or `broom.helpers::tidy_plus_plus()`. ## Supported models ```{r, echo=FALSE} broom.helpers::supported_models |> knitr::kable() ``` Note: this list of models has been tested. `{broom.helpers}`, and therefore `ggcoef_model()`, may or may not work properly or partially with other types of models. ggstats/vignettes/stat_weighted_mean.Rmd0000644000176200001440000000526514357760262020261 0ustar liggesusers--- title: "Compute weighted mean with `stat_weighted_mean()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Compute weighted mean with `stat_weighted_mean()`} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(ggstats) library(ggplot2) ``` `stat_weighted_mean()` computes mean value of **y** (taking into account any **weight** aesthetic if provided) for each value of **x**. More precisely, it will return a new data frame with one line per unique value of **x** with the following new variables: - **y**: mean value of the original **y** (i.e. **numerator**/**denominator**) - **numerator** - **denominator** Let's take an example. The following plot shows all tips received according to the day of the week. ```{r} data(tips, package = "reshape") ggplot(tips) + aes(x = day, y = tip) + geom_point() ``` To plot their mean value per day, simply use `stat_weighted_mean()`. ```{r} ggplot(tips) + aes(x = day, y = tip) + stat_weighted_mean() ``` We can specify the geometry we want using `geom` argument. Note that for lines, we need to specify the **group** aesthetic as well. ```{r} ggplot(tips) + aes(x = day, y = tip, group = 1) + stat_weighted_mean(geom = "line") ``` An alternative is to specify the statistic in `ggplot2::geom_line()`. ```{r} ggplot(tips) + aes(x = day, y = tip, group = 1) + geom_line(stat = "weighted_mean") ``` Of course, it could be use with other geometries. Here a bar plot. ```{r} p <- ggplot(tips) + aes(x = day, y = tip, fill = sex) + stat_weighted_mean(geom = "bar", position = "dodge") + ylab("mean tip") p ``` It is very easy to add facets. In that case, computation will be done separately for each facet. ```{r} p + facet_grid(rows = vars(smoker)) ``` `stat_weighted_mean()` could be also used for computing proportions as a proportion is technically a mean of binary values (0 or 1). ```{r} ggplot(tips) + aes(x = day, y = as.integer(smoker == "Yes"), fill = sex) + stat_weighted_mean(geom = "bar", position = "dodge") + scale_y_continuous(labels = scales::percent) + ylab("proportion of smoker") ``` Finally, you can use the **weight** aesthetic to indicate weights to take into account for computing means / proportions. ```{r} d <- as.data.frame(Titanic) ggplot(d) + aes(x = Class, y = as.integer(Survived == "Yes"), weight = Freq, fill = Sex) + geom_bar(stat = "weighted_mean", position = "dodge") + scale_y_continuous(labels = scales::percent) + labs(y = "Proportion who survived") ``` ggstats/vignettes/gglikert.Rmd0000644000176200001440000002037314674033502016223 0ustar liggesusers--- title: "Plot Likert-type items with `gglikert()`" author: Joseph Larmarange output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Plot Likert-type items with `gglikert()`} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(ggstats) library(dplyr) library(ggplot2) ``` The purpose of `gglikert()` is to generate a centered bar plot comparing the answers of several questions sharing a common Likert-type scale. ## Generating an example dataset ```{r} likert_levels <- c( "Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree" ) set.seed(42) df <- tibble( q1 = sample(likert_levels, 150, replace = TRUE), q2 = sample(likert_levels, 150, replace = TRUE, prob = 5:1), q3 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), q4 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), q5 = sample(c(likert_levels, NA), 150, replace = TRUE), q6 = sample(likert_levels, 150, replace = TRUE, prob = c(1, 0, 1, 1, 0)) ) |> mutate(across(everything(), ~ factor(.x, levels = likert_levels))) likert_levels_dk <- c( "Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree", "Don't know" ) df_dk <- tibble( q1 = sample(likert_levels_dk, 150, replace = TRUE), q2 = sample(likert_levels_dk, 150, replace = TRUE, prob = 6:1), q3 = sample(likert_levels_dk, 150, replace = TRUE, prob = 1:6), q4 = sample(likert_levels_dk, 150, replace = TRUE, prob = 1:6), q5 = sample(c(likert_levels_dk, NA), 150, replace = TRUE), q6 = sample( likert_levels_dk, 150, replace = TRUE, prob = c(1, 0, 1, 1, 0, 1) ) ) |> mutate(across(everything(), ~ factor(.x, levels = likert_levels_dk))) ``` ## Quick plot Simply call `gglikert()`. ```{r} gglikert(df) ``` The list of variables to plot (all by default) could by specify with `include`. This argument accepts tidy-select syntax. ```{r} gglikert(df, include = q1:q3) ``` ## Customizing the plot The generated plot is a standard `ggplot2` object. You can therefore use `ggplot2` functions to custom many aspects. ```{r} gglikert(df) + ggtitle("A Likert-type items plot", subtitle = "generated with gglikert()") + scale_fill_brewer(palette = "RdYlBu") ``` ### Sorting the questions You can sort the plot with `sort`. ```{r} gglikert(df, sort = "ascending") ``` By default, the plot is sorted based on the proportion being higher than the center level, i.e. in this case the proportion of answers equal to "Agree" or "Strongly Agree". Alternatively, the questions could be transformed into a score and sorted accorded to their mean. ```{r} gglikert(df, sort = "ascending", sort_method = "mean") ``` ### Sorting the answers You can reverse the order of the answers with `reverse_likert`. ```{r} gglikert(df, reverse_likert = TRUE) ``` ### Proportion labels Proportion labels could be removed with `add_labels = FALSE`. ```{r} gglikert(df, add_labels = FALSE) ``` or customized. ```{r} gglikert( df, labels_size = 3, labels_accuracy = .1, labels_hide_below = .2, labels_color = "white" ) ``` ### Totals on each side By default, totals are added on each side of the plot. In case of an uneven number of answer levels, the central level is not taken into account for computing totals. With `totals_include_center = TRUE`, half of the proportion of the central level will be added on each side. ```{r} gglikert( df, totals_include_center = TRUE, sort = "descending", sort_prop_include_center = TRUE ) ``` Totals could be customized. ```{r} gglikert( df, totals_size = 4, totals_color = "blue", totals_fontface = "italic", totals_hjust = .20 ) ``` Or removed. ```{r} gglikert(df, add_totals = FALSE) ``` ## Variable labels If you are using variable labels (see `labelled::set_variable_labels()`), they will be taken automatically into account by `gglikert()`. ```{r} if (require(labelled)) { df <- df |> set_variable_labels( q1 = "first question", q2 = "second question", q3 = "this is the third question with a quite long variable label" ) } gglikert(df) ``` You can also provide custom variable labels with `variable_labels`. ```{r} gglikert( df, variable_labels = c( q1 = "alternative label for the first question", q6 = "another custom label" ) ) ``` You can control how variable labels are wrapped with `y_label_wrap`. ```{r} gglikert(df, y_label_wrap = 20) gglikert(df, y_label_wrap = 200) ``` ## Custom center By default, Likert plots will be centered, i.e. displaying the same number of categories on each side on the graph. When the number of categories is odd, half of the "central" category is displayed negatively and half positively. It is possible to control where to center the graph, using the `cutoff` argument, representing the number of categories to be displayed negatively: `2` to display the two first categories negatively and the others positively; `2.25` to display the two first categories and a quarter of the third negatively. ```{r} gglikert(df, cutoff = 0) gglikert(df, cutoff = 1) gglikert(df, cutoff = 1.25) gglikert(df, cutoff = 1.75) gglikert(df, cutoff = 2) gglikert(df, cutoff = NULL) gglikert(df, cutoff = 4) gglikert(df, cutoff = 5) ``` ## Symmetric x-axis Simply specify `symmetric = TRUE`. ```{r} gglikert(df, cutoff = 1) gglikert(df, cutoff = 1, symmetric = TRUE) ``` ## Removing certain values Sometimes, the dataset could contain certain values that you should not be displayed. ```{r} gglikert(df_dk) ``` A first option could be to convert the don't knows into `NA`. In such case, the proportions will be computed on non missing. ```{r} df_dk |> mutate(across(everything(), ~ factor(.x, levels = likert_levels))) |> gglikert() ``` Or, you could use `exclude_fill_values` to not display specific values, but still counting them in the denominator for computing proportions. ```{r} df_dk |> gglikert(exclude_fill_values = "Don't know") ``` ## Facets To define facets, use `facet_rows` and/or `facet_cols`. ```{r message=FALSE} df_group <- df df_group$group1 <- sample(c("A", "B"), 150, replace = TRUE) df_group$group2 <- sample(c("a", "b", "c"), 150, replace = TRUE) gglikert(df_group, q1:q6, facet_cols = vars(group1), labels_size = 3 ) gglikert(df_group, q1:q2, facet_rows = vars(group1, group2), labels_size = 3 ) gglikert(df_group, q3:q6, facet_cols = vars(group1), facet_rows = vars(group2), labels_size = 3 ) + scale_x_continuous( labels = label_percent_abs(), expand = expansion(0, .2) ) ``` To compare answers by subgroup, you can alternatively map `.question` to facets, and define a grouping variable for `y`. ```{r} gglikert(df_group, q1:q4, y = "group1", facet_rows = vars(.question), labels_size = 3, facet_label_wrap = 15 ) ``` ## Stacked plot For a more classical stacked bar plot, you can use `gglikert_stacked()`. ```{r} gglikert_stacked(df) gglikert_stacked( df, sort = "asc", add_median_line = TRUE, add_labels = FALSE ) gglikert_stacked( df_group, include = q1:q4, y = "group2" ) + facet_grid( rows = vars(.question), labeller = label_wrap_gen(15) ) ``` ## Long format dataset Internally, `gglikert()` is calling `gglikert_data()` to generate a long format dataset combining all questions into two columns, `.question` and `.answer`. ```{r} gglikert_data(df) |> head() ``` Such dataset could be useful for other types of plot, for example for a classic stacked bar plot. ```{r} ggplot(gglikert_data(df)) + aes(y = .question, fill = .answer) + geom_bar(position = "fill") ``` ## Weighted data `gglikert()`, `gglikert_stacked()` and `gglikert_data()` accepts a `weights` argument, allowing to specify statistical weights. ```{r} df$sampling_weights <- runif(nrow(df)) gglikert(df, q1:q4, weights = sampling_weights) ``` ## See also The function `position_likert()` used to center bars. ggstats/vignettes/stat_cross.Rmd0000644000176200001440000000601514357760262016604 0ustar liggesusers--- title: "Compute cross-tabulation statistics with `stat_cross()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Compute cross-tabulation statistics with `stat_cross()`} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(ggstats) library(ggplot2) ``` This statistic is intended to be used with two discrete variables mapped to **x** and **y** aesthetics. It will compute several statistics of a cross-tabulated table using `broom::tidy.test()` and `stats::chisq.test()`. More precisely, the computed variables are: - **observed**: number of observations in x,y - **prop**: proportion of total - **row.prop**: row proportion - **col.prop**: column proportion - **expected**: expected count under the null hypothesis - **resid**: Pearson's residual - **std.resid**: standardized residual - **row.observed**: total number of observations within row - **col.observed**: total number of observations within column - **total.observed**: total number of observations within the table - **phi**: phi coefficients, see `augment_chisq_add_phi()` By default, `stat_cross()` is using `ggplot2::geom_points()`. If you want to plot the number of observations, you need to map `after_stat(observed)` to an aesthetic (here **size**): ```{r} d <- as.data.frame(Titanic) ggplot(d) + aes(x = Class, y = Survived, weight = Freq, size = after_stat(observed)) + stat_cross() + scale_size_area(max_size = 20) ``` Note that the **weight** aesthetic is taken into account by `stat_cross()`. We can go further using a custom shape and filling points with standardized residual to identify visually cells who are over- or underrepresented. ```{r fig.height=6, fig.width=6} ggplot(d) + aes( x = Class, y = Survived, weight = Freq, size = after_stat(observed), fill = after_stat(std.resid) ) + stat_cross(shape = 22) + scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) + scale_size_area(max_size = 20) ``` We can easily recreate a cross-tabulated table. ```{r} ggplot(d) + aes(x = Class, y = Survived, weight = Freq) + geom_tile(fill = "white", colour = "black") + geom_text(stat = "cross", mapping = aes(label = after_stat(observed))) + theme_minimal() ``` Even more complicated, we want to produce a table showing column proportions and where cells are filled with standardized residuals. Note that `stat_cross()` could be used with facets. In that case, computation is done separately in each facet. ```{r} ggplot(d) + aes( x = Class, y = Survived, weight = Freq, label = scales::percent(after_stat(col.prop), accuracy = .1), fill = after_stat(std.resid) ) + stat_cross(shape = 22, size = 30) + geom_text(stat = "cross") + scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) + facet_grid(rows = vars(Sex)) + labs(fill = "Standardized residuals") + theme_minimal() ``` ggstats/vignettes/stat_prop.Rmd0000644000176200001440000001242214674033502016422 0ustar liggesusers--- title: "Compute custom proportions with `stat_prop()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Compute custom proportions with `stat_prop()`} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(ggstats) library(ggplot2) ``` `stat_prop()` is a variation of `ggplot2::stat_count()` allowing to compute custom proportions according to the **by** aesthetic defining the denominator (i.e. all proportions for a same value of **by** will sum to 1). The **by** aesthetic should be a factor. Therefore, `stat_prop()` requires the **by** aesthetic and this **by** aesthetic should be a factor. ## Adding labels on a percent stacked bar plot When using `position = "fill"` with `geom_bar()`, you can produce a percent stacked bar plot. However, the proportions corresponding to the **y** axis are not directly accessible using only `ggplot2`. With `stat_prop()`, you can easily add them on the plot. In the following example, we indicated `stat = "prop"` to `ggplot2::geom_text()` to use `stat_prop()`, we defined the **by** aesthetic (here we want to compute the proportions separately for each value of **x**), and we also used `ggplot2::position_fill()` when calling `ggplot2::geom_text()`. ```{r} d <- as.data.frame(Titanic) p <- ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, by = Class) + geom_bar(position = "fill") + geom_text(stat = "prop", position = position_fill(.5)) p ``` Note that `stat_prop()` has properly taken into account the **weight** aesthetic. `stat_prop()` is also compatible with faceting. In that case, proportions are computed separately in each facet. ```{r} p + facet_grid(cols = vars(Sex)) ``` ## Displaying proportions of the total If you want to display proportions of the total, simply map the **by** aesthetic to `1`. Here an example using a stacked bar chart. ```{r} ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, by = 1) + geom_bar() + geom_text( aes(label = scales::percent(after_stat(prop), accuracy = 1)), stat = "prop", position = position_stack(.5) ) ``` ## A dodged bar plot to compare two distributions A dodged bar plot could be used to compare two distributions. ```{r} ggplot(d) + aes(x = Class, fill = Sex, weight = Freq, by = Sex) + geom_bar(position = "dodge") ``` On the previous graph, it is difficult to see if first class is over- or under-represented among women, due to the fact they were much more men on the boat. `stat_prop()` could be used to adjust the graph by displaying instead the proportion within each category (i.e. here the proportion by sex). ```{r} ggplot(d) + aes(x = Class, fill = Sex, weight = Freq, by = Sex, y = after_stat(prop)) + geom_bar(stat = "prop", position = "dodge") + scale_y_continuous(labels = scales::percent) ``` The same example with labels: ```{r} ggplot(d) + aes(x = Class, fill = Sex, weight = Freq, by = Sex, y = after_stat(prop)) + geom_bar(stat = "prop", position = "dodge") + scale_y_continuous(labels = scales::percent) + geom_text( mapping = aes( label = scales::percent(after_stat(prop), accuracy = .1), y = after_stat(0.01) ), vjust = "bottom", position = position_dodge(.9), stat = "prop" ) ``` ## Displaying unobserved levels With the `complete` argument, it is possible to indicate an aesthetic for those statistics should be completed for unobserved values. ```{r} d <- diamonds |> dplyr::filter(!(cut == "Ideal" & clarity == "I1")) |> dplyr::filter(!(cut == "Very Good" & clarity == "VS2")) |> dplyr::filter(!(cut == "Premium" & clarity == "IF")) p <- ggplot(d) + aes(x = clarity, fill = cut, by = clarity) + geom_bar(position = "fill") p + geom_text( stat = "prop", position = position_fill(.5) ) ``` Adding `complete = "fill"` will generate "0.0%" labels where relevant. ```{r} p + geom_text( stat = "prop", position = position_fill(.5), complete = "fill" ) ``` ## Using `geom_prop_bar()` and `geom_prop_text()` The dedicated geometries `geom_prop_bar()` and `geom_prop_text()` could be used for quick and easy proportional bar plots. They use by default `stat_prop()` with relevant default values. For example, proportions are computed by **x** or **y** if the `by` aesthetic is not specified. It allows to generate a quick proportional bar plot. ```{r} ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_prop_bar() + geom_prop_text() ``` You can specify a `by` aesthetic. For example, to reproduce the comparison of the two distributions presented earlier. ```{r} d <- as.data.frame(Titanic) ggplot(d) + aes(x = Class, fill = Sex, weight = Freq, by = Sex) + geom_prop_bar(position = "dodge") + geom_prop_text( position = position_dodge(width = .9), vjust = - 0.5 ) + scale_y_continuous(labels = scales::percent) ``` You can also display counts instead of proportions. ```{r} ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_prop_bar(height = "count") + geom_prop_text( height = "count", labels = "count", labeller = scales::number ) ``` ggstats/vignettes/geom_diverging.Rmd0000644000176200001440000001272414755045350017405 0ustar liggesusers--- title: "Geometries for diverging bar plots" author: Joseph Larmarange output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Geometries for diverging bar plots} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(ggstats) library(dplyr) library(ggplot2) library(patchwork) ``` *Note :* if you are looking for an all-in-one function to display Likert-type items, please refer to `gglikert()` and `vignette("gglikert")`. ## New positions Diverging bar plots could be achieved using `position_diverging()` or `position_likert()`. `position_diverging()` stacks bars on top of each other and centers them around zero (the same number of categories are displayed on each side). ```{r} base <- ggplot(diamonds) + aes(y = clarity, fill = cut) + theme(legend.position = "none") p_stack <- base + geom_bar(position = "stack") + ggtitle("position_stack()") p_diverging <- base + geom_bar(position = "diverging") + ggtitle("position_diverging()") p_stack + p_diverging ``` `position_likert()` is similar but uses proportions instead of counts. ```{r} p_fill <- base + geom_bar(position = "fill") + ggtitle("position_fill()") p_likert <- base + geom_bar(position = "likert") + ggtitle("position_likert()") p_fill + p_likert ``` By default, the same number of categories is displayed on each side, i.e. if you have 4 categories, 2 will be displayed negatively and 2 positively. If you have an odd number of categories, half of the central category will be displayed negatively and half positively. The center could be changed with the `cutoff` argument, representing the number of categories to be displayed negatively: `2` to display negatively the two first categories, `2.5` to display negatively the two first categories and half of the third, `2.2` to display negatively the two first categories and a fifth of the third. ```{r} p_1 <- base + geom_bar(position = position_diverging(cutoff = 1)) + ggtitle("cutoff = 1") p_2 <- base + geom_bar(position = position_diverging(cutoff = 2)) + ggtitle("cutoff = 2") p_null <- base + geom_bar(position = position_diverging(cutoff = NULL)) + ggtitle("cutoff = NULL") p_3.75 <- base + geom_bar(position = position_diverging(cutoff = 3.75)) + ggtitle("cutoff = 3.75") p_5 <- base + geom_bar(position = position_diverging(cutoff = 5)) + ggtitle("cutoff = 5") wrap_plots(p_1, p_2, p_null, p_3.75, p_5) ``` ## New scales For a diverging bar plot, it is recommended to use a diverging palette, as provided in the Brewer palettes. Sometimes, the number of available colors is insufficient in the palette. In that case, you could use `pal_extender()` or `scale_fill_extended()`. However, if you use a custom `cutoff`, it is also important to change the center of the palette as well. Therefore, for diverging bar plots, we recommend to use `scale_fill_likert()`. ```{r} wrap_plots( p_1 + scale_fill_likert(cutoff = 1), p_null + scale_fill_likert(), p_3.75 + scale_fill_likert(cutoff = 3.75) ) ``` ## Improving axes You may also want have centered axes. That could be easily achieved with `symmetric_limits()`. You could also use `label_number_abs()` or `label_percent_abs()` to display absolute numbers. ```{r} wrap_plots( p_3.75, p_3.75 + scale_x_continuous( limits = symmetric_limits, labels = label_number_abs() ) ) ``` ## New geometries To facilitate the creation of diverging bar plots, you could use variants of `geom_bar()` and `geom_text()`. ### geom_diverging() & geom_diverging_text() Let's consider the following plot: ```{r} ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = "diverging") + geom_text( aes( label = label_number_abs(hide_below = 800) (after_stat(count)) ), stat = "count", position = position_diverging(.5) ) + scale_fill_likert() + scale_x_continuous( labels = label_number_abs(), limits = symmetric_limits ) ``` The same could be achieved quicker with `geom_diverging()` and `geom_diverging_text()`. ```{r} ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_diverging() + geom_diverging_text( aes( label = label_number_abs(hide_below = 800) (after_stat(count)) ) ) + scale_fill_likert() + scale_x_continuous( labels = label_number_abs(), limits = symmetric_limits ) ``` ### geom_likert() & geom_likert_text() `geom_likert()` and `geom_likert_text()` works similarly. `geom_likert_text()` takes advantages of `stat_prop()` for computing the proportions to be displayed (see `vignette("stat_prop")`). ```{r} ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_likert() + geom_likert_text() + scale_fill_likert() + scale_x_continuous( labels = label_percent_abs() ) ``` ### geom_pyramid() & geom_pyramid_text() Finally, `geom_pyramid()` and `geom_pyramid_text()` are variations adapted to display an age-sex pyramid. It uses proportions of the total. ```{r} d <- Titanic |> as.data.frame() ggplot(d) + aes(y = Class, fill = Sex, weight = Freq) + geom_pyramid() + geom_pyramid_text() + scale_x_continuous( labels = label_percent_abs(), limits = symmetric_limits ) ``` ggstats/NAMESPACE0000644000176200001440000000347715031235710013155 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(round_any,POSIXct) S3method(round_any,numeric) export(GeomConnector) export(PositionDiverging) export(PositionLikert) export(StatCross) export(StatProp) export(StatWeightedMean) export(augment_chisq_add_phi) export(auto_contrast) export(compute_cascade) export(geom_bar_connector) export(geom_connector) export(geom_diverging) export(geom_diverging_text) export(geom_likert) export(geom_likert_text) export(geom_prop_bar) export(geom_prop_connector) export(geom_prop_text) export(geom_pyramid) export(geom_pyramid_text) export(geom_stripped_cols) export(geom_stripped_rows) export(ggcascade) export(ggcoef_compare) export(ggcoef_dodged) export(ggcoef_faceted) export(ggcoef_model) export(ggcoef_multicomponents) export(ggcoef_multinom) export(ggcoef_plot) export(ggcoef_table) export(gglikert) export(gglikert_data) export(gglikert_stacked) export(ggsurvey) export(hex_bw) export(hex_bw_threshold) export(label_number_abs) export(label_percent_abs) export(likert_pal) export(pal_extender) export(plot_cascade) export(position_diverging) export(position_likert) export(round_any) export(scale_colour_extended) export(scale_fill_extended) export(scale_fill_likert) export(signif_stars) export(stat_cross) export(stat_prop) export(stat_weighted_mean) export(symmetric_limits) export(weighted.median) export(weighted.quantile) export(weighted.sum) import(ggplot2) importFrom(dplyr,.data) importFrom(dplyr,sym) importFrom(ggplot2,after_scale) importFrom(ggplot2,after_stat) importFrom(lifecycle,deprecate_soft) importFrom(lifecycle,deprecate_stop) importFrom(lifecycle,deprecate_warn) importFrom(lifecycle,deprecated) importFrom(scales,percent) importFrom(stats,weighted.mean) importFrom(stats,weights) importFrom(utils,modifyList) ggstats/NEWS.md0000644000176200001440000001373115122116026013025 0ustar liggesusers# ggstats 0.12.0 **Improvements** * `gglikert()`, `gglikert_stacked()` and `gglikert_data()` now accepts survey objects (#110) * compatibility with `vctrs` version 0.7.0 added (#109) # ggstats 0.11.0 **Improvements** * new `type = "table"` for `ggcoef_compare()` (#102) * new argument `x_limits` for `ggcoef_plot()` * limits of x-axis are now harmonized between sub-plots returned by `ggcoef_table()` (#102) * `ggcoef_plot()` has been updated to account for the deprecation of `ggplot2::geom_errorbarh()` in version 4.0.0 of `ggplot2` (#104) # ggstats 0.10.0 **Improvements** * `gglikert()`: legend order is reversed when `reverse_likert = TRUE` (#95) * `gglikert_stacked()`: legend order is reversed when `reverse_fill = TRUE` (#95) **Renamed argument** * the `table_witdhs` argument of `ggcoef_table()` is deprecated. Please use the `table_widths` argument instead (#99) # ggstats 0.9.0 **Improvements** * `ggccoef_model()` and `ggcoef_table()` are now compatible with multinomial and multi-component models, following `broom.helpers` 1.20.0: both functions gained new arguments `group_by` and `group_labels` (#93) * new functions `ggcoef_dodged()` and `ggcoef_faceted()` (#93) * `ggcoef_plot()` got a new argument `y_labeller` (#86) **Deprecated functions** * `ggcoef_multinom()` and `ggcoef_multicomponents()` are now soft-deprecated and may be removed in a future release. Use instead `ggcoef_model()`, `ggcoef_table()`, `ggcoef_dodged()` or `ggcoef_faceted()` (#93) **Bug fix** * fix terms order in `ggcoef_model()` for specific cases when two modalities of two different variables have the same name (#86) * bug fix in `stat_prop()` when `complete = "color"` or `complete = "group"`(#89) * bug fix in `gglikert()` when `reverse_likert = TRUE` and `add_totals = TRUE` (#91) # ggstats 0.8.0 **Breaking changes** * `geom_diverging()`, `geom_likert()`, `geom_pyramid()`, `geom_diverging_text()`, `geom_likert_text()`, and `geom_pyramid_text()` have been redesigned (#73) * `stat_prop()`: arguments `height`, `labels` and `labeller` have been removed (#73) **Improvements** * new geoms `geom_connector()`, `geom_bar_connector()` and `geom_prop_connector()` (#81) * new shortcut `auto_contrast` (#75) # ggstats 0.7.0 **Minor breaking change** * `position_likert_count()` has been renamed as `position_diverging()` (#69) * R minimum version 4.2.0 is now required. **Improvements** * new experimental plot: `ggcascade()` for "cascade" plots (#71) * new scale `scale_fill_likert()` (#64) * new geometries: `geom_prop_bar()` and `geom_prop_text()` (#69) * new geometries: `geom_diverging()`, `geom_likert()`, `geom_pyramid()` and `geom_diverging_text()`, `geom_likert_text()`, `geom_pyramid_text()` (#69) * new helper `symmetric_limits()` to make a scale symmetric (#66) * new helper `pal_extender()` and corresponding `scale_fill_extender()` and `scale_colour_extender()` * new helper `weighted.sum()` for weighted sums (#71) * new sorting option `"prop_lower"` for `gglikert()` (#62) * new argument `symmetric` for `gglikert()` (#66) * new arguments `default_by`, `height`, `labels` and `labeller` for `stat_prop()` (#69) * new returned statistics for `stat_prop()`: `after_stat(denominator)`, `after_stat(height)` and `after_stat(labels)` # ggstats 0.6.0 **Improvements** * new function `hex_bw()` to identify a suitable font color given a background color (#57) * new default value `"auto"` for `labels_color` argument in `gglikert()` and `gglikert_stacked()` (using `hex_bw()`) (#57) * new argument `data_fun` for `gglikert()`, `gglikert_data()` and `gglikert_stacked()` (#60) # ggstats 0.5.1 **Bug fixes** * fix in `ggcoef_model()` and other similar functions: Unicode character removed in significance labels (#49) # ggstats 0.5.0 **Improvements** * new options `labels_color` and `totals_color` in `gglikert()` and `gglikert_stacked()` (#43) **Bug fixes** * fix in `ggcoef_multicomponents()` when `type = "table"` and `exponentiate = TRUE` * fix in `gglikert()`: the function could be called directly with `ggstats::gglikert()` without requiring the full package to be loaded (#47) # ggstats 0.4.0 **New features** * new function `ggcoef_table()` displaying a coefficient table at the right of the forest plot (#32) * new function `ggcoef_multicomponents()` for multi-components models such as zero-inflated Poisson or beta regressions (#38) * new type `"table"` for `ggcoef_multinom()` **Improvements** * `gglikert()` now aligns total proportions when faceting (#28) * new `weights` argument for `gglikert()`, `gglikert_stacked()` and `gglikert_data()` (#29) * new `y` argument for `gglikert()` and `gglikert_stacked()` (#31) * new `facet_label_wrap` argument for `gglikert()` (#31) **New helpers** * `weighted.median()` and `weighted.quantile()` functions # ggstats 0.3.0 **New features** * New functions `gglikert()`, `gglikert_stacked()` and `gglikert_data()` (#25) * New positions `position_likert()` and `position_likert_count()` (#25) * New `complete` argument for `stat_prop()` (#25) **Bug fixes** * Bug fix in `ggcoef_compare()` to preserve the order of model terms and to avoid an error with `add_reference_rows = FALSE` (#23) # ggstats 0.2.1 * Bug fix in `geom_stripped_rows()` and `geom_stripped_cols()` (#20) # ggstats 0.2.0 * Support for pairwise contrasts (#14) * New argument `tidy_args` in `ggcoef_*()` to pass additional arguments to `broom.helpers::tidy_plus_plus()` and to `tidy_fun` (#17) * Now requires `ggplot2` version 3.4.0 or more (#15) * Following change in `geom_rect()`, the `size` aesthetic is now deprecated in `geom_stripped_cols()` and `geom_stripped_rows()`: please use the `linewidth` aesthetic instead (#15) # ggstats 0.1.1 * Examples relying on Internet resources have been removed (#11) # ggstats 0.1.0 * First version, based on dev version of GGally * Fix in `ggcoef_multinom()` to display y levels not listed in `y.level_label` * `stat_cross()` now returns phi coefficients (see also `augment_chisq_add_phi()`) (#6) ggstats/inst/0000755000176200001440000000000015122116413012677 5ustar liggesusersggstats/inst/doc/0000755000176200001440000000000015122116413013444 5ustar liggesusersggstats/inst/doc/ggcoef_model.Rmd0000644000176200001440000002263215122046567016543 0ustar liggesusers--- title: "Plot model coefficients with `ggcoef_model()`" author: Joseph Larmarange output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Plot model coefficients with `ggcoef_model()`} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(ggstats) ``` ```{r include=FALSE} if ( !broom.helpers::.assert_package("emmeans", boolean = TRUE) ) { knitr::opts_chunk$set(eval = FALSE) } ``` The purpose of `ggcoef_model()` is to quickly plot the coefficients of a model. It is an updated and improved version of `GGally::ggcoef()` based on `broom.helpers::tidy_plus_plus()`. For displaying a nicely formatted table of the same models, look at `gtsummary::tbl_regression()`. ## Quick coefficients plot To work automatically, this function requires the `{broom.helpers}`. Simply call `ggcoef_model()` with a model object. It could be the result of `stats::lm`, `stats::glm` or any other model covered by `{broom.helpers}`. ```{r ggcoef-reg} data(tips, package = "reshape") mod_simple <- lm(tip ~ day + time + total_bill, data = tips) ggcoef_model(mod_simple) ``` In the case of a logistic regression (or any other model for which coefficients are usually exponentiated), simply indicated `exponentiate = TRUE`. Note that a logarithmic scale will be used for the x-axis. ```{r ggcoef-titanic} d_titanic <- as.data.frame(Titanic) d_titanic$Survived <- factor(d_titanic$Survived, c("No", "Yes")) mod_titanic <- glm( Survived ~ Sex * Age + Class, weights = Freq, data = d_titanic, family = binomial ) ggcoef_model(mod_titanic, exponentiate = TRUE) ``` ## Customizing the plot ### Variable labels You can use the `{labelled}` package to define variable labels. They will be automatically used by `ggcoef_model()`. Note that variable labels should be defined before computing the model. ```{r} library(labelled) tips_labelled <- tips |> set_variable_labels( day = "Day of the week", time = "Lunch or Dinner", total_bill = "Bill's total" ) mod_labelled <- lm(tip ~ day + time + total_bill, data = tips_labelled) ggcoef_model(mod_labelled) ``` You can also define custom variable labels directly by passing a named vector to the `variable_labels` option. ```{r} ggcoef_model( mod_simple, variable_labels = c( day = "Week day", time = "Time (lunch or dinner ?)", total_bill = "Total of the bill" ) ) ``` If variable labels are to long, you can pass `ggplot2::label_wrap_gen()` or any other labeller function to `facet_labeller.` ```{r} ggcoef_model( mod_simple, variable_labels = c( day = "Week day", time = "Time (lunch or dinner ?)", total_bill = "Total of the bill" ), facet_labeller = ggplot2::label_wrap_gen(10) ) ``` Use `facet_row = NULL` to hide variable names. ```{r} ggcoef_model(mod_simple, facet_row = NULL, colour_guide = TRUE) ``` ### Term labels Several options allows you to customize term labels. ```{r} ggcoef_model(mod_titanic, exponentiate = TRUE) ggcoef_model( mod_titanic, exponentiate = TRUE, show_p_values = FALSE, signif_stars = FALSE, add_reference_rows = FALSE, categorical_terms_pattern = "{level} (ref: {reference_level})", interaction_sep = " x " ) + ggplot2::scale_y_discrete(labels = scales::label_wrap(15)) ``` By default, for categorical variables using treatment and sum contrasts, reference rows will be added and displayed on the graph. ```{r} mod_titanic2 <- glm( Survived ~ Sex * Age + Class, weights = Freq, data = d_titanic, family = binomial, contrasts = list(Sex = contr.sum, Class = contr.treatment(4, base = 3)) ) ggcoef_model(mod_titanic2, exponentiate = TRUE) ``` Continuous variables with polynomial terms defined with `stats::poly()` are also properly managed. ```{r} mod_poly <- lm(Sepal.Length ~ poly(Petal.Width, 3) + Petal.Length, data = iris) ggcoef_model(mod_poly) ``` Use `no_reference_row` to indicate which variables should not have a reference row added. ```{r} ggcoef_model( mod_titanic2, exponentiate = TRUE, no_reference_row = "Sex" ) ggcoef_model( mod_titanic2, exponentiate = TRUE, no_reference_row = broom.helpers::all_dichotomous() ) ggcoef_model( mod_titanic2, exponentiate = TRUE, no_reference_row = broom.helpers::all_categorical(), categorical_terms_pattern = "{level}/{reference_level}" ) ``` ### Elements to display Use `intercept = TRUE` to display intercepts. ```{r} ggcoef_model(mod_simple, intercept = TRUE) ``` You can remove confidence intervals with `conf.int = FALSE`. ```{r} ggcoef_model(mod_simple, conf.int = FALSE) ``` By default, significant terms (i.e. with a p-value below 5%) are highlighted using two types of dots. You can control the level of significance with `significance` or remove it with `significance = NULL`. ```{r} ggcoef_model(mod_simple, significance = NULL) ``` By default, dots are colored by variable. You can deactivate this behavior with `colour = NULL`. ```{r} ggcoef_model(mod_simple, colour = NULL) ``` You can display only a subset of terms with **include**. ```{r} ggcoef_model(mod_simple, include = c("time", "total_bill")) ``` It is possible to use `tidyselect` helpers. ```{r} ggcoef_model(mod_simple, include = dplyr::starts_with("t")) ``` You can remove stripped rows with `stripped_rows = FALSE`. ```{r} ggcoef_model(mod_simple, stripped_rows = FALSE) ``` Do not hesitate to consult the help file of `ggcoef_model()` to see all available options. ### ggplot2 elements The plot returned by `ggcoef_model()` is a classic `ggplot2` plot. You can therefore apply `ggplot2` functions to it. ```{r} ggcoef_model(mod_simple) + ggplot2::xlab("Coefficients") + ggplot2::ggtitle("Custom title") + ggplot2::scale_color_brewer(palette = "Set1") + ggplot2::theme(legend.position = "right") ``` ## Forest plot with a coefficient table `ggcoef_table()` is a variant of `ggcoef_model()` displaying a coefficient table on the right of the forest plot. ```{r} ggcoef_table(mod_simple) ggcoef_table(mod_titanic, exponentiate = TRUE) ``` You can easily customize the columns to be displayed. ```{r} ggcoef_table( mod_simple, table_stat = c("label", "estimate", "std.error", "ci"), ci_pattern = "{conf.low} to {conf.high}", table_stat_label = list( estimate = scales::label_number(accuracy = .001), conf.low = scales::label_number(accuracy = .01), conf.high = scales::label_number(accuracy = .01), std.error = scales::label_number(accuracy = .001), label = toupper ), table_header = c("Term", "Coef.", "SE", "CI"), table_widths = c(2, 3) ) ``` ## Multinomial models For multinomial models, simply use `ggcoef_model()` or `ggcoef_table()`. Additional visualizations are available using `ggcoef_dodged()` or `ggcoef_faceted()`. ```{r} library(nnet) hec <- as.data.frame(HairEyeColor) mod <- multinom( Hair ~ Eye + Sex, data = hec, weights = hec$Freq ) ``` ```{r, fig.height=9, fig.width=6} mod |> ggcoef_model(exponentiate = TRUE) mod |> ggcoef_table(exponentiate = TRUE) ``` ```{r, fig.height=4, fig.width=6} mod |> ggcoef_dodged(exponentiate = TRUE) mod |> ggcoef_faceted(exponentiate = TRUE) ``` You can use `group_labels` to customize the label of each level. ```{r} mod |> ggcoef_faceted( group_labels = c("Brown" = "Brown\n(ref: Black)"), exponentiate = TRUE ) ``` ## Multi-components models Multi-components models such as zero-inflated Poisson or beta regression generate a set of terms for each of their components. simply use `ggcoef_model()` or `ggcoef_table()`. Additional visualizations are available using `ggcoef_dodged()` or `ggcoef_faceted()`. ```{r} library(pscl) data("bioChemists", package = "pscl") mod <- zeroinfl(art ~ fem * mar | fem + mar, data = bioChemists) ``` ```{r, fig.height=9, fig.width=6} mod |> ggcoef_model() mod |> ggcoef_table() ``` ```{r, fig.height=4, fig.width=6} mod |> ggcoef_dodged(exponentiate = TRUE) mod |> ggcoef_faceted( exponentiate = TRUE, group_labels = c(conditional = "Count", zero_inflated = "Zero-inflated") ) ``` ## Comparing several models You can easily compare several models with `ggcoef_compare()`. To be noted, `ggcoef_compare()` is not compatible with multinomial or multi-components models. ```{r} mod1 <- lm(Fertility ~ ., data = swiss) mod2 <- step(mod1, trace = 0) mod3 <- lm(Fertility ~ Agriculture + Education * Catholic, data = swiss) models <- list( "Full model" = mod1, "Simplified model" = mod2, "With interaction" = mod3 ) ggcoef_compare(models) ggcoef_compare(models, type = "faceted") ``` ```{r fig.height=10, fig.width=8} ggcoef_compare(models, type = "table") ``` ## Advanced users Advanced users could use their own dataset and pass it to `ggcoef_plot()`. Such dataset could be produced by `ggcoef_model()`, `ggcoef_dodged()`, `ggcoef_faceted()` or `ggcoef_compare()` with the option `return_data = TRUE` or by using `broom::tidy()` or `broom.helpers::tidy_plus_plus()`. ## Supported models ```{r, echo=FALSE} broom.helpers::supported_models |> knitr::kable() ``` Note: this list of models has been tested. `{broom.helpers}`, and therefore `ggcoef_model()`, may or may not work properly or partially with other types of models. ggstats/inst/doc/stat_weighted_mean.Rmd0000644000176200001440000000526514357760262017773 0ustar liggesusers--- title: "Compute weighted mean with `stat_weighted_mean()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Compute weighted mean with `stat_weighted_mean()`} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(ggstats) library(ggplot2) ``` `stat_weighted_mean()` computes mean value of **y** (taking into account any **weight** aesthetic if provided) for each value of **x**. More precisely, it will return a new data frame with one line per unique value of **x** with the following new variables: - **y**: mean value of the original **y** (i.e. **numerator**/**denominator**) - **numerator** - **denominator** Let's take an example. The following plot shows all tips received according to the day of the week. ```{r} data(tips, package = "reshape") ggplot(tips) + aes(x = day, y = tip) + geom_point() ``` To plot their mean value per day, simply use `stat_weighted_mean()`. ```{r} ggplot(tips) + aes(x = day, y = tip) + stat_weighted_mean() ``` We can specify the geometry we want using `geom` argument. Note that for lines, we need to specify the **group** aesthetic as well. ```{r} ggplot(tips) + aes(x = day, y = tip, group = 1) + stat_weighted_mean(geom = "line") ``` An alternative is to specify the statistic in `ggplot2::geom_line()`. ```{r} ggplot(tips) + aes(x = day, y = tip, group = 1) + geom_line(stat = "weighted_mean") ``` Of course, it could be use with other geometries. Here a bar plot. ```{r} p <- ggplot(tips) + aes(x = day, y = tip, fill = sex) + stat_weighted_mean(geom = "bar", position = "dodge") + ylab("mean tip") p ``` It is very easy to add facets. In that case, computation will be done separately for each facet. ```{r} p + facet_grid(rows = vars(smoker)) ``` `stat_weighted_mean()` could be also used for computing proportions as a proportion is technically a mean of binary values (0 or 1). ```{r} ggplot(tips) + aes(x = day, y = as.integer(smoker == "Yes"), fill = sex) + stat_weighted_mean(geom = "bar", position = "dodge") + scale_y_continuous(labels = scales::percent) + ylab("proportion of smoker") ``` Finally, you can use the **weight** aesthetic to indicate weights to take into account for computing means / proportions. ```{r} d <- as.data.frame(Titanic) ggplot(d) + aes(x = Class, y = as.integer(Survived == "Yes"), weight = Freq, fill = Sex) + geom_bar(stat = "weighted_mean", position = "dodge") + scale_y_continuous(labels = scales::percent) + labs(y = "Proportion who survived") ``` ggstats/inst/doc/stat_cross.R0000644000176200001440000000317315122116404015757 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(ggstats) library(ggplot2) ## ----------------------------------------------------------------------------- d <- as.data.frame(Titanic) ggplot(d) + aes(x = Class, y = Survived, weight = Freq, size = after_stat(observed)) + stat_cross() + scale_size_area(max_size = 20) ## ----fig.height=6, fig.width=6------------------------------------------------ ggplot(d) + aes( x = Class, y = Survived, weight = Freq, size = after_stat(observed), fill = after_stat(std.resid) ) + stat_cross(shape = 22) + scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) + scale_size_area(max_size = 20) ## ----------------------------------------------------------------------------- ggplot(d) + aes(x = Class, y = Survived, weight = Freq) + geom_tile(fill = "white", colour = "black") + geom_text(stat = "cross", mapping = aes(label = after_stat(observed))) + theme_minimal() ## ----------------------------------------------------------------------------- ggplot(d) + aes( x = Class, y = Survived, weight = Freq, label = scales::percent(after_stat(col.prop), accuracy = .1), fill = after_stat(std.resid) ) + stat_cross(shape = 22, size = 30) + geom_text(stat = "cross") + scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) + facet_grid(rows = vars(Sex)) + labs(fill = "Standardized residuals") + theme_minimal() ggstats/inst/doc/stat_weighted_mean.R0000644000176200001440000000363615122116412017431 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(ggstats) library(ggplot2) ## ----------------------------------------------------------------------------- data(tips, package = "reshape") ggplot(tips) + aes(x = day, y = tip) + geom_point() ## ----------------------------------------------------------------------------- ggplot(tips) + aes(x = day, y = tip) + stat_weighted_mean() ## ----------------------------------------------------------------------------- ggplot(tips) + aes(x = day, y = tip, group = 1) + stat_weighted_mean(geom = "line") ## ----------------------------------------------------------------------------- ggplot(tips) + aes(x = day, y = tip, group = 1) + geom_line(stat = "weighted_mean") ## ----------------------------------------------------------------------------- p <- ggplot(tips) + aes(x = day, y = tip, fill = sex) + stat_weighted_mean(geom = "bar", position = "dodge") + ylab("mean tip") p ## ----------------------------------------------------------------------------- p + facet_grid(rows = vars(smoker)) ## ----------------------------------------------------------------------------- ggplot(tips) + aes(x = day, y = as.integer(smoker == "Yes"), fill = sex) + stat_weighted_mean(geom = "bar", position = "dodge") + scale_y_continuous(labels = scales::percent) + ylab("proportion of smoker") ## ----------------------------------------------------------------------------- d <- as.data.frame(Titanic) ggplot(d) + aes(x = Class, y = as.integer(Survived == "Yes"), weight = Freq, fill = Sex) + geom_bar(stat = "weighted_mean", position = "dodge") + scale_y_continuous(labels = scales::percent) + labs(y = "Proportion who survived") ggstats/inst/doc/stat_cross.html0000644000176200001440000013125515122116404016525 0ustar liggesusers Compute cross-tabulation statistics with stat_cross()

Compute cross-tabulation statistics with stat_cross()

library(ggstats)
library(ggplot2)

This statistic is intended to be used with two discrete variables mapped to x and y aesthetics. It will compute several statistics of a cross-tabulated table using broom::tidy.test() and stats::chisq.test(). More precisely, the computed variables are:

By default, stat_cross() is using ggplot2::geom_points(). If you want to plot the number of observations, you need to map after_stat(observed) to an aesthetic (here size):

d <- as.data.frame(Titanic)
ggplot(d) +
  aes(x = Class, y = Survived, weight = Freq, size = after_stat(observed)) +
  stat_cross() +
  scale_size_area(max_size = 20)

Note that the weight aesthetic is taken into account by stat_cross().

We can go further using a custom shape and filling points with standardized residual to identify visually cells who are over- or underrepresented.

ggplot(d) +
  aes(
    x = Class, y = Survived, weight = Freq,
    size = after_stat(observed), fill = after_stat(std.resid)
  ) +
  stat_cross(shape = 22) +
  scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) +
  scale_size_area(max_size = 20)

We can easily recreate a cross-tabulated table.

ggplot(d) +
  aes(x = Class, y = Survived, weight = Freq) +
  geom_tile(fill = "white", colour = "black") +
  geom_text(stat = "cross", mapping = aes(label = after_stat(observed))) +
  theme_minimal()

Even more complicated, we want to produce a table showing column proportions and where cells are filled with standardized residuals. Note that stat_cross() could be used with facets. In that case, computation is done separately in each facet.

ggplot(d) +
  aes(
    x = Class, y = Survived, weight = Freq,
    label = scales::percent(after_stat(col.prop), accuracy = .1),
    fill = after_stat(std.resid)
  ) +
  stat_cross(shape = 22, size = 30) +
  geom_text(stat = "cross") +
  scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) +
  facet_grid(rows = vars(Sex)) +
  labs(fill = "Standardized residuals") +
  theme_minimal()

ggstats/inst/doc/stat_prop.html0000644000176200001440000056663015122116411016363 0ustar liggesusers Compute custom proportions with stat_prop()

Compute custom proportions with stat_prop()

library(ggstats)
library(ggplot2)

stat_prop() is a variation of ggplot2::stat_count() allowing to compute custom proportions according to the by aesthetic defining the denominator (i.e. all proportions for a same value of by will sum to 1). The by aesthetic should be a factor. Therefore, stat_prop() requires the by aesthetic and this by aesthetic should be a factor.

Adding labels on a percent stacked bar plot

When using position = "fill" with geom_bar(), you can produce a percent stacked bar plot. However, the proportions corresponding to the y axis are not directly accessible using only ggplot2. With stat_prop(), you can easily add them on the plot.

In the following example, we indicated stat = "prop" to ggplot2::geom_text() to use stat_prop(), we defined the by aesthetic (here we want to compute the proportions separately for each value of x), and we also used ggplot2::position_fill() when calling ggplot2::geom_text().

d <- as.data.frame(Titanic)
p <- ggplot(d) +
  aes(x = Class, fill = Survived, weight = Freq, by = Class) +
  geom_bar(position = "fill") +
  geom_text(stat = "prop", position = position_fill(.5))
p

Note that stat_prop() has properly taken into account the weight aesthetic.

stat_prop() is also compatible with faceting. In that case, proportions are computed separately in each facet.

p + facet_grid(cols = vars(Sex))

Displaying proportions of the total

If you want to display proportions of the total, simply map the by aesthetic to 1. Here an example using a stacked bar chart.

ggplot(d) +
  aes(x = Class, fill = Survived, weight = Freq, by = 1) +
  geom_bar() +
  geom_text(
    aes(label = scales::percent(after_stat(prop), accuracy = 1)),
    stat = "prop",
    position = position_stack(.5)
  )

A dodged bar plot to compare two distributions

A dodged bar plot could be used to compare two distributions.

ggplot(d) +
  aes(x = Class, fill = Sex, weight = Freq, by = Sex) +
  geom_bar(position = "dodge")

On the previous graph, it is difficult to see if first class is over- or under-represented among women, due to the fact they were much more men on the boat. stat_prop() could be used to adjust the graph by displaying instead the proportion within each category (i.e. here the proportion by sex).

ggplot(d) +
  aes(x = Class, fill = Sex, weight = Freq, by = Sex, y = after_stat(prop)) +
  geom_bar(stat = "prop", position = "dodge") +
  scale_y_continuous(labels = scales::percent)

The same example with labels:

ggplot(d) +
  aes(x = Class, fill = Sex, weight = Freq, by = Sex, y = after_stat(prop)) +
  geom_bar(stat = "prop", position = "dodge") +
  scale_y_continuous(labels = scales::percent) +
  geom_text(
    mapping = aes(
      label = scales::percent(after_stat(prop), accuracy = .1),
      y = after_stat(0.01)
    ),
    vjust = "bottom",
    position = position_dodge(.9),
    stat = "prop"
  )

Displaying unobserved levels

With the complete argument, it is possible to indicate an aesthetic for those statistics should be completed for unobserved values.

d <- diamonds |>
  dplyr::filter(!(cut == "Ideal" & clarity == "I1")) |>
  dplyr::filter(!(cut == "Very Good" & clarity == "VS2")) |>
  dplyr::filter(!(cut == "Premium" & clarity == "IF"))
p <- ggplot(d) +
  aes(x = clarity, fill = cut, by = clarity) +
  geom_bar(position = "fill")
p +
  geom_text(
    stat = "prop",
    position = position_fill(.5)
  )

Adding complete = "fill" will generate “0.0%” labels where relevant.

p +
  geom_text(
    stat = "prop",
    position = position_fill(.5),
    complete = "fill"
  )

Using geom_prop_bar() and geom_prop_text()

The dedicated geometries geom_prop_bar() and geom_prop_text() could be used for quick and easy proportional bar plots. They use by default stat_prop() with relevant default values. For example, proportions are computed by x or y if the by aesthetic is not specified. It allows to generate a quick proportional bar plot.

ggplot(diamonds) +
  aes(y = clarity, fill = cut) +
  geom_prop_bar() +
  geom_prop_text()

You can specify a by aesthetic. For example, to reproduce the comparison of the two distributions presented earlier.

d <- as.data.frame(Titanic)
ggplot(d) +
  aes(x = Class, fill = Sex, weight = Freq, by = Sex) +
  geom_prop_bar(position = "dodge") +
  geom_prop_text(
    position = position_dodge(width = .9),
    vjust = - 0.5
  ) +
  scale_y_continuous(labels = scales::percent)

You can also display counts instead of proportions.

ggplot(diamonds) +
  aes(x = clarity, fill = cut) +
  geom_prop_bar(height = "count") +
  geom_prop_text(
    height = "count",
    labels = "count",
    labeller = scales::number
  )
#> Warning in ggplot2::geom_bar(mapping = mapping, data = data, position =
#> position, : Ignoring unknown parameters: `height`
#> Warning in ggplot2::geom_text(mapping = mapping, data = data, position =
#> position, : Ignoring unknown parameters: `height`, `labels`, and `labeller`

ggstats/inst/doc/gglikert.Rmd0000644000176200001440000002037314674033502015735 0ustar liggesusers--- title: "Plot Likert-type items with `gglikert()`" author: Joseph Larmarange output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Plot Likert-type items with `gglikert()`} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(ggstats) library(dplyr) library(ggplot2) ``` The purpose of `gglikert()` is to generate a centered bar plot comparing the answers of several questions sharing a common Likert-type scale. ## Generating an example dataset ```{r} likert_levels <- c( "Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree" ) set.seed(42) df <- tibble( q1 = sample(likert_levels, 150, replace = TRUE), q2 = sample(likert_levels, 150, replace = TRUE, prob = 5:1), q3 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), q4 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), q5 = sample(c(likert_levels, NA), 150, replace = TRUE), q6 = sample(likert_levels, 150, replace = TRUE, prob = c(1, 0, 1, 1, 0)) ) |> mutate(across(everything(), ~ factor(.x, levels = likert_levels))) likert_levels_dk <- c( "Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree", "Don't know" ) df_dk <- tibble( q1 = sample(likert_levels_dk, 150, replace = TRUE), q2 = sample(likert_levels_dk, 150, replace = TRUE, prob = 6:1), q3 = sample(likert_levels_dk, 150, replace = TRUE, prob = 1:6), q4 = sample(likert_levels_dk, 150, replace = TRUE, prob = 1:6), q5 = sample(c(likert_levels_dk, NA), 150, replace = TRUE), q6 = sample( likert_levels_dk, 150, replace = TRUE, prob = c(1, 0, 1, 1, 0, 1) ) ) |> mutate(across(everything(), ~ factor(.x, levels = likert_levels_dk))) ``` ## Quick plot Simply call `gglikert()`. ```{r} gglikert(df) ``` The list of variables to plot (all by default) could by specify with `include`. This argument accepts tidy-select syntax. ```{r} gglikert(df, include = q1:q3) ``` ## Customizing the plot The generated plot is a standard `ggplot2` object. You can therefore use `ggplot2` functions to custom many aspects. ```{r} gglikert(df) + ggtitle("A Likert-type items plot", subtitle = "generated with gglikert()") + scale_fill_brewer(palette = "RdYlBu") ``` ### Sorting the questions You can sort the plot with `sort`. ```{r} gglikert(df, sort = "ascending") ``` By default, the plot is sorted based on the proportion being higher than the center level, i.e. in this case the proportion of answers equal to "Agree" or "Strongly Agree". Alternatively, the questions could be transformed into a score and sorted accorded to their mean. ```{r} gglikert(df, sort = "ascending", sort_method = "mean") ``` ### Sorting the answers You can reverse the order of the answers with `reverse_likert`. ```{r} gglikert(df, reverse_likert = TRUE) ``` ### Proportion labels Proportion labels could be removed with `add_labels = FALSE`. ```{r} gglikert(df, add_labels = FALSE) ``` or customized. ```{r} gglikert( df, labels_size = 3, labels_accuracy = .1, labels_hide_below = .2, labels_color = "white" ) ``` ### Totals on each side By default, totals are added on each side of the plot. In case of an uneven number of answer levels, the central level is not taken into account for computing totals. With `totals_include_center = TRUE`, half of the proportion of the central level will be added on each side. ```{r} gglikert( df, totals_include_center = TRUE, sort = "descending", sort_prop_include_center = TRUE ) ``` Totals could be customized. ```{r} gglikert( df, totals_size = 4, totals_color = "blue", totals_fontface = "italic", totals_hjust = .20 ) ``` Or removed. ```{r} gglikert(df, add_totals = FALSE) ``` ## Variable labels If you are using variable labels (see `labelled::set_variable_labels()`), they will be taken automatically into account by `gglikert()`. ```{r} if (require(labelled)) { df <- df |> set_variable_labels( q1 = "first question", q2 = "second question", q3 = "this is the third question with a quite long variable label" ) } gglikert(df) ``` You can also provide custom variable labels with `variable_labels`. ```{r} gglikert( df, variable_labels = c( q1 = "alternative label for the first question", q6 = "another custom label" ) ) ``` You can control how variable labels are wrapped with `y_label_wrap`. ```{r} gglikert(df, y_label_wrap = 20) gglikert(df, y_label_wrap = 200) ``` ## Custom center By default, Likert plots will be centered, i.e. displaying the same number of categories on each side on the graph. When the number of categories is odd, half of the "central" category is displayed negatively and half positively. It is possible to control where to center the graph, using the `cutoff` argument, representing the number of categories to be displayed negatively: `2` to display the two first categories negatively and the others positively; `2.25` to display the two first categories and a quarter of the third negatively. ```{r} gglikert(df, cutoff = 0) gglikert(df, cutoff = 1) gglikert(df, cutoff = 1.25) gglikert(df, cutoff = 1.75) gglikert(df, cutoff = 2) gglikert(df, cutoff = NULL) gglikert(df, cutoff = 4) gglikert(df, cutoff = 5) ``` ## Symmetric x-axis Simply specify `symmetric = TRUE`. ```{r} gglikert(df, cutoff = 1) gglikert(df, cutoff = 1, symmetric = TRUE) ``` ## Removing certain values Sometimes, the dataset could contain certain values that you should not be displayed. ```{r} gglikert(df_dk) ``` A first option could be to convert the don't knows into `NA`. In such case, the proportions will be computed on non missing. ```{r} df_dk |> mutate(across(everything(), ~ factor(.x, levels = likert_levels))) |> gglikert() ``` Or, you could use `exclude_fill_values` to not display specific values, but still counting them in the denominator for computing proportions. ```{r} df_dk |> gglikert(exclude_fill_values = "Don't know") ``` ## Facets To define facets, use `facet_rows` and/or `facet_cols`. ```{r message=FALSE} df_group <- df df_group$group1 <- sample(c("A", "B"), 150, replace = TRUE) df_group$group2 <- sample(c("a", "b", "c"), 150, replace = TRUE) gglikert(df_group, q1:q6, facet_cols = vars(group1), labels_size = 3 ) gglikert(df_group, q1:q2, facet_rows = vars(group1, group2), labels_size = 3 ) gglikert(df_group, q3:q6, facet_cols = vars(group1), facet_rows = vars(group2), labels_size = 3 ) + scale_x_continuous( labels = label_percent_abs(), expand = expansion(0, .2) ) ``` To compare answers by subgroup, you can alternatively map `.question` to facets, and define a grouping variable for `y`. ```{r} gglikert(df_group, q1:q4, y = "group1", facet_rows = vars(.question), labels_size = 3, facet_label_wrap = 15 ) ``` ## Stacked plot For a more classical stacked bar plot, you can use `gglikert_stacked()`. ```{r} gglikert_stacked(df) gglikert_stacked( df, sort = "asc", add_median_line = TRUE, add_labels = FALSE ) gglikert_stacked( df_group, include = q1:q4, y = "group2" ) + facet_grid( rows = vars(.question), labeller = label_wrap_gen(15) ) ``` ## Long format dataset Internally, `gglikert()` is calling `gglikert_data()` to generate a long format dataset combining all questions into two columns, `.question` and `.answer`. ```{r} gglikert_data(df) |> head() ``` Such dataset could be useful for other types of plot, for example for a classic stacked bar plot. ```{r} ggplot(gglikert_data(df)) + aes(y = .question, fill = .answer) + geom_bar(position = "fill") ``` ## Weighted data `gglikert()`, `gglikert_stacked()` and `gglikert_data()` accepts a `weights` argument, allowing to specify statistical weights. ```{r} df$sampling_weights <- runif(nrow(df)) gglikert(df, q1:q4, weights = sampling_weights) ``` ## See also The function `position_likert()` used to center bars. ggstats/inst/doc/geom_diverging.R0000644000176200001440000000675115122116267016574 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(ggstats) library(dplyr) library(ggplot2) library(patchwork) ## ----------------------------------------------------------------------------- base <- ggplot(diamonds) + aes(y = clarity, fill = cut) + theme(legend.position = "none") p_stack <- base + geom_bar(position = "stack") + ggtitle("position_stack()") p_diverging <- base + geom_bar(position = "diverging") + ggtitle("position_diverging()") p_stack + p_diverging ## ----------------------------------------------------------------------------- p_fill <- base + geom_bar(position = "fill") + ggtitle("position_fill()") p_likert <- base + geom_bar(position = "likert") + ggtitle("position_likert()") p_fill + p_likert ## ----------------------------------------------------------------------------- p_1 <- base + geom_bar(position = position_diverging(cutoff = 1)) + ggtitle("cutoff = 1") p_2 <- base + geom_bar(position = position_diverging(cutoff = 2)) + ggtitle("cutoff = 2") p_null <- base + geom_bar(position = position_diverging(cutoff = NULL)) + ggtitle("cutoff = NULL") p_3.75 <- base + geom_bar(position = position_diverging(cutoff = 3.75)) + ggtitle("cutoff = 3.75") p_5 <- base + geom_bar(position = position_diverging(cutoff = 5)) + ggtitle("cutoff = 5") wrap_plots(p_1, p_2, p_null, p_3.75, p_5) ## ----------------------------------------------------------------------------- wrap_plots( p_1 + scale_fill_likert(cutoff = 1), p_null + scale_fill_likert(), p_3.75 + scale_fill_likert(cutoff = 3.75) ) ## ----------------------------------------------------------------------------- wrap_plots( p_3.75, p_3.75 + scale_x_continuous( limits = symmetric_limits, labels = label_number_abs() ) ) ## ----------------------------------------------------------------------------- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = "diverging") + geom_text( aes( label = label_number_abs(hide_below = 800) (after_stat(count)) ), stat = "count", position = position_diverging(.5) ) + scale_fill_likert() + scale_x_continuous( labels = label_number_abs(), limits = symmetric_limits ) ## ----------------------------------------------------------------------------- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_diverging() + geom_diverging_text( aes( label = label_number_abs(hide_below = 800) (after_stat(count)) ) ) + scale_fill_likert() + scale_x_continuous( labels = label_number_abs(), limits = symmetric_limits ) ## ----------------------------------------------------------------------------- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_likert() + geom_likert_text() + scale_fill_likert() + scale_x_continuous( labels = label_percent_abs() ) ## ----------------------------------------------------------------------------- d <- Titanic |> as.data.frame() ggplot(d) + aes(y = Class, fill = Sex, weight = Freq) + geom_pyramid() + geom_pyramid_text() + scale_x_continuous( labels = label_percent_abs(), limits = symmetric_limits ) ggstats/inst/doc/geom_diverging.html0000644000176200001440000037754315122116271017344 0ustar liggesusers Geometries for diverging bar plots

Geometries for diverging bar plots

Joseph Larmarange

library(ggstats)
library(dplyr)
#> 
#> Attachement du package : 'dplyr'
#> Les objets suivants sont masqués depuis 'package:stats':
#> 
#>     filter, lag
#> Les objets suivants sont masqués depuis 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)
library(patchwork)

Note : if you are looking for an all-in-one function to display Likert-type items, please refer to gglikert() and vignette("gglikert").

New positions

Diverging bar plots could be achieved using position_diverging() or position_likert().

position_diverging() stacks bars on top of each other and centers them around zero (the same number of categories are displayed on each side).

base <-
  ggplot(diamonds) +
  aes(y = clarity, fill = cut) +
  theme(legend.position = "none")

p_stack <-
  base +
  geom_bar(position = "stack") +
  ggtitle("position_stack()")

p_diverging <-
  base +
  geom_bar(position = "diverging") +
  ggtitle("position_diverging()")

p_stack + p_diverging

position_likert() is similar but uses proportions instead of counts.

p_fill <-
  base +
  geom_bar(position = "fill") +
  ggtitle("position_fill()")

p_likert <-
  base +
  geom_bar(position = "likert") +
  ggtitle("position_likert()")

p_fill + p_likert

By default, the same number of categories is displayed on each side, i.e. if you have 4 categories, 2 will be displayed negatively and 2 positively. If you have an odd number of categories, half of the central category will be displayed negatively and half positively.

The center could be changed with the cutoff argument, representing the number of categories to be displayed negatively: 2 to display negatively the two first categories, 2.5 to display negatively the two first categories and half of the third, 2.2 to display negatively the two first categories and a fifth of the third.

p_1 <-
  base +
  geom_bar(position = position_diverging(cutoff = 1)) +
  ggtitle("cutoff = 1")

p_2 <-
  base +
  geom_bar(position = position_diverging(cutoff = 2)) +
  ggtitle("cutoff = 2")

p_null <-
  base +
  geom_bar(position = position_diverging(cutoff = NULL)) +
  ggtitle("cutoff = NULL")

p_3.75 <-
  base +
  geom_bar(position = position_diverging(cutoff = 3.75)) +
  ggtitle("cutoff = 3.75")

p_5 <-
  base +
  geom_bar(position = position_diverging(cutoff = 5)) +
  ggtitle("cutoff = 5")

wrap_plots(p_1, p_2, p_null, p_3.75, p_5)

New scales

For a diverging bar plot, it is recommended to use a diverging palette, as provided in the Brewer palettes. Sometimes, the number of available colors is insufficient in the palette. In that case, you could use pal_extender() or scale_fill_extended(). However, if you use a custom cutoff, it is also important to change the center of the palette as well.

Therefore, for diverging bar plots, we recommend to use scale_fill_likert().

wrap_plots(
  p_1 + scale_fill_likert(cutoff = 1),
  p_null + scale_fill_likert(),
  p_3.75 + scale_fill_likert(cutoff = 3.75)
)

Improving axes

You may also want have centered axes. That could be easily achieved with symmetric_limits().

You could also use label_number_abs() or label_percent_abs() to display absolute numbers.

wrap_plots(
  p_3.75,
  p_3.75 +
    scale_x_continuous(
      limits = symmetric_limits,
      labels = label_number_abs()
    )
)

New geometries

To facilitate the creation of diverging bar plots, you could use variants of geom_bar() and geom_text().

geom_diverging() & geom_diverging_text()

Let’s consider the following plot:

ggplot(diamonds) +
  aes(y = clarity, fill = cut) +
  geom_bar(position = "diverging") +
  geom_text(
    aes(
      label =
        label_number_abs(hide_below = 800)
        (after_stat(count))
    ),
    stat = "count",
    position = position_diverging(.5)
  ) +
  scale_fill_likert() +
  scale_x_continuous(
    labels = label_number_abs(),
    limits = symmetric_limits
  )

The same could be achieved quicker with geom_diverging() and geom_diverging_text().

ggplot(diamonds) +
  aes(y = clarity, fill = cut) +
  geom_diverging() +
  geom_diverging_text(
    aes(
      label =
        label_number_abs(hide_below = 800)
        (after_stat(count))
    )
  ) +
  scale_fill_likert() +
  scale_x_continuous(
    labels = label_number_abs(),
    limits = symmetric_limits
  )

geom_likert() & geom_likert_text()

geom_likert() and geom_likert_text() works similarly. geom_likert_text() takes advantages of stat_prop() for computing the proportions to be displayed (see vignette("stat_prop")).

ggplot(diamonds) +
  aes(y = clarity, fill = cut) +
  geom_likert() +
  geom_likert_text() +
  scale_fill_likert() +
  scale_x_continuous(
    labels = label_percent_abs()
  )

geom_pyramid() & geom_pyramid_text()

Finally, geom_pyramid() and geom_pyramid_text() are variations adapted to display an age-sex pyramid. It uses proportions of the total.

d <- Titanic |> as.data.frame()
ggplot(d) +
  aes(y = Class, fill = Sex, weight = Freq) +
  geom_pyramid() +
  geom_pyramid_text() +
  scale_x_continuous(
    labels = label_percent_abs(),
    limits = symmetric_limits
  )

ggstats/inst/doc/gglikert.html0000644000176200001440000227647215122116402016164 0ustar liggesusers Plot Likert-type items with gglikert()

Plot Likert-type items with gglikert()

Joseph Larmarange

library(ggstats)
library(dplyr)
library(ggplot2)

The purpose of gglikert() is to generate a centered bar plot comparing the answers of several questions sharing a common Likert-type scale.

Generating an example dataset

likert_levels <- c(
  "Strongly disagree",
  "Disagree",
  "Neither agree nor disagree",
  "Agree",
  "Strongly agree"
)
set.seed(42)
df <-
  tibble(
    q1 = sample(likert_levels, 150, replace = TRUE),
    q2 = sample(likert_levels, 150, replace = TRUE, prob = 5:1),
    q3 = sample(likert_levels, 150, replace = TRUE, prob = 1:5),
    q4 = sample(likert_levels, 150, replace = TRUE, prob = 1:5),
    q5 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q6 = sample(likert_levels, 150, replace = TRUE, prob = c(1, 0, 1, 1, 0))
  ) |>
  mutate(across(everything(), ~ factor(.x, levels = likert_levels)))

likert_levels_dk <- c(
  "Strongly disagree",
  "Disagree",
  "Neither agree nor disagree",
  "Agree",
  "Strongly agree",
  "Don't know"
)
df_dk <-
  tibble(
    q1 = sample(likert_levels_dk, 150, replace = TRUE),
    q2 = sample(likert_levels_dk, 150, replace = TRUE, prob = 6:1),
    q3 = sample(likert_levels_dk, 150, replace = TRUE, prob = 1:6),
    q4 = sample(likert_levels_dk, 150, replace = TRUE, prob = 1:6),
    q5 = sample(c(likert_levels_dk, NA), 150, replace = TRUE),
    q6 = sample(
      likert_levels_dk, 150,
      replace = TRUE, prob = c(1, 0, 1, 1, 0, 1)
    )
  ) |>
  mutate(across(everything(), ~ factor(.x, levels = likert_levels_dk)))

Quick plot

Simply call gglikert().

gglikert(df)

The list of variables to plot (all by default) could by specify with include. This argument accepts tidy-select syntax.

gglikert(df, include = q1:q3)

Customizing the plot

The generated plot is a standard ggplot2 object. You can therefore use ggplot2 functions to custom many aspects.

gglikert(df) +
  ggtitle("A Likert-type items plot", subtitle = "generated with gglikert()") +
  scale_fill_brewer(palette = "RdYlBu")
#> Scale for fill is already present.
#> Adding another scale for fill, which will replace the existing scale.

Sorting the questions

You can sort the plot with sort.

gglikert(df, sort = "ascending")

By default, the plot is sorted based on the proportion being higher than the center level, i.e. in this case the proportion of answers equal to “Agree” or “Strongly Agree”. Alternatively, the questions could be transformed into a score and sorted accorded to their mean.

gglikert(df, sort = "ascending", sort_method = "mean")

Sorting the answers

You can reverse the order of the answers with reverse_likert.

gglikert(df, reverse_likert = TRUE)

Proportion labels

Proportion labels could be removed with add_labels = FALSE.

gglikert(df, add_labels = FALSE)

or customized.

gglikert(
  df,
  labels_size = 3,
  labels_accuracy = .1,
  labels_hide_below = .2,
  labels_color = "white"
)

Totals on each side

By default, totals are added on each side of the plot. In case of an uneven number of answer levels, the central level is not taken into account for computing totals. With totals_include_center = TRUE, half of the proportion of the central level will be added on each side.

gglikert(
  df,
  totals_include_center = TRUE,
  sort = "descending",
  sort_prop_include_center = TRUE
)

Totals could be customized.

gglikert(
  df,
  totals_size = 4,
  totals_color = "blue",
  totals_fontface = "italic",
  totals_hjust = .20
)

Or removed.

gglikert(df, add_totals = FALSE)

Variable labels

If you are using variable labels (see labelled::set_variable_labels()), they will be taken automatically into account by gglikert().

if (require(labelled)) {
  df <- df |>
    set_variable_labels(
      q1 = "first question",
      q2 = "second question",
      q3 = "this is the third question with a quite long variable label"
    )
}
gglikert(df)

You can also provide custom variable labels with variable_labels.

gglikert(
  df,
  variable_labels = c(
    q1 = "alternative label for the first question",
    q6 = "another custom label"
  )
)

You can control how variable labels are wrapped with y_label_wrap.

gglikert(df, y_label_wrap = 20)

gglikert(df, y_label_wrap = 200)

Custom center

By default, Likert plots will be centered, i.e. displaying the same number of categories on each side on the graph. When the number of categories is odd, half of the “central” category is displayed negatively and half positively.

It is possible to control where to center the graph, using the cutoff argument, representing the number of categories to be displayed negatively: 2 to display the two first categories negatively and the others positively; 2.25 to display the two first categories and a quarter of the third negatively.

gglikert(df, cutoff = 0)

gglikert(df, cutoff = 1)

gglikert(df, cutoff = 1.25)

gglikert(df, cutoff = 1.75)

gglikert(df, cutoff = 2)

gglikert(df, cutoff = NULL)

gglikert(df, cutoff = 4)

gglikert(df, cutoff = 5)

Symmetric x-axis

Simply specify symmetric = TRUE.

gglikert(df, cutoff = 1)

gglikert(df, cutoff = 1, symmetric = TRUE)

Removing certain values

Sometimes, the dataset could contain certain values that you should not be displayed.

gglikert(df_dk)

A first option could be to convert the don’t knows into NA. In such case, the proportions will be computed on non missing.

df_dk |>
  mutate(across(everything(), ~ factor(.x, levels = likert_levels))) |>
  gglikert()

Or, you could use exclude_fill_values to not display specific values, but still counting them in the denominator for computing proportions.

df_dk |> gglikert(exclude_fill_values = "Don't know")

Facets

To define facets, use facet_rows and/or facet_cols.

df_group <- df
df_group$group1 <- sample(c("A", "B"), 150, replace = TRUE)
df_group$group2 <- sample(c("a", "b", "c"), 150, replace = TRUE)

gglikert(df_group,
  q1:q6,
  facet_cols = vars(group1),
  labels_size = 3
)

gglikert(df_group,
  q1:q2,
  facet_rows = vars(group1, group2),
  labels_size = 3
)

gglikert(df_group,
  q3:q6,
  facet_cols = vars(group1),
  facet_rows = vars(group2),
  labels_size = 3
) +
  scale_x_continuous(
    labels = label_percent_abs(),
    expand = expansion(0, .2)
  )

To compare answers by subgroup, you can alternatively map .question to facets, and define a grouping variable for y.

gglikert(df_group,
  q1:q4,
  y = "group1",
  facet_rows = vars(.question),
  labels_size = 3,
  facet_label_wrap = 15
)

Stacked plot

For a more classical stacked bar plot, you can use gglikert_stacked().

gglikert_stacked(df)


gglikert_stacked(
  df,
  sort = "asc",
  add_median_line = TRUE,
  add_labels = FALSE
)


gglikert_stacked(
  df_group,
  include = q1:q4,
  y = "group2"
) +
  facet_grid(
    rows = vars(.question),
    labeller = label_wrap_gen(15)
  )

Long format dataset

Internally, gglikert() is calling gglikert_data() to generate a long format dataset combining all questions into two columns, .question and .answer.

gglikert_data(df) |>
  head()
#> # A tibble: 6 × 3
#>   .weights .question                                                   .answer  
#>      <dbl> <fct>                                                       <fct>    
#> 1        1 first question                                              Strongly…
#> 2        1 second question                                             Disagree 
#> 3        1 this is the third question with a quite long variable label Agree    
#> 4        1 q4                                                          Disagree 
#> 5        1 q5                                                          Strongly…
#> 6        1 q6                                                          Strongly…

Such dataset could be useful for other types of plot, for example for a classic stacked bar plot.

ggplot(gglikert_data(df)) +
  aes(y = .question, fill = .answer) +
  geom_bar(position = "fill")

Weighted data

gglikert(), gglikert_stacked() and gglikert_data() accepts a weights argument, allowing to specify statistical weights.

df$sampling_weights <- runif(nrow(df))
gglikert(df, q1:q4, weights = sampling_weights)

See also

The function position_likert() used to center bars.

ggstats/inst/doc/ggcoef_model.R0000644000176200001440000001635615122116361016216 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(ggstats) ## ----include=FALSE------------------------------------------------------------ if ( !broom.helpers::.assert_package("emmeans", boolean = TRUE) ) { knitr::opts_chunk$set(eval = FALSE) } ## ----ggcoef-reg--------------------------------------------------------------- data(tips, package = "reshape") mod_simple <- lm(tip ~ day + time + total_bill, data = tips) ggcoef_model(mod_simple) ## ----ggcoef-titanic----------------------------------------------------------- d_titanic <- as.data.frame(Titanic) d_titanic$Survived <- factor(d_titanic$Survived, c("No", "Yes")) mod_titanic <- glm( Survived ~ Sex * Age + Class, weights = Freq, data = d_titanic, family = binomial ) ggcoef_model(mod_titanic, exponentiate = TRUE) ## ----------------------------------------------------------------------------- library(labelled) tips_labelled <- tips |> set_variable_labels( day = "Day of the week", time = "Lunch or Dinner", total_bill = "Bill's total" ) mod_labelled <- lm(tip ~ day + time + total_bill, data = tips_labelled) ggcoef_model(mod_labelled) ## ----------------------------------------------------------------------------- ggcoef_model( mod_simple, variable_labels = c( day = "Week day", time = "Time (lunch or dinner ?)", total_bill = "Total of the bill" ) ) ## ----------------------------------------------------------------------------- ggcoef_model( mod_simple, variable_labels = c( day = "Week day", time = "Time (lunch or dinner ?)", total_bill = "Total of the bill" ), facet_labeller = ggplot2::label_wrap_gen(10) ) ## ----------------------------------------------------------------------------- ggcoef_model(mod_simple, facet_row = NULL, colour_guide = TRUE) ## ----------------------------------------------------------------------------- ggcoef_model(mod_titanic, exponentiate = TRUE) ggcoef_model( mod_titanic, exponentiate = TRUE, show_p_values = FALSE, signif_stars = FALSE, add_reference_rows = FALSE, categorical_terms_pattern = "{level} (ref: {reference_level})", interaction_sep = " x " ) + ggplot2::scale_y_discrete(labels = scales::label_wrap(15)) ## ----------------------------------------------------------------------------- mod_titanic2 <- glm( Survived ~ Sex * Age + Class, weights = Freq, data = d_titanic, family = binomial, contrasts = list(Sex = contr.sum, Class = contr.treatment(4, base = 3)) ) ggcoef_model(mod_titanic2, exponentiate = TRUE) ## ----------------------------------------------------------------------------- mod_poly <- lm(Sepal.Length ~ poly(Petal.Width, 3) + Petal.Length, data = iris) ggcoef_model(mod_poly) ## ----------------------------------------------------------------------------- ggcoef_model( mod_titanic2, exponentiate = TRUE, no_reference_row = "Sex" ) ggcoef_model( mod_titanic2, exponentiate = TRUE, no_reference_row = broom.helpers::all_dichotomous() ) ggcoef_model( mod_titanic2, exponentiate = TRUE, no_reference_row = broom.helpers::all_categorical(), categorical_terms_pattern = "{level}/{reference_level}" ) ## ----------------------------------------------------------------------------- ggcoef_model(mod_simple, intercept = TRUE) ## ----------------------------------------------------------------------------- ggcoef_model(mod_simple, conf.int = FALSE) ## ----------------------------------------------------------------------------- ggcoef_model(mod_simple, significance = NULL) ## ----------------------------------------------------------------------------- ggcoef_model(mod_simple, colour = NULL) ## ----------------------------------------------------------------------------- ggcoef_model(mod_simple, include = c("time", "total_bill")) ## ----------------------------------------------------------------------------- ggcoef_model(mod_simple, include = dplyr::starts_with("t")) ## ----------------------------------------------------------------------------- ggcoef_model(mod_simple, stripped_rows = FALSE) ## ----------------------------------------------------------------------------- ggcoef_model(mod_simple) + ggplot2::xlab("Coefficients") + ggplot2::ggtitle("Custom title") + ggplot2::scale_color_brewer(palette = "Set1") + ggplot2::theme(legend.position = "right") ## ----------------------------------------------------------------------------- ggcoef_table(mod_simple) ggcoef_table(mod_titanic, exponentiate = TRUE) ## ----------------------------------------------------------------------------- ggcoef_table( mod_simple, table_stat = c("label", "estimate", "std.error", "ci"), ci_pattern = "{conf.low} to {conf.high}", table_stat_label = list( estimate = scales::label_number(accuracy = .001), conf.low = scales::label_number(accuracy = .01), conf.high = scales::label_number(accuracy = .01), std.error = scales::label_number(accuracy = .001), label = toupper ), table_header = c("Term", "Coef.", "SE", "CI"), table_widths = c(2, 3) ) ## ----------------------------------------------------------------------------- library(nnet) hec <- as.data.frame(HairEyeColor) mod <- multinom( Hair ~ Eye + Sex, data = hec, weights = hec$Freq ) ## ----fig.height=9, fig.width=6------------------------------------------------ mod |> ggcoef_model(exponentiate = TRUE) mod |> ggcoef_table(exponentiate = TRUE) ## ----fig.height=4, fig.width=6------------------------------------------------ mod |> ggcoef_dodged(exponentiate = TRUE) mod |> ggcoef_faceted(exponentiate = TRUE) ## ----------------------------------------------------------------------------- mod |> ggcoef_faceted( group_labels = c("Brown" = "Brown\n(ref: Black)"), exponentiate = TRUE ) ## ----------------------------------------------------------------------------- library(pscl) data("bioChemists", package = "pscl") mod <- zeroinfl(art ~ fem * mar | fem + mar, data = bioChemists) ## ----fig.height=9, fig.width=6------------------------------------------------ mod |> ggcoef_model() mod |> ggcoef_table() ## ----fig.height=4, fig.width=6------------------------------------------------ mod |> ggcoef_dodged(exponentiate = TRUE) mod |> ggcoef_faceted( exponentiate = TRUE, group_labels = c(conditional = "Count", zero_inflated = "Zero-inflated") ) ## ----------------------------------------------------------------------------- mod1 <- lm(Fertility ~ ., data = swiss) mod2 <- step(mod1, trace = 0) mod3 <- lm(Fertility ~ Agriculture + Education * Catholic, data = swiss) models <- list( "Full model" = mod1, "Simplified model" = mod2, "With interaction" = mod3 ) ggcoef_compare(models) ggcoef_compare(models, type = "faceted") ## ----fig.height=10, fig.width=8----------------------------------------------- ggcoef_compare(models, type = "table") ## ----echo=FALSE--------------------------------------------------------------- broom.helpers::supported_models |> knitr::kable() ggstats/inst/doc/stat_cross.Rmd0000644000176200001440000000601514357760262016316 0ustar liggesusers--- title: "Compute cross-tabulation statistics with `stat_cross()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Compute cross-tabulation statistics with `stat_cross()`} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(ggstats) library(ggplot2) ``` This statistic is intended to be used with two discrete variables mapped to **x** and **y** aesthetics. It will compute several statistics of a cross-tabulated table using `broom::tidy.test()` and `stats::chisq.test()`. More precisely, the computed variables are: - **observed**: number of observations in x,y - **prop**: proportion of total - **row.prop**: row proportion - **col.prop**: column proportion - **expected**: expected count under the null hypothesis - **resid**: Pearson's residual - **std.resid**: standardized residual - **row.observed**: total number of observations within row - **col.observed**: total number of observations within column - **total.observed**: total number of observations within the table - **phi**: phi coefficients, see `augment_chisq_add_phi()` By default, `stat_cross()` is using `ggplot2::geom_points()`. If you want to plot the number of observations, you need to map `after_stat(observed)` to an aesthetic (here **size**): ```{r} d <- as.data.frame(Titanic) ggplot(d) + aes(x = Class, y = Survived, weight = Freq, size = after_stat(observed)) + stat_cross() + scale_size_area(max_size = 20) ``` Note that the **weight** aesthetic is taken into account by `stat_cross()`. We can go further using a custom shape and filling points with standardized residual to identify visually cells who are over- or underrepresented. ```{r fig.height=6, fig.width=6} ggplot(d) + aes( x = Class, y = Survived, weight = Freq, size = after_stat(observed), fill = after_stat(std.resid) ) + stat_cross(shape = 22) + scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) + scale_size_area(max_size = 20) ``` We can easily recreate a cross-tabulated table. ```{r} ggplot(d) + aes(x = Class, y = Survived, weight = Freq) + geom_tile(fill = "white", colour = "black") + geom_text(stat = "cross", mapping = aes(label = after_stat(observed))) + theme_minimal() ``` Even more complicated, we want to produce a table showing column proportions and where cells are filled with standardized residuals. Note that `stat_cross()` could be used with facets. In that case, computation is done separately in each facet. ```{r} ggplot(d) + aes( x = Class, y = Survived, weight = Freq, label = scales::percent(after_stat(col.prop), accuracy = .1), fill = after_stat(std.resid) ) + stat_cross(shape = 22, size = 30) + geom_text(stat = "cross") + scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) + facet_grid(rows = vars(Sex)) + labs(fill = "Standardized residuals") + theme_minimal() ``` ggstats/inst/doc/stat_prop.R0000644000176200001440000000650215122116410015602 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(ggstats) library(ggplot2) ## ----------------------------------------------------------------------------- d <- as.data.frame(Titanic) p <- ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, by = Class) + geom_bar(position = "fill") + geom_text(stat = "prop", position = position_fill(.5)) p ## ----------------------------------------------------------------------------- p + facet_grid(cols = vars(Sex)) ## ----------------------------------------------------------------------------- ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, by = 1) + geom_bar() + geom_text( aes(label = scales::percent(after_stat(prop), accuracy = 1)), stat = "prop", position = position_stack(.5) ) ## ----------------------------------------------------------------------------- ggplot(d) + aes(x = Class, fill = Sex, weight = Freq, by = Sex) + geom_bar(position = "dodge") ## ----------------------------------------------------------------------------- ggplot(d) + aes(x = Class, fill = Sex, weight = Freq, by = Sex, y = after_stat(prop)) + geom_bar(stat = "prop", position = "dodge") + scale_y_continuous(labels = scales::percent) ## ----------------------------------------------------------------------------- ggplot(d) + aes(x = Class, fill = Sex, weight = Freq, by = Sex, y = after_stat(prop)) + geom_bar(stat = "prop", position = "dodge") + scale_y_continuous(labels = scales::percent) + geom_text( mapping = aes( label = scales::percent(after_stat(prop), accuracy = .1), y = after_stat(0.01) ), vjust = "bottom", position = position_dodge(.9), stat = "prop" ) ## ----------------------------------------------------------------------------- d <- diamonds |> dplyr::filter(!(cut == "Ideal" & clarity == "I1")) |> dplyr::filter(!(cut == "Very Good" & clarity == "VS2")) |> dplyr::filter(!(cut == "Premium" & clarity == "IF")) p <- ggplot(d) + aes(x = clarity, fill = cut, by = clarity) + geom_bar(position = "fill") p + geom_text( stat = "prop", position = position_fill(.5) ) ## ----------------------------------------------------------------------------- p + geom_text( stat = "prop", position = position_fill(.5), complete = "fill" ) ## ----------------------------------------------------------------------------- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_prop_bar() + geom_prop_text() ## ----------------------------------------------------------------------------- d <- as.data.frame(Titanic) ggplot(d) + aes(x = Class, fill = Sex, weight = Freq, by = Sex) + geom_prop_bar(position = "dodge") + geom_prop_text( position = position_dodge(width = .9), vjust = - 0.5 ) + scale_y_continuous(labels = scales::percent) ## ----------------------------------------------------------------------------- ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_prop_bar(height = "count") + geom_prop_text( height = "count", labels = "count", labeller = scales::number ) ggstats/inst/doc/stat_prop.Rmd0000644000176200001440000001242214674033502016134 0ustar liggesusers--- title: "Compute custom proportions with `stat_prop()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Compute custom proportions with `stat_prop()`} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(ggstats) library(ggplot2) ``` `stat_prop()` is a variation of `ggplot2::stat_count()` allowing to compute custom proportions according to the **by** aesthetic defining the denominator (i.e. all proportions for a same value of **by** will sum to 1). The **by** aesthetic should be a factor. Therefore, `stat_prop()` requires the **by** aesthetic and this **by** aesthetic should be a factor. ## Adding labels on a percent stacked bar plot When using `position = "fill"` with `geom_bar()`, you can produce a percent stacked bar plot. However, the proportions corresponding to the **y** axis are not directly accessible using only `ggplot2`. With `stat_prop()`, you can easily add them on the plot. In the following example, we indicated `stat = "prop"` to `ggplot2::geom_text()` to use `stat_prop()`, we defined the **by** aesthetic (here we want to compute the proportions separately for each value of **x**), and we also used `ggplot2::position_fill()` when calling `ggplot2::geom_text()`. ```{r} d <- as.data.frame(Titanic) p <- ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, by = Class) + geom_bar(position = "fill") + geom_text(stat = "prop", position = position_fill(.5)) p ``` Note that `stat_prop()` has properly taken into account the **weight** aesthetic. `stat_prop()` is also compatible with faceting. In that case, proportions are computed separately in each facet. ```{r} p + facet_grid(cols = vars(Sex)) ``` ## Displaying proportions of the total If you want to display proportions of the total, simply map the **by** aesthetic to `1`. Here an example using a stacked bar chart. ```{r} ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, by = 1) + geom_bar() + geom_text( aes(label = scales::percent(after_stat(prop), accuracy = 1)), stat = "prop", position = position_stack(.5) ) ``` ## A dodged bar plot to compare two distributions A dodged bar plot could be used to compare two distributions. ```{r} ggplot(d) + aes(x = Class, fill = Sex, weight = Freq, by = Sex) + geom_bar(position = "dodge") ``` On the previous graph, it is difficult to see if first class is over- or under-represented among women, due to the fact they were much more men on the boat. `stat_prop()` could be used to adjust the graph by displaying instead the proportion within each category (i.e. here the proportion by sex). ```{r} ggplot(d) + aes(x = Class, fill = Sex, weight = Freq, by = Sex, y = after_stat(prop)) + geom_bar(stat = "prop", position = "dodge") + scale_y_continuous(labels = scales::percent) ``` The same example with labels: ```{r} ggplot(d) + aes(x = Class, fill = Sex, weight = Freq, by = Sex, y = after_stat(prop)) + geom_bar(stat = "prop", position = "dodge") + scale_y_continuous(labels = scales::percent) + geom_text( mapping = aes( label = scales::percent(after_stat(prop), accuracy = .1), y = after_stat(0.01) ), vjust = "bottom", position = position_dodge(.9), stat = "prop" ) ``` ## Displaying unobserved levels With the `complete` argument, it is possible to indicate an aesthetic for those statistics should be completed for unobserved values. ```{r} d <- diamonds |> dplyr::filter(!(cut == "Ideal" & clarity == "I1")) |> dplyr::filter(!(cut == "Very Good" & clarity == "VS2")) |> dplyr::filter(!(cut == "Premium" & clarity == "IF")) p <- ggplot(d) + aes(x = clarity, fill = cut, by = clarity) + geom_bar(position = "fill") p + geom_text( stat = "prop", position = position_fill(.5) ) ``` Adding `complete = "fill"` will generate "0.0%" labels where relevant. ```{r} p + geom_text( stat = "prop", position = position_fill(.5), complete = "fill" ) ``` ## Using `geom_prop_bar()` and `geom_prop_text()` The dedicated geometries `geom_prop_bar()` and `geom_prop_text()` could be used for quick and easy proportional bar plots. They use by default `stat_prop()` with relevant default values. For example, proportions are computed by **x** or **y** if the `by` aesthetic is not specified. It allows to generate a quick proportional bar plot. ```{r} ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_prop_bar() + geom_prop_text() ``` You can specify a `by` aesthetic. For example, to reproduce the comparison of the two distributions presented earlier. ```{r} d <- as.data.frame(Titanic) ggplot(d) + aes(x = Class, fill = Sex, weight = Freq, by = Sex) + geom_prop_bar(position = "dodge") + geom_prop_text( position = position_dodge(width = .9), vjust = - 0.5 ) + scale_y_continuous(labels = scales::percent) ``` You can also display counts instead of proportions. ```{r} ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_prop_bar(height = "count") + geom_prop_text( height = "count", labels = "count", labeller = scales::number ) ``` ggstats/inst/doc/stat_weighted_mean.html0000644000176200001440000012740015122116413020171 0ustar liggesusers Compute weighted mean with stat_weighted_mean()

Compute weighted mean with stat_weighted_mean()

library(ggstats)
library(ggplot2)

stat_weighted_mean() computes mean value of y (taking into account any weight aesthetic if provided) for each value of x. More precisely, it will return a new data frame with one line per unique value of x with the following new variables:

Let’s take an example. The following plot shows all tips received according to the day of the week.

data(tips, package = "reshape")
ggplot(tips) +
  aes(x = day, y = tip) +
  geom_point()

To plot their mean value per day, simply use stat_weighted_mean().

ggplot(tips) +
  aes(x = day, y = tip) +
  stat_weighted_mean()

We can specify the geometry we want using geom argument. Note that for lines, we need to specify the group aesthetic as well.

ggplot(tips) +
  aes(x = day, y = tip, group = 1) +
  stat_weighted_mean(geom = "line")

An alternative is to specify the statistic in ggplot2::geom_line().

ggplot(tips) +
  aes(x = day, y = tip, group = 1) +
  geom_line(stat = "weighted_mean")

Of course, it could be use with other geometries. Here a bar plot.

p <- ggplot(tips) +
  aes(x = day, y = tip, fill = sex) +
  stat_weighted_mean(geom = "bar", position = "dodge") +
  ylab("mean tip")
p

It is very easy to add facets. In that case, computation will be done separately for each facet.

p + facet_grid(rows = vars(smoker))

stat_weighted_mean() could be also used for computing proportions as a proportion is technically a mean of binary values (0 or 1).

ggplot(tips) +
  aes(x = day, y = as.integer(smoker == "Yes"), fill = sex) +
  stat_weighted_mean(geom = "bar", position = "dodge") +
  scale_y_continuous(labels = scales::percent) +
  ylab("proportion of smoker")

Finally, you can use the weight aesthetic to indicate weights to take into account for computing means / proportions.

d <- as.data.frame(Titanic)
ggplot(d) +
  aes(x = Class, y = as.integer(Survived == "Yes"), weight = Freq, fill = Sex) +
  geom_bar(stat = "weighted_mean", position = "dodge") +
  scale_y_continuous(labels = scales::percent) +
  labs(y = "Proportion who survived")

ggstats/inst/doc/geom_diverging.Rmd0000644000176200001440000001272414755045350017117 0ustar liggesusers--- title: "Geometries for diverging bar plots" author: Joseph Larmarange output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Geometries for diverging bar plots} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(ggstats) library(dplyr) library(ggplot2) library(patchwork) ``` *Note :* if you are looking for an all-in-one function to display Likert-type items, please refer to `gglikert()` and `vignette("gglikert")`. ## New positions Diverging bar plots could be achieved using `position_diverging()` or `position_likert()`. `position_diverging()` stacks bars on top of each other and centers them around zero (the same number of categories are displayed on each side). ```{r} base <- ggplot(diamonds) + aes(y = clarity, fill = cut) + theme(legend.position = "none") p_stack <- base + geom_bar(position = "stack") + ggtitle("position_stack()") p_diverging <- base + geom_bar(position = "diverging") + ggtitle("position_diverging()") p_stack + p_diverging ``` `position_likert()` is similar but uses proportions instead of counts. ```{r} p_fill <- base + geom_bar(position = "fill") + ggtitle("position_fill()") p_likert <- base + geom_bar(position = "likert") + ggtitle("position_likert()") p_fill + p_likert ``` By default, the same number of categories is displayed on each side, i.e. if you have 4 categories, 2 will be displayed negatively and 2 positively. If you have an odd number of categories, half of the central category will be displayed negatively and half positively. The center could be changed with the `cutoff` argument, representing the number of categories to be displayed negatively: `2` to display negatively the two first categories, `2.5` to display negatively the two first categories and half of the third, `2.2` to display negatively the two first categories and a fifth of the third. ```{r} p_1 <- base + geom_bar(position = position_diverging(cutoff = 1)) + ggtitle("cutoff = 1") p_2 <- base + geom_bar(position = position_diverging(cutoff = 2)) + ggtitle("cutoff = 2") p_null <- base + geom_bar(position = position_diverging(cutoff = NULL)) + ggtitle("cutoff = NULL") p_3.75 <- base + geom_bar(position = position_diverging(cutoff = 3.75)) + ggtitle("cutoff = 3.75") p_5 <- base + geom_bar(position = position_diverging(cutoff = 5)) + ggtitle("cutoff = 5") wrap_plots(p_1, p_2, p_null, p_3.75, p_5) ``` ## New scales For a diverging bar plot, it is recommended to use a diverging palette, as provided in the Brewer palettes. Sometimes, the number of available colors is insufficient in the palette. In that case, you could use `pal_extender()` or `scale_fill_extended()`. However, if you use a custom `cutoff`, it is also important to change the center of the palette as well. Therefore, for diverging bar plots, we recommend to use `scale_fill_likert()`. ```{r} wrap_plots( p_1 + scale_fill_likert(cutoff = 1), p_null + scale_fill_likert(), p_3.75 + scale_fill_likert(cutoff = 3.75) ) ``` ## Improving axes You may also want have centered axes. That could be easily achieved with `symmetric_limits()`. You could also use `label_number_abs()` or `label_percent_abs()` to display absolute numbers. ```{r} wrap_plots( p_3.75, p_3.75 + scale_x_continuous( limits = symmetric_limits, labels = label_number_abs() ) ) ``` ## New geometries To facilitate the creation of diverging bar plots, you could use variants of `geom_bar()` and `geom_text()`. ### geom_diverging() & geom_diverging_text() Let's consider the following plot: ```{r} ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = "diverging") + geom_text( aes( label = label_number_abs(hide_below = 800) (after_stat(count)) ), stat = "count", position = position_diverging(.5) ) + scale_fill_likert() + scale_x_continuous( labels = label_number_abs(), limits = symmetric_limits ) ``` The same could be achieved quicker with `geom_diverging()` and `geom_diverging_text()`. ```{r} ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_diverging() + geom_diverging_text( aes( label = label_number_abs(hide_below = 800) (after_stat(count)) ) ) + scale_fill_likert() + scale_x_continuous( labels = label_number_abs(), limits = symmetric_limits ) ``` ### geom_likert() & geom_likert_text() `geom_likert()` and `geom_likert_text()` works similarly. `geom_likert_text()` takes advantages of `stat_prop()` for computing the proportions to be displayed (see `vignette("stat_prop")`). ```{r} ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_likert() + geom_likert_text() + scale_fill_likert() + scale_x_continuous( labels = label_percent_abs() ) ``` ### geom_pyramid() & geom_pyramid_text() Finally, `geom_pyramid()` and `geom_pyramid_text()` are variations adapted to display an age-sex pyramid. It uses proportions of the total. ```{r} d <- Titanic |> as.data.frame() ggplot(d) + aes(y = Class, fill = Sex, weight = Freq) + geom_pyramid() + geom_pyramid_text() + scale_x_continuous( labels = label_percent_abs(), limits = symmetric_limits ) ``` ggstats/inst/doc/gglikert.R0000644000176200001440000001462715122116402015407 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(ggstats) library(dplyr) library(ggplot2) ## ----------------------------------------------------------------------------- likert_levels <- c( "Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree" ) set.seed(42) df <- tibble( q1 = sample(likert_levels, 150, replace = TRUE), q2 = sample(likert_levels, 150, replace = TRUE, prob = 5:1), q3 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), q4 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), q5 = sample(c(likert_levels, NA), 150, replace = TRUE), q6 = sample(likert_levels, 150, replace = TRUE, prob = c(1, 0, 1, 1, 0)) ) |> mutate(across(everything(), ~ factor(.x, levels = likert_levels))) likert_levels_dk <- c( "Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree", "Don't know" ) df_dk <- tibble( q1 = sample(likert_levels_dk, 150, replace = TRUE), q2 = sample(likert_levels_dk, 150, replace = TRUE, prob = 6:1), q3 = sample(likert_levels_dk, 150, replace = TRUE, prob = 1:6), q4 = sample(likert_levels_dk, 150, replace = TRUE, prob = 1:6), q5 = sample(c(likert_levels_dk, NA), 150, replace = TRUE), q6 = sample( likert_levels_dk, 150, replace = TRUE, prob = c(1, 0, 1, 1, 0, 1) ) ) |> mutate(across(everything(), ~ factor(.x, levels = likert_levels_dk))) ## ----------------------------------------------------------------------------- gglikert(df) ## ----------------------------------------------------------------------------- gglikert(df, include = q1:q3) ## ----------------------------------------------------------------------------- gglikert(df) + ggtitle("A Likert-type items plot", subtitle = "generated with gglikert()") + scale_fill_brewer(palette = "RdYlBu") ## ----------------------------------------------------------------------------- gglikert(df, sort = "ascending") ## ----------------------------------------------------------------------------- gglikert(df, sort = "ascending", sort_method = "mean") ## ----------------------------------------------------------------------------- gglikert(df, reverse_likert = TRUE) ## ----------------------------------------------------------------------------- gglikert(df, add_labels = FALSE) ## ----------------------------------------------------------------------------- gglikert( df, labels_size = 3, labels_accuracy = .1, labels_hide_below = .2, labels_color = "white" ) ## ----------------------------------------------------------------------------- gglikert( df, totals_include_center = TRUE, sort = "descending", sort_prop_include_center = TRUE ) ## ----------------------------------------------------------------------------- gglikert( df, totals_size = 4, totals_color = "blue", totals_fontface = "italic", totals_hjust = .20 ) ## ----------------------------------------------------------------------------- gglikert(df, add_totals = FALSE) ## ----------------------------------------------------------------------------- if (require(labelled)) { df <- df |> set_variable_labels( q1 = "first question", q2 = "second question", q3 = "this is the third question with a quite long variable label" ) } gglikert(df) ## ----------------------------------------------------------------------------- gglikert( df, variable_labels = c( q1 = "alternative label for the first question", q6 = "another custom label" ) ) ## ----------------------------------------------------------------------------- gglikert(df, y_label_wrap = 20) gglikert(df, y_label_wrap = 200) ## ----------------------------------------------------------------------------- gglikert(df, cutoff = 0) gglikert(df, cutoff = 1) gglikert(df, cutoff = 1.25) gglikert(df, cutoff = 1.75) gglikert(df, cutoff = 2) gglikert(df, cutoff = NULL) gglikert(df, cutoff = 4) gglikert(df, cutoff = 5) ## ----------------------------------------------------------------------------- gglikert(df, cutoff = 1) gglikert(df, cutoff = 1, symmetric = TRUE) ## ----------------------------------------------------------------------------- gglikert(df_dk) ## ----------------------------------------------------------------------------- df_dk |> mutate(across(everything(), ~ factor(.x, levels = likert_levels))) |> gglikert() ## ----------------------------------------------------------------------------- df_dk |> gglikert(exclude_fill_values = "Don't know") ## ----message=FALSE------------------------------------------------------------ df_group <- df df_group$group1 <- sample(c("A", "B"), 150, replace = TRUE) df_group$group2 <- sample(c("a", "b", "c"), 150, replace = TRUE) gglikert(df_group, q1:q6, facet_cols = vars(group1), labels_size = 3 ) gglikert(df_group, q1:q2, facet_rows = vars(group1, group2), labels_size = 3 ) gglikert(df_group, q3:q6, facet_cols = vars(group1), facet_rows = vars(group2), labels_size = 3 ) + scale_x_continuous( labels = label_percent_abs(), expand = expansion(0, .2) ) ## ----------------------------------------------------------------------------- gglikert(df_group, q1:q4, y = "group1", facet_rows = vars(.question), labels_size = 3, facet_label_wrap = 15 ) ## ----------------------------------------------------------------------------- gglikert_stacked(df) gglikert_stacked( df, sort = "asc", add_median_line = TRUE, add_labels = FALSE ) gglikert_stacked( df_group, include = q1:q4, y = "group2" ) + facet_grid( rows = vars(.question), labeller = label_wrap_gen(15) ) ## ----------------------------------------------------------------------------- gglikert_data(df) |> head() ## ----------------------------------------------------------------------------- ggplot(gglikert_data(df)) + aes(y = .question, fill = .answer) + geom_bar(position = "fill") ## ----------------------------------------------------------------------------- df$sampling_weights <- runif(nrow(df)) gglikert(df, q1:q4, weights = sampling_weights) ggstats/inst/doc/ggcoef_model.html0000644000176200001440000121450215122116361016753 0ustar liggesusers Plot model coefficients with ggcoef_model()

Plot model coefficients with ggcoef_model()

Joseph Larmarange

library(ggstats)

The purpose of ggcoef_model() is to quickly plot the coefficients of a model. It is an updated and improved version of GGally::ggcoef() based on broom.helpers::tidy_plus_plus(). For displaying a nicely formatted table of the same models, look at gtsummary::tbl_regression().

Quick coefficients plot

To work automatically, this function requires the {broom.helpers}. Simply call ggcoef_model() with a model object. It could be the result of stats::lm, stats::glm or any other model covered by {broom.helpers}.

data(tips, package = "reshape")
mod_simple <- lm(tip ~ day + time + total_bill, data = tips)
ggcoef_model(mod_simple)

In the case of a logistic regression (or any other model for which coefficients are usually exponentiated), simply indicated exponentiate = TRUE. Note that a logarithmic scale will be used for the x-axis.

d_titanic <- as.data.frame(Titanic)
d_titanic$Survived <- factor(d_titanic$Survived, c("No", "Yes"))
mod_titanic <- glm(
  Survived ~ Sex * Age + Class,
  weights = Freq,
  data = d_titanic,
  family = binomial
)
ggcoef_model(mod_titanic, exponentiate = TRUE)

Customizing the plot

Variable labels

You can use the {labelled} package to define variable labels. They will be automatically used by ggcoef_model(). Note that variable labels should be defined before computing the model.

library(labelled)
tips_labelled <- tips |>
  set_variable_labels(
    day = "Day of the week",
    time = "Lunch or Dinner",
    total_bill = "Bill's total"
  )
mod_labelled <- lm(tip ~ day + time + total_bill, data = tips_labelled)
ggcoef_model(mod_labelled)

You can also define custom variable labels directly by passing a named vector to the variable_labels option.

ggcoef_model(
  mod_simple,
  variable_labels = c(
    day = "Week day",
    time = "Time (lunch or dinner ?)",
    total_bill = "Total of the bill"
  )
)

If variable labels are to long, you can pass ggplot2::label_wrap_gen() or any other labeller function to facet_labeller.

ggcoef_model(
  mod_simple,
  variable_labels = c(
    day = "Week day",
    time = "Time (lunch or dinner ?)",
    total_bill = "Total of the bill"
  ),
  facet_labeller = ggplot2::label_wrap_gen(10)
)

Use facet_row = NULL to hide variable names.

ggcoef_model(mod_simple, facet_row = NULL, colour_guide = TRUE)

Term labels

Several options allows you to customize term labels.

ggcoef_model(mod_titanic, exponentiate = TRUE)

ggcoef_model(
  mod_titanic,
  exponentiate = TRUE,
  show_p_values = FALSE,
  signif_stars = FALSE,
  add_reference_rows = FALSE,
  categorical_terms_pattern = "{level} (ref: {reference_level})",
  interaction_sep = " x "
) +
  ggplot2::scale_y_discrete(labels = scales::label_wrap(15))
#> Scale for y is already present.
#> Adding another scale for y, which will replace the existing scale.

By default, for categorical variables using treatment and sum contrasts, reference rows will be added and displayed on the graph.

mod_titanic2 <- glm(
  Survived ~ Sex * Age + Class,
  weights = Freq,
  data = d_titanic,
  family = binomial,
  contrasts = list(Sex = contr.sum, Class = contr.treatment(4, base = 3))
)
ggcoef_model(mod_titanic2, exponentiate = TRUE)

Continuous variables with polynomial terms defined with stats::poly() are also properly managed.

mod_poly <- lm(Sepal.Length ~ poly(Petal.Width, 3) + Petal.Length, data = iris)
ggcoef_model(mod_poly)

Use no_reference_row to indicate which variables should not have a reference row added.

ggcoef_model(
  mod_titanic2,
  exponentiate = TRUE,
  no_reference_row = "Sex"
)

ggcoef_model(
  mod_titanic2,
  exponentiate = TRUE,
  no_reference_row = broom.helpers::all_dichotomous()
)

ggcoef_model(
  mod_titanic2,
  exponentiate = TRUE,
  no_reference_row = broom.helpers::all_categorical(),
  categorical_terms_pattern = "{level}/{reference_level}"
)

Elements to display

Use intercept = TRUE to display intercepts.

ggcoef_model(mod_simple, intercept = TRUE)

You can remove confidence intervals with conf.int = FALSE.

ggcoef_model(mod_simple, conf.int = FALSE)

By default, significant terms (i.e. with a p-value below 5%) are highlighted using two types of dots. You can control the level of significance with significance or remove it with significance = NULL.

ggcoef_model(mod_simple, significance = NULL)

By default, dots are colored by variable. You can deactivate this behavior with colour = NULL.

ggcoef_model(mod_simple, colour = NULL)

You can display only a subset of terms with include.

ggcoef_model(mod_simple, include = c("time", "total_bill"))

It is possible to use tidyselect helpers.

ggcoef_model(mod_simple, include = dplyr::starts_with("t"))

You can remove stripped rows with stripped_rows = FALSE.

ggcoef_model(mod_simple, stripped_rows = FALSE)

Do not hesitate to consult the help file of ggcoef_model() to see all available options.

ggplot2 elements

The plot returned by ggcoef_model() is a classic ggplot2 plot. You can therefore apply ggplot2 functions to it.

ggcoef_model(mod_simple) +
  ggplot2::xlab("Coefficients") +
  ggplot2::ggtitle("Custom title") +
  ggplot2::scale_color_brewer(palette = "Set1") +
  ggplot2::theme(legend.position = "right")
#> Scale for colour is already present.
#> Adding another scale for colour, which will replace the existing scale.

Forest plot with a coefficient table

ggcoef_table() is a variant of ggcoef_model() displaying a coefficient table on the right of the forest plot.

ggcoef_table(mod_simple)

ggcoef_table(mod_titanic, exponentiate = TRUE)

You can easily customize the columns to be displayed.

ggcoef_table(
  mod_simple,
  table_stat = c("label", "estimate", "std.error", "ci"),
  ci_pattern = "{conf.low} to {conf.high}",
  table_stat_label = list(
    estimate = scales::label_number(accuracy = .001),
    conf.low = scales::label_number(accuracy = .01),
    conf.high = scales::label_number(accuracy = .01),
    std.error = scales::label_number(accuracy = .001),
    label = toupper
  ),
  table_header = c("Term", "Coef.", "SE", "CI"),
  table_widths = c(2, 3)
)

Multinomial models

For multinomial models, simply use ggcoef_model() or ggcoef_table(). Additional visualizations are available using ggcoef_dodged() or ggcoef_faceted().

library(nnet)
hec <- as.data.frame(HairEyeColor)
mod <- multinom(
  Hair ~ Eye + Sex,
  data = hec,
  weights = hec$Freq
)
#> # weights:  24 (15 variable)
#> initial  value 820.686262 
#> iter  10 value 669.061500
#> iter  20 value 658.888977
#> final  value 658.885327 
#> converged
mod |> ggcoef_model(exponentiate = TRUE)

mod |> ggcoef_table(exponentiate = TRUE)

mod |> ggcoef_dodged(exponentiate = TRUE)

mod |> ggcoef_faceted(exponentiate = TRUE)

You can use group_labels to customize the label of each level.

mod |>
  ggcoef_faceted(
    group_labels = c("Brown" = "Brown\n(ref: Black)"),
    exponentiate = TRUE
  )

Multi-components models

Multi-components models such as zero-inflated Poisson or beta regression generate a set of terms for each of their components. simply use ggcoef_model() or ggcoef_table(). Additional visualizations are available using ggcoef_dodged() or ggcoef_faceted().

library(pscl)
#> Classes and Methods for R originally developed in the
#> Political Science Computational Laboratory
#> Department of Political Science
#> Stanford University (2002-2015),
#> by and under the direction of Simon Jackman.
#> hurdle and zeroinfl functions by Achim Zeileis.
data("bioChemists", package = "pscl")
mod <- zeroinfl(art ~ fem * mar | fem + mar, data = bioChemists)
mod |> ggcoef_model()
#> ℹ <zeroinfl> model detected.
#> ✔ `tidy_zeroinfl()` used instead.
#> ℹ Add `tidy_fun = broom.helpers::tidy_zeroinfl` to quiet these messages.

mod |> ggcoef_table()
#> ℹ <zeroinfl> model detected.
#> ✔ `tidy_zeroinfl()` used instead.
#> ℹ Add `tidy_fun = broom.helpers::tidy_zeroinfl` to quiet these messages.

mod |> ggcoef_dodged(exponentiate = TRUE)
#> ℹ <zeroinfl> model detected.
#> ✔ `tidy_zeroinfl()` used instead.
#> ℹ Add `tidy_fun = broom.helpers::tidy_zeroinfl` to quiet these messages.

mod |> ggcoef_faceted(
  exponentiate = TRUE,
  group_labels = c(conditional = "Count", zero_inflated = "Zero-inflated")
)
#> ℹ <zeroinfl> model detected.
#> ✔ `tidy_zeroinfl()` used instead.
#> ℹ Add `tidy_fun = broom.helpers::tidy_zeroinfl` to quiet these messages.

Comparing several models

You can easily compare several models with ggcoef_compare(). To be noted, ggcoef_compare() is not compatible with multinomial or multi-components models.

mod1 <- lm(Fertility ~ ., data = swiss)
mod2 <- step(mod1, trace = 0)
mod3 <- lm(Fertility ~ Agriculture + Education * Catholic, data = swiss)
models <- list(
  "Full model" = mod1,
  "Simplified model" = mod2,
  "With interaction" = mod3
)

ggcoef_compare(models)

ggcoef_compare(models, type = "faceted")

ggcoef_compare(models, type = "table")

Advanced users

Advanced users could use their own dataset and pass it to ggcoef_plot(). Such dataset could be produced by ggcoef_model(), ggcoef_dodged(), ggcoef_faceted() or ggcoef_compare() with the option return_data = TRUE or by using broom::tidy() or broom.helpers::tidy_plus_plus().

Supported models

model notes
betareg::betareg() Use tidy_parameters() as tidy_fun with component argument to control with coefficients to return. broom::tidy() does not support the exponentiate argument for betareg models, use tidy_parameters() instead.
biglm::bigglm()
brms::brm() broom.mixed package required
cmprsk::crr() Limited support. It is recommended to use tidycmprsk::crr() instead.
fixest::feglm() May fail with R <= 4.0.
fixest::femlm() May fail with R <= 4.0.
fixest::feNmlm() May fail with R <= 4.0.
fixest::feols() May fail with R <= 4.0.
gam::gam()
geepack::geeglm()
glmmTMB::glmmTMB() broom.mixed package required
glmtoolbox::glmgee()
lavaan::lavaan() Limited support for categorical variables
lfe::felm()
lme4::glmer.nb() broom.mixed package required
lme4::glmer() broom.mixed package required
lme4::lmer() broom.mixed package required
logitr::logitr() Requires logitr >= 0.8.0
MASS::glm.nb()
MASS::polr()
mgcv::gam() Use default tidier broom::tidy() for smooth terms only, or gtsummary::tidy_gam() to include parametric terms
mice::mira Limited support. If mod is a mira object, use tidy_fun = function(x, ...) {mice::pool(x) &#124;> mice::tidy(...)}
mmrm::mmrm()
multgee::nomLORgee() Use tidy_multgee() as tidy_fun.
multgee::ordLORgee() Use tidy_multgee() as tidy_fun.
nnet::multinom()
ordinal::clm() Limited support for models with nominal predictors.
ordinal::clmm() Limited support for models with nominal predictors.
parsnip::model_fit Supported as long as the type of model and the engine is supported.
plm::plm()
pscl::hurdle() Use tidy_zeroinfl() as tidy_fun.
pscl::zeroinfl() Use tidy_zeroinfl() as tidy_fun.
quantreg::rq() If several quantiles are estimated, use tidy_with_broom_or_parameters() tidier, the default tidier used by tidy_plus_plus().
rstanarm::stan_glm() broom.mixed package required
stats::aov() Reference rows are not relevant for such models.
stats::glm()
stats::lm()
stats::nls() Limited support
survey::svycoxph()
survey::svyglm()
survey::svyolr()
survival::cch() Experimental support.
survival::clogit()
survival::coxph()
survival::survreg()
svyVGAM::svy_vglm() Experimental support. It is recommended to use tidy_svy_vglm() as tidy_fun.
tidycmprsk::crr()
VGAM::vgam() Experimental support. It is recommended to use tidy_vgam() as tidy_fun.
VGAM::vglm() Experimental support. It is recommended to use tidy_vgam() as tidy_fun.

Note: this list of models has been tested. {broom.helpers}, and therefore ggcoef_model(), may or may not work properly or partially with other types of models.

ggstats/inst/WORDLIST0000644000176200001440000000032714762623766014122 0ustar liggesusersBaddeley CMD Codecov Colour DOI GGally Lifecycle Likert ORCID POSIXct behaviour colour colours dev geom's geoms ggplot ggproto labeller labelling likert resid th tibble ungrouping unmapped ggstats/README.md0000644000176200001440000001210515122045524013204 0ustar liggesusers # `ggstats`: extension to `ggplot2` for plotting stats [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) [![R-CMD-check](https://github.com/larmarange/ggstats/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/larmarange/ggstats/actions/workflows/R-CMD-check.yaml) [![Codecov test coverage](https://codecov.io/gh/larmarange/ggstats/graph/badge.svg)](https://app.codecov.io/gh/larmarange/ggstats) [![CRAN status](https://www.r-pkg.org/badges/version/ggstats)](https://CRAN.R-project.org/package=ggstats) [![DOI](https://zenodo.org/badge/547360047.svg)](https://zenodo.org/badge/latestdoi/547360047) The `ggstats` package provides new statistics, new geometries and new positions for `ggplot2` and a suite of functions to facilitate the creation of statistical plots. ## Installation & Documentation To install **stable version**: ``` r install.packages("ggstats") ``` Documentation of stable version: To install **development version**: ``` r remotes::install_github("larmarange/ggstats") ``` Documentation of development version: ## Plot model coefficients ``` r library(ggstats) mod1 <- lm(Fertility ~ ., data = swiss) ggcoef_model(mod1) ``` ``` r ggcoef_table(mod1) ``` ## Comparing several models ``` r mod2 <- step(mod1, trace = 0) mod3 <- lm(Fertility ~ Agriculture + Education * Catholic, data = swiss) models <- list( "Full model" = mod1, "Simplified model" = mod2, "With interaction" = mod3 ) ggcoef_compare(models, type = "faceted") ``` ## Compute custom proportions ``` r library(ggplot2) ggplot(as.data.frame(Titanic)) + aes(x = Class, fill = Survived, weight = Freq, by = Class) + geom_bar(position = "fill") + geom_text(stat = "prop", position = position_fill(.5)) + facet_grid(~Sex) ``` ## Compute weighted mean ``` r data(tips, package = "reshape") ggplot(tips) + aes(x = day, y = total_bill, fill = sex) + stat_weighted_mean(geom = "bar", position = "dodge") + ylab("Mean total bill per day and sex") ``` ## Compute cross-tabulation statistics ``` r ggplot(as.data.frame(Titanic)) + aes( x = Class, y = Survived, weight = Freq, size = after_stat(observed), fill = after_stat(std.resid) ) + stat_cross(shape = 22) + scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) + scale_size_area(max_size = 20) ``` ## Plot survey objects taking into account weights ``` r library(survey, quietly = TRUE) #> #> Attachement du package : 'survey' #> L'objet suivant est masqué depuis 'package:graphics': #> #> dotchart dw <- svydesign( ids = ~1, weights = ~Freq, data = as.data.frame(Titanic) ) ggsurvey(dw) + aes(x = Class, fill = Survived) + geom_bar(position = "fill") + ylab("Weighted proportion of survivors") ``` ## Plot Likert-type items ``` r library(dplyr) #> #> Attachement du package : 'dplyr' #> Les objets suivants sont masqués depuis 'package:stats': #> #> filter, lag #> Les objets suivants sont masqués depuis 'package:base': #> #> intersect, setdiff, setequal, union likert_levels <- c( "Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree" ) set.seed(42) df <- tibble( q1 = sample(likert_levels, 150, replace = TRUE), q2 = sample(likert_levels, 150, replace = TRUE, prob = 5:1), q3 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), q4 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), q5 = sample(c(likert_levels, NA), 150, replace = TRUE), q6 = sample(likert_levels, 150, replace = TRUE, prob = c(1, 0, 1, 1, 0)) ) |> mutate(across(everything(), ~ factor(.x, levels = likert_levels))) gglikert(df) ``` ## Connect bars ``` r ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_bar(width = .5) + geom_bar_connector(width = .5, linewidth = .25) + theme_minimal() + theme(legend.position = "bottom") ``` ## Generate a cascade plot ``` r diamonds |> ggcascade( all = TRUE, big = carat > .5, "big & ideal" = carat > .5 & cut == "Ideal" ) ``` ggstats/build/0000755000176200001440000000000015122116413013021 5ustar liggesusersggstats/build/vignette.rds0000644000176200001440000000066215122116413015364 0ustar liggesusersS;O0iyP(j1JL*`@5%Ⱦuw3Pc7NClw= avs- zz?JAiA,OOۊ*ȁ"'R tNggstats/man/0000755000176200001440000000000015122045054012477 5ustar liggesusersggstats/man/ggcoef_model.Rd0000644000176200001440000004432615062213141015406 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ggcoef_model.R \name{ggcoef_model} \alias{ggcoef_model} \alias{ggcoef_table} \alias{ggcoef_dodged} \alias{ggcoef_faceted} \alias{ggcoef_compare} \alias{ggcoef_plot} \title{Plot model coefficients} \usage{ ggcoef_model( model, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = 0.95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), group_by = broom.helpers::auto_group_by(), group_labels = NULL, add_pairwise_contrasts = FALSE, pairwise_variables = broom.helpers::all_categorical(), keep_model_terms = FALSE, pairwise_reverse = TRUE, emmeans_args = list(), significance = 1 - conf.level, significance_labels = NULL, show_p_values = TRUE, signif_stars = TRUE, return_data = FALSE, ... ) ggcoef_table( model, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = 0.95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), group_by = broom.helpers::auto_group_by(), group_labels = NULL, add_pairwise_contrasts = FALSE, pairwise_variables = broom.helpers::all_categorical(), keep_model_terms = FALSE, pairwise_reverse = TRUE, emmeans_args = list(), significance = 1 - conf.level, significance_labels = NULL, show_p_values = FALSE, signif_stars = FALSE, table_stat = c("estimate", "ci", "p.value"), table_header = NULL, table_text_size = 3, table_stat_label = NULL, ci_pattern = "{conf.low}, {conf.high}", table_widths = c(3, 2), table_witdhs = deprecated(), ... ) ggcoef_dodged( model, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = 0.95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), group_by = broom.helpers::auto_group_by(), group_labels = NULL, significance = 1 - conf.level, significance_labels = NULL, return_data = FALSE, ... ) ggcoef_faceted( model, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = 0.95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), group_by = broom.helpers::auto_group_by(), group_labels = NULL, significance = 1 - conf.level, significance_labels = NULL, return_data = FALSE, ... ) ggcoef_compare( models, type = c("dodged", "faceted", "table"), tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = 0.95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), add_pairwise_contrasts = FALSE, pairwise_variables = broom.helpers::all_categorical(), keep_model_terms = FALSE, pairwise_reverse = TRUE, emmeans_args = list(), significance = 1 - conf.level, significance_labels = NULL, table_stat = c("estimate", "ci", "p.value"), table_header = NULL, table_text_size = 3, table_stat_label = NULL, ci_pattern = "{conf.low}, {conf.high}", table_widths = c(3, 2), return_data = FALSE, ... ) ggcoef_plot( data, x = "estimate", y = "label", exponentiate = FALSE, y_labeller = NULL, point_size = 2, point_stroke = 2, point_fill = "white", colour = NULL, colour_guide = TRUE, colour_lab = "", colour_labels = ggplot2::waiver(), shape = "significance", shape_values = c(16, 21), shape_guide = TRUE, shape_lab = "", errorbar = TRUE, errorbar_height = 0.1, errorbar_coloured = FALSE, stripped_rows = TRUE, strips_odd = "#11111111", strips_even = "#00000000", vline = TRUE, vline_colour = "grey50", dodged = FALSE, dodged_width = 0.8, facet_row = "var_label", facet_col = NULL, facet_labeller = "label_value", plot_title = NULL, x_limits = NULL ) } \arguments{ \item{model}{a regression model object} \item{tidy_fun}{(\code{function})\cr Option to specify a custom tidier function.} \item{tidy_args}{Additional arguments passed to \code{\link[broom.helpers:tidy_plus_plus]{broom.helpers::tidy_plus_plus()}} and to \code{tidy_fun}} \item{conf.int}{(\code{logical})\cr Should confidence intervals be computed? (see \code{\link[broom:reexports]{broom::tidy()}})} \item{conf.level}{the confidence level to use for the confidence interval if \code{conf.int = TRUE}; must be strictly greater than 0 and less than 1; defaults to 0.95, which corresponds to a 95 percent confidence interval} \item{exponentiate}{if \code{TRUE} a logarithmic scale will be used for x-axis} \item{variable_labels}{(\code{\link[gtsummary:syntax]{formula-list-selector}})\cr A named list or a named vector of custom variable labels.} \item{term_labels}{(\code{list} or \code{vector})\cr A named list or a named vector of custom term labels.} \item{interaction_sep}{(\code{string})\cr Separator for interaction terms.} \item{categorical_terms_pattern}{(\code{\link[glue:glue]{glue pattern}})\cr A \link[glue:glue]{glue pattern} for labels of categorical terms with treatment or sum contrasts (see \code{\link[broom.helpers:model_list_terms_levels]{model_list_terms_levels()}}).} \item{add_reference_rows}{(\code{logical})\cr Should reference rows be added?} \item{no_reference_row}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr Variables for those no reference row should be added, when \code{add_reference_rows = TRUE}.} \item{intercept}{(\code{logical})\cr Should the intercept(s) be included?} \item{include}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr Variables to include. Default is \code{everything()}. See also \code{\link[broom.helpers:all_continuous]{all_continuous()}}, \code{\link[broom.helpers:all_categorical]{all_categorical()}}, \code{\link[broom.helpers:all_dichotomous]{all_dichotomous()}} and \code{\link[broom.helpers:all_interaction]{all_interaction()}}.} \item{group_by}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr One or several variables to group by. Default is \code{auto_group_by()}. Use \code{NULL} to force ungrouping.} \item{group_labels}{(\code{string})\cr An optional named vector of custom term labels.} \item{add_pairwise_contrasts}{(\code{logical})\cr Apply \code{\link[broom.helpers:tidy_add_pairwise_contrasts]{tidy_add_pairwise_contrasts()}}?} \item{pairwise_variables}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr Variables to add pairwise contrasts.} \item{keep_model_terms}{(\code{logical})\cr Keep original model terms for variables where pairwise contrasts are added? (default is \code{FALSE})} \item{pairwise_reverse}{(\code{logical})\cr Determines whether to use \code{"pairwise"} (if \code{TRUE}) or \code{"revpairwise"} (if \code{FALSE}), see \code{\link[emmeans:contrast]{emmeans::contrast()}}.} \item{emmeans_args}{(\code{list})\cr List of additional parameter to pass to \code{\link[emmeans:emmeans]{emmeans::emmeans()}} when computing pairwise contrasts.} \item{significance}{level (between 0 and 1) below which a coefficient is consider to be significantly different from 0 (or 1 if \code{exponentiate = TRUE}), \code{NULL} for not highlighting such coefficients} \item{significance_labels}{optional vector with custom labels for significance variable} \item{show_p_values}{if \code{TRUE}, add p-value to labels} \item{signif_stars}{if \code{TRUE}, add significant stars to labels} \item{return_data}{if \code{TRUE}, will return the data.frame used for plotting instead of the plot} \item{...}{parameters passed to \code{\link[=ggcoef_plot]{ggcoef_plot()}}} \item{table_stat}{statistics to display in the table, use any column name returned by the tidier or \code{"ci"} for confidence intervals formatted according to \code{ci_pattern}} \item{table_header}{optional custom headers for the table} \item{table_text_size}{text size for the table} \item{table_stat_label}{optional named list of labeller functions for the displayed statistic (see examples)} \item{ci_pattern}{glue pattern for confidence intervals in the table} \item{table_widths}{relative widths of the forest plot and the coefficients table} \item{table_witdhs}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}\cr use \code{table_widths} instead} \item{models}{named list of models} \item{type}{a dodged plot, a faceted plot or multiple table plots?} \item{data}{a data frame containing data to be plotted, typically the output of \code{ggcoef_model()}, \code{ggcoef_compare()} or \code{ggcoef_multinom()} with the option \code{return_data = TRUE}} \item{x, y}{variables mapped to x and y axis} \item{y_labeller}{optional function to be applied on y labels (see examples)} \item{point_size}{size of the points} \item{point_stroke}{thickness of the points} \item{point_fill}{fill colour for the points} \item{colour}{optional variable name to be mapped to colour aesthetic} \item{colour_guide}{should colour guide be displayed in the legend?} \item{colour_lab}{label of the colour aesthetic in the legend} \item{colour_labels}{labels argument passed to \code{\link[ggplot2:scale_colour_discrete]{ggplot2::scale_colour_discrete()}} and \code{\link[ggplot2:discrete_scale]{ggplot2::discrete_scale()}}} \item{shape}{optional variable name to be mapped to the shape aesthetic} \item{shape_values}{values of the different shapes to use in \code{\link[ggplot2:scale_manual]{ggplot2::scale_shape_manual()}}} \item{shape_guide}{should shape guide be displayed in the legend?} \item{shape_lab}{label of the shape aesthetic in the legend} \item{errorbar}{should error bars be plotted?} \item{errorbar_height}{height of error bars} \item{errorbar_coloured}{should error bars be colored as the points?} \item{stripped_rows}{should stripped rows be displayed in the background?} \item{strips_odd}{color of the odd rows} \item{strips_even}{color of the even rows} \item{vline}{should a vertical line be drawn at 0 (or 1 if \code{exponentiate = TRUE})?} \item{vline_colour}{colour of vertical line} \item{dodged}{should points be dodged (according to the colour aesthetic)?} \item{dodged_width}{width value for \code{\link[ggplot2:position_dodge]{ggplot2::position_dodge()}}} \item{facet_row}{variable name to be used for row facets} \item{facet_col}{optional variable name to be used for column facets} \item{facet_labeller}{labeller function to be used for labeling facets; if labels are too long, you can use \code{\link[ggplot2:labellers]{ggplot2::label_wrap_gen()}} (see examples), more information in the documentation of \code{\link[ggplot2:facet_grid]{ggplot2::facet_grid()}}} \item{plot_title}{an optional plot title} \item{x_limits}{optional limits for the x axis} } \value{ A \code{ggplot2} plot or a \code{tibble} if \code{return_data = TRUE}. } \description{ \code{ggcoef_model()}, \code{ggcoef_table()}, \code{ggcoef_dodged()}, \code{ggcoef_faceted()} and \code{ggcoef_compare()} use \code{\link[broom.helpers:tidy_plus_plus]{broom.helpers::tidy_plus_plus()}} to obtain a \code{tibble} of the model coefficients, apply additional data transformation and then pass the produced \code{tibble} to \code{ggcoef_plot()} to generate the plot. } \details{ For more control, you can use the argument \code{return_data = TRUE} to get the produced \code{tibble}, apply any transformation of your own and then pass your customized \code{tibble} to \code{ggcoef_plot()}. } \section{Functions}{ \itemize{ \item \code{ggcoef_table()}: a variation of \code{\link[=ggcoef_model]{ggcoef_model()}} adding a table with estimates, confidence intervals and p-values \item \code{ggcoef_dodged()}: a dodged variation of \code{\link[=ggcoef_model]{ggcoef_model()}} for multi groups models \item \code{ggcoef_faceted()}: a faceted variation of \code{\link[=ggcoef_model]{ggcoef_model()}} for multi groups models \item \code{ggcoef_compare()}: designed for displaying several models on the same plot. \item \code{ggcoef_plot()}: plot a tidy \code{tibble} of coefficients }} \note{ \code{ggcoef_compare(type = "table")} is not compatible with multi-components models. } \examples{ mod <- lm(Sepal.Length ~ Sepal.Width + Species, data = iris) ggcoef_model(mod) ggcoef_table(mod) \donttest{ ggcoef_table(mod, table_stat = c("estimate", "ci")) ggcoef_table( mod, table_stat_label = list( estimate = scales::label_number(.001) ) ) ggcoef_table(mod, table_text_size = 5, table_widths = c(1, 1)) # a logistic regression example d_titanic <- as.data.frame(Titanic) d_titanic$Survived <- factor(d_titanic$Survived, c("No", "Yes")) mod_titanic <- glm( Survived ~ Sex * Age + Class, weights = Freq, data = d_titanic, family = binomial ) # use 'exponentiate = TRUE' to get the Odds Ratio ggcoef_model(mod_titanic, exponentiate = TRUE) ggcoef_table(mod_titanic, exponentiate = TRUE) # display intercepts ggcoef_model(mod_titanic, exponentiate = TRUE, intercept = TRUE) # customize terms labels ggcoef_model( mod_titanic, exponentiate = TRUE, show_p_values = FALSE, signif_stars = FALSE, add_reference_rows = FALSE, categorical_terms_pattern = "{level} (ref: {reference_level})", interaction_sep = " x ", y_labeller = scales::label_wrap(15) ) # display only a subset of terms ggcoef_model(mod_titanic, exponentiate = TRUE, include = c("Age", "Class")) # do not change points' shape based on significance ggcoef_model(mod_titanic, exponentiate = TRUE, significance = NULL) # a black and white version ggcoef_model( mod_titanic, exponentiate = TRUE, colour = NULL, stripped_rows = FALSE ) # show dichotomous terms on one row ggcoef_model( mod_titanic, exponentiate = TRUE, no_reference_row = broom.helpers::all_dichotomous(), categorical_terms_pattern = "{ifelse(dichotomous, paste0(level, ' / ', reference_level), level)}", show_p_values = FALSE ) } \dontshow{if (requireNamespace("reshape")) withAutoprint(\{ # examplesIf} \donttest{ data(tips, package = "reshape") mod_simple <- lm(tip ~ day + time + total_bill, data = tips) ggcoef_model(mod_simple) # custom variable labels # you can use the labelled package to define variable labels # before computing model if (requireNamespace("labelled")) { tips_labelled <- tips |> labelled::set_variable_labels( day = "Day of the week", time = "Lunch or Dinner", total_bill = "Bill's total" ) mod_labelled <- lm(tip ~ day + time + total_bill, data = tips_labelled) ggcoef_model(mod_labelled) } # you can provide custom variable labels with 'variable_labels' ggcoef_model( mod_simple, variable_labels = c( day = "Week day", time = "Time (lunch or dinner ?)", total_bill = "Total of the bill" ) ) # if labels are too long, you can use 'facet_labeller' to wrap them ggcoef_model( mod_simple, variable_labels = c( day = "Week day", time = "Time (lunch or dinner ?)", total_bill = "Total of the bill" ), facet_labeller = ggplot2::label_wrap_gen(10) ) # do not display variable facets but add colour guide ggcoef_model(mod_simple, facet_row = NULL, colour_guide = TRUE) # works also with with polynomial terms mod_poly <- lm( tip ~ poly(total_bill, 3) + day, data = tips, ) ggcoef_model(mod_poly) # or with different type of contrasts # for sum contrasts, the value of the reference term is computed if (requireNamespace("emmeans")) { mod2 <- lm( tip ~ day + time + sex, data = tips, contrasts = list(time = contr.sum, day = contr.treatment(4, base = 3)) ) ggcoef_model(mod2) } } \dontshow{\}) # examplesIf} \dontshow{if (requireNamespace("nnet") && requireNamespace("gtsummary")) withAutoprint(\{ # examplesIf} \donttest{ # multinomial model mod <- nnet::multinom(grade ~ stage + trt + age, data = gtsummary::trial) ggcoef_model(mod, exponentiate = TRUE) ggcoef_table(mod, group_labels = c(II = "Stage 2 vs. 1")) ggcoef_dodged(mod, exponentiate = TRUE) ggcoef_faceted(mod, exponentiate = TRUE) } \dontshow{\}) # examplesIf} \dontshow{if (requireNamespace("pscl")) withAutoprint(\{ # examplesIf} \donttest{ library(pscl) data("bioChemists", package = "pscl") mod <- zeroinfl(art ~ fem * mar | fem + mar, data = bioChemists) ggcoef_model(mod) ggcoef_table(mod) ggcoef_dodged(mod) ggcoef_faceted( mod, group_labels = c(conditional = "Count", zero_inflated = "Zero-inflated") ) mod2 <- zeroinfl(art ~ fem + mar | 1, data = bioChemists) ggcoef_table(mod2) ggcoef_table(mod2, intercept = TRUE) } \dontshow{\}) # examplesIf} \donttest{ # Use ggcoef_compare() for comparing several models on the same plot mod1 <- lm(Fertility ~ ., data = swiss) mod2 <- step(mod1, trace = 0) mod3 <- lm(Fertility ~ Agriculture + Education * Catholic, data = swiss) models <- list( "Full model" = mod1, "Simplified model" = mod2, "With interaction" = mod3 ) ggcoef_compare(models) ggcoef_compare(models, type = "faceted") ggcoef_compare(models, type = "table") # you can reverse the vertical position of the point by using a negative # value for dodged_width (but it will produce some warnings) ggcoef_compare(models, dodged_width = -.9) } } \seealso{ \code{vignette("ggcoef_model")} } ggstats/man/augment_chisq_add_phi.Rd0000644000176200001440000000226514357760261017307 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stat_cross.R \name{augment_chisq_add_phi} \alias{augment_chisq_add_phi} \title{Augment a chi-squared test and compute phi coefficients} \usage{ augment_chisq_add_phi(x) } \arguments{ \item{x}{a chi-squared test as returned by \code{\link[stats:chisq.test]{stats::chisq.test()}}} } \value{ A \code{tibble}. } \description{ Augment a chi-squared test and compute phi coefficients } \details{ Phi coefficients are a measurement of the degree of association between two binary variables. \itemize{ \item A value between -1.0 to -0.7 indicates a strong negative association. \item A value between -0.7 to -0.3 indicates a weak negative association. \item A value between -0.3 to +0.3 indicates a little or no association. \item A value between +0.3 to +0.7 indicates a weak positive association. \item A value between +0.7 to +1.0 indicates a strong positive association. } } \examples{ tab <- xtabs(Freq ~ Sex + Class, data = as.data.frame(Titanic)) augment_chisq_add_phi(chisq.test(tab)) } \seealso{ \code{\link[=stat_cross]{stat_cross()}}, \code{GDAtools::phi.table()} or \code{psych::phi()} } ggstats/man/weighted.median.Rd0000644000176200001440000000435614674013371016042 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/weighted_quantile.R \name{weighted.median} \alias{weighted.median} \alias{weighted.quantile} \title{Weighted Median and Quantiles} \source{ These functions are adapted from their homonyms developed by Adrian Baddeley in the \code{spatstat} package. } \usage{ weighted.median(x, w, na.rm = TRUE, type = 2) weighted.quantile(x, w, probs = seq(0, 1, 0.25), na.rm = TRUE, type = 4) } \arguments{ \item{x}{a numeric vector of values} \item{w}{a numeric vector of weights} \item{na.rm}{a logical indicating whether to ignore \code{NA} values} \item{type}{Integer specifying the rule for calculating the median or quantile, corresponding to the rules available for \code{stats:quantile()}. The only valid choices are type=1, 2 or 4. See Details.} \item{probs}{probabilities for which the quantiles should be computed, a numeric vector of values between 0 and 1} } \value{ A numeric vector. } \description{ Compute the median or quantiles a set of numbers which have weights associated with them. } \details{ The \code{i}th observation \code{x[i]} is treated as having a weight proportional to \code{w[i]}. The weighted median is a value \code{m} such that the total weight of data less than or equal to \code{m} is equal to half the total weight. More generally, the weighted quantile with probability \code{p} is a value \code{q} such that the total weight of data less than or equal to \code{q} is equal to \code{p} times the total weight. If there is no such value, then \itemize{ \item if \code{type = 1}, the next largest value is returned (this is the right-continuous inverse of the left-continuous cumulative distribution function); \item if \code{type = 2}, the average of the two surrounding values is returned (the average of the right-continuous and left-continuous inverses); \item if \code{type = 4}, linear interpolation is performed. } Note that the default rule for \code{weighted.median()} is \code{type = 2}, consistent with the traditional definition of the median, while the default for \code{weighted.quantile()} is \code{type = 4}. } \examples{ x <- 1:20 w <- runif(20) weighted.median(x, w) weighted.quantile(x, w) } ggstats/man/ggstats-package.Rd0000644000176200001440000000143114467450345016050 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ggstats-package.R \docType{package} \name{ggstats-package} \alias{ggstats} \alias{ggstats-package} \title{ggstats: Extension to 'ggplot2' for Plotting Stats} \description{ Provides new statistics, new geometries and new positions for 'ggplot2' and a suite of functions to facilitate the creation of statistical plots. } \seealso{ Useful links: \itemize{ \item \url{https://larmarange.github.io/ggstats/} \item \url{https://github.com/larmarange/ggstats} \item Report bugs at \url{https://github.com/larmarange/ggstats/issues} } } \author{ \strong{Maintainer}: Joseph Larmarange \email{joseph@larmarange.net} (\href{https://orcid.org/0000-0001-7097-700X}{ORCID}) } \keyword{internal} ggstats/man/position_likert.Rd0000644000176200001440000001233114672600601016210 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/position_likert.R \docType{data} \name{position_likert} \alias{position_likert} \alias{position_diverging} \alias{PositionLikert} \alias{PositionDiverging} \title{Stack objects on top of each another and center them around 0} \usage{ position_likert( vjust = 1, reverse = FALSE, exclude_fill_values = NULL, cutoff = NULL ) position_diverging( vjust = 1, reverse = FALSE, exclude_fill_values = NULL, cutoff = NULL ) } \arguments{ \item{vjust}{Vertical adjustment for geoms that have a position (like points or lines), not a dimension (like bars or areas). Set to \code{0} to align with the bottom, \code{0.5} for the middle, and \code{1} (the default) for the top.} \item{reverse}{If \code{TRUE}, will reverse the default stacking order. This is useful if you're rotating both the plot and legend.} \item{exclude_fill_values}{Vector of values from the variable associated with the \code{fill} aesthetic that should not be displayed (but still taken into account for computing proportions)} \item{cutoff}{number of categories to be displayed negatively (i.e. on the left of the x axis or the bottom of the y axis), could be a decimal value: \code{2} to display negatively the two first categories, \code{2.5} to display negatively the two first categories and half of the third, \code{2.2} to display negatively the two first categories and a fifth of the third (see examples). By default (\code{NULL}), it will be equal to the number of categories divided by 2, i.e. it will be centered.} } \description{ \code{position_diverging()} stacks bars on top of each other and center them around zero (the same number of categories are displayed on each side). \code{position_likert()} uses proportions instead of counts. This type of presentation is commonly used to display Likert-type scales. } \details{ It is recommended to use \code{position_likert()} with \code{stat_prop()} and its \code{complete} argument (see examples). } \examples{ library(ggplot2) ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = "fill") + scale_x_continuous(label = scales::label_percent()) + xlab("proportion") ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = "likert") + scale_x_continuous(label = label_percent_abs()) + scale_fill_likert() + xlab("proportion") ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = "stack") + scale_fill_likert(pal = scales::brewer_pal(palette = "PiYG")) ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = "diverging") + scale_x_continuous(label = label_number_abs()) + scale_fill_likert() \donttest{ # Reverse order ------------------------------------------------------------- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = position_likert(reverse = TRUE)) + scale_x_continuous(label = label_percent_abs()) + scale_fill_likert() + xlab("proportion") # Custom center ------------------------------------------------------------- ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = position_likert(cutoff = 1)) + scale_x_continuous(label = label_percent_abs()) + scale_fill_likert(cutoff = 1) + xlab("proportion") ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = position_likert(cutoff = 3.75)) + scale_x_continuous(label = label_percent_abs()) + scale_fill_likert(cutoff = 3.75) + xlab("proportion") # Missing items ------------------------------------------------------------- # example with a level not being observed for a specific value of y d <- diamonds d <- d[!(d$cut == "Premium" & d$clarity == "I1"), ] d <- d[!(d$cut \%in\% c("Fair", "Good") & d$clarity == "SI2"), ] # by default, the two lowest bar are not properly centered ggplot(d) + aes(y = clarity, fill = cut) + geom_bar(position = "likert") + scale_fill_likert() # use stat_prop() with `complete = "fill"` to fix it ggplot(d) + aes(y = clarity, fill = cut) + geom_bar(position = "likert", stat = "prop", complete = "fill") + scale_fill_likert() # Add labels ---------------------------------------------------------------- custom_label <- function(x) { p <- scales::percent(x, accuracy = 1) p[x < .075] <- "" p } ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = "likert") + geom_text( aes(by = clarity, label = custom_label(after_stat(prop))), stat = "prop", position = position_likert(vjust = .5) ) + scale_x_continuous(label = label_percent_abs()) + scale_fill_likert() + xlab("proportion") # Do not display specific fill values --------------------------------------- # (but taken into account to compute proportions) ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = position_likert(exclude_fill_values = "Very Good")) + scale_x_continuous(label = label_percent_abs()) + scale_fill_likert() + xlab("proportion") } } \seealso{ See \code{\link[ggplot2:position_stack]{ggplot2::position_stack()}} and \code{\link[ggplot2:position_stack]{ggplot2::position_fill()}} } \keyword{datasets} ggstats/man/hex_bw.Rd0000644000176200001440000000401215062213141014234 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/hex_bw.R \docType{data} \name{hex_bw} \alias{hex_bw} \alias{hex_bw_threshold} \alias{auto_contrast} \title{Identify a suitable font color (black or white) given a background HEX color} \format{ An object of class \code{ggplot2::mapping} (inherits from \code{uneval}, \code{gg}, \code{S7_object}) of length 1. } \source{ Adapted from \code{saros} for \code{hex_code()} and from \url{https://github.com/teunbrand/ggplot_tricks?tab=readme-ov-file#text-contrast} for \code{auto_contrast}. } \usage{ hex_bw(hex_code) hex_bw_threshold(hex_code, values, threshold) auto_contrast } \arguments{ \item{hex_code}{Background color in hex-format.} \item{values}{Values to be compared.} \item{threshold}{Threshold.} } \value{ Either black or white, in hex-format } \description{ You could use \code{auto_contrast} as a shortcut of \code{aes(colour = after_scale(hex_bw(.data$fill)))}. You should use \verb{!!!} to inject it within \code{\link[ggplot2:aes]{ggplot2::aes()}} (see examples). \code{hex_bw_threshold()} is a variation of \code{hex_bw()}. For \code{values} below \code{threshold}, black (\code{"#000000"}) will always be returned, regardless of \code{hex_code}. } \examples{ hex_bw("#0dadfd") library(ggplot2) ggplot(diamonds) + aes(x = cut, fill = color, label = after_stat(count)) + geom_bar() + geom_text( mapping = aes(color = after_scale(hex_bw(.data$fill))), position = position_stack(.5), stat = "count", size = 2 ) ggplot(diamonds) + aes(x = cut, fill = color, label = after_stat(count)) + geom_bar() + geom_text( mapping = auto_contrast, position = position_stack(.5), stat = "count", size = 2 ) ggplot(diamonds) + aes(x = cut, fill = color, label = after_stat(count), !!!auto_contrast) + geom_bar() + geom_text( mapping = auto_contrast, position = position_stack(.5), stat = "count", size = 2 ) } \keyword{datasets} ggstats/man/ggcascade.Rd0000644000176200001440000000520114674034026014675 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ggcascade.R \name{ggcascade} \alias{ggcascade} \alias{compute_cascade} \alias{plot_cascade} \title{Cascade plot} \usage{ ggcascade( .data, ..., .weights = NULL, .by = NULL, .nrow = NULL, .ncol = NULL, .add_n = TRUE, .text_size = 4, .arrows = TRUE ) compute_cascade(.data, ..., .weights = NULL, .by = NULL) plot_cascade( .data, .by = NULL, .nrow = NULL, .ncol = NULL, .add_n = TRUE, .text_size = 4, .arrows = TRUE ) } \arguments{ \item{.data}{A data frame, or data frame extension (e.g. a tibble). For \code{plot_cascade()}, the variable displayed on the x-axis should be named \code{"x"} and the number of observations should be named \code{"n"}, like the tibble returned by \code{compute_cascade()}.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of conditions defining the different statuses to be plotted (see examples).} \item{.weights}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optional weights. Should select only one variable.} \item{.by}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> A variable or a set of variables to group by the computation of the cascade, and to generate facets. To select several variables, use \code{\link[dplyr:pick]{dplyr::pick()}} (see examples).} \item{.nrow, .ncol}{Number of rows and columns, for faceted plots.} \item{.add_n}{Display the number of observations?} \item{.text_size}{Size of the labels, passed to \code{\link[ggplot2:geom_text]{ggplot2::geom_text()}}.} \item{.arrows}{Display arrows between statuses?} } \value{ A \code{ggplot2} plot or a \code{tibble}. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} } \details{ \code{ggcascade()} calls \code{compute_cascade()} to generate a data set passed to \code{plot_cascade()}. Use \code{compute_cascade()} and \code{plot_cascade()} for more controls. } \examples{ ggplot2::diamonds |> ggcascade( all = TRUE, big = carat > .5, "big & ideal" = carat > .5 & cut == "Ideal" ) ggplot2::mpg |> ggcascade( all = TRUE, recent = year > 2000, "recent & economic" = year > 2000 & displ < 3, .by = cyl, .ncol = 3, .arrows = FALSE, .text_size = 3 ) ggplot2::mpg |> ggcascade( all = TRUE, recent = year > 2000, "recent & economic" = year > 2000 & displ < 3, .by = pick(cyl, drv), .add_n = FALSE, .text_size = 2 ) } ggstats/man/stat_prop.Rd0000644000176200001440000002006015062213141014774 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stat_prop.R \docType{data} \name{stat_prop} \alias{stat_prop} \alias{StatProp} \title{Compute proportions according to custom denominator} \usage{ stat_prop( mapping = NULL, data = NULL, geom = "bar", position = "fill", ..., na.rm = FALSE, orientation = NA, show.legend = NA, inherit.aes = TRUE, complete = NULL, default_by = "total" ) } \arguments{ \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and \code{inherit.aes = TRUE} (the default), it is combined with the default mapping at the top level of the plot. You must supply \code{mapping} if there is no plot mapping.} \item{data}{The data to be displayed in this layer. There are three options: If \code{NULL}, the default, the data is inherited from the plot data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. A \code{data.frame}, or other object, will override the plot data. All objects will be fortified to produce a data frame. See \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. A \code{function} will be called with a single argument, the plot data. The return value must be a \code{data.frame}, and will be used as the layer data. A \code{function} can be created from a \code{formula} (e.g. \code{~ head(.x, 10)}).} \item{geom}{Override the default connection with \code{\link[ggplot2:geom_bar]{ggplot2::geom_bar()}}.} \item{position}{A position adjustment to use on the data for this layer. This can be used in various ways, including to prevent overplotting and improving the display. The \code{position} argument accepts the following: \itemize{ \item The result of calling a position function, such as \code{position_jitter()}. This method allows for passing extra arguments to the position. \item A string naming the position adjustment. To give the position as a string, strip the function name of the \code{position_} prefix. For example, to use \code{position_jitter()}, give the position as \code{"jitter"}. \item For more information and other ways to specify the position, see the \link[ggplot2:layer_positions]{layer position} documentation. }} \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}'s \code{params} argument. These arguments broadly fall into one of 4 categories below. Notably, further arguments to the \code{position} argument, or aesthetics that are required can \emph{not} be passed through \code{...}. Unknown arguments that are not part of the 4 categories below are ignored. \itemize{ \item Static aesthetics that are not mapped to a scale, but are at a fixed value and apply to the layer as a whole. For example, \code{colour = "red"} or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics} section that lists the available options. The 'required' aesthetics cannot be passed on to the \code{params}. Please note that while passing unmapped aesthetics as vectors is technically possible, the order and required length is not guaranteed to be parallel to the input data. \item When constructing a layer using a \verb{stat_*()} function, the \code{...} argument can be used to pass on parameters to the \code{geom} part of the layer. An example of this is \code{stat_density(geom = "area", outline.type = "both")}. The geom's documentation lists which parameters it can accept. \item Inversely, when constructing a layer using a \verb{geom_*()} function, the \code{...} argument can be used to pass on parameters to the \code{stat} part of the layer. An example of this is \code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation lists which parameters it can accept. \item The \code{key_glyph} argument of \code{\link[ggplot2:layer]{layer()}} may also be passed on through \code{...}. This can be one of the functions described as \link[ggplot2:draw_key]{key glyphs}, to change the display of the layer in the legend. }} \item{na.rm}{If \code{FALSE}, the default, missing values are removed with a warning. If \code{TRUE}, missing values are silently removed.} \item{orientation}{The orientation of the layer. The default (\code{NA}) automatically determines the orientation from the aesthetic mapping. In the rare event that this fails it can be given explicitly by setting \code{orientation} to either \code{"x"} or \code{"y"}. See the \emph{Orientation} section for more detail.} \item{show.legend}{logical. Should this layer be included in the legends? \code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE} never includes, and \code{TRUE} always includes. It can also be a named logical vector to finely select the aesthetics to display. To include legend keys for all levels, even when no data exists, use \code{TRUE}. If \code{NA}, all levels are shown in legend, but unobserved levels are omitted.} \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, rather than combining with them. This is most useful for helper functions that define both data and aesthetics and shouldn't inherit behaviour from the default plot specification, e.g. \code{\link[ggplot2:annotation_borders]{annotation_borders()}}.} \item{complete}{Name (character) of an aesthetic for those statistics should be completed for unobserved values (see example).} \item{default_by}{If the \strong{by} aesthetic is not available, name of another aesthetic that will be used to determine the denominators (e.g. \code{"fill"}), or \code{NULL} or \code{"total"} to compute proportions of the total. To be noted, \code{default_by = "x"} works both for vertical and horizontal bars.} } \value{ A \code{ggplot2} plot with the added statistic. } \description{ \code{stat_prop()} is a variation of \code{\link[ggplot2:geom_bar]{ggplot2::stat_count()}} allowing to compute custom proportions according to the \strong{by} aesthetic defining the denominator (i.e. all proportions for a same value of \strong{by} will sum to 1). If the \strong{by} aesthetic is not specified, denominators will be determined according to the \code{default_by} argument. } \section{Aesthetics}{ \code{stat_prop()} understands the following aesthetics (required aesthetics are in bold): \itemize{ \item \strong{x \emph{or} y} \item by \item weight } } \section{Computed variables}{ \describe{ \item{\code{after_stat(count)}}{number of points in bin} \item{\code{after_stat(denominator)}}{denominator for the proportions} \item{\code{after_stat(prop)}}{computed proportion, i.e. \code{after_stat(count)}/\code{after_stat(denominator)}} } } \examples{ library(ggplot2) d <- as.data.frame(Titanic) p <- ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, by = Class) + geom_bar(position = "fill") + geom_text(stat = "prop", position = position_fill(.5)) p p + facet_grid(~Sex) ggplot(d) + aes(x = Class, fill = Survived, weight = Freq) + geom_bar(position = "dodge") + geom_text( aes(by = Survived), stat = "prop", position = position_dodge(0.9), vjust = "bottom" ) \donttest{ if (requireNamespace("scales")) { ggplot(d) + aes(x = Class, fill = Survived, weight = Freq, by = 1) + geom_bar() + geom_text( aes(label = scales::percent(after_stat(prop), accuracy = 1)), stat = "prop", position = position_stack(.5) ) } # displaying unobserved levels with complete d <- diamonds |> dplyr::filter(!(cut == "Ideal" & clarity == "I1")) |> dplyr::filter(!(cut == "Very Good" & clarity == "VS2")) |> dplyr::filter(!(cut == "Premium" & clarity == "IF")) p <- ggplot(d) + aes(x = clarity, fill = cut, by = clarity) + geom_bar(position = "fill") p + geom_text(stat = "prop", position = position_fill(.5)) p + geom_text(stat = "prop", position = position_fill(.5), complete = "fill") } } \seealso{ \code{vignette("stat_prop")}, \code{\link[ggplot2:geom_bar]{ggplot2::stat_count()}}. For an alternative approach, see \url{https://github.com/tidyverse/ggplot2/issues/5505#issuecomment-1791324008}. } \keyword{datasets} ggstats/man/ggsurvey.Rd0000644000176200001440000000305115062213141014635 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ggsurvey.R \name{ggsurvey} \alias{ggsurvey} \title{Easy ggplot2 with survey objects} \usage{ ggsurvey(design = NULL, mapping = NULL, ...) } \arguments{ \item{design}{A survey design object, usually created with \code{\link[survey:svydesign]{survey::svydesign()}}} \item{mapping}{Default list of aesthetic mappings to use for plot, to be created with \code{\link[ggplot2:aes]{ggplot2::aes()}}.} \item{...}{Other arguments passed on to methods. Not currently used.} } \value{ A \code{ggplot2} plot. } \description{ A function to facilitate \code{ggplot2} graphs using a survey object. It will initiate a ggplot and map survey weights to the corresponding aesthetic. } \details{ Graphs will be correct as long as only weights are required to compute the graph. However, statistic or geometry requiring correct variance computation (like \code{\link[ggplot2:geom_smooth]{ggplot2::geom_smooth()}}) will be statistically incorrect. } \examples{ \dontshow{if (requireNamespace("survey")) withAutoprint(\{ # examplesIf} data(api, package = "survey") dstrat <- survey::svydesign( id = ~1, strata = ~stype, weights = ~pw, data = apistrat, fpc = ~fpc ) ggsurvey(dstrat) + ggplot2::aes(x = cnum, y = dnum) + ggplot2::geom_count() d <- as.data.frame(Titanic) dw <- survey::svydesign(ids = ~1, weights = ~Freq, data = d) ggsurvey(dw) + ggplot2::aes(x = Class, fill = Survived) + ggplot2::geom_bar(position = "fill") \dontshow{\}) # examplesIf} } ggstats/man/gglikert.Rd0000644000176200001440000002322315122045054014600 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/gglikert.R \name{gglikert} \alias{gglikert} \alias{gglikert_data} \alias{gglikert_stacked} \title{Plotting Likert-type items} \usage{ gglikert( data, include = dplyr::everything(), weights = NULL, y = ".question", variable_labels = NULL, sort = c("none", "ascending", "descending"), sort_method = c("prop", "prop_lower", "mean", "median"), sort_prop_include_center = totals_include_center, factor_to_sort = ".question", exclude_fill_values = NULL, cutoff = NULL, data_fun = NULL, add_labels = TRUE, labels_size = 3.5, labels_color = "auto", labels_accuracy = 1, labels_hide_below = 0.05, add_totals = TRUE, totals_size = labels_size, totals_color = "black", totals_accuracy = labels_accuracy, totals_fontface = "bold", totals_include_center = FALSE, totals_hjust = 0.1, y_reverse = TRUE, y_label_wrap = 50, reverse_likert = FALSE, width = 0.9, facet_rows = NULL, facet_cols = NULL, facet_label_wrap = 50, symmetric = FALSE ) gglikert_data( data, include = dplyr::everything(), weights = NULL, variable_labels = NULL, sort = c("none", "ascending", "descending"), sort_method = c("prop", "prop_lower", "mean", "median"), sort_prop_include_center = TRUE, factor_to_sort = ".question", exclude_fill_values = NULL, cutoff = NULL, data_fun = NULL ) gglikert_stacked( data, include = dplyr::everything(), weights = NULL, y = ".question", variable_labels = NULL, sort = c("none", "ascending", "descending"), sort_method = c("prop", "prop_lower", "mean", "median"), sort_prop_include_center = FALSE, factor_to_sort = ".question", data_fun = NULL, add_labels = TRUE, labels_size = 3.5, labels_color = "auto", labels_accuracy = 1, labels_hide_below = 0.05, add_median_line = FALSE, y_reverse = TRUE, y_label_wrap = 50, reverse_fill = TRUE, width = 0.9 ) } \arguments{ \item{data}{a data frame, a data frame extension (e.g. a tibble), or a survey design object} \item{include}{variables to include, accepts \link[dplyr:select]{tidy-select} syntax} \item{weights}{optional variable name of a weighting variable, accepts \link[dplyr:select]{tidy-select} syntax} \item{y}{name of the variable to be plotted on \code{y} axis (relevant when \code{.question} is mapped to "facets, see examples), accepts \link[dplyr:select]{tidy-select} syntax} \item{variable_labels}{a named list or a named vector of custom variable labels} \item{sort}{should the factor defined by \code{factor_to_sort} be sorted according to the answers (see \code{sort_method})? One of "none" (default), "ascending" or "descending"} \item{sort_method}{method used to sort the variables: \code{"prop"} sort according to the proportion of answers higher than the centered level, \code{"prop_lower"} according to the proportion lower than the centered level, \code{"mean"} considers answer as a score and sort according to the mean score, \code{"median"} used the median and the majority judgment rule for tie-breaking.} \item{sort_prop_include_center}{when sorting with \code{"prop"} and if the number of levels is uneven, should half of the central level be taken into account to compute the proportion?} \item{factor_to_sort}{name of the factor column to sort if \code{sort} is not equal to \code{"none"}; by default the list of questions passed to \code{include}; should be one factor column of the tibble returned by \code{gglikert_data()}; accepts \link[dplyr:select]{tidy-select} syntax} \item{exclude_fill_values}{Vector of values that should not be displayed (but still taken into account for computing proportions), see \code{\link[=position_likert]{position_likert()}}} \item{cutoff}{number of categories to be displayed negatively (i.e. on the left of the x axis or the bottom of the y axis), could be a decimal value: \code{2} to display negatively the two first categories, \code{2.5} to display negatively the two first categories and half of the third, \code{2.2} to display negatively the two first categories and a fifth of the third (see examples). By default (\code{NULL}), it will be equal to the number of categories divided by 2, i.e. it will be centered.} \item{data_fun}{for advanced usage, custom function to be applied to the generated dataset at the end of \code{gglikert_data()}} \item{add_labels}{should percentage labels be added to the plot?} \item{labels_size}{size of the percentage labels} \item{labels_color}{color of the percentage labels (\code{"auto"} to use \code{hex_bw()} to determine a font color based on background color)} \item{labels_accuracy}{accuracy of the percentages, see \code{\link[scales:label_percent]{scales::label_percent()}}} \item{labels_hide_below}{if provided, values below will be masked, see \code{\link[=label_percent_abs]{label_percent_abs()}}} \item{add_totals}{should the total proportions of negative and positive answers be added to plot? \strong{This option is not compatible with facets!}} \item{totals_size}{size of the total proportions} \item{totals_color}{color of the total proportions} \item{totals_accuracy}{accuracy of the total proportions, see \code{\link[scales:label_percent]{scales::label_percent()}}} \item{totals_fontface}{font face of the total proportions} \item{totals_include_center}{if the number of levels is uneven, should half of the center level be added to the total proportions?} \item{totals_hjust}{horizontal adjustment of totals labels on the x axis} \item{y_reverse}{should the y axis be reversed?} \item{y_label_wrap}{number of characters per line for y axis labels, see \code{\link[scales:label_wrap]{scales::label_wrap()}}} \item{reverse_likert}{if \code{TRUE}, will reverse the default stacking order, see \code{\link[=position_likert]{position_likert()}}} \item{width}{bar width, see \code{\link[ggplot2:geom_bar]{ggplot2::geom_bar()}}} \item{facet_rows, facet_cols}{A set of variables or expressions quoted by \code{\link[ggplot2:vars]{ggplot2::vars()}} and defining faceting groups on the rows or columns dimension (see examples)} \item{facet_label_wrap}{number of characters per line for facet labels, see \code{\link[ggplot2:labellers]{ggplot2::label_wrap_gen()}}} \item{symmetric}{should the x-axis be symmetric?} \item{add_median_line}{add a vertical line at 50\%?} \item{reverse_fill}{if \code{TRUE}, will reverse the default stacking order, see \code{\link[ggplot2:position_stack]{ggplot2::position_fill()}}} } \value{ A \code{ggplot2} plot or a \code{tibble}. } \description{ Combines several factor variables using the same list of ordered levels (e.g. Likert-type scales) into a unique data frame and generates a centered bar plot. } \details{ You could use \code{gglikert_data()} to just produce the dataset to be plotted. If variable labels have been defined (see \code{\link[labelled:var_label]{labelled::var_label()}}), they will be considered. You can also pass custom variables labels with the \code{variable_labels} argument. } \examples{ library(ggplot2) library(dplyr) likert_levels <- c( "Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree" ) set.seed(42) df <- tibble( q1 = sample(likert_levels, 150, replace = TRUE), q2 = sample(likert_levels, 150, replace = TRUE, prob = 5:1), q3 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), q4 = sample(likert_levels, 150, replace = TRUE, prob = 1:5), q5 = sample(c(likert_levels, NA), 150, replace = TRUE), q6 = sample(likert_levels, 150, replace = TRUE, prob = c(1, 0, 1, 1, 0)) ) |> mutate(across(everything(), ~ factor(.x, levels = likert_levels))) gglikert(df) gglikert(df, include = q1:3) + scale_fill_likert(pal = scales::brewer_pal(palette = "PRGn")) gglikert(df, sort = "ascending") \donttest{ gglikert(df, sort = "ascending", sort_prop_include_center = TRUE) gglikert(df, sort = "ascending", sort_method = "mean") gglikert(df, reverse_likert = TRUE) gglikert(df, add_totals = FALSE, add_labels = FALSE) gglikert( df, totals_include_center = TRUE, totals_hjust = .25, totals_size = 4.5, totals_fontface = "italic", totals_accuracy = .01, labels_accuracy = 1, labels_size = 2.5, labels_hide_below = .25 ) gglikert(df, exclude_fill_values = "Neither agree nor disagree") if (require("labelled")) { df |> set_variable_labels( q1 = "First question", q2 = "Second question" ) |> gglikert( variable_labels = c( q4 = "a custom label", q6 = "a very very very very very very very very very very long label" ), y_label_wrap = 25 ) } # Facets df_group <- df df_group$group <- sample(c("A", "B"), 150, replace = TRUE) gglikert(df_group, q1:q6, facet_rows = vars(group)) gglikert(df_group, q1:q6, facet_cols = vars(group)) gglikert(df_group, q1:q6, y = "group", facet_rows = vars(.question)) # Custom function to be applied on data f <- function(d) { d$.question <- forcats::fct_relevel(d$.question, "q5", "q2") d } gglikert(df, include = q1:q6, data_fun = f) # Custom center gglikert(df, cutoff = 2) gglikert(df, cutoff = 1) gglikert(df, cutoff = 1, symmetric = TRUE) } gglikert_stacked(df, q1:q6) gglikert_stacked(df, q1:q6, add_median_line = TRUE, sort = "asc") \donttest{ gglikert_stacked(df_group, q1:q6, y = "group", add_median_line = TRUE) + facet_grid(rows = vars(.question)) } } \seealso{ \code{vignette("gglikert")}, \code{\link[=position_likert]{position_likert()}}, \code{\link[=stat_prop]{stat_prop()}} } ggstats/man/weighted.sum.Rd0000644000176200001440000000076314674033502015405 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/weighted_sum.R \name{weighted.sum} \alias{weighted.sum} \title{Weighted Sum} \usage{ weighted.sum(x, w, na.rm = TRUE) } \arguments{ \item{x}{a numeric vector of values} \item{w}{a numeric vector of weights} \item{na.rm}{a logical indicating whether to ignore \code{NA} values} } \value{ A numeric vector. } \description{ Weighted Sum } \examples{ x <- 1:20 w <- runif(20) weighted.sum(x, w) } ggstats/man/geom_connector.Rd0000644000176200001440000002014615062213140015766 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/geom_connector.R \docType{data} \name{geom_connector} \alias{geom_connector} \alias{geom_bar_connector} \alias{GeomConnector} \title{Connect bars / points} \usage{ geom_connector( mapping = NULL, data = NULL, stat = "identity", position = "identity", width = 0.1, continuous = FALSE, na.rm = FALSE, orientation = NA, show.legend = NA, inherit.aes = TRUE, ... ) geom_bar_connector( mapping = NULL, data = NULL, stat = "prop", position = "stack", width = 0.9, continuous = FALSE, add_baseline = TRUE, na.rm = FALSE, orientation = NA, show.legend = NA, inherit.aes = TRUE, ... ) } \arguments{ \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and \code{inherit.aes = TRUE} (the default), it is combined with the default mapping at the top level of the plot. You must supply \code{mapping} if there is no plot mapping.} \item{data}{The data to be displayed in this layer. There are three options: If \code{NULL}, the default, the data is inherited from the plot data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. A \code{data.frame}, or other object, will override the plot data. All objects will be fortified to produce a data frame. See \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. A \code{function} will be called with a single argument, the plot data. The return value must be a \code{data.frame}, and will be used as the layer data. A \code{function} can be created from a \code{formula} (e.g. \code{~ head(.x, 10)}).} \item{stat}{The statistical transformation to use on the data for this layer. When using a \verb{geom_*()} function to construct a layer, the \code{stat} argument can be used to override the default coupling between geoms and stats. The \code{stat} argument accepts the following: \itemize{ \item A \code{Stat} ggproto subclass, for example \code{StatCount}. \item A string naming the stat. To give the stat as a string, strip the function name of the \code{stat_} prefix. For example, to use \code{stat_count()}, give the stat as \code{"count"}. \item For more information and other ways to specify the stat, see the \link[ggplot2:layer_stats]{layer stat} documentation. }} \item{position}{A position adjustment to use on the data for this layer. This can be used in various ways, including to prevent overplotting and improving the display. The \code{position} argument accepts the following: \itemize{ \item The result of calling a position function, such as \code{position_jitter()}. This method allows for passing extra arguments to the position. \item A string naming the position adjustment. To give the position as a string, strip the function name of the \code{position_} prefix. For example, to use \code{position_jitter()}, give the position as \code{"jitter"}. \item For more information and other ways to specify the position, see the \link[ggplot2:layer_positions]{layer position} documentation. }} \item{width}{Bar width (see examples).} \item{continuous}{Should connect segments be continuous?} \item{na.rm}{If \code{FALSE}, the default, missing values are removed with a warning. If \code{TRUE}, missing values are silently removed.} \item{orientation}{The orientation of the layer. The default (\code{NA}) automatically determines the orientation from the aesthetic mapping. In the rare event that this fails it can be given explicitly by setting \code{orientation} to either \code{"x"} or \code{"y"}. See the \emph{Orientation} section for more detail.} \item{show.legend}{logical. Should this layer be included in the legends? \code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE} never includes, and \code{TRUE} always includes. It can also be a named logical vector to finely select the aesthetics to display. To include legend keys for all levels, even when no data exists, use \code{TRUE}. If \code{NA}, all levels are shown in legend, but unobserved levels are omitted.} \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, rather than combining with them. This is most useful for helper functions that define both data and aesthetics and shouldn't inherit behaviour from the default plot specification, e.g. \code{\link[ggplot2:annotation_borders]{annotation_borders()}}.} \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}'s \code{params} argument. These arguments broadly fall into one of 4 categories below. Notably, further arguments to the \code{position} argument, or aesthetics that are required can \emph{not} be passed through \code{...}. Unknown arguments that are not part of the 4 categories below are ignored. \itemize{ \item Static aesthetics that are not mapped to a scale, but are at a fixed value and apply to the layer as a whole. For example, \code{colour = "red"} or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics} section that lists the available options. The 'required' aesthetics cannot be passed on to the \code{params}. Please note that while passing unmapped aesthetics as vectors is technically possible, the order and required length is not guaranteed to be parallel to the input data. \item When constructing a layer using a \verb{stat_*()} function, the \code{...} argument can be used to pass on parameters to the \code{geom} part of the layer. An example of this is \code{stat_density(geom = "area", outline.type = "both")}. The geom's documentation lists which parameters it can accept. \item Inversely, when constructing a layer using a \verb{geom_*()} function, the \code{...} argument can be used to pass on parameters to the \code{stat} part of the layer. An example of this is \code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation lists which parameters it can accept. \item The \code{key_glyph} argument of \code{\link[ggplot2:layer]{layer()}} may also be passed on through \code{...}. This can be one of the functions described as \link[ggplot2:draw_key]{key glyphs}, to change the display of the layer in the legend. }} \item{add_baseline}{Add connectors at baseline?} } \description{ \code{geom_connector()} is a variation of \code{\link[ggplot2:geom_path]{ggplot2::geom_step()}}. Its variant \code{geom_bar_connector()} is particularly adapted to connect bars. } \examples{ library(ggplot2) # geom_bar_connector() ----------- ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_bar(width = .5) + geom_bar_connector(width = .5, linewidth = .25) + theme_minimal() + theme(legend.position = "bottom") \donttest{ ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_bar(width = .5) + geom_bar_connector( width = .5, continuous = TRUE, colour = "red", linetype = "dotted", add_baseline = FALSE, ) + theme(legend.position = "bottom") ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_bar(width = .5, position = "fill") + geom_bar_connector(width = .5, position = "fill") + theme(legend.position = "bottom") ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_bar(width = .5, position = "diverging") + geom_bar_connector(width = .5, position = "diverging", linewidth = .25) + theme(legend.position = "bottom") # geom_connector() ----------- ggplot(mtcars) + aes(x = wt, y = mpg, colour = factor(cyl)) + geom_connector() + geom_point() ggplot(mtcars) + aes(x = wt, y = mpg, colour = factor(cyl)) + geom_connector(continuous = TRUE) + geom_point() ggplot(mtcars) + aes(x = wt, y = mpg, colour = factor(cyl)) + geom_connector(continuous = TRUE, width = .3) + geom_point() ggplot(mtcars) + aes(x = wt, y = mpg, colour = factor(cyl)) + geom_connector(width = 0) + geom_point() ggplot(mtcars) + aes(x = wt, y = mpg, colour = factor(cyl)) + geom_connector(width = Inf) + geom_point() ggplot(mtcars) + aes(x = wt, y = mpg, colour = factor(cyl)) + geom_connector(width = Inf, continuous = TRUE) + geom_point() } } \keyword{datasets} ggstats/man/round_any.Rd0000644000176200001440000000111314600506645014770 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/round_any.R \name{round_any} \alias{round_any} \title{Round to multiple of any number.} \source{ adapted from \code{plyr} } \usage{ round_any(x, accuracy, f = round) } \arguments{ \item{x}{numeric or date-time (POSIXct) vector to round} \item{accuracy}{number to round to; for POSIXct objects, a number of seconds} \item{f}{rounding function: \code{\link{floor}}, \code{\link{ceiling}} or \code{\link{round}}} } \description{ Round to multiple of any number. } \examples{ round_any(1.865, accuracy = .25) } ggstats/man/signif_stars.Rd0000644000176200001440000000165114357760261015501 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/signif_stars.R \name{signif_stars} \alias{signif_stars} \title{Significance Stars} \usage{ signif_stars(x, three = 0.001, two = 0.01, one = 0.05, point = 0.1) } \arguments{ \item{x}{numeric values that will be compared to the \code{point}, \code{one}, \code{two}, and \code{three} values} \item{three}{threshold below which to display three stars} \item{two}{threshold below which to display two stars} \item{one}{threshold below which to display one star} \item{point}{threshold below which to display one point (\code{NULL} to deactivate)} } \value{ Character vector containing the appropriate number of stars for each \code{x} value. } \description{ Calculate significance stars } \examples{ x <- c(0.5, 0.1, 0.05, 0.01, 0.001) signif_stars(x) signif_stars(x, one = .15, point = NULL) } \author{ Joseph Larmarange } ggstats/man/pal_extender.Rd0000644000176200001440000000345314625277577015475 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pal_extender.R \name{pal_extender} \alias{pal_extender} \alias{scale_fill_extended} \alias{scale_colour_extended} \title{Extend a discrete colour palette} \usage{ pal_extender(pal = scales::brewer_pal(palette = "BrBG")) scale_fill_extended( name = waiver(), ..., pal = scales::brewer_pal(palette = "BrBG"), aesthetics = "fill" ) scale_colour_extended( name = waiver(), ..., pal = scales::brewer_pal(palette = "BrBG"), aesthetics = "colour" ) } \arguments{ \item{pal}{A palette function, such as returned by \link[scales:pal_brewer]{scales::brewer_pal}, taking a number of colours as entry and returning a list of colours.} \item{name}{The name of the scale. Used as the axis or legend title. If \code{waiver()}, the default, the name of the scale is taken from the first mapping used for that aesthetic. If \code{NULL}, the legend title will be omitted.} \item{...}{Other arguments passed on to \code{discrete_scale()} to control name, limits, breaks, labels and so forth.} \item{aesthetics}{Character string or vector of character strings listing the name(s) of the aesthetic(s) that this scale works with. This can be useful, for example, to apply colour settings to the colour and fill aesthetics at the same time, via \code{aesthetics = c("colour", "fill")}.} } \value{ A palette function. } \description{ If the palette returns less colours than requested, the list of colours will be expanded using \code{\link[scales:pal_gradient_n]{scales::pal_gradient_n()}}. To be used with a sequential or diverging palette. Not relevant for qualitative palettes. } \examples{ pal <- scales::pal_brewer(palette = "PiYG") scales::show_col(pal(16)) scales::show_col(pal_extender(pal)(16)) } ggstats/man/symmetric_limits.Rd0000644000176200001440000000216214657111214016370 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/symmetric_limits.R \name{symmetric_limits} \alias{symmetric_limits} \title{Symmetric limits} \source{ Adapted from the homonym function in \code{{ggpmisc}} } \usage{ symmetric_limits(x) } \arguments{ \item{x}{a vector of numeric values, possibly a range, from which to compute enclosing range} } \value{ A numeric vector of length two with the new limits, which are always such that the absolute value of upper and lower limits is the same. } \description{ Expand scale limits to make them symmetric around zero. Can be passed as argument to parameter \code{limits} of continuous scales from packages \code{{ggplot2}} or \code{{scales}}. Can be also used to obtain an enclosing symmetric range for numeric vectors. } \examples{ library(ggplot2) ggplot(iris) + aes(x = Sepal.Length - 5, y = Sepal.Width - 3, colour = Species) + geom_vline(xintercept = 0) + geom_hline(yintercept = 0) + geom_point() last_plot() + scale_x_continuous(limits = symmetric_limits) + scale_y_continuous(limits = symmetric_limits) } ggstats/man/stat_weighted_mean.Rd0000644000176200001440000001576515062213141016634 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stat_weighted_mean.R \docType{data} \name{stat_weighted_mean} \alias{stat_weighted_mean} \alias{StatWeightedMean} \title{Compute weighted y mean} \usage{ stat_weighted_mean( mapping = NULL, data = NULL, geom = "point", position = "identity", ..., na.rm = FALSE, orientation = NA, show.legend = NA, inherit.aes = TRUE ) } \arguments{ \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and \code{inherit.aes = TRUE} (the default), it is combined with the default mapping at the top level of the plot. You must supply \code{mapping} if there is no plot mapping.} \item{data}{The data to be displayed in this layer. There are three options: If \code{NULL}, the default, the data is inherited from the plot data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. A \code{data.frame}, or other object, will override the plot data. All objects will be fortified to produce a data frame. See \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. A \code{function} will be called with a single argument, the plot data. The return value must be a \code{data.frame}, and will be used as the layer data. A \code{function} can be created from a \code{formula} (e.g. \code{~ head(.x, 10)}).} \item{geom}{Override the default connection with \code{\link[ggplot2:geom_point]{ggplot2::geom_point()}}.} \item{position}{A position adjustment to use on the data for this layer. This can be used in various ways, including to prevent overplotting and improving the display. The \code{position} argument accepts the following: \itemize{ \item The result of calling a position function, such as \code{position_jitter()}. This method allows for passing extra arguments to the position. \item A string naming the position adjustment. To give the position as a string, strip the function name of the \code{position_} prefix. For example, to use \code{position_jitter()}, give the position as \code{"jitter"}. \item For more information and other ways to specify the position, see the \link[ggplot2:layer_positions]{layer position} documentation. }} \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}'s \code{params} argument. These arguments broadly fall into one of 4 categories below. Notably, further arguments to the \code{position} argument, or aesthetics that are required can \emph{not} be passed through \code{...}. Unknown arguments that are not part of the 4 categories below are ignored. \itemize{ \item Static aesthetics that are not mapped to a scale, but are at a fixed value and apply to the layer as a whole. For example, \code{colour = "red"} or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics} section that lists the available options. The 'required' aesthetics cannot be passed on to the \code{params}. Please note that while passing unmapped aesthetics as vectors is technically possible, the order and required length is not guaranteed to be parallel to the input data. \item When constructing a layer using a \verb{stat_*()} function, the \code{...} argument can be used to pass on parameters to the \code{geom} part of the layer. An example of this is \code{stat_density(geom = "area", outline.type = "both")}. The geom's documentation lists which parameters it can accept. \item Inversely, when constructing a layer using a \verb{geom_*()} function, the \code{...} argument can be used to pass on parameters to the \code{stat} part of the layer. An example of this is \code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation lists which parameters it can accept. \item The \code{key_glyph} argument of \code{\link[ggplot2:layer]{layer()}} may also be passed on through \code{...}. This can be one of the functions described as \link[ggplot2:draw_key]{key glyphs}, to change the display of the layer in the legend. }} \item{na.rm}{If \code{FALSE}, the default, missing values are removed with a warning. If \code{TRUE}, missing values are silently removed.} \item{orientation}{The orientation of the layer. The default (\code{NA}) automatically determines the orientation from the aesthetic mapping. In the rare event that this fails it can be given explicitly by setting \code{orientation} to either \code{"x"} or \code{"y"}. See the \emph{Orientation} section for more detail.} \item{show.legend}{logical. Should this layer be included in the legends? \code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE} never includes, and \code{TRUE} always includes. It can also be a named logical vector to finely select the aesthetics to display. To include legend keys for all levels, even when no data exists, use \code{TRUE}. If \code{NA}, all levels are shown in legend, but unobserved levels are omitted.} \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, rather than combining with them. This is most useful for helper functions that define both data and aesthetics and shouldn't inherit behaviour from the default plot specification, e.g. \code{\link[ggplot2:annotation_borders]{annotation_borders()}}.} } \value{ A \code{ggplot2} plot with the added statistic. } \description{ This statistic will compute the mean of \strong{y} aesthetic for each unique value of \strong{x}, taking into account \strong{weight} aesthetic if provided. } \section{Computed variables}{ \describe{ \item{y}{weighted y (numerator / denominator)} \item{numerator}{numerator} \item{denominator}{denominator} } } \examples{ \dontshow{if (requireNamespace("reshape")) withAutoprint(\{ # examplesIf} library(ggplot2) data(tips, package = "reshape") ggplot(tips) + aes(x = day, y = total_bill) + geom_point() ggplot(tips) + aes(x = day, y = total_bill) + stat_weighted_mean() \donttest{ ggplot(tips) + aes(x = day, y = total_bill, group = 1) + stat_weighted_mean(geom = "line") ggplot(tips) + aes(x = day, y = total_bill, colour = sex, group = sex) + stat_weighted_mean(geom = "line") ggplot(tips) + aes(x = day, y = total_bill, fill = sex) + stat_weighted_mean(geom = "bar", position = "dodge") # computing a proportion on the fly if (requireNamespace("scales")) { ggplot(tips) + aes(x = day, y = as.integer(smoker == "Yes"), fill = sex) + stat_weighted_mean(geom = "bar", position = "dodge") + scale_y_continuous(labels = scales::percent) } } \dontshow{\}) # examplesIf} \dontshow{if (requireNamespace("scales")) withAutoprint(\{ # examplesIf} library(ggplot2) # taking into account some weights d <- as.data.frame(Titanic) ggplot(d) + aes( x = Class, y = as.integer(Survived == "Yes"), weight = Freq, fill = Sex ) + geom_bar(stat = "weighted_mean", position = "dodge") + scale_y_continuous(labels = scales::percent) + labs(y = "Survived") \dontshow{\}) # examplesIf} } \seealso{ \code{vignette("stat_weighted_mean")} } \keyword{datasets} ggstats/man/figures/0000755000176200001440000000000014733753650014162 5ustar liggesusersggstats/man/figures/README-unnamed-chunk-4-1.png0000644000176200001440000001476715122045515020661 0ustar liggesusersPNG  IHDRMR/MPLTE:f:::f:fff}::::f:::::::f:::ff:f:f::MMMMMnMMMnMnMff:fff:f::ffff:ffffnMMnMnnMnnnnMMMnMnMn::::fff:ff:fېnMnnnnȫff::ffې۶ȎMې:ېf۶f۶ېnkvmfȎې۶Cj" pHYsodPIDATxWG[nAvf3Dױ8/0=?n=U-H2jíVK(7]Dg~.e(@tʠ#P]Q7+Xa` t%W1u`J0A:`@u* *0(T#PT79NrxyωY~6/l`MmJ+bחg~MER6[v쬙+s%-T[1?,];4`@;Nsu3I ?'?4`@ۥ\bo߲;1@ݏ _1)^r(\}%P#5)NUAI~IAE EJkB,t:eYTV(zT;>?&:.A hK=}]cIsDRYi#l9"[3#P f8TGP@"PBE:@u*, ``@u* T(T0@a"ݵ.<>qc7aCnDG.mz|5j-X@QwڝxGwϘYh':x߅HRիBr4>\=>{pZ1(n(8kPK:*@=y_<՘eKS|2Ѻs<$79 hMҸTIzz݀^kP_Π?| F4v_s3b{#Y[o[ נN P*>/;3lxUAN]jjEsDR]Vۈw+3۲.p_j}s@ 戤5ʭǻcM,եjgRKNعk5@{_r[}  @_$.5Ykf}$p}XUR{Fa&Й>Йk0#P:d~>г*@>rxT 40d[)_z]NA=ܬ^W _s3b{#\yOTn]|y05Ӆ ԭ}>j5h|kUB_ =8'i..R/I0K0Gth6`@u* *0(T#PTGP@%% P#Bp@% PJ0A: (L *(T#P fI3#P f8TGp@ BE:@u TY9"* T3+PV(+hʠ#P2AG Mua` t%W1u`J0A:`@u* *0(T#PT7)NSjnyoc^܏Nzvm`Mt%ы!. 戤6K,紳fJ觪kSyngPse5oh6@zΙdV%x.:q@ 戤<W޷ק<:cVfI9vvKw/ (L0@a v^&Mw ݴodT5(@uAW3.ͻ]>pXhTǿ$I0K0G@% PJ0A:`@u  P*(T#PTG̒(!P f8TGp@% PB&@u7h_i$#H: GjGwz<8{xE_~9ݓ ۾t?Ds hbJvDG}Y%Z(I7W;;Gց*ؿ>xgܛ ۷Js nb[N2vװ5nuℏh(ӷو.Yt5;ĪrxΠDD'Aנ8|5hji h&fRFxHAӗO[?<^KmO >5ps׼ze0K0G6 %`@u@@3@ 3#P`DP@"PBE:`0@aJ0A:`@u* *0(T#PTGP@%% P#Bp@% PJ0A: (L *(T#P fI3#P f8TGpp@Y [(@tʠ#P2AWykDz. la:MЧy8#Bp@% PJ0A: (L *M ̘7'7_܏:ՕO̺_GkRf7Vy^_=qM0GdR4?UKfZ joȾ"l9"mɳf+-sm{=P<Ϲ@ L Ԣ7V|@s?h63huP{l_O0)5pLNAa"P]U"_A;Uu zyu`M>A&ۦthoޝ3EI`0@aJ0A:`@u* *0(T#PTGP@%% P#Bp@% PJ0A: (L %nuhCnV·Zv r_>q*P7o +;0YFtto`+ޱߐG}Y} `[_;[vr|x .}X$yn ]8P#Tu7WO֤LG1loDo?ց"u%@E#lDE1аɤ˝mqaM8+(7#=;G4kP#4gpw P`„_s3b{#YoU&po1&V>XŇ=ezΥKYV0/I:0_`0ZfS/I0K0G h6`@u* *0(T#PTGP@%% P#Bp@% PJ0A: (L *(T#P fI3#P f8TGp@ BE:@u TY9"* T3+PV(+hʠ#P2AG Mua` 40["P闌_9"* T3#P`DPnR'f=ƔZ|q?ĬDQ9pgxy9D+=z}ecU@Ý1`کVzkSjJ1{b~W;2쬙FT; }Z2']0K0G$M!X9يv{ɞ0]]?)٬R^sfx?ˎ1']0AT@W;{^&{7ͺig}T׭y>MS]z; tewim;}|D 4`t@E;+?A@ힷr ERњY~iuV:U7Ov4>d譪kj=\yZ_īxY;߫.$P-XmH*#¿K0A:`@u* *0(T#PTGP@%% P#Bp@% PJ0A: (L З/_Gzvt7kǃ[QMjD_D'Q7bo!ѿv#:7g %&L#P~#n}UnAk`#/r Z$yyrBPA\=>{p2:Qyցp$(5m+˧jOcJvs:WqS͆tU]h#k NAz0 5轁b{#YoUѿ &V>XŇ=eμ>%,xP{7a7}ЛPlJ3%.A:kM0G@% PJ0A: (L *(T#P fI3#P f8TGp@ BE:@u TY9"* T3#P`DP@"PBE:`0@aJ0A:`@u*  0E:]0kJpÑ{p/fe(@tʠ#P ?){þ]7in29yqe(@tʠ#P2AG :e(@tʠ#P2AG :e(@t$>]IENDB`ggstats/man/figures/README-unnamed-chunk-4-2.png0000644000176200001440000001543215122045515020650 0ustar liggesusersPNG  IHDRMR/PLTE:f:::f:fff}7`777`7777`777::::f:::::::f:::ff:f:f::MMMMMnMMMnnMnMnM``7```ff:fff:f::ffff:ffffnMMnMnnMnnnnnnnnn7777`MMMnMnMnnnnȎ::::fff:ff:fې```nMnnȫ﫫ﳳff:ff:ffې۶ȎMȎnȫnȫȫΈ7ې:ېf۶f۶ېn䫎Ȏȫk`ΈvmfȎې۶WO pHYsodIDATxݍ}qMBDBO@LsNܦo/ m '!!=m DMkLQ, yV&iv4+=oOڝ;;NfPr:t%(9@JNPr:t%(9@JNVuooyt롟7 NggngέttuZ }/=|ջ+K\:s^}J5n}Ѕ[:%s_3Oc"7[zufOTς+f 8+hg pB_Ȕ οm}Q{Vtџ~#ޥֹ[g?E@HfUuƟAO?KsEҟ>]/7/8f2BUƈٜ7  =F̠x5oLh?YŇiagn3ho߃Οt߃+q J<:q]<]טnWߘAN%\5&d}dm/9tu~J]Y@ԡ >xy@K;gZj9* c$ Q:>pUP Zg.p!?'n^O~rpGsGzPSZ9/awi (AsO3o&̣7AԟAc#sb zo| ކNޫ? Fg&@{s*Ӟh,4 EfN+/h*DH?> !e$ Fm4f PWxEQ&UO'xͿ0NI_G~~T@uQz]S@er,drPT@-PY$Y@erPT@-"I@-g{wx@'{A'h_9j+Uy]5靇&Gm:|3MAF +`Qdp#IQ]}wZ*Cћ"у!l|@E@lP=zp⌬+vP/(GƇ;x|l3ngИ&}-1M{kz.@Ho[ UZ^HXPh6:@ԡ ER,Zj9* c$ ,Zj9X$* ,Zj9Ij9* ,ZE,Zj9* {dm/(7t%(9@\*~3@o+ t糺 U](kR,Zj9X$hAS)UCnwɜPYM5hadwhz"}j y 5̓Az+xnVA'n?|ZTݝþT@ow ozNR@Y$h+2F/=HjϿcX s]Q As)@b@&@W@3&gs̠&[訽󓦹իty"ɡvR ӻ8J* ,ZE,Zj9* c$ ,Zj9+_U].8yECmPUB->*nz޶vأ Ez }@^@|̠vwHA|_yt;-W|f*ZZFBEaAE#v#􃳎4f*o--!P=xa|.zG̃FlH_G'7 ٶAKkq]/)?N?pZ_ŗ^xOdNz2;o fX]kERtrG^')9./Irhzᡧhh.)en3imUh6-|*-/Yġ lu(fCU(X@erPT@-"I@-PY@erP˱HPT@-PY@er,drPT@-PY$Y@erPT@-"/_3s(Qn%(9@JNPr:ӹt/U%?3g?Xi92h6:TuU/hi PT@-PY@e4ՠufw0Oz}uӲ$Zxs~cѓ4՞tpBNyMfVA}'sPoP=gi{:{o:fe7:2u#X 8G/=OCf؈z:b E@{Asn AATJA7vSoh4{Ptf.o]={ΏcU-/IUT^u(fC4@@Y$* ,Zj9Ij9* ,ZNH뮻VY$9T%U/>/:@mо  q%~2>W6wñs>л* uNa׼5hBE{sYE@g%HoO'\h,1oA|o;U5ڵJ3hi 5kfA3 _Xw4լ aЬub{w7U{Zoܱ~{G7 @R]h殒hƚuϠ렱^$5`@!f͠Ht+n—?]܋5n!{^f P?\].ERh6:@ԡ U],brPT@-PY$Y@erPT@-"I@-PY@erP˱HPT@-PY@er,drPT@-PY܋$ks|Dt%(9@\6*~3@o+ t糺 luEi_v@erPT@Ӎ.*usѻ뫃~aE,/~X~# tA ԡ{jFml~v'ngAӟ't 4=~E,J  yjv̪`uPa*he^ ,ZO&,ZE,Zj9* c$ ,Zj9X$6oj9W.yJ&NEtajc~O%~l<@O-6FrJͶ@IloXgpٹ߈0h>~tj`#z73-F>W tqg6 d>~n4 l1 6 E@%^OcHKcw54ۦϠOyzKwͫLN܃.4qEKs.Њ̴2_$Π'E+<@sA +e&׼*4>png}4{yZeӼsܺ(Ѡat+ Kf:W|BdT@|s@@z4w^jfh1^$ՏF~'u4觏ub3MS߼jz j=t%(9@JNPr:t%(9@JNPr:t%(9@JNPr:t%(9@H/6+ʁe M6,w4pg*j~V02h14o܂Ҍhb\3.@#OƝq? h毴 Ө--̫BSn@>vr/ ^43e}[x@-HYn{u|moR(9@JNPr:t%(9@JNPr:t%(9@JNPr:t%(9w=dKCIENDB`ggstats/man/figures/README-unnamed-chunk-7-1.png0000644000176200001440000001232715122045520020646 0ustar liggesusersPNG  IHDRMR/ PLTE:f:f:f333::::f:::f:f:MMMMMnMMMnMnMff:fff:f::ff:fffffffnMMnMnnMnnnnnnMMMnMnMM::::fff۶nMnnnMȫff:fffې۶ȎMې:ېfnvmfȎې Y pHYsodrIDATx흍V@ nInKi֑ 6XJ ]8/AO=ֻڤA={e'Ai: B!P4 &2o n uZu;%z P[J RJuRBE(-@QGo)%\N :zK)!P[J RJuRBE(-@QGo)%:zK)!P[J &؄@#mJ@c!Фh,@BhR4M BI!X4) &@c!Фh,@BhR4M BI!X4) &@c!Фh,bh,J[J hRB(Fo)%J[J hRB(Fo)%J[J hRtϲ˽s& @kw廫& @kU~I6yuR~rc$h-AUtXLaNT4DZ)@d;@4{^O^JE~TpTPh*,m jjM?Ӿc렂ڀ@S!$ݕh*T)J4MCw% զn ơMjS7@] @Bq讄@SAM!8tWB Pmh+!T6uS4ݕh*TZpSզ%@wxI;%z Pmj^NTZpSզ%@wxI;%z Pmj^NTZpSզ%@wxI;%z Pmj^NTZpSզ%@wxI;%z Pmj^NTZpSզ%@wxI;%z Pmj^NTZpS@7GĕtD-%`զ%@wxI;%z Pmj^NTZpSզ%@wxI;%z Pmj^NTZpSզ%@wxI;%z Pmj^NTZpSզ%@QVQTQTQTK:get׿{@-ƽe(Uj@5ޏz[/'Pj@峷rt*^ԲZ:cP+~G t~"0gskyGZ@'/-բ+tB}q~Jբ:F.MEݥ@47V+.3h:ytq$+IxAVBfT6^tvʗ$P}2fx?͋_;@-#EfT6ҁf#w ˨=j@OXLVBk:eOwC@QVh-5qOjzw&g>ϗquPBrW'z+|kg?;t?Y PGT{ >;?CGeuS+h{7Ϲ@wfŋBpGQo h2c!y;/pGZR.A?PVo |w+G?<z@Ck Tͨl Tͨl TͨlB77Q@] jYl'PԢ:O^d^@Qwo=?;ɮsu^=j^-hqCYvɻ9w7xTYo$z jYݱC@F^L,^?zsuosa^,ځ>wO =I\E@0{|6@BkA/o3:O3^jYVdoof g3Rq3|7zeZ"mz@ی uczuVfL1@ՌʦZ4P.3RMu0_bE݂zCEq#[1pGZ-`{v0g=j@]W Բ xjZ:lXzx4 ?%Pj@!Pj@uPMjFeS-gԲ#+hjY-hqU'Vţ,uPMjFeS-(xAx$@-1(ꠚ@ՌʦZ6P~Bݑ4ZVKZ&P5&P5V]/Ԃj XAQV6Z:A*et\ETK:; Pt!Pj@/SIj@k@fT6ruH-(Q:רGTw!P^W-(Q:רGRZ ZVQT:jZ-AYjY-tJe&ԫfT6ҁf#wAQ{fc^_-h%jYMjFeSMjFeSMjFeS-l̄zZv+o'Pj@sex}Q@kǠW;hPOҁVՙml:3 련Բ+h-Բ@Ռʦ@Ռʦ@Ռʦ@Ռʦg'jY@/}@՝ wKeu'"0'_T:ynie Zp8@- Tͨl;YvPw4PwviptxM2dXi/ sP9Vڋ(+E@u :J{QPc(α^@TXi/ sP9Vڋ(+E@u :J{QPc(α^@TXi/ sP9Vڋ(+E@u :J{QPc(α^@TXi/Q+A1h8.v|"ɮn!>յW=]h287coԭ-ђ_;@7\\c\.\c;.7EժDb|Uvqկ<rzJ$A@nj\G`hu_vQgl/x#;p_H{8t~k=?x]~`x[sel7}iɌ^GȼRn/k&7޼.A&5?濶ʼnսن֚ʄ~cMWu)ju~⌍=I:u=ဲ7Tu~6n~,G}~DjKnT b^F$uH.|=|rJWQTH;etg}\*yVGy ^.ɥR >+*zą]iF۠S5.|~m^Kgd4~DcCz2/٬^Σ }qBw޴UQ@ =qZG3V~I頴H<˥߅Ʊ7y/uXUsxxۍ'X' 4?3gbdPt4z30I@_`&Xb -A}Plhu0좾tPԥ.j}+t==%QMAꠧ9:/㱃2sUM ~$L&bA䳺2!U=B 5kH0}CI^V*[g96y}hηL 2}El4:kZ~V4 u"!_mRYdؓhJ{QPc(g9Vڋ(+E@u :J{QPc(α^@TXi/ sP9Vڋ(+E@u :J{QPc(α^@TXi/ sP9Vڋ(+E@u :J{QPcL:UUq1, ũ;@#.S w8F\p(N5qQjçDA%w|gs(tt:^*vE@c ֱ]QPXŮ(ubW@:V+ jPU(Z*vE@c ֱ]QPXŮ(ubW@:V+ jPU(Z*vE@c ֱ]QPXŮ(ubW@:V+ jPU{ΨzD FU_IsmPT_|OW^!Nҋ߿)c5fpzqv@Oݺ;5K6g2 |kWЗk >\#w@NbĂW>IA7BN@iK>:jS՝نtTukVe9_0P;p_w]{vnrɠ>==~_з9_?DN5{izvgN}sfݧN t{LhEMzno/臯PtR-ٺj]BSu|@K#R4*zaZ1LNt<c4t%Is0 3hsP |9uK~:AR9^"9htCq$dwT- z~Ei'c:-O( ubW@:V+ jPU(Z*vE@c ֱ]QPXŮ(ubW@:V+ jPU(Z*vE@c ֱ]QPXŮ(ubW@:V+ j $TV*F\p(N5qQj48phEqЈTFmwB= {S=C@T{@ 0P=  hP}@4LT(jh>&*@@|@P  ZO( PP-'P@@(( aӦ*٧ܾN~+6o0;~q0;.@Oݺ9l-w|m7&4.]L1drõ+Z'ğW;%B& _sIϴͅ?=$F~XO!GGDS|A@5GOdV=~Ŋۏ߿~ۚ:.۝ߜ~pWZ|v ^l'cKۯX\^g ~K:i3u_,N hf0U)U*:tӽz#Ϥ֪F @T}:qU N~6O,}|\.Jb_zu667B@/\+FIJ;0E@agп?`^uj@ @tQ@}]s:+A4Ѐ@|:hz2Z3Fij3SaA ,Z @37~j47P9zᚋddr꫁`O@~Y4puzبbc5NB@?.ҩuK ד@}v`6-,H\*yJ9d˨\*4Mr].PsˆɥnFm1r-q"A)Lާ)e恎ռ-z6cCzu#yt/%sQs%\`xסK<' Bert\灦ǪhM@h6'X' S_$ыMwNk@W^\T&@ h;[C2Gc95feTMAeAGewPēNuߘ'j oKDAs|uSPB>huSHJ^m,7`org*[9(-ƛF]#Ҳ)Ai]$Iyc2UXQ1z /@@|@P  ZO( PP-'P@@(( a@T{@ 0P=  hP}@4L48phEqЈTũ;@#.S w8F\p-+&wӴmPmP}@4LP]P-'P@@P}@4LP]P-'P@@P}@4LP]P-'P@@P}@4LP]P-'P@$(TucaTuzD:@]7;.@Oݺb\96@ 㢥;KW$k >pȀ 񧭉ĂA!XX"D/x. bS3 ;Sg 4]I/5GOd=~ /8sx^G+-:o/MsWK.7ݚjU:b׷OTugeB@GV5\+)jtK:yC7pzظ%LI~6O,}|\.2&w$Wźf$ʄʀNC?@j]B=͡ }aBwތSp@Ih\ud@yZԝ'Ηx2Nȡ4qNǒƱ7fON2 ڞyuXC5Ib]:@i6J:ͷB6u"TH:PCߘ:4gceS 4-&tLdo1Ѥ;hv. ٭yPvqEoTNsPvzu;eߡxܨ|sPZFQ}sP1I @ Y5@}ģ TW"NXu'P@@P}@4LP]P-'P@@P}@4LP]P-'P@@P}@4LP]P-'P@@P}eũư(F\p(N5qQj48phEq@KoUFM:nE3V < " a jO( (j>& 9  h((( a jO( (j>& 9 X3f ]i\+1hRo_/B4w yxqp|q큁0Z[wg2  ׮/WK&Fg M$~Q|*X"_,xU:h75隣}E}/L&{/cY#:~x}o]~`x[sz%e3Re yQWKur}s몃4P{?[TuP<.uB:sz֊u'T εbdY1mjMUJQcUдt 9?.9-o^_~`򿕳Ozjn@_0uknsЍ7S/sP tMr@x+Ar%T tCb♞?.Zt6[*>'>/DlÑ\nTCB0/$֠@_&D]Gu?H;etg}\*yVGy ^.ɥR >nzؕP)}TMӌx7j]B=Exu}MYzR]SQ~=4.>yd::.x7}zHVu:>r+[BT>wJKxN-BXVMS f+Pƒz i.5g虧X' Tm7:MrL$E7X5@wBerH6&fA˼{vA}OIFxIuhpzy&yA.h_][%Qf 4z'}lh$E…*#MMzJ7wOWG|;TQsm,os3YT΀sPvzu5]-?^ZɋB+MeQPn _Kx~;8q=%:@h( (j>& 9  h((( a jO( (j>& 9  hDQUũư(F\p(N5qQj48phEqЈT>s7yNٽQof4P?V(+E@Ƣ JcQPc(X@TXi, 4P?V(+E@Ƣ JcQPc(X@TXi, 4P?V(+E@Ƣ JcQPc(X@TXi, 4uzagӲ}yW"mpWV="rm_1APu J]^!Nvu5K66 %9u(EKMwH&F}2J޸rzp&SHvf:5G|0K6/p=]o Sߜl9+2o/MsWK.sڀrt!Tt{wL<{|kX&mFHqT `%$N~|'rbinu㶁(r%=oP xIDAT"]B腝kȲ*ZkC~oA_07l~b_ԕS>teBc?B@uN%<ڤ'Z7Tkn/,yz|mށn"=iTJK.J zxCN'3)Nn>A?;]9haaDRR)b|Z6glK%@<Mr].bT1 gBtVe͕yw m[tsнg\`x@sPtFdZ5_(2/™P"頜*/dYZƱ7bߩ/uuT=4ߊHb&ָm6a'Bv{uJ)јK*^5g!q|yIgu&ߘ-&tLL&!6 !k{>~6sPZ42v*栴,⹧Qm49(-ƛF]g4OHݨw!h tP 4d+Ew4P?V(+E@Ƣ JcQPc(X@TXi, 4P?V(+E@Ƣ JcQPc(X@TXi, 4P?V(+E@Ƣ Jcљ48ЈTũ;@#.S w8F\p(N5qQj48phEqA-0%UpZQ%3 (M """" :Mw$Aza>+̐) zP~l!уkŧ?rL9ϙ]߿Oч8|+!9c>PS7'鑿#qU73Cs|z'=_~?ݴv!3Ѕ;taZfI#$*3qA? @s?\;% AgT_g'1x8yH f ƌTJ@姯Q%{rZ?T TJdRxUqsp=NfDU+ 0?%S"qYHfqOc;jP;[(Y*A'Z>3 T&%b $_3wJdOߡƯ^E)%/ht=+rbNbnsY"PiudaS)~Fa>wSʫx3wJ@d/H){_ȃky>yOW7бlG@s?^U|[V>*Vqwp~NlZ<l_-T@b{ҟ4K7f׭ T- ?Vs ;4Op X*WgRuⶰ^ ߥܿSAs͚RkִoX|907f܁ZHWGLOޓVOrs1' T}dRK;eZ&)`qn uHX n`ʁL]+tH@Z* h*:no:suM9jP(Lmuuuuuuuuuuuuuuu?2?:!1IENDB`ggstats/man/figures/lifecycle-questioning.svg0000644000176200001440000000171414357760261021206 0ustar liggesuserslifecyclelifecyclequestioningquestioning ggstats/man/figures/lifecycle-stable.svg0000644000176200001440000000167414357760261020120 0ustar liggesuserslifecyclelifecyclestablestable ggstats/man/figures/README-unnamed-chunk-5-1.png0000644000176200001440000001616115122045517020652 0ustar liggesusersPNG  IHDRMR/>PLTE:f:::f:fff8::::f:::::::f:::ff:f:f::MMMMMnMMMnMaff:fff:f::f:fffff:ffffnMMnMnnMnnnMMMnM::::fff:ff:fېېnMȫff::ffې۶ȎMې:ېf۶f۶ې۶ndvmfȎې۶+e{ pHYsodIDATx흏y =iUǖSmZ9dVώV( ΰ{㏁ey{3fg815@P XAk (` 5@P Xs *C|П6/'6LO3W7~Z=LIS{7=Rw>T";wb[%}7'cOz-kLhU A Ln߮>)WޕZ 衁[5\J`.h"iS˟VZ<\ƵM,A뀠q@P-'!hmbyR:i84]RV A.)k4+l|tfA=)!񂎕8ʏFj@тV7VSڟb[侾?j?z&4NGWp_{>w>>:uϩMOϞ'_5)o꿚Ǽ$ M,+|Н,5^ : GLUr'I*5Vq#__+_Kr' h*N.W~ur9%IDPVo{o߱oAylm˓T%x_.gOB@բVJ AS!h%e~AY Z*\'>`-ZGcBP}nw$(mb]f.ZR,ReW*ee-襤Tc \}\-$YYWz$ M M M M_yH*)`Ay~ M,A뀠q@P-'!hmbyZjQy Q%e4蒲BTtIY!h* 幚&)6<A!h4E%DBРK AS!h%e4蒲 j@բz&u@8 \J-(8z (` 5ouD!(^~[[xL;Z|KTj1bTj۲Vsy6<A J:D JᨒBTtIY!h* R5}~~PqI Q%eԬwB{6I M,O4NL/3zhfgϳn?n{@+3g3ԋ h:ZQb:w7Π根[C֠݉r+="b9wac jr/Q@t>:OF5V5w 6I%R53QjW\+5?R%_fv׫As^PrkPJ֠fB:SGoWJAs+xJM<6I!h4E$ M,A뀠q@P-j5/![ЯZyAKe΂~X<%^AUZ7q!h%e,rVP,(6Iq`&S/PZAgahų$ M,Mz? ZAs5hxHz7A Ak: $!(`AyI M,A뀠q@P-'!hmbyZjQy Q%e4蒲BTtIY!h* 幚&)6<A!h4E%DBРK AS!h%e4蒲 j@բz&u@8 \J-(8z (` 5ouD!(^V|K,v:nňQnʳ[!!h\բz&,(*(J AS!h%e4蒲 gJ/y@OU?]%zRBTIY=]ܭt;IjAylmS'틩:-|@S7%2zRBTIYc矵w)h:!/#h5MRmbXAK^fp>PMAzu~: jctb$Vб<[C=j_fPܝgsc5R;gTkěV]s Ѕ^POB@4<mj:;{ 6I%RנV -/w4.Aw @ВAUjfNЍ]8zRBTIY&j@բz&u@8 URV A.) zH>蒲Z{L6I<蒲~*5W,(y5O&$ zD$rO޼I h-B+<&nDyIIY-M兠t7>h M_yH*)`Ay~ M,A뀠q@P-'!hmbyZjQy Q%e4蒲BTtIY!h* 幚&)6<A!h4E%DBРK AS!h%e4蒲 j@բz&u@8 \J-(8z (` 5ouD!(^V|K,v:nňQnʳ[!!h\բz&,(*(J AS!h%e4蒲 :UTkuX-\yװe)wJtN֟N)<3$rYc9U}s`2n gE1a%)ٓ46a *'%D Zwgp.άvÁR-@I"*h5MRmb:V^TOB@ϳ'!hmbyZjQy Q%e[{ntIY)@Z:bO?KJh (KʚwƍzԠIJ4!hrC'glDY!hj0n39oSҀ[ޢ h* 4]RV\"!h4E$ M,A뀠q@P-j5/!4]RV A.)+MAUi_Lii~Z~@+9>ىhͿdd$?4zf%[C:f[<sO+K(%Ak5ryAs=>S@hy?ɿ Zuԓ46P :V'F3zdTW3h_:a\(נM\/K|#r__gԙ:A\!(8(^PT A|{ă gjM\,6I݉_0@P.,hbO3wLu8-={_71plE@10νWi'廖VTY8"o X,f @P XAk (` 5@P XAk (` 5@P XAk`?z{IENDB`ggstats/man/figures/lifecycle-experimental.svg0000644000176200001440000000171614357760261021340 0ustar liggesuserslifecyclelifecycleexperimentalexperimental ggstats/man/figures/lifecycle-deprecated.svg0000644000176200001440000000171214357760261020737 0ustar liggesuserslifecyclelifecycledeprecateddeprecated ggstats/man/figures/lifecycle-superseded.svg0000644000176200001440000000171314357760261021003 0ustar liggesusers lifecyclelifecyclesupersededsuperseded ggstats/man/figures/lifecycle-archived.svg0000644000176200001440000000170714357760261020430 0ustar liggesusers lifecyclelifecyclearchivedarchived ggstats/man/figures/README-unnamed-chunk-8-1.png0000644000176200001440000001257615122045520020655 0ustar liggesusersPNG  IHDRMR/PLTE:f:f333::::f::f::::MMMMMnMMMnMnMff:fff:f:fnMMnMnnMnnnnnn䊀MMMnMnMn::::ff:f۶ېnMnnMȫfwpȎMȎnȎې:nfȎې۶Es pHYsod%IDATxݍ_qPYC9˺4MnJ6˶fd80F߲ L0փO}WQD9=XY((3GiP PR?u]:Y 44-tQ P(@EW+֥FkP*;ih"ZZM] P(@EW+ PѕJTte@@g{Y6Asr5h?:g-d7^Щ8ٽ4pQtvdi+8#N{eH@uTe@@g{C}Ф*Z4ys?(>pNM!C΃U:htJTt%@*] P(@EW+彙\%J@]-iT+ PѕJTt%@*] P(@EW+ PѕJTt%@*/еu@S rQ}#@PRA#@PRA#@PRA#@PRA#@PRA#@PRA,*ucvHe8@-R4VeX@՘ɳ:Ӫ lo눧'VP`eX@ѐ}д*j7<8p}P΃UhtJTt%@*] P(@EW+ PѕJTtgò$Qå4JTt%@*] P(@EW+ PѕJTt%@*] P(@EW+ PѕָZy5M'КK(h[j$Q_ݝx-~^O"@]c٤MPm~oғUkPzih{#@$&x^~>?h>Vh~7ӷmЪգ>bٽ4j@~3;Dh>VW.o{5lGGv苫=h>VW.o2鎑i'M}P%Sh>Vg#m1GuZ5]\;hzwZ [K#pP]@ yPh| }@hu+@pWgХp_|4bM|b oV7it*;ih"Zj>\ P(@EW+ PѕJPѕ]:@Ө(@EW+ PѕJTt%@*] P(@EW+ PѕJTt%@*] P(@EW+}Gҭr? P*64-]5/@S(@EW+ PѕJTt%@*] P(@EW+ PѕJTt%@*]Ie|~Wt4|[N{e8@dxWTmӳlGGv+:H#N{e8@MFCFд*>hZŷ8;7;E?T А*σnσN4'@%WFZSFР+ Pѕ-7ʿ 8KD @Wh>̩}\3к|%hʕ1 (T1ο[gRe@C}LaW+O&WF 8Z+cZ7DtռM2^u>*2^>Tt?}聎A=Hzа+Z?DtM2Zm[yi'2 Hv+:<.'{c;h{WtouۈL;h1t2&R IAӪ >W㦾V8O2 F`M2 5* PѕJTt%@*] P(@EW+ PѕJTt%@*] P(@EW+ Pѕ^<@F%@*] P(@EW+ PѕJTt%@*]!P+_qzo[JjeUQbrtDsл=hGTE-P}mh_@[">@E՜E"tM3k@ f7  &ՇBV1"v:+h"v:+h"v:+hz/oZ &-@T}ЋnZ &z,;T6[mvP >gS jƭëj3Q)hAkZ_lZ &4?}Q|u@$9+hot=sGTΊZګ- Z? @]eY6 sovOxqeb&xiNgE 5#loXF-н<_iwV{5ml'%СݔK3-2EGYITWb͠9)`iyZ&z*H͙*2V#ttuL @]%ƚ9*IPJi9o$))ͧj'Sl<84v0O3-'KSPUI*$uꘓTWH SZ:r*N. }qb:珎FF>+_Wx'DԤ4J@G[)S\2LgO܌c;~@]|]6»kj,?[M|ĕv)XlTϋW(j6["|lwmGIV}~yд+{<] P(@EW+ PѕJTt%@*] P(@EW+ Pѕ= @i]eޮv]e@:˳0RqϽi]e#}ghtw; @h!.*{A}.*{;@(@EW+ PѕJTt%@*] P(@EW+ PѕJTt%@*GvFWХp; @񖏫p @hpO>zvFW'vFW#ЛvFW|4pwU{:] P(@EW+ PѕJTt%@*] P(@EW+ Pѕ=u|1@94~Uv?l.o=htuz1@(v]z *{A *cFW/P׷4U+ PѕJTt%@*] P(@EW+ PѕJTt%@*g,R2 n+U+|Deh574^gcZ@g{[ *Zv,HoHtsJzJte샒Rw0% oW'/p]:Ul(K?:@/~W<ӇO< 4 ǟU!8Mk'/ϿHU対~ ~c6@;P=x!/{jZ //cgu&^u²MSM\&)LObUZ>{sirW\woͦI >t?sY^5^FP=l6*suٛ_^]IͶQ<>fzjL~wÏͦI qYyv4l?ZPv4of[n8gmRwRL˯|g8cN%=,+g 7GPjJ-إz/^os~{4%+2GfS5SfWvȍڦ>w7ՃN& @WfJ_ԈiwEGJDDtJDDtJDDtJDDtJDDtaK>IENDB`ggstats/man/figures/lifecycle-defunct.svg0000644000176200001440000000170414357760261020270 0ustar liggesuserslifecyclelifecycledefunctdefunct ggstats/man/figures/README-unnamed-chunk-9-1.png0000644000176200001440000001412315122045521020645 0ustar liggesusersPNG  IHDRMR/PLTE:f:f333::::f:::::MMMMMnMMMnMnMff:fff:ffnMMnMnnMnnnnnnMMMnMnM::::f۶ېnMnnnMȫffېȎMȎnې:nvmfȎېݍ pHYsod IDATx {YvEmp;A03$tf'cM3#?[Uz!!RU}7hO-%pvBAIAPt$%IAIY[k/m>&x>AFkRM炠NZ`ϵIm)ަsAPpQ$oǹ S8(sm{[\)Akl6-q.5 \ޖm8u G[|MboK6} :ow n|*}<=.ߌnt+[ ?kOaeZ V@&~*xV<˳97 3K=T~EPMIP>rVJxzX.?WJ[<AARW ,\FPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%o]سJxm[<5(W>+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳRGPl<+%oq5J[AͳR-Γ/?i/=D4JvlA^vmZ"hp%[ [</wO†prTܾ@o~G4}~-~x+f[7GS?iE6|W0?etoȂ>LjHX,ς=T~G7IS (k4Jv5h6v?LAoʣ+%o؂ 5~Gag\Ɏ-h}83fZiv唥WcAG>[dV$J{ pz$l A4JvlA;{\Ɏ.h hp%At%\FP] WAW•lEЕp%;IfGP\FP] W Z~W|!doTWc Z+\Ɏ/h}W :{@4JvlA}PnWw9)J6"J[P6k.'%\^!`goX+GZXNJ\buwmf85i쥂Vx`|{d9)J AGbs/勗w;],'%\^*huǂ>+!w7eu/|8<}处+-i=( ˵Aq;@.d4 =:4܆-xWIE3?"hI W fO773/"snrR•쥂Vg:{Kna_޻NG5\^*h=) ?ǻY_Z{8Ww{x?I W :y'lH}q<N3Wfje*++m%3uki؂6"/؂ WAW•vdG7,p%;|c处++JRA:6oa9)JrAFrdJMd#(+Q-:A؂ Wc i&{AW :xWŽ^bEfA^%~ӣMqA 5l"lEЦ{wKO} :| j^hgyS+ }/֠y<=ö׋-Z}]y33/{gA \n*hGam&9Թ^AWc &UgAЕ.N3AkAӀ++JrA/fxUl4 ]P[NJ[PN3p%A'处+ٱ-M|处+ٱtMdV4 ˓rR•-԰ofC^la9)JʽCǜme9)JvAڳ"]=fja9)JvAw_ `/ L,#h I WNdr;|jM| I W Zg6wO!.vd o[|L-,'%\n.h AP/~处+-oö_(A.'%\n$1߿xdoXs9)JvlAkAӀ+<(*~hNF-$)ax%A; 6g#hٖ)aCsJ6vN35g#hAl0ڜA hs6vmF39 ASDɎ*hHZ"$JvlAys=6v]ؒbK ;Fx]s6k3{`#h4Ss6vmF34 Iy: h(ٱ?g8ZA|sm?G9ZfyЍLaxHs6vmF3t0~2Al%l0$5g#h9IZfkhL{e4 I؂A;̶|f5>.iHdG 7Fil0ڜf% 4 I؂/`9!iHdtn&Hz<=.~<ˆ?eGS?iHdG4 y" +̛3]Ė:ٙy#Utxo-}=˳b/TRxlAV:;7}<=,(k4խAϙ5~(#hVeiŏtwV< _94[\ hy|%ã@AӀ+-^I5~Gx-&gW ʻf-SкA4J6gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJ) g oa ڜxkPMd#}VJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-Y)~#yVJj8gd-CJqG囩{AȆ.&& (?MMxӿ!U/둏ս! %6kR\$5(n>|vkR\$(&x>AFkRM炠N[/hQ~|DkAmAPp% (I:J$h\bnO}r뽚αSߐHZN1 |6HTAC0/ð7P/,'W*&q~4|Ŋ4gocAB?]^V+0⾺4?ԛo%IM|V,OŲ9oz ʙc Gp77&A E4\h* yy6}3ک =g_<~>[' X/<4UW W,WSOOnj%Ab((˄kЬVo~74zAþQdAK?GF.U]=GGcn| M7zA^8AoYy{0%mAZmw ]O>?*[/?v̮Atnɗ[^FQ|y~q[TnFf΃7o\fuuKnYVS$w-_cV[f$%IAIAPt$%IAIAPt$%IyCfpIENDB`ggstats/man/figures/README-unnamed-chunk-11-1.png0000644000176200001440000002011615122045523020717 0ustar liggesusersPNG  IHDRMR/PLTE:f:f:f!::::f::f:::f::;RDTMMMMMnMMMnM]cff:fff:f:ffffffffnMMnMnnMnnnnnMMMnMnM::::f:f۶nMnnnȫff:ې۶ȎMې:n%fȎې۶' pHYsodIDATx u`&nZ𵱱>\6v I$iq`LKt=c#iZYstq43wfT c8`P1eL@!Pt1>Vdn@iz4Q!D=,Z@hMâuz4Q!D=,Z@hMâuz4Q!D=,Z@hMâuz4Q!D=,Z@hMâuz. hݜ`@fVqWD@>?|^qWD@c"ET`@#)JP)Qsh$B28+"H@%c" 紭b"j" G&j" T PpqWDTZ2_2E{E 2N{E fDqWDȀ:!RTA@kӊ7Ւ^EZ VJ ͭWxDE4Հɷ@Q-8+"P(Qs?KV$׋OEOlh93T9\E(nC0QE'|\t@@7!PО(nCB-Qnv͹"еzID tUE_L{T**tk\t-RsE+"ugG5@-n 5@-"P+E̮@lZ45PjZ3P~T쁎|nTsEk"udw)6@W,b 5@W,"PCE:96@+| 5@+"PKE: I6@W+x 5@W+"PSE:Y6@*t 5@*-Rt TF=9-_B,VbPst")@E@F=u\%ZꄚsE"e'_.{ 5@W)"PsE: h@Uxst"PPst"WD86@W(r/,\ Et{I\h  5@ykw*?ƛsEɋ|G=B͹"Et%Ԝ+M^D&!Ҿ@Q\h" Pj."PE((Ԝ+M\_@*\B͹ 嗏[O=یQZDFRGO>v_l4FaQN@eB͹ tZA_ZIQXDVV9Ŀi΋vM} YO_6fϷj ?UнVPjn; eEjhG:{!h zJDqb1 %Ԝ+@<(-ObG \h¢jht͹"ЄES#=4j+"PE;  jhT9W(E4$h/Po*M 5@SEPs4QQohǝ\hjhp͹"DEbxs4Mp74|7@)>REVݜ+&1ޜ+FԹeF\h4(;Pd 5@c|ihoƒP` 5@cQlYt@ PB{N9WIQ6PjFP @xs4&5<7Js4̀ƖPs4\ziFR@>@E{e \@4r7@ l-sE276\h8RMJ %Ԝ+ z2 4@h hQ\h0R-Z #믘sEXz^ %Ԝ+ F4AKCMQ@'0Qc9WL%colK{r'"1k 4=*0?B1,ʽD 49Rc{h%@ˡ>#PdˀsxBw %pFn 4vZM` *"ck)Jd41IN(*/P'*߰޴R'@Yh'*9J }<"1ϝP|W":z@K(*r@Bq}E.:0PJӯU@@I{ ߠù*,-\ E&. *^B*r@}-*[lav Ud'f7Ȗ}hw"Ei/ {}[#PTK "B쀾yu4qe&UD41Bm<*PEwǕK%*ZB3گFXM<,P-r@]-*YBz}s=/XXBޚ>lt;w%*XBj>h@ŗ쀾ghdg tK _B3MtI쀾]ztWt6"ɖuB/ζ@5!Pw:hˊWle!|Wc,Khv@xCQɶLf |ś-:--\F%2+ﭬc$Ŗ쀶ޱr_@U@Ф[&XBE1:."uo _B3ZFѸC)"Kh~@W_^SV@=.wu#Ɂ^>@N( /5ugfPK2@14hYg(Ud mTiƖwc@> y+ hh*xwPKO3rd %PEg$]7fj^6@%€<Z=}xr2]P@D4@?<^>*_~h|<(,Ty4no/MWϞA$EiHF,pj#:wHF`/xP?ߪI/ t كVT޺gPe4/+hVPIP#%@i7yǨ :G}b?VFF'X: P JiB`HʁfX< w T#4yL]yP@sLq"eU 4m tLhx,0PPe~j$@ykv;b"KG4HlPP$ɀfY3 gT'@q?ͪ-sa@UB trG7IyF(S nwцU%awAw81PL+l Jxm9PTklֆ %a@=A]  H{LSe2^(ؙO;Z[6)B@>1P`:{/:.h:{9Q-wIn@Y(g:Kl% %AZ@:B cr:E[6RE9H|ٖ 4@AzΊ@:@G/=C!swAl-;7BO9½W#@U.?|6eV6 ذ'I s ] ACly1r]6-J}@uvvc24"@.z2rm"ag&K@q%-J} >S %>k6Z -z %.H-S@!=ˁ?D>즎 :+;O.Ph3YP #dMu>" j4@wL;$@K쓚od K D@½hMqXE"do#O•9EE4@[vt&Q O.-lDarG}hyMdԷm)yPl%۬,?a$@BԷz*  oD.@[( PP튼7 XDE@eBs[γئ|+vLzc$c7 --i Hz"- x[@=BG J %@=RU>9xl"beA8h?u #nN@^ . =6c$qh3qˁ6E ćX;B3Zl2eTt&@M{ =8P];h~A>nfфTh@Gh=4P]蒢3Q1JlZ)9AmIJCuIKvEH;X#JB}L>&)^y0֣@ynMr!=יGZ& ȦEH"* h6-Bz!=M_g %h6-Bz!=mt<zt^ayQPf"^s*z?J6ilZ+BzƯ|]/ѳ҆cAd@RL;!=KV-Bz!=~5ɰ6ϦEH"U4\Dg(Ie"^HJ!= KϾWxFȘ 4@7,BzBon{峿o"'T/iS+BzE-|N~IۣWĊ]J!=ѢjkDI&m"^agu ={EHφE:dl"^sq@# I{EHe-jm"^sa@+g8& ,Bz!=:I{EH6i`c4ڤEH"Rzcl"^sQ@2z6i`c,]PXMX+Bztc!=l~? J`(& ,Bz!=4:c!=ؤEH"bl"^sQ@gؤEH"€al"^ғ P" Vsy@#06i`c('ϮؤEH"碀 I{EHO6@7Fal"^C4W\MX+Bztc!= ؤEH"@'16i`cmݺWխiƈܜIKe@~w|q1"gln"<>*_~(36i ^c˯y~lQ ,Bz!=銼Y[Chije\IJŤhuΓŤhGd1i,ڮgAbX]}se\D;hMâuz4Q!D=,Z@hMâuz4Q!D=,Z@hMâuz0(c:ʘ2C(c:ʘ2!m7G{J_=AUT mSiVS[tP{tjWW_ 5П-Ӥ=TV17{ /]~H"ݘڒr- @OVz+OS~?(AϒAU`6&E1u%.Z4QmMf=Av_76E1u%-ZtamM)z:aӶ`H"͘ڒ-Zu|v}{ W}ۂ!MTcjKS4U(^~ԳdPo_;j]ۂ!MtcjJ ]Dԏ(7K|{ ?=h[0QvLuIA* Z?w|R=~"ܳlP͡m[2QvLI*y #2C(c:ʘ2Cy?vBg -]? b+w/Eq|;}u{CͽwW?Vk_\oVf_{x+=J!Ocs=z*5C;5hux>]/ns_Y[GjOM/} vQ]|V~UϛkMjz͉V]@qgo^8@m;( Zg=6l FYcpQ@+u[,vhw&6\U@q4<}tpR)]W=rhwtj .C|ux?ͭ~߇ۑD/^??\AovWmѶgO8@GmӬeuk2޽^!9(G7G/o~ہMڎm=ϞjsA@scJ6 xqo~nf7ooM^A?̞l-q.8Ŀۨ'ZE\ψ@돁V"_m̞|-qvm{udmMCzT*Z?V?lwOW}~wv&@T@q4ٓ%9izښ %t%(] P39@; 4 ,AT.2(c:ʘ2C(c:ʘ2C(c:ʘ2C(c:ʘ2dCEsgcnd;@PP jT (AA5 APP jT (F;/F^mFѬ}93|4 8m^]鏦ooW'^AwGNP+m7fcQ}vuwyf+^A7g\w4%m4iFiZ}m'@It6mwdhO`A[Y{]t^n6P{M^îvc ^PH6_7ف}6r4hAA7 APP͡/Ym{Ԏ+>8&V5/c< Kͪٚ?ssd;hgiutj'x3Mr?4`uaR)܂ny3A/][T b =U"\N8Gno\fDF :<;Vr 3RJ5q A8} DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@,CA0ACDPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DPQP@DP]ʳkt+s4kw2\xn9ϛfl AAddf1kV͸ڽRH=\4ls`OnA֜A/][To 3Jl=vP't3L "# tjwحf-4AA&oTM2ܪT{AzpL(( "((( "((( "((( "((( "((( "((( "((( "((( "((( "((( "((( "((( "((( "((( "((( "(.`XLAAFATF^[ۏ9EvLq6A hI4 oD˳΋ n@r]3iDݼiui͎)Aa3=[KϾ zӪ6 -֌h:!C4ZHN4\ h94fzI;gтH SArGU]?Mk~ԟCB"BV@[{ yiV4 Awf2f.#n?n8AK^ % |IAPP"((( "((( "((( NQ ,2QP 2 * 2 * 2 * 2 * 2 * 2 * 2 *3: ݕʹ1܆1M] ATdt55ᜌMFj惆*22 lh_\jzAEF&}mwt.}hG&x-5vBiNP7tmR_Jf jq5 "9x39f-4AT&UmݕI枱ݠ#hٌNBgZ>hrge 2 * 2 * 2 * 2 * 2 * 2 * 2 * 2 * 2 * 2 * 2 * 2 * 2 * 2 * 2 * 2 *S8 s`IRcWN0AC-5 AAAe$ATFAAe$ATFAAe$ATFAAe$ATFAAe$ATFAAe$ATFAAe$ATFAAe$ATFAAe$ATFAAe$ATFAAe$ATFAAe$ATFAAe$ATFAAe$ATFAAe$ATFAAe$ATFAAe$R3X0^p! (IB\y'yӾѬ! (HB[߭Қi_\jzAEF'{_lt.}hXCؕ_P1[s^P7tmR_J02 gv퉫q5 "$G5vY 9 (:}#ٵs>f3rZ6yo3uW&{vcz:h*.J]>6:IWصor,`- KثtT A >zBz34LÂBRAJA0  }:mUiXPS)hR'ez&o$ALÂBJA{k L_#0  }*U$ ((H"$ ((H"$ ((H"$ X C_8P*EOAh7EP9P*"fAT3ԁ *@B@PJfLg2aLӽvWj~AP=4Q_\+ud$Aˍ\AwA+>usK+գ&A%Aˍ\A9+EkcAW0! (aFdOVrzHO3t4tqQ&zi:vAA0C@zcf~$hkF?JAs7&h$.z#ϣ4wz$A`_~jzo vG#hH6fA ϾG#hHZ;-۹sdY#ҟG?JAsG?Go?2<4ӿH:Q;=SޓϮ?;Q;=n4[ {Ϟ5#%C@# ۗ!/<̔(yIЌC7=AG=JAs7$hy8qQ#%C@`7EP9P*"fAT3ԁ *@B@P u! (a:PA0C@rT EP9P*/c!hݷrAE=C! :(AKAE4w:BRD! ACDC@ctx AEн{4oz sB Dhx-T4XT EP9P*"fAT3ԁ *@B@P u! (a:PA0C@rT EP9P*"fAT3ԁ *@B@P u! (a:PA0C@rT EP9P*"fAT3ԁ óiKO=1O޴5 ]Qt-/{=ɐs^7 ?vE_x}&ϬZO?鉷O;+g*3a /k_nes}^w~jv|ڭA_-8)O{{mgvwZYl|{IZ`~y f~hOW1W? \>['T~grB߿;g/sh!CQ?cVDSg~ڙ'O_3 ˘0V'޺6,ա[>pFFEgb_Pw5/oVl<^.~>/)zUw}Gwm*wmGbޞ&>?h䰠筟x'A{lWGo><xƾ/m3](icEA?_S:耠9蠠hrr栉sPEtf3U4? /步癃I7N]g_sw?{UBAwЁ\ BgD5Z@]NZ.$Aw/G'H~t|% ?/h/E %zNNy/ ߨ/o8EcGм! (a:PA0~X0^+P? (f9'yӾѬP?އ*{5ӾfՌ+P?އ*{u` Jol؏*֜A/][Ts mոnVx *jgl7pL>hrg DŽ$ (AA5 APP jT (AA5 APP jT (AA5 APP j&OЭJ3(Vik0?Tb ||vmW/-xaƕZ~ s7~-:<{ ôƩ߳[y>P Aۯݯ$ :m3,0+_NvJ RbT$_W'QЪ5QOZ80̸bS}}W^AJjm_x)htzVeWk/HQM$ :Y'vP3X$A+]j'w>ps=a+_]3uv RbT$[o :xy=|U| Ĩ&H)Eom;Qh AU D( AA5 APP jT (AA5 APP jT (AA5 APP jT (AA5JIENDB`ggstats/man/figures/lifecycle-maturing.svg0000644000176200001440000000170614357760261020470 0ustar liggesuserslifecyclelifecyclematuringmaturing ggstats/man/figures/README-unnamed-chunk-6-1.png0000644000176200001440000002170715122045520020647 0ustar liggesusersPNG  IHDRMR/PLTE,:Nf=++,+n0M:LLfk,kNnn?b?b?b333888,88=8C^::::f:::::????b?b?b?MMMMMnMMMnMnMbb?bbb?b??bb?bbbbbbccc,cTmff:fff:ffnMMnMnnMnnnnnn????bbbٌemMMMnMnM::::f۶ېbb?bٽ٫nMnnnMȫ0e=vmffې?bٟȎMȎnCvmٟbٽٟٽې:nT,e=vMv^vmfȎې+ pHYsod IDATx흏y DX )vF[96 8-64m Ƥ"mZҦi"ZFm$6*"Q!tigfgvwF;ݓ<7sG* (Ahu@PD"ZIIIIIIIII)A?>y/,I-A;?}u :!h !/OD+TjOy4e#`zП~;"RT by֝W?/ƐoS?UI1AV>-#8 CEkۗ;I1AoOUKD&>w0TΠ-A+$1b~j){`\eH*Ǩ ]Z咾o0TDTtAP byΟү[sgR1VH$cmW"tDR< hp"tDR< hp"tDR< hp"tDR< hp"tDR<UW.0;54FE  "NM Qf&èSAaT٩ɀ0*d@`avj2 h00;54FE  "NM Qf&èSAaT٩ɀ0*d@`avj2HA7/֕{wBE蝚 JЍJSfv1;"NM!?I3eLbvBE蝘N_ eNAs/A>&pTt2S/?{n(t$y^o=I1{2ze+$EiS,gރoތv۠ נWtz=cO/ď|xLoe.ao6}bڋX֋g)^F;(%c>(cp-=L4\P* 耊Xy=wD]L_Dx;mA *Jr&]u~eN%hY\nQ@WHv4\MЕY_(CС%v9B>t&fsf3=)wE@*$dCS$QJ?tL`/4*+6V90uh9`Osԡ!!CCP?CP~n\ߐjsɈg67.8⹷xX땎xfoJpÂ)1Y~BZ69j$l*t 7,gΈmp a*yCP?Z.!@'oMr !(As'h>Yt}##A1ykAß!(As&h6Yźgs6UӔ*^4[.C2;F<7!NؽL9"yCPwIPwijtLjf2IŢ=4;;{KŸzq7RkPaVr.zdU}#G6τZUCPL*N*{&lժZEjP8ql'lժZES#=qKi'lժZEaS!]4%hECP!]4%hECP!]4%hECP!]4%hECP!]4%hECP!]tk"rb0vlt 8^x@^2].tbQ\,N.. A!(Aǂ(|4Z/. A!(AG^>l-.t$ؗ@KKCPhJБ2_/. A!(Aǁ3]ECPA1K{ݝ. A!(AǁWtv\)c.yUPj!q) +. A!(A/ }}Õvz;]ECP/א[C^{!]4%h" vjJ!(A^xvjJ!(Ag/6W{>ZE^{Z/={A/qUb;&;G/J`k*/誔Qrn*hv6 *p_si^;V^A^SjzVUq ʀ2b̛~ R,ACU4! N>9[r?]Kn?$'grAXRdy/uڇRh잜}0RdTxמ^K>9{B8RA)<DF̧c++ӣA!}( =?(͞rQX^&1Vс0K󞿛 *̿4 v jǂ v(V@E4,_a\OVu/ՂnN T{Ƽ*:v^A>yB=ҷJ)x#h /=cjCPhuPяe鵃ZT=&R٣$~x Ƽ*:v~AUIu$TݻW]|yILn>QJyUt A ADK 9`j>9>yZԭvd@0izp@P"QAu+"!($CPy*I> VD`;CP2OU{U_GAq Z-/z>@CPA+4/z7}r6A'" A920A!(@s.z7): A2蓷=VA 4'Y2Gz֕{jm\A!h]Ay:P}cvJmYw6@PZSh*)Kߦuuy~YႽBڂ޾uN zDv$$iޗjS{;_𲈌 4E{cOdiqE |dP"L͊StzKXm6NőBC _=!~ʕ 4O* #CĸZ&Glrl ٱ }A}SktZؙR93&=֙C\-&B.?Z7@PJP2<NXP%xgVgzS̵n>AT]{Fvr`qT^o?(&TdJ$Љ.*}T|V 8ً)qw%Vѻ*IM*cSR1L@'EjkYRPj#RoR&`4nH|]qshg"ykA]SoeW4n\hz=]|Y 'K9!hS|zy ih3g S<ɻ!~ XpÝ$9YWOI3mz2<;XЊAe=1n A ޳&oq:$oJ4~ A4 Az7guB4$ha4i:40uh9`Osԡ!!CCP?CP~ A4 Ai:40u E`;Ǡ9V>ӎ#2h A݊lgJ !(Lsԡ!!CCP?CP~ A4 Ai:40u Z8 Ap Z01j:< CP8 Ap Z01j:< CP8 Ap Z01j:< CP8 Ap Z01j:< CP8 Ap Z01j:< CP8 Ap Z01j:< CP8 Ap Z01j:< CP8 Ap Z01j:< [W'冀w~ A!hft|\zBik" (~A.7˄!5]pk" (~A7/S3 &NL,"/4h ;ttdAY>ϢȠ*4PbݺrQoͮC!hdT{A[.́ďDxaVN^qQT^SL!)xaøJxzL;;UA}PDO=7<mNirZ ;֙LNGVP: h*̘:ON˔)6Z V8<BAuO>$Z4&S@!hpl *NRICg#$:Hc6U4؞T^ 229YqrJ p,Ot2Qa< trh;a BP2 ZU"od$I,։ [1ҟ*: A!h0BP2 h A:40uh9`Osԡ!!CCP?CPnY^ ?GibdP c 3d42C5As:RB]6]:41`$6Ymf utiOGIT֙v At48;珱WʦKCP~:\uϘZEԡӋV@:TKƼ*:/MIyUt AzBsԡ!!CCP?CP~ A4 Ai:40uh9`Osԡ!!CCP?CP~ A4 Ai:40uh9`|n]鞼W\7Dh7k k Aݺ< (t½ Ze+B_ЍGo*Qm&](avȠh*B#hkPa,avtŬ1_,VcѨ̙}Pa,avbTcѨSAaT٩ɀ0*d@`avj2 h00;54FE  "NM Qf&èSAaT٩ɀ0*d@`avj2Xt?*b耠RE 4~ѱ^/:K1t4+(Q3 (Ahu@PD)A *;|qβ!b}3/:,œx QE~u -õ,,_^w)~à"jF3.֕A!ti~¿!Pf/J _#Zрo.&li,TDh/+G#WEU!Kb]dh8|&~ \,*^4*hMcö诃K U s24[Om@*V4I+]4l␉}[W񭫿?quȴeTDh/Nv^:.݆L՟ǧhrP"Ea%m._zoMDxIDATf|V"EtI݊۾1Du+c޴a;kl:o)PHC@E |hu@PD"ZꀠVE: P$׮s}txzRstB Z==gV_ty._"b)VV[z,9 Z=~;[}Mfk᷈xAAGiV3 h0נO޾NB'ggoxAU݋WGӤ{נq}PW^$|or*n>IQdL})-1"ZꀠVE: (Ahu@PD"Z6 G[*:IENDB`ggstats/man/geom_prop_bar.Rd0000644000176200001440000001032714733753650015623 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stat_prop.R \name{geom_prop_bar} \alias{geom_prop_bar} \alias{geom_prop_text} \alias{geom_prop_connector} \title{Convenient geometries for proportion bar plots} \usage{ geom_prop_bar( mapping = NULL, data = NULL, position = "stack", ..., width = 0.9, complete = NULL, default_by = "x" ) geom_prop_text( mapping = ggplot2::aes(!!!auto_contrast), data = NULL, position = ggplot2::position_stack(0.5), ..., complete = NULL, default_by = "x" ) geom_prop_connector( mapping = NULL, data = NULL, position = "stack", ..., width = 0.9, complete = "fill", default_by = "x" ) } \arguments{ \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and \code{inherit.aes = TRUE} (the default), it is combined with the default mapping at the top level of the plot. You must supply \code{mapping} if there is no plot mapping.} \item{data}{The data to be displayed in this layer. There are three options: If \code{NULL}, the default, the data is inherited from the plot data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. A \code{data.frame}, or other object, will override the plot data. All objects will be fortified to produce a data frame. See \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. A \code{function} will be called with a single argument, the plot data. The return value must be a \code{data.frame}, and will be used as the layer data. A \code{function} can be created from a \code{formula} (e.g. \code{~ head(.x, 10)}).} \item{position}{A position adjustment to use on the data for this layer. This can be used in various ways, including to prevent overplotting and improving the display. The \code{position} argument accepts the following: \itemize{ \item The result of calling a position function, such as \code{position_jitter()}. This method allows for passing extra arguments to the position. \item A string naming the position adjustment. To give the position as a string, strip the function name of the \code{position_} prefix. For example, to use \code{position_jitter()}, give the position as \code{"jitter"}. \item For more information and other ways to specify the position, see the \link[ggplot2:layer_positions]{layer position} documentation. }} \item{...}{Additional parameters passed to \code{\link[ggplot2:geom_bar]{ggplot2::geom_bar()}}, \code{\link[ggplot2:geom_text]{ggplot2::geom_text()}} or \code{\link[=geom_bar_connector]{geom_bar_connector()}}.} \item{width}{Bar width (\code{0.9} by default).} \item{complete}{Name (character) of an aesthetic for those statistics should be completed for unobserved values (see example).} \item{default_by}{If the \strong{by} aesthetic is not available, name of another aesthetic that will be used to determine the denominators (e.g. \code{"fill"}), or \code{NULL} or \code{"total"} to compute proportions of the total. To be noted, \code{default_by = "x"} works both for vertical and horizontal bars.} } \description{ \code{geom_prop_bar()}, \code{geom_prop_text()} and \code{geom_prop_connector()} are variations of \code{\link[ggplot2:geom_bar]{ggplot2::geom_bar()}}, \code{\link[ggplot2:geom_text]{ggplot2::geom_text()}} and \code{\link[=geom_bar_connector]{geom_bar_connector()}} using \code{\link[=stat_prop]{stat_prop()}}, with custom default aesthetics: \code{after_stat(prop)} for \strong{x} or \strong{y}, and \code{scales::percent(after_stat(prop))} for \strong{label}. } \examples{ library(ggplot2) d <- as.data.frame(Titanic) ggplot(d) + aes(x = Class, fill = Survived, weight = Freq) + geom_prop_bar() + geom_prop_text() + geom_prop_connector() ggplot(d) + aes(y = Class, fill = Survived, weight = Freq) + geom_prop_bar(width = .5) + geom_prop_text() + geom_prop_connector(width = .5, linetype = "dotted") ggplot(d) + aes( x = Class, fill = Survived, weight = Freq, y = after_stat(count), label = after_stat(count) ) + geom_prop_bar() + geom_prop_text() + geom_prop_connector() } \seealso{ \code{\link[=geom_bar_connector]{geom_bar_connector()}} } ggstats/man/geom_diverging.Rd0000644000176200001440000000664514702251241015766 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/geom_diverging.R \name{geom_diverging} \alias{geom_diverging} \alias{geom_likert} \alias{geom_pyramid} \alias{geom_diverging_text} \alias{geom_likert_text} \alias{geom_pyramid_text} \title{Geometries for diverging bar plots} \usage{ geom_diverging( mapping = NULL, data = NULL, position = "diverging", ..., complete = "fill", default_by = "total" ) geom_likert( mapping = NULL, data = NULL, position = "likert", ..., complete = "fill", default_by = "x" ) geom_pyramid( mapping = NULL, data = NULL, position = "diverging", ..., complete = NULL, default_by = "total" ) geom_diverging_text( mapping = ggplot2::aes(!!!auto_contrast), data = NULL, position = position_diverging(0.5), ..., complete = "fill", default_by = "total" ) geom_likert_text( mapping = ggplot2::aes(!!!auto_contrast), data = NULL, position = position_likert(0.5), ..., complete = "fill", default_by = "x" ) geom_pyramid_text( mapping = ggplot2::aes(!!!auto_contrast), data = NULL, position = position_diverging(0.5), ..., complete = NULL, default_by = "total" ) } \arguments{ \item{mapping}{Optional set of aesthetic mappings.} \item{data}{The data to be displayed in this layers.} \item{position}{A position adjustment to use on the data for this layer.} \item{...}{Other arguments passed on to \code{\link[ggplot2:geom_bar]{ggplot2::geom_bar()}}} \item{complete}{An aesthetic for those unobserved values should be completed, see \code{\link[=stat_prop]{stat_prop()}}.} \item{default_by}{Name of an aesthetic determining denominators by default, see \code{\link[=stat_prop]{stat_prop()}}.} } \description{ These geometries are variations of \code{\link[ggplot2:geom_bar]{ggplot2::geom_bar()}} and \code{\link[ggplot2:geom_text]{ggplot2::geom_text()}} but provides different set of default values. } \details{ \itemize{ \item \code{geom_diverging()} is designed for stacked diverging bar plots, using \code{\link[=position_diverging]{position_diverging()}}. \item \code{geom_likert()} is designed for Likert-type items. Using \code{\link[=position_likert]{position_likert()}} (each bar sums to 100\%). \item \code{geom_pyramid()} is similar to \code{geom_diverging()} but uses proportions of the total instead of counts. } To add labels on the bar plots, simply use \code{geom_diverging_text()}, \code{geom_likert_text()}, or \code{geom_pyramid_text()}. All these geometries relies on \code{\link[=stat_prop]{stat_prop()}}. } \examples{ library(ggplot2) ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_diverging() ggplot(diamonds) + aes(x = clarity, fill = cut) + geom_diverging(position = position_diverging(cutoff = 4)) ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_likert() + geom_likert_text() ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_likert() + geom_likert_text( aes( label = label_percent_abs(accuracy = 1, hide_below = .10)( after_stat(prop) ), colour = after_scale(hex_bw(.data$fill)) ) ) d <- Titanic |> as.data.frame() ggplot(d) + aes(y = Class, fill = Sex, weight = Freq) + geom_diverging() + geom_diverging_text() ggplot(d) + aes(y = Class, fill = Sex, weight = Freq) + geom_pyramid() + geom_pyramid_text() } ggstats/man/ggcoef_multicomponents.Rd0000644000176200001440000001271115031235710017541 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/deprecated.R \name{ggcoef_multicomponents} \alias{ggcoef_multicomponents} \alias{ggcoef_multinom} \title{Deprecated functions} \usage{ ggcoef_multicomponents( model, type = c("dodged", "faceted", "table"), component_col = "component", component_label = NULL, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = 0.95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), significance = 1 - conf.level, significance_labels = NULL, return_data = FALSE, table_stat = c("estimate", "ci", "p.value"), table_header = NULL, table_text_size = 3, table_stat_label = NULL, ci_pattern = "{conf.low}, {conf.high}", table_witdhs = c(3, 2), ... ) ggcoef_multinom( model, type = c("dodged", "faceted", "table"), y.level_label = NULL, tidy_fun = broom.helpers::tidy_with_broom_or_parameters, tidy_args = NULL, conf.int = TRUE, conf.level = 0.95, exponentiate = FALSE, variable_labels = NULL, term_labels = NULL, interaction_sep = " * ", categorical_terms_pattern = "{level}", add_reference_rows = TRUE, no_reference_row = NULL, intercept = FALSE, include = dplyr::everything(), significance = 1 - conf.level, significance_labels = NULL, return_data = FALSE, table_stat = c("estimate", "ci", "p.value"), table_header = NULL, table_text_size = 3, table_stat_label = NULL, ci_pattern = "{conf.low}, {conf.high}", table_witdhs = c(3, 2), ... ) } \arguments{ \item{model}{a regression model object} \item{type}{a dodged plot, a faceted plot or multiple table plots?} \item{component_col}{name of the component column} \item{component_label}{an optional named vector for labeling components} \item{tidy_fun}{(\code{function})\cr Option to specify a custom tidier function.} \item{tidy_args}{Additional arguments passed to \code{\link[broom.helpers:tidy_plus_plus]{broom.helpers::tidy_plus_plus()}} and to \code{tidy_fun}} \item{conf.int}{(\code{logical})\cr Should confidence intervals be computed? (see \code{\link[broom:reexports]{broom::tidy()}})} \item{conf.level}{the confidence level to use for the confidence interval if \code{conf.int = TRUE}; must be strictly greater than 0 and less than 1; defaults to 0.95, which corresponds to a 95 percent confidence interval} \item{exponentiate}{if \code{TRUE} a logarithmic scale will be used for x-axis} \item{variable_labels}{(\code{\link[gtsummary:syntax]{formula-list-selector}})\cr A named list or a named vector of custom variable labels.} \item{term_labels}{(\code{list} or \code{vector})\cr A named list or a named vector of custom term labels.} \item{interaction_sep}{(\code{string})\cr Separator for interaction terms.} \item{categorical_terms_pattern}{(\code{\link[glue:glue]{glue pattern}})\cr A \link[glue:glue]{glue pattern} for labels of categorical terms with treatment or sum contrasts (see \code{\link[broom.helpers:model_list_terms_levels]{model_list_terms_levels()}}).} \item{add_reference_rows}{(\code{logical})\cr Should reference rows be added?} \item{no_reference_row}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr Variables for those no reference row should be added, when \code{add_reference_rows = TRUE}.} \item{intercept}{(\code{logical})\cr Should the intercept(s) be included?} \item{include}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr Variables to include. Default is \code{everything()}. See also \code{\link[broom.helpers:all_continuous]{all_continuous()}}, \code{\link[broom.helpers:all_categorical]{all_categorical()}}, \code{\link[broom.helpers:all_dichotomous]{all_dichotomous()}} and \code{\link[broom.helpers:all_interaction]{all_interaction()}}.} \item{significance}{level (between 0 and 1) below which a coefficient is consider to be significantly different from 0 (or 1 if \code{exponentiate = TRUE}), \code{NULL} for not highlighting such coefficients} \item{significance_labels}{optional vector with custom labels for significance variable} \item{return_data}{if \code{TRUE}, will return the data.frame used for plotting instead of the plot} \item{table_stat}{statistics to display in the table, use any column name returned by the tidier or \code{"ci"} for confidence intervals formatted according to \code{ci_pattern}} \item{table_header}{optional custom headers for the table} \item{table_text_size}{text size for the table} \item{table_stat_label}{optional named list of labeller functions for the displayed statistic (see examples)} \item{ci_pattern}{glue pattern for confidence intervals in the table} \item{table_witdhs}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}\cr use \code{table_widths} instead} \item{...}{parameters passed to \code{\link[=ggcoef_plot]{ggcoef_plot()}}} \item{y.level_label}{an optional named vector for labeling \code{y.level} (see examples)} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} } ggstats/man/scale_fill_likert.Rd0000644000176200001440000000435014657065717016463 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/scale_fill_likert.R \name{scale_fill_likert} \alias{scale_fill_likert} \alias{likert_pal} \title{Colour scale for Likert-type plots} \usage{ scale_fill_likert( name = waiver(), ..., pal = scales::brewer_pal(palette = "BrBG"), cutoff = NULL, aesthetics = "fill" ) likert_pal(pal = scales::brewer_pal(palette = "BrBG"), cutoff = NULL) } \arguments{ \item{name}{The name of the scale. Used as the axis or legend title. If \code{waiver()}, the default, the name of the scale is taken from the first mapping used for that aesthetic. If \code{NULL}, the legend title will be omitted.} \item{...}{Other arguments passed on to \code{discrete_scale()} to control name, limits, breaks, labels and so forth.} \item{pal}{A palette function taking a number of colours as entry and returning a list of colours (see examples), ideally a diverging palette} \item{cutoff}{Number of categories displayed negatively (see \code{\link[=position_likert]{position_likert()}}) and therefore changing the center of the colour scale (see examples).} \item{aesthetics}{Character string or vector of character strings listing the name(s) of the aesthetic(s) that this scale works with. This can be useful, for example, to apply colour settings to the colour and fill aesthetics at the same time, via \code{aesthetics = c("colour", "fill")}.} } \description{ This scale is similar to other diverging discrete colour scales, but allows to change the "center" of the scale using \code{cutoff} argument, as used by \code{\link[=position_likert]{position_likert()}}. } \examples{ library(ggplot2) ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = "likert") + scale_x_continuous(label = label_percent_abs()) + xlab("proportion") ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = "likert") + scale_x_continuous(label = label_percent_abs()) + xlab("proportion") + scale_fill_likert() ggplot(diamonds) + aes(y = clarity, fill = cut) + geom_bar(position = position_likert(cutoff = 1)) + scale_x_continuous(label = label_percent_abs()) + xlab("proportion") + scale_fill_likert(cutoff = 1) } ggstats/man/stat_cross.Rd0000644000176200001440000001656615062213141015165 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stat_cross.R \docType{data} \name{stat_cross} \alias{stat_cross} \alias{StatCross} \title{Compute cross-tabulation statistics} \usage{ stat_cross( mapping = NULL, data = NULL, geom = "point", position = "identity", ..., na.rm = TRUE, show.legend = NA, inherit.aes = TRUE, keep.zero.cells = FALSE ) } \arguments{ \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and \code{inherit.aes = TRUE} (the default), it is combined with the default mapping at the top level of the plot. You must supply \code{mapping} if there is no plot mapping.} \item{data}{The data to be displayed in this layer. There are three options: If \code{NULL}, the default, the data is inherited from the plot data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. A \code{data.frame}, or other object, will override the plot data. All objects will be fortified to produce a data frame. See \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. A \code{function} will be called with a single argument, the plot data. The return value must be a \code{data.frame}, and will be used as the layer data. A \code{function} can be created from a \code{formula} (e.g. \code{~ head(.x, 10)}).} \item{geom}{Override the default connection with \code{\link[ggplot2:geom_point]{ggplot2::geom_point()}}.} \item{position}{A position adjustment to use on the data for this layer. This can be used in various ways, including to prevent overplotting and improving the display. The \code{position} argument accepts the following: \itemize{ \item The result of calling a position function, such as \code{position_jitter()}. This method allows for passing extra arguments to the position. \item A string naming the position adjustment. To give the position as a string, strip the function name of the \code{position_} prefix. For example, to use \code{position_jitter()}, give the position as \code{"jitter"}. \item For more information and other ways to specify the position, see the \link[ggplot2:layer_positions]{layer position} documentation. }} \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}'s \code{params} argument. These arguments broadly fall into one of 4 categories below. Notably, further arguments to the \code{position} argument, or aesthetics that are required can \emph{not} be passed through \code{...}. Unknown arguments that are not part of the 4 categories below are ignored. \itemize{ \item Static aesthetics that are not mapped to a scale, but are at a fixed value and apply to the layer as a whole. For example, \code{colour = "red"} or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics} section that lists the available options. The 'required' aesthetics cannot be passed on to the \code{params}. Please note that while passing unmapped aesthetics as vectors is technically possible, the order and required length is not guaranteed to be parallel to the input data. \item When constructing a layer using a \verb{stat_*()} function, the \code{...} argument can be used to pass on parameters to the \code{geom} part of the layer. An example of this is \code{stat_density(geom = "area", outline.type = "both")}. The geom's documentation lists which parameters it can accept. \item Inversely, when constructing a layer using a \verb{geom_*()} function, the \code{...} argument can be used to pass on parameters to the \code{stat} part of the layer. An example of this is \code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation lists which parameters it can accept. \item The \code{key_glyph} argument of \code{\link[ggplot2:layer]{layer()}} may also be passed on through \code{...}. This can be one of the functions described as \link[ggplot2:draw_key]{key glyphs}, to change the display of the layer in the legend. }} \item{na.rm}{If \code{TRUE}, the default, missing values are removed with a warning. If \code{TRUE}, missing values are silently removed.} \item{show.legend}{logical. Should this layer be included in the legends? \code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE} never includes, and \code{TRUE} always includes. It can also be a named logical vector to finely select the aesthetics to display. To include legend keys for all levels, even when no data exists, use \code{TRUE}. If \code{NA}, all levels are shown in legend, but unobserved levels are omitted.} \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, rather than combining with them. This is most useful for helper functions that define both data and aesthetics and shouldn't inherit behaviour from the default plot specification, e.g. \code{\link[ggplot2:annotation_borders]{annotation_borders()}}.} \item{keep.zero.cells}{If \code{TRUE}, cells with no observations are kept.} } \value{ A \code{ggplot2} plot with the added statistic. } \description{ Computes statistics of a 2-dimensional matrix using \link[broom:augment.htest]{broom::augment.htest}. } \section{Aesthetics}{ \code{stat_cross()} requires the \strong{x} and the \strong{y} aesthetics. } \section{Computed variables}{ \describe{ \item{observed}{number of observations in x,y} \item{prop}{proportion of total} \item{row.prop}{row proportion} \item{col.prop}{column proportion} \item{expected}{expected count under the null hypothesis} \item{resid}{Pearson's residual} \item{std.resid}{standardized residual} \item{row.observed}{total number of observations within row} \item{col.observed}{total number of observations within column} \item{total.observed}{total number of observations within the table} \item{phi}{phi coefficients, see \code{\link[=augment_chisq_add_phi]{augment_chisq_add_phi()}}} } } \examples{ library(ggplot2) d <- as.data.frame(Titanic) # plot number of observations ggplot(d) + aes(x = Class, y = Survived, weight = Freq, size = after_stat(observed)) + stat_cross() + scale_size_area(max_size = 20) # custom shape and fill colour based on chi-squared residuals ggplot(d) + aes( x = Class, y = Survived, weight = Freq, size = after_stat(observed), fill = after_stat(std.resid) ) + stat_cross(shape = 22) + scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) + scale_size_area(max_size = 20) \donttest{ # custom shape and fill colour based on phi coeffients ggplot(d) + aes( x = Class, y = Survived, weight = Freq, size = after_stat(observed), fill = after_stat(phi) ) + stat_cross(shape = 22) + scale_fill_steps2(show.limits = TRUE) + scale_size_area(max_size = 20) # plotting the number of observations as a table ggplot(d) + aes( x = Class, y = Survived, weight = Freq, label = after_stat(observed) ) + geom_text(stat = "cross") # Row proportions with standardized residuals ggplot(d) + aes( x = Class, y = Survived, weight = Freq, label = scales::percent(after_stat(row.prop)), size = NULL, fill = after_stat(std.resid) ) + stat_cross(shape = 22, size = 30) + geom_text(stat = "cross") + scale_fill_steps2(breaks = c(-3, -2, 2, 3), show.limits = TRUE) + facet_grid(Sex ~ .) + labs(fill = "Standardized residuals") + theme_minimal() } } \seealso{ \code{vignette("stat_cross")} } \keyword{datasets} ggstats/man/geom_stripped_rows.Rd0000644000176200001440000001557615062213140016713 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/geom_stripped_rows.R \name{geom_stripped_rows} \alias{geom_stripped_rows} \alias{geom_stripped_cols} \title{Alternating Background Color} \usage{ geom_stripped_rows( mapping = NULL, data = NULL, stat = "identity", position = "identity", ..., show.legend = NA, inherit.aes = TRUE, xfrom = -Inf, xto = Inf, width = 1, nudge_y = 0 ) geom_stripped_cols( mapping = NULL, data = NULL, stat = "identity", position = "identity", ..., show.legend = NA, inherit.aes = TRUE, yfrom = -Inf, yto = Inf, width = 1, nudge_x = 0 ) } \arguments{ \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and \code{inherit.aes = TRUE} (the default), it is combined with the default mapping at the top level of the plot. You must supply \code{mapping} if there is no plot mapping.} \item{data}{The data to be displayed in this layer. There are three options: If \code{NULL}, the default, the data is inherited from the plot data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. A \code{data.frame}, or other object, will override the plot data. All objects will be fortified to produce a data frame. See \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. A \code{function} will be called with a single argument, the plot data. The return value must be a \code{data.frame}, and will be used as the layer data. A \code{function} can be created from a \code{formula} (e.g. \code{~ head(.x, 10)}).} \item{stat}{The statistical transformation to use on the data for this layer. When using a \verb{geom_*()} function to construct a layer, the \code{stat} argument can be used to override the default coupling between geoms and stats. The \code{stat} argument accepts the following: \itemize{ \item A \code{Stat} ggproto subclass, for example \code{StatCount}. \item A string naming the stat. To give the stat as a string, strip the function name of the \code{stat_} prefix. For example, to use \code{stat_count()}, give the stat as \code{"count"}. \item For more information and other ways to specify the stat, see the \link[ggplot2:layer_stats]{layer stat} documentation. }} \item{position}{A position adjustment to use on the data for this layer. This can be used in various ways, including to prevent overplotting and improving the display. The \code{position} argument accepts the following: \itemize{ \item The result of calling a position function, such as \code{position_jitter()}. This method allows for passing extra arguments to the position. \item A string naming the position adjustment. To give the position as a string, strip the function name of the \code{position_} prefix. For example, to use \code{position_jitter()}, give the position as \code{"jitter"}. \item For more information and other ways to specify the position, see the \link[ggplot2:layer_positions]{layer position} documentation. }} \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}'s \code{params} argument. These arguments broadly fall into one of 4 categories below. Notably, further arguments to the \code{position} argument, or aesthetics that are required can \emph{not} be passed through \code{...}. Unknown arguments that are not part of the 4 categories below are ignored. \itemize{ \item Static aesthetics that are not mapped to a scale, but are at a fixed value and apply to the layer as a whole. For example, \code{colour = "red"} or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics} section that lists the available options. The 'required' aesthetics cannot be passed on to the \code{params}. Please note that while passing unmapped aesthetics as vectors is technically possible, the order and required length is not guaranteed to be parallel to the input data. \item When constructing a layer using a \verb{stat_*()} function, the \code{...} argument can be used to pass on parameters to the \code{geom} part of the layer. An example of this is \code{stat_density(geom = "area", outline.type = "both")}. The geom's documentation lists which parameters it can accept. \item Inversely, when constructing a layer using a \verb{geom_*()} function, the \code{...} argument can be used to pass on parameters to the \code{stat} part of the layer. An example of this is \code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation lists which parameters it can accept. \item The \code{key_glyph} argument of \code{\link[ggplot2:layer]{layer()}} may also be passed on through \code{...}. This can be one of the functions described as \link[ggplot2:draw_key]{key glyphs}, to change the display of the layer in the legend. }} \item{show.legend}{logical. Should this layer be included in the legends? \code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE} never includes, and \code{TRUE} always includes. It can also be a named logical vector to finely select the aesthetics to display. To include legend keys for all levels, even when no data exists, use \code{TRUE}. If \code{NA}, all levels are shown in legend, but unobserved levels are omitted.} \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, rather than combining with them. This is most useful for helper functions that define both data and aesthetics and shouldn't inherit behaviour from the default plot specification, e.g. \code{\link[ggplot2:annotation_borders]{annotation_borders()}}.} \item{xfrom, xto}{limitation of the strips along the x-axis} \item{width}{width of the strips} \item{yfrom, yto}{limitation of the strips along the y-axis} \item{nudge_x, nudge_y}{horizontal or vertical adjustment to nudge strips by} } \value{ A \code{ggplot2} plot with the added geometry. } \description{ Add alternating background color along the y-axis. The geom takes default aesthetics \code{odd} and \code{even} that receive color codes. } \examples{ \dontshow{if (requireNamespace("reshape")) withAutoprint(\{ # examplesIf} data(tips, package = "reshape") library(ggplot2) p <- ggplot(tips) + aes(x = time, y = day) + geom_count() + theme_light() p p + geom_stripped_rows() p + geom_stripped_cols() p + geom_stripped_rows() + geom_stripped_cols() \donttest{ p <- ggplot(tips) + aes(x = total_bill, y = day) + geom_count() + theme_light() p p + geom_stripped_rows() p + geom_stripped_rows() + scale_y_discrete(expand = expansion(0, 0.5)) p + geom_stripped_rows(xfrom = 10, xto = 35) p + geom_stripped_rows(odd = "blue", even = "yellow") p + geom_stripped_rows(odd = "blue", even = "yellow", alpha = .1) p + geom_stripped_rows(odd = "#00FF0022", even = "#FF000022") p + geom_stripped_cols() p + geom_stripped_cols(width = 10) p + geom_stripped_cols(width = 10, nudge_x = 5) } \dontshow{\}) # examplesIf} } ggstats/man/label_number_abs.Rd0000644000176200001440000000216714527332015016254 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/label_number_abs.R \name{label_number_abs} \alias{label_number_abs} \alias{label_percent_abs} \title{Label absolute values} \usage{ label_number_abs(..., hide_below = NULL) label_percent_abs(..., hide_below = NULL) } \arguments{ \item{...}{arguments passed to \code{\link[scales:label_number]{scales::label_number()}} or \code{\link[scales:label_percent]{scales::label_percent()}}} \item{hide_below}{if provided, values below \code{hide_below} will be masked (i.e. an empty string \code{""} will be returned)} } \value{ A "labelling" function, , i.e. a function that takes a vector and returns a character vector of same length giving a label for each input value. } \description{ Label absolute values } \examples{ x <- c(-0.2, -.05, 0, .07, .25, .66) scales::label_number()(x) label_number_abs()(x) scales::label_percent()(x) label_percent_abs()(x) label_percent_abs(hide_below = .1)(x) } \seealso{ \code{\link[scales:label_number]{scales::label_number()}}, \code{\link[scales:label_percent]{scales::label_percent()}} } ggstats/DESCRIPTION0000644000176200001440000000251715122161450013436 0ustar liggesusersPackage: ggstats Type: Package Title: Extension to 'ggplot2' for Plotting Stats Version: 0.12.0 Authors@R: c( person( "Joseph", "Larmarange", , "joseph@larmarange.net", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7097-700X") ) ) Description: Provides new statistics, new geometries and new positions for 'ggplot2' and a suite of functions to facilitate the creation of statistical plots. License: GPL (>= 3) URL: https://larmarange.github.io/ggstats/, https://github.com/larmarange/ggstats BugReports: https://github.com/larmarange/ggstats/issues Depends: R (>= 4.2) Imports: cli, dplyr, forcats, ggplot2 (>= 4.0.0), lifecycle, patchwork, purrr, rlang, scales, stats, stringr, utils, tidyr Suggests: betareg, broom, broom.helpers (>= 1.20.0), emmeans, glue, gtsummary, knitr, labelled (>= 2.11.0), reshape, rmarkdown, nnet, parameters, pscl, testthat (>= 3.0.0), spelling, survey, survival, vdiffr Encoding: UTF-8 RoxygenNote: 7.3.3 Config/testthat/edition: 3 Language: en-US VignetteBuilder: knitr NeedsCompilation: no Packaged: 2025-12-22 01:21:21 UTC; josep Author: Joseph Larmarange [aut, cre] (ORCID: ) Maintainer: Joseph Larmarange Repository: CRAN Date/Publication: 2025-12-22 06:20:24 UTC