sparsevctrs/0000755000176200001440000000000014744256122012640 5ustar liggesuserssparsevctrs/tests/0000755000176200001440000000000014741321215013773 5ustar liggesuserssparsevctrs/tests/testthat/0000755000176200001440000000000014744256121015641 5ustar liggesuserssparsevctrs/tests/testthat/test-has_sparse_elements.R0000644000176200001440000000026314741321215022760 0ustar liggesuserstest_that("has_sparse_elements() works", { expect_false(has_sparse_elements(mtcars)) mtcars$sparse <- sparse_integer(1, 1, 32) expect_true(has_sparse_elements(mtcars)) }) sparsevctrs/tests/testthat/test-sparse_character.R0000644000176200001440000001473414741321215022255 0ustar liggesuserstest_that("input checking is done correctly", { # value expect_snapshot( error = TRUE, sparse_character(1, 1, 1) ) expect_snapshot( error = TRUE, sparse_character(0.5, 1, 1) ) expect_snapshot( error = TRUE, sparse_character(NULL, 1, 1) ) expect_snapshot( error = TRUE, sparse_character(Inf, 1, 1) ) expect_snapshot( error = TRUE, sparse_character(NaN, 1, 1) ) # position expect_snapshot( error = TRUE, sparse_character("A", 1.5, 1) ) expect_snapshot( error = TRUE, sparse_character("A", "1", 1) ) expect_snapshot( error = TRUE, sparse_character("A", NULL, 1) ) expect_snapshot( error = TRUE, sparse_character("A", NA, 1) ) expect_snapshot( error = TRUE, sparse_character("A", Inf, 1) ) expect_snapshot( error = TRUE, sparse_character("A", NaN, 1) ) # length expect_no_error( sparse_character(character(0), integer(0), 0) ) expect_snapshot( error = TRUE, sparse_character(numeric(0), integer(0), -10) ) expect_snapshot( error = TRUE, sparse_character(numeric(0), integer(0), 10000000000) ) expect_snapshot( error = TRUE, sparse_character(character(0), integer(0), c(1, 10)) ) expect_snapshot( error = TRUE, sparse_character(character(0), integer(0), 1.5) ) expect_snapshot( error = TRUE, sparse_character(character(0), integer(0), "1") ) expect_snapshot( error = TRUE, sparse_character(character(0), integer(0), NA) ) expect_snapshot( error = TRUE, sparse_character(character(0), integer(0), Inf) ) expect_snapshot( error = TRUE, sparse_character(character(0), integer(0), NULL) ) expect_snapshot( error = TRUE, sparse_character(character(0), integer(0), NaN) ) # Length restriction expect_snapshot( error = TRUE, sparse_character(letters[1:4], 1:6, 10) ) expect_snapshot( error = TRUE, sparse_character("A", 1:6, 10) ) # duplicates in position expect_snapshot( error = TRUE, sparse_character(letters[1:4], c(1, 1, 5, 6), 10) ) expect_snapshot( error = TRUE, sparse_character(letters, rep(1, 26), 100) ) # Ordered position expect_snapshot( error = TRUE, sparse_character(c("A", "B"), c(3, 1), 5) ) # Too large position values expect_snapshot( error = TRUE, sparse_character("A", 10, 5) ) expect_snapshot( error = TRUE, sparse_character(rep("A", 50), seq(25, 74), 50) ) # Too large position values expect_snapshot( error = TRUE, sparse_character("A", 0, 5) ) expect_snapshot( error = TRUE, sparse_character(rep("A", 101), seq(-50, 50), 100) ) # Too large position values expect_snapshot( error = TRUE, sparse_character("", 1, 10) ) expect_snapshot( error = TRUE, sparse_character(rep(c("A", ""), 5), 1:10, 50) ) }) test_that("length() works with sparse_character()", { expect_identical( length(sparse_character(character(), integer(), 0)), 0L ) expect_identical( length(sparse_character("A", 1, 10)), 10L ) expect_identical( length(sparse_character("A", 1, 100)), 100L ) }) test_that("single subsetting works with sparse_character()", { x_sparse <- sparse_character(value = c("A", NA, "B"), position = c(1, 5, 8), 10) x_dense <- c("A", "", "", "", NA, "", "", "B", "", "") for (i in seq_len(10)) { expect_identical(x_sparse[i], x_dense[i]) } expect_identical(x_sparse[0], x_dense[0]) expect_identical(x_sparse[NA_integer_], x_dense[NA_integer_]) expect_identical(x_sparse[NULL], x_dense[NULL]) expect_identical(x_sparse[NaN], x_dense[NaN]) expect_identical(x_sparse[100], x_dense[100]) expect_identical(x_sparse[Inf], x_dense[Inf]) expect_identical(x_sparse["not a number"], x_dense["not a number"]) expect_identical(x_sparse[1.6], x_dense[1.6]) expect_identical(x_sparse[2.6], x_dense[2.6]) }) test_that("multiple subsetting works with sparse_character()", { x_sparse <- sparse_character(value = c("A", NA, "B"), position = c(1, 5, 8), 10) x_dense <- c("A", "", "", "", NA, "", "", "B", "", "") expect_identical(x_sparse[1:2], x_dense[1:2]) expect_identical(x_sparse[3:7], x_dense[3:7]) expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)]) expect_identical(x_sparse[-1], x_dense[-1]) expect_identical(x_sparse[-c(5:7)], x_dense[-c(5:7)]) expect_identical(x_sparse[FALSE], x_dense[FALSE]) expect_identical(x_sparse[TRUE], x_dense[TRUE]) expect_identical(x_sparse[NA], x_dense[NA]) expect_identical(x_sparse[c(1, NA, 4)], x_dense[c(1, NA, 4)]) expect_identical(x_sparse[c(1, NA, 0, 4, 0)], x_dense[c(1, NA, 0, 4, 0)]) expect_identical(x_sparse[c(1, 11)], x_dense[c(1, 11)]) expect_identical(x_sparse[c(1, Inf)], x_dense[c(1, Inf)]) expect_identical(x_sparse[c(1, NaN)], x_dense[c(1, NaN)]) }) test_that("materialization works with sparse_character()", { x_sparse <- sparse_character(value = c("A", NA, "B"), position = c(1, 5, 8), 10) x_dense <- c("A", "", "", "", NA, "", "", "B", "", "") expect_identical(x_sparse[], x_dense) }) test_that("default argument is working", { expect_snapshot( error = TRUE, sparse_character("A", 1, 10, default = letters) ) expect_snapshot( error = TRUE, sparse_character("A", 1, 10, default = TRUE) ) expect_snapshot( error = TRUE, sparse_character(c("A", "B", "C"), c(1, 4, 6), 10, default = "A") ) x_sparse <- sparse_character( value = c("A", NA, "B"), position = c(1, 5, 8), length = 10, default = "H" ) x_dense <- c("A", "H", "H", "H", NA, "H", "H", "B", "H", "H") for (i in seq_len(10)) { expect_identical(x_sparse[i], x_dense[i]) } expect_identical(x_sparse[1:2], x_dense[1:2]) expect_identical(x_sparse[3:7], x_dense[3:7]) expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)]) expect_identical(x_sparse[], x_dense) }) test_that("verbose testing", { withr::local_options("sparsevctrs.verbose_materialize" = TRUE) x <- sparse_character("A", 1, 1) expect_snapshot({ tmp <- x[] tmp <- x[] }) withr::local_options("sparsevctrs.verbose_materialize" = 2) x <- sparse_character("A", 1, 1) expect_snapshot({ tmp <- x[] tmp <- x[] }) withr::local_options("sparsevctrs.verbose_materialize" = 3) x <- sparse_character("A", 1, 1) expect_snapshot( error = TRUE, { tmp <- x[] } ) }) test_that("printing works #48", { expect_snapshot( sparse_character("A", 1, 10)[] ) }) sparsevctrs/tests/testthat/test-sparse_mean.R0000644000176200001440000000166614741321215021241 0ustar liggesuserstest_that("sparse_mean() works", { x <- sparse_double(10, 5, 1000) expect_equal(mean(x), sparse_mean(x)) x <- sparse_double(c(10, -10), c(5, 100), 1000) expect_equal(mean(x), sparse_mean(x)) x <- sparse_double(c(10, -10), c(5, 100), 1000, default = 20) expect_equal(mean(x), sparse_mean(x)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000) expect_equal(mean(x), sparse_mean(x)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) expect_equal(mean(x), sparse_mean(x)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000) expect_equal(mean(x, na.rm = TRUE), sparse_mean(x, na_rm = TRUE)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) expect_equal(mean(x, na.rm = TRUE), sparse_mean(x, na_rm = TRUE)) x <- sparse_double(numeric(), integer(), 1000) expect_equal(mean(x), sparse_mean(x)) x <- sparse_double(numeric(), integer(), 1000, default = 100) expect_equal(mean(x), sparse_mean(x)) }) sparsevctrs/tests/testthat/test-sparse_integer.R0000644000176200001440000002360414741321215021752 0ustar liggesuserstest_that("input checking is done correctly", { expect_identical( sparse_integer(1L, 1, 1), sparse_integer(1, 1, 1) ) # value expect_snapshot( error = TRUE, sparse_integer("1", 1, 1) ) expect_snapshot( error = TRUE, sparse_integer(0.5, 1, 1) ) expect_snapshot( error = TRUE, sparse_integer(NULL, 1, 1) ) expect_snapshot( error = TRUE, sparse_integer(Inf, 1, 1) ) expect_snapshot( error = TRUE, sparse_integer(NaN, 1, 1) ) # position expect_snapshot( error = TRUE, sparse_integer(1, 1.5, 1) ) expect_snapshot( error = TRUE, sparse_integer(1, "1", 1) ) expect_snapshot( error = TRUE, sparse_integer(1, NULL, 1) ) expect_snapshot( error = TRUE, sparse_integer(1, NA, 1) ) expect_snapshot( error = TRUE, sparse_integer(1, Inf, 1) ) expect_snapshot( error = TRUE, sparse_integer(1, NaN, 1) ) # length expect_no_error( sparse_integer(integer(0), integer(0), 0) ) expect_snapshot( error = TRUE, sparse_integer(numeric(0), integer(0), -10) ) expect_snapshot( error = TRUE, sparse_integer(numeric(0), integer(0), 10000000000) ) expect_snapshot( error = TRUE, sparse_integer(integer(0), integer(0), c(1, 10)) ) expect_snapshot( error = TRUE, sparse_integer(integer(0), integer(0), 1.5) ) expect_snapshot( error = TRUE, sparse_integer(integer(0), integer(0), "1") ) expect_snapshot( error = TRUE, sparse_integer(integer(0), integer(0), NA) ) expect_snapshot( error = TRUE, sparse_integer(integer(0), integer(0), Inf) ) expect_snapshot( error = TRUE, sparse_integer(integer(0), integer(0), NULL) ) expect_snapshot( error = TRUE, sparse_integer(integer(0), integer(0), NaN) ) # Length restriction expect_snapshot( error = TRUE, sparse_integer(1:4, 1:6, 10) ) expect_snapshot( error = TRUE, sparse_integer(1, 1:6, 10) ) # duplicates in position expect_snapshot( error = TRUE, sparse_integer(1:4, c(1, 1, 5, 6), 10) ) expect_snapshot( error = TRUE, sparse_integer(1:100, rep(1, 100), 100) ) # Ordered position expect_snapshot( error = TRUE, sparse_integer(c(1, 2), c(3, 1), 5) ) # Too large position values expect_snapshot( error = TRUE, sparse_integer(1, 10, 5) ) expect_snapshot( error = TRUE, sparse_integer(rep(1, 50), seq(25, 74), 50) ) # Too large position values expect_snapshot( error = TRUE, sparse_integer(1, 0, 5) ) expect_snapshot( error = TRUE, sparse_integer(rep(1, 101), seq(-50, 50), 100) ) # Too large position values expect_snapshot( error = TRUE, sparse_integer(0, 1, 10) ) expect_snapshot( error = TRUE, sparse_integer(rep(c(1, 0), 5), 1:10, 50) ) }) test_that("length() works with sparse_integer()", { expect_identical( length(sparse_integer(integer(), integer(), 0)), 0L ) expect_identical( length(sparse_integer(1, 1, 10)), 10L ) expect_identical( length(sparse_integer(1, 1, 100)), 100L ) }) test_that("single subsetting works with sparse_integer()", { x_sparse <- sparse_integer(value = c(10, NA, 20), position = c(1, 5, 8), 10) x_dense <- c(10L, 0L, 0L, 0L, NA, 0L, 0L, 20L, 0L, 0L) for (i in seq_len(10)) { expect_identical(x_sparse[i], x_dense[i]) } expect_identical(x_sparse[0], x_dense[0]) expect_identical(x_sparse[NA_integer_], x_dense[NA_integer_]) expect_identical(x_sparse[NULL], x_dense[NULL]) expect_identical(x_sparse[NaN], x_dense[NaN]) expect_identical(x_sparse[100], x_dense[100]) expect_identical(x_sparse[Inf], x_dense[Inf]) expect_identical(x_sparse["not a number"], x_dense["not a number"]) expect_identical(x_sparse[1.6], x_dense[1.6]) expect_identical(x_sparse[2.6], x_dense[2.6]) }) test_that("multiple subsetting works with sparse_integer()", { x_sparse <- sparse_integer(value = c(10, NA, 20), position = c(1, 5, 8), 10) x_dense <- c(10L, 0L, 0L, 0L, NA, 0L, 0L, 20L, 0L, 0L) expect_identical(x_sparse[1:2], x_dense[1:2]) expect_identical(x_sparse[3:7], x_dense[3:7]) expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)]) expect_identical(x_sparse[-1], x_dense[-1]) expect_identical(x_sparse[-c(5:7)], x_dense[-c(5:7)]) expect_identical(x_sparse[FALSE], x_dense[FALSE]) expect_identical(x_sparse[TRUE], x_dense[TRUE]) expect_identical(x_sparse[NA], x_dense[NA]) expect_identical(x_sparse[c(1, NA, 4)], x_dense[c(1, NA, 4)]) expect_identical(x_sparse[c(1, NA, 0, 4, 0)], x_dense[c(1, NA, 0, 4, 0)]) expect_identical(x_sparse[c(1, 11)], x_dense[c(1, 11)]) expect_identical(x_sparse[c(1, Inf)], x_dense[c(1, Inf)]) expect_identical(x_sparse[c(1, NaN)], x_dense[c(1, NaN)]) }) test_that("materialization works with sparse_integer()", { x_sparse <- sparse_integer(value = c(10, NA, 20), position = c(1, 5, 8), 10) x_dense <- c(10L, 0L, 0L, 0L, NA, 0L, 0L, 20L, 0L, 0L) expect_identical(x_sparse[], x_dense) }) test_that("sorting works with sparse_integer()", { x_sparse <- sparse_integer(integer(), integer(), 10) expect_true(is_sparse_integer(sort(x_sparse))) x_sparse <- sparse_integer(NA, 4, 10) expect_identical( sort(x_sparse), rep(0L, 9) ) x_sparse <- sparse_integer(integer(), integer(), 10) expect_true(is_sparse_integer(sort(x_sparse))) x_sparse <- sparse_integer(c(1, 4, 5), c(1, 4, 7), 7) expect_false(is_sparse_integer(sort(x_sparse))) x_sparse <- sparse_integer(c(1, 5), c(1, 7), 7) expect_false(is_sparse_integer(sort(x_sparse))) x_sparse <- sparse_integer(c(-1, 5), c(1, 7), 7) expect_true(is_sparse_integer(sort(x_sparse))) }) test_that("min method works with sparse_integer()", { expect_snapshot( res <- min(sparse_integer(integer(), integer(), 0)) ) expect_identical(res, Inf) expect_identical( min(sparse_integer(integer(), integer(), 1000000000)), 0L ) expect_identical( min(sparse_integer(-10, 10, 1000000000)), -10L ) expect_identical( min(sparse_integer(-10, 10, 1000000000, default = -100)), -100L ) expect_identical( min(sparse_integer(c(10:14, 16:20), 11:20, 1000000000, default = 15L)), 10L ) expect_identical( min(sparse_integer(NA, 10, 1000000000)), NA_integer_ ) expect_identical( min(sparse_integer(c(11:19, NA), 11:20, 1000000000)), NA_integer_ ) expect_identical( min(sparse_integer(NA, 10, 1000000000), na.rm = TRUE), 0L ) expect_identical( min(sparse_integer(c(-10, 11:19, NA), 10:20, 1000000000), na.rm = TRUE), -10L ) }) test_that("max method works with sparse_integer()", { expect_snapshot( res <- max(sparse_integer(integer(), integer(), 0)) ) expect_identical(res, -Inf) expect_identical( max(sparse_integer(integer(), integer(), 1000000000)), 0L ) expect_identical( max(sparse_integer(10, 10, 1000000000)), 10L ) expect_identical( max(sparse_integer(10, 10, 1000000000, default = 100)), 100L ) expect_identical( max(sparse_integer(c(10:14, 16:20), 11:20, 1000000000, default = 15)), 20L ) expect_identical( max(sparse_integer(NA, 10, 1000000000)), NA_integer_ ) expect_identical( max(sparse_integer(c(11:19, NA), 11:20, 1000000000)), NA_integer_ ) expect_identical( max(sparse_integer(NA, 10, 1000000000), na.rm = TRUE), 0L ) expect_identical( max(sparse_integer(c(-10, 11:19, NA), 10:20, 1000000000), na.rm = TRUE), 19L ) }) test_that("anyNA method works with sparse_integer", { expect_false( anyNA(sparse_integer(integer(), integer(), 1000000000)) ) expect_false( anyNA(sparse_integer(1, 1, 1000000000)) ) expect_true( anyNA(sparse_integer(NA, 1, 1000000000)) ) expect_true( anyNA(sparse_integer(c(-10, 11:19, NA), 10:20, 1000000000)) ) }) test_that("sum method works with sparse_integer", { expect_identical( sum(sparse_integer(integer(), integer(), 0)), 0L ) expect_identical( sum(sparse_integer(integer(), integer(), 1000000000)), 0L ) expect_identical( sum(sparse_integer(integer(), integer(), 1000000000, default = 2)), 2000000000L ) expect_identical( sum(sparse_integer(c(1, 54, 10), c(1, 5, 10), 10)), 65L ) expect_identical( sum(sparse_integer(c(1, 54, 10), c(1, 5, 10), 10, default = -1)), 58L ) expect_identical( sum(sparse_integer(c(1, 54, NA), c(1, 5, 10), 10)), NA_integer_ ) expect_identical( sum(sparse_integer(c(1, 54, NA), c(1, 5, 10), 10), na.rm = TRUE), 55L ) }) test_that("default argument is working", { expect_snapshot( error = TRUE, sparse_integer(1, 1, 10, default = 1:10) ) expect_snapshot( error = TRUE, sparse_integer(1, 1, 10, default = TRUE) ) expect_snapshot( error = TRUE, sparse_integer(c(1, 1, 4), c(1, 4, 6), 10, default = 1) ) x_sparse <- sparse_integer( value = c(10, NA, 20), position = c(1, 5, 8), length = 10, default = 4 ) x_dense <- c(10L, 4L, 4L, 4L, NA, 4L, 4L, 20L, 4L, 4L) for (i in seq_len(10)) { expect_identical(x_sparse[i], x_dense[i]) } expect_identical(x_sparse[1:2], x_dense[1:2]) expect_identical(x_sparse[3:7], x_dense[3:7]) expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)]) expect_identical(x_sparse[], x_dense) }) test_that("verbose testing", { withr::local_options("sparsevctrs.verbose_materialize" = TRUE) x <- sparse_integer(1, 1, 1) expect_snapshot({ tmp <- x[] tmp <- x[] }) withr::local_options("sparsevctrs.verbose_materialize" = 2) x <- sparse_integer(1, 1, 1) expect_snapshot({ tmp <- x[] tmp <- x[] }) withr::local_options("sparsevctrs.verbose_materialize" = 3) x <- sparse_integer(1, 1, 1) expect_snapshot( error = TRUE, { tmp <- x[] } ) }) test_that("printing works #48", { expect_snapshot( sparse_integer(1, 1, 10) + 1 ) }) sparsevctrs/tests/testthat/test-sparse_median.R0000644000176200001440000000265614741321215021556 0ustar liggesuserstest_that("sparse_median() works", { x <- sparse_double(10, 5, 1000) expect_equal(median(x), sparse_median(x)) x <- sparse_double(c(10, -10), c(5, 100), 1000) expect_equal(median(x), sparse_median(x)) x <- sparse_double(c(10, -10), c(5, 100), 1000, default = 20) expect_equal(median(x), sparse_median(x)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000) expect_equal(median(x), sparse_median(x)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) expect_equal(median(x), sparse_median(x)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000) expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE)) x <- sparse_double(numeric(), integer(), 1000) expect_equal(median(x), sparse_median(x)) x <- sparse_double(numeric(), integer(), 1000, default = 100) expect_equal(median(x), sparse_median(x)) }) test_that("sparse_median() edge cases", { x <- sparse_double(c(10, 10), c(1, 2), 4) expect_equal(median(x), sparse_median(x)) x <- sparse_double(c(10, NA), c(1, 2), 4) expect_equal(median(x), sparse_median(x)) expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE)) x <- sparse_double(c(10, 10, NA), c(1, 2, 3), 5) expect_equal(median(x), sparse_median(x)) expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE)) }) sparsevctrs/tests/testthat/test-sparse_sd.R0000644000176200001440000000172014741321215020716 0ustar liggesuserstest_that("sparse_sd() works", { x <- sparse_double(10, 5, 1000) expect_equal(sd(x), sparse_sd(x)) x <- sparse_double(c(10, -10), c(5, 100), 1000) expect_equal(sd(x), sparse_sd(x)) x <- sparse_double(c(10, -10), c(5, 100), 1000, default = 20) expect_equal(sd(x), sparse_sd(x)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000) expect_equal(sd(x), sparse_sd(x)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) expect_equal(sd(x), sparse_sd(x)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000) expect_equal(sd(x, na.rm = TRUE), sparse_sd(x, na_rm = TRUE)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) expect_equal(sd(x, na.rm = TRUE), sparse_sd(x, na_rm = TRUE)) x <- sparse_double(numeric(), integer(), 1000) expect_equal(sd(x), sparse_sd(x)) x <- sparse_double(numeric(), integer(), 1000, default = 100) expect_equal(sd(x), sparse_sd(x)) }) test_that("multiplication works", { expect_equal(2 * 2, 4) }) sparsevctrs/tests/testthat/test-sparse_logical.R0000644000176200001440000001546114741321215021731 0ustar liggesuserstest_that("input checking is done correctly", { # value expect_snapshot( error = TRUE, sparse_logical("1", 1, 1) ) expect_snapshot( error = TRUE, sparse_logical(1, 1, 1) ) expect_snapshot( error = TRUE, sparse_logical(NULL, 1, 1) ) expect_snapshot( error = TRUE, sparse_logical(Inf, 1, 1) ) expect_snapshot( error = TRUE, sparse_logical(NaN, 1, 1) ) # position expect_snapshot( error = TRUE, sparse_logical(TRUE, 1.5, 1) ) expect_snapshot( error = TRUE, sparse_logical(TRUE, "1", 1) ) expect_snapshot( error = TRUE, sparse_logical(TRUE, NULL, 1) ) expect_snapshot( error = TRUE, sparse_logical(TRUE, NA, 1) ) expect_snapshot( error = TRUE, sparse_logical(TRUE, Inf, 1) ) expect_snapshot( error = TRUE, sparse_logical(TRUE, NaN, 1) ) # length expect_no_error( sparse_logical(logical(0), integer(0), 0) ) expect_snapshot( error = TRUE, sparse_logical(numeric(0), integer(0), -10) ) expect_snapshot( error = TRUE, sparse_logical(numeric(0), integer(0), 10000000000) ) expect_snapshot( error = TRUE, sparse_logical(logical(0), integer(0), c(1, 10)) ) expect_snapshot( error = TRUE, sparse_logical(logical(0), integer(0), 1.5) ) expect_snapshot( error = TRUE, sparse_logical(logical(0), integer(0), "1") ) expect_snapshot( error = TRUE, sparse_logical(logical(0), integer(0), NA) ) expect_snapshot( error = TRUE, sparse_logical(logical(0), integer(0), Inf) ) expect_snapshot( error = TRUE, sparse_logical(logical(0), integer(0), NULL) ) expect_snapshot( error = TRUE, sparse_logical(logical(0), integer(0), NaN) ) # Length restriction expect_snapshot( error = TRUE, sparse_logical(c(TRUE, TRUE), 1:6, 10) ) expect_snapshot( error = TRUE, sparse_logical(TRUE, 1:6, 10) ) # duplicates in position expect_snapshot( error = TRUE, sparse_logical(c(TRUE, TRUE, TRUE, TRUE), c(1, 1, 5, 6), 10) ) expect_snapshot( error = TRUE, sparse_logical(rep(TRUE, 100), rep(1, 100), 100) ) # Ordered position expect_snapshot( error = TRUE, sparse_logical(c(TRUE, TRUE), c(3, 1), 5) ) # Too large position values expect_snapshot( error = TRUE, sparse_logical(TRUE, 10, 5) ) expect_snapshot( error = TRUE, sparse_logical(rep(TRUE, 50), seq(25, 74), 50) ) # Too large position values expect_snapshot( error = TRUE, sparse_logical(TRUE, 0, 5) ) expect_snapshot( error = TRUE, sparse_logical(rep(TRUE, 101), seq(-50, 50), 100) ) }) test_that("length() works with sparse_logical()", { expect_identical( length(sparse_logical(logical(), integer(), 0)), 0L ) expect_identical( length(sparse_logical(TRUE, 1, 10)), 10L ) expect_identical( length(sparse_logical(TRUE, 1, 100)), 100L ) }) test_that("single subsetting works with sparse_logical()", { x_sparse <- sparse_logical(value = c(TRUE, NA, TRUE), position = c(1, 5, 8), 10) x_dense <- c(TRUE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, TRUE, FALSE, FALSE) for (i in seq_len(10)) { expect_identical(x_sparse[i], x_dense[i]) } expect_identical(x_sparse[0], x_dense[0]) expect_identical(x_sparse[NA_integer_], x_dense[NA_integer_]) expect_identical(x_sparse[NULL], x_dense[NULL]) expect_identical(x_sparse[NaN], x_dense[NaN]) expect_identical(x_sparse[100], x_dense[100]) expect_identical(x_sparse[Inf], x_dense[Inf]) expect_identical(x_sparse["not a number"], x_dense["not a number"]) expect_identical(x_sparse[1.6], x_dense[1.6]) expect_identical(x_sparse[2.6], x_dense[2.6]) }) test_that("multiple subsetting works with sparse_logical()", { x_sparse <- sparse_logical(value = c(TRUE, NA, TRUE), position = c(1, 5, 8), 10) x_dense <- c(TRUE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, TRUE, FALSE, FALSE) expect_identical(x_sparse[1:2], x_dense[1:2]) expect_identical(x_sparse[3:7], x_dense[3:7]) expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)]) expect_identical(x_sparse[-1], x_dense[-1]) expect_identical(x_sparse[-c(5:7)], x_dense[-c(5:7)]) expect_identical(x_sparse[FALSE], x_dense[FALSE]) expect_identical(x_sparse[TRUE], x_dense[TRUE]) expect_identical(x_sparse[NA], x_dense[NA]) expect_identical(x_sparse[c(1, NA, 4)], x_dense[c(1, NA, 4)]) expect_identical(x_sparse[c(1, NA, 0, 4, 0)], x_dense[c(1, NA, 0, 4, 0)]) expect_identical(x_sparse[c(1, 11)], x_dense[c(1, 11)]) expect_identical(x_sparse[c(1, Inf)], x_dense[c(1, Inf)]) expect_identical(x_sparse[c(1, NaN)], x_dense[c(1, NaN)]) }) test_that("materialization works with sparse_logical()", { x_sparse <- sparse_logical(value = c(TRUE, NA, TRUE), position = c(1, 5, 8), 10) x_dense <- c(TRUE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, TRUE, FALSE, FALSE) expect_identical(x_sparse[], x_dense) }) test_that("sorting works with sparse_logical()", { x_sparse <- sparse_logical(logical(), integer(), 10) expect_true(is_sparse_logical(sort(x_sparse))) x_sparse <- sparse_logical(NA, 4, 10) expect_identical( sort(x_sparse), rep(FALSE, 9) ) x_sparse <- sparse_logical(logical(), integer(), 10) expect_true(is_sparse_logical(sort(x_sparse))) x_sparse <- sparse_logical(c(TRUE, TRUE, TRUE), c(1, 4, 7), 7) expect_true(is_sparse_logical(sort(x_sparse))) x_sparse <- sparse_logical(c(TRUE, TRUE), c(1, 7), 7) expect_true(is_sparse_logical(sort(x_sparse))) x_sparse <- sparse_logical(c(TRUE, TRUE), c(1, 7), 7) expect_true(is_sparse_logical(sort(x_sparse))) }) test_that("default argument is working", { expect_snapshot( error = TRUE, sparse_logical(TRUE, 1, 10, default = TRUE) ) expect_snapshot( error = TRUE, sparse_logical(c(TRUE, TRUE, NA), c(1, 4, 6), 10, default = TRUE) ) x_sparse <- sparse_logical( value = c(FALSE, NA, FALSE), position = c(1, 5, 8), length = 10, default = TRUE ) x_dense <- c(FALSE, TRUE, TRUE, TRUE, NA, TRUE, TRUE, FALSE, TRUE, TRUE) for (i in seq_len(10)) { expect_identical(x_sparse[i], x_dense[i]) } expect_identical(x_sparse[1:2], x_dense[1:2]) expect_identical(x_sparse[3:7], x_dense[3:7]) expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)]) expect_identical(x_sparse[], x_dense) }) test_that("verbose testing", { withr::local_options("sparsevctrs.verbose_materialize" = TRUE) x <- sparse_logical(TRUE, 1, 1) expect_snapshot({ tmp <- x[] tmp <- x[] }) withr::local_options("sparsevctrs.verbose_materialize" = 2) x <- sparse_logical(TRUE, 1, 1) expect_snapshot({ tmp <- x[] tmp <- x[] }) withr::local_options("sparsevctrs.verbose_materialize" = 3) x <- sparse_logical(TRUE, 1, 1) expect_snapshot( error = TRUE, { tmp <- x[] } ) }) sparsevctrs/tests/testthat/test-sparse_dummy.R0000644000176200001440000001362514741321215021452 0ustar liggesusers# one_hot = TRUE -------------------------------------------------------------- test_that("sparse_dummy(one_hot = TRUE) works with single level", { x <- factor(c("a", "a", "a")) exp <- list( a = c(1L, 1L, 1L) ) res <- sparse_dummy(x, one_hot = TRUE) expect_identical( res, exp ) expect_true(is.integer(res$a)) expect_false(is_sparse_vector(res$a)) }) test_that("sparse_dummy(one_hot = FALSE) works zero length input", { x <- factor(character()) exp <- structure(list(), names = character(0)) res <- sparse_dummy(x, one_hot = FALSE) expect_identical( res, exp ) }) ## anyNA = FALSE --------------------------------------------------------------- test_that("sparse_dummy(one_hot = TRUE) works with no NAs", { x <- factor(c("a", "b", "c", "d", "a")) exp <- list( a = sparse_integer(c(1, 1), c(1, 5), 5), b = sparse_integer(1, 2, 5), c = sparse_integer(1, 3, 5), d = sparse_integer(1, 4, 5) ) res <- sparse_dummy(x, one_hot = TRUE) expect_identical( res, exp ) expect_true( all(vapply(res, is_sparse_integer, logical(1))) ) }) test_that("sparse_dummy(one_hot = TRUE) works with no NAs and unseen levels", { x <- factor(c("a", "b", "c", "d", "a"), levels = letters[1:6]) exp <- list( a = sparse_integer(c(1, 1), c(1, 5), 5), b = sparse_integer(1, 2, 5), c = sparse_integer(1, 3, 5), d = sparse_integer(1, 4, 5), e = sparse_integer(integer(), integer(), 5), f = sparse_integer(integer(), integer(), 5) ) res <- sparse_dummy(x, one_hot = TRUE) expect_identical( res, exp ) expect_true( all(vapply(res, is_sparse_integer, logical(1))) ) }) ## anyNA = TRUE ---------------------------------------------------------------- test_that("sparse_dummy(one_hot = TRUE) works with NA", { x <- factor(c("a", NA, "b", "c", "a", NA)) exp <- list( a = sparse_integer(c(1, NA, 1, NA), c(1, 2, 5, 6), 6), b = sparse_integer(c(NA, 1, NA), c(2, 3, 6), 6), c = sparse_integer(c(NA, 1, NA), c(2, 4, 6), 6) ) res <- sparse_dummy(x, one_hot = TRUE) expect_identical( res, exp ) expect_true( all(vapply(res, is_sparse_integer, logical(1))) ) }) test_that("sparse_dummy(one_hot = TRUE) works with NA and unseen levels", { x <- factor(c("a", NA, "b", "c", "a", NA), levels = letters[1:5]) exp <- list( a = sparse_integer(c(1, NA, 1, NA), c(1, 2, 5, 6), 6), b = sparse_integer(c(NA, 1, NA), c(2, 3, 6), 6), c = sparse_integer(c(NA, 1, NA), c(2, 4, 6), 6), d = sparse_integer(c(NA, NA), c(2, 6), 6), e = sparse_integer(c(NA, NA), c(2, 6), 6) ) res <- sparse_dummy(x, one_hot = TRUE) expect_identical( res, exp ) expect_true( all(vapply(res, is_sparse_integer, logical(1))) ) }) # one_hot = FALSE --------------------------------------------------------------- test_that("sparse_dummy(one_hot = FALSE) works with single level", { x <- factor(c("a", "a", "a")) exp <- structure(list(), names = character(0)) res <- sparse_dummy(x, one_hot = FALSE) expect_identical( res, exp ) }) test_that("sparse_dummy(one_hot = FALSE) works with two levels", { x <- factor(c("a", "b", "a")) exp <- list( b = c(0L, 1L, 0L) ) res <- sparse_dummy(x, one_hot = FALSE) expect_identical( res, exp ) expect_true(is.integer(res$b)) expect_true(is_sparse_vector(res$b)) }) test_that("sparse_dummy(one_hot = TRUE) works zero length input", { x <- factor(character()) exp <- structure(list(), names = character(0)) res <- sparse_dummy(x, one_hot = FALSE) expect_identical( res, exp ) }) ## anyNA = FALSE --------------------------------------------------------------- test_that("sparse_dummy(one_hot = FALSE) works with no NAs", { x <- factor(c("a", "b", "c", "d", "a")) exp <- list( b = sparse_integer(1, 2, 5), c = sparse_integer(1, 3, 5), d = sparse_integer(1, 4, 5) ) res <- sparse_dummy(x, one_hot = FALSE) expect_identical( res, exp ) expect_true( all(vapply(res, is_sparse_integer, logical(1))) ) }) test_that("sparse_dummy(one_hot = FALSE) works with no NAs and unseen levels", { x <- factor(c("a", "b", "c", "d", "a"), levels = letters[1:6]) exp <- list( b = sparse_integer(1, 2, 5), c = sparse_integer(1, 3, 5), d = sparse_integer(1, 4, 5), e = sparse_integer(integer(), integer(), 5), f = sparse_integer(integer(), integer(), 5) ) res <- sparse_dummy(x, one_hot = FALSE) expect_identical( res, exp ) expect_true( all(vapply(res, is_sparse_integer, logical(1))) ) }) ## anyNA = TRUE ---------------------------------------------------------------- test_that("sparse_dummy(one_hot = FALSE) works with NA", { x <- factor(c("a", NA, "b", "c", "a", NA)) exp <- list( b = sparse_integer(c(NA, 1, NA), c(2, 3, 6), 6), c = sparse_integer(c(NA, 1, NA), c(2, 4, 6), 6) ) res <- sparse_dummy(x, one_hot = FALSE) expect_identical( res, exp ) expect_true( all(vapply(res, is_sparse_integer, logical(1))) ) }) test_that("sparse_dummy(one_hot = FALSE) works with NA and unseen levels", { x <- factor(c("a", NA, "b", "c", "a", NA), levels = letters[1:5]) exp <- list( b = sparse_integer(c(NA, 1, NA), c(2, 3, 6), 6), c = sparse_integer(c(NA, 1, NA), c(2, 4, 6), 6), d = sparse_integer(c(NA, NA), c(2, 6), 6), e = sparse_integer(c(NA, NA), c(2, 6), 6) ) res <- sparse_dummy(x, one_hot = FALSE) expect_identical( res, exp ) expect_true( all(vapply(res, is_sparse_integer, logical(1))) ) }) # Other ------------------------------------------------------------------------ test_that("sparse_dummy() errors with wrong input", { expect_snapshot( error = TRUE, sparse_dummy(letters) ) expect_snapshot( error = TRUE, sparse_dummy(mtcars) ) expect_snapshot( error = TRUE, sparse_dummy(1:5) ) expect_snapshot( error = TRUE, sparse_dummy(NULL) ) }) sparsevctrs/tests/testthat/test-type-predicates.R0000644000176200001440000000271614741321215022043 0ustar liggesuserstest_that("is_sparse_vector works", { expect_true(is_sparse_vector(sparse_double(1, 1, 1))) expect_true(is_sparse_vector(sparse_integer(1, 1, 1))) expect_false(is_sparse_vector(c(1, 1, 1))) expect_false(is_sparse_vector(1:10)) expect_false(is_sparse_vector(NULL)) }) test_that("is_sparse_numeric works", { expect_true(is_sparse_numeric(sparse_double(1, 1, 1))) expect_true(is_sparse_numeric(sparse_integer(1, 1, 1))) expect_false(is_sparse_numeric(c(1, 1, 1))) expect_false(is_sparse_numeric(1:10)) expect_false(is_sparse_numeric(NULL)) }) test_that("is_sparse_double works", { expect_true(is_sparse_double(sparse_double(1, 1, 1))) expect_false(is_sparse_double(c(1, 1, 1))) expect_false(is_sparse_double(1:10)) expect_false(is_sparse_double(NULL)) }) test_that("is_sparse_integer works", { expect_true(is_sparse_integer(sparse_integer(1, 1, 1))) expect_false(is_sparse_integer(c(1, 1, 1))) expect_false(is_sparse_integer(1:10)) expect_false(is_sparse_integer(NULL)) }) test_that("is_sparse_character works", { expect_true(is_sparse_character(sparse_character("A", 1, 1))) expect_false(is_sparse_character(c(1, 1, 1))) expect_false(is_sparse_character(1:10)) expect_false(is_sparse_character(NULL)) }) test_that("is_sparse_logical works", { expect_true(is_sparse_logical(sparse_logical(TRUE, 1, 1))) expect_false(is_sparse_logical(c(1, 1, 1))) expect_false(is_sparse_logical(1:10)) expect_false(is_sparse_logical(NULL)) }) sparsevctrs/tests/testthat/test-coerce-vector.R0000644000176200001440000000226014741321215021473 0ustar liggesuserstest_that("as_sparse_double works", { x_dense <- c(3, 0, 2, 0, 0, 0, 4, 0, 0, NA) x_sparse <- as_sparse_double(x_dense) expect_true(is_sparse_double(x_sparse)) expect_identical(x_sparse, x_dense) x_dense <- c(3L, 0L, 2L, 0L, 0L, 0L, 4L, 0L, 0L, NA) x_sparse <- as_sparse_double(x_dense) expect_true(is_sparse_double(x_sparse)) expect_identical(x_sparse, as.numeric(x_dense)) }) test_that("as_sparse_integer works", { x_dense <- c(3, 0, 2, 0, 0, 0, 4, 0, 0, NA) x_sparse <- as_sparse_integer(x_dense) expect_true(is_sparse_integer(x_sparse)) expect_identical(x_sparse, as.integer(x_dense)) x_dense <- c(3L, 0L, 2L, 0L, 0L, 0L, 4L, 0L, 0L, NA) x_sparse <- as_sparse_integer(x_dense) expect_true(is_sparse_integer(x_sparse)) expect_identical(x_sparse, x_dense) }) test_that("as_sparse_integer works", { x_dense <- c("A", "", "B", "", "", "", "C", "", "", NA) x_sparse <- as_sparse_character(x_dense) expect_true(is_sparse_character(x_sparse)) }) test_that("as_sparse_logical works", { x_dense <- c(FALSE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, FALSE, FALSE, FALSE) x_sparse <- as_sparse_logical(x_dense) expect_true(is_sparse_logical(x_sparse)) }) sparsevctrs/tests/testthat/test-sparse_var.R0000644000176200001440000000164314741321215021104 0ustar liggesuserstest_that("sparse_var() works", { x <- sparse_double(10, 5, 1000) expect_equal(var(x), sparse_var(x)) x <- sparse_double(c(10, -10), c(5, 100), 1000) expect_equal(var(x), sparse_var(x)) x <- sparse_double(c(10, -10), c(5, 100), 1000, default = 20) expect_equal(var(x), sparse_var(x)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000) expect_equal(var(x), sparse_var(x)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) expect_equal(var(x), sparse_var(x)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000) expect_equal(var(x, na.rm = TRUE), sparse_var(x, na_rm = TRUE)) x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100) expect_equal(var(x, na.rm = TRUE), sparse_var(x, na_rm = TRUE)) x <- sparse_double(numeric(), integer(), 1000) expect_equal(var(x), sparse_var(x)) x <- sparse_double(numeric(), integer(), 1000, default = 100) expect_equal(var(x), sparse_var(x)) }) sparsevctrs/tests/testthat/test-extractors.R0000644000176200001440000000230414741321215021130 0ustar liggesuserstest_that("sparse_positions works with altrep_sparse_double", { expect_identical( sparse_positions(sparse_double(1, 5, 10)), 5L ) expect_identical( sparse_positions(sparse_double(1:3, 5:7, 10)), 5:7 ) }) test_that("sparse_positions works with numeric vectors", { expect_identical( sparse_positions(c(1, 6, 4, 2)), seq_len(4) ) expect_identical( sparse_positions(101:200), 1:100 ) }) test_that("sparse_values works with altrep_sparse_double", { expect_identical( sparse_values(sparse_double(1, 5, 10)), 1 ) expect_identical( sparse_values(sparse_double(1:3, 5:7, 10)), c(1, 2, 3) ) }) test_that("sparse_values works with numeric vectors", { expect_identical( sparse_values(c(1, 6, 4, 2)), c(1, 6, 4, 2) ) expect_identical( sparse_values(101:200), 101:200 ) }) test_that("sparse_default works with altrep_sparse_double", { expect_identical( sparse_default(sparse_double(1, 5, 10)), 0 ) expect_identical( sparse_default(sparse_double(1, 5, 10, default = 11)), 11 ) }) test_that("sparse_values works with numeric vectors", { expect_identical( sparse_default(c(1, 6, 4, 2)), NA ) }) sparsevctrs/tests/testthat/_snaps/0000755000176200001440000000000014741572451017130 5ustar liggesuserssparsevctrs/tests/testthat/_snaps/sparsity.md0000644000176200001440000000030614741572451021327 0ustar liggesusers# works with data.frames sample arg Code sparsity(mtcars, sample = 0.4) Condition Error in `sparsity()`: ! `sample` must be a whole number or `NULL`, not the number 0.4. sparsevctrs/tests/testthat/_snaps/sparse_double.md0000644000176200001440000001547014741321215022276 0ustar liggesusers# input checking is done correctly Code sparse_double("1", 1, 1) Condition Error in `sparse_double()`: ! `values` must be a numeric vector, not a string. --- Code sparse_double(NULL, 1, 1) Condition Error in `sparse_double()`: ! `values` must be a numeric vector, not NULL. --- Code sparse_double(Inf, 1, 1) Condition Error in `sparse_double()`: x `values` must not contain infinite values. i Infinite values at index: 1. --- Code sparse_double(NaN, 1, 1) Condition Error in `sparse_double()`: x `values` must not contain NaN values. i NaN values at index: 1. --- Code sparse_double(1, 1.5, 1) Condition Error in `sparse_double()`: x `positions` must contain integer values. i Non-integer values at index: 1. --- Code sparse_double(1, "1", 1) Condition Error in `sparse_double()`: ! `positions` must be a integer vector, not a string. --- Code sparse_double(1, NULL, 1) Condition Error in `sparse_double()`: ! `positions` must be a integer vector, not NULL. --- Code sparse_double(1, NA, 1) Condition Error in `sparse_double()`: ! `positions` must be a integer vector, not `NA`. --- Code sparse_double(1, Inf, 1) Condition Error in `sparse_double()`: x `positions` must not contain infinite values. i Infinite values at index: 1. --- Code sparse_double(1, NaN, 1) Condition Error in `sparse_double()`: x `positions` must not contain NaN values. i NaN values at index: 1. --- Code sparse_double(numeric(0), integer(0), -10) Condition Error in `sparse_double()`: ! `length` must be a whole number larger than or equal to 0, not the number -10. --- Code sparse_double(numeric(0), integer(0), 1e+10) Condition Error in `sparse_double()`: ! `length` must be less than 2147483647, not 1e+10. --- Code sparse_double(numeric(0), integer(0), c(1, 10)) Condition Error in `sparse_double()`: ! `length` must be a whole number, not a double vector. --- Code sparse_double(numeric(0), integer(0), 1.5) Condition Error in `sparse_double()`: ! `length` must be a whole number, not the number 1.5. --- Code sparse_double(numeric(0), integer(0), "1") Condition Error in `sparse_double()`: ! `length` must be a whole number, not the string "1". --- Code sparse_double(numeric(0), integer(0), NA) Condition Error in `sparse_double()`: ! `length` must be a whole number, not `NA`. --- Code sparse_double(numeric(0), integer(0), Inf) Condition Error in `sparse_double()`: ! `length` must be a whole number, not `Inf`. --- Code sparse_double(numeric(0), integer(0), NULL) Condition Error in `sparse_double()`: ! `length` must be a whole number, not `NULL`. --- Code sparse_double(numeric(0), integer(0), NaN) Condition Error in `sparse_double()`: ! `length` must be a whole number, not `NaN`. --- Code sparse_double(1:4, 1:6, 10) Condition Error in `sparse_double()`: ! `value` (4) and `positions` (6) must have the same length. --- Code sparse_double(1, 1:6, 10) Condition Error in `sparse_double()`: ! `value` (1) and `positions` (6) must have the same length. --- Code sparse_double(1:4, c(1, 1, 5, 6), 10) Condition Error in `sparse_double()`: x `positions` must not contain any duplicate values. i Duplicate values at index: 2. --- Code sparse_double(1:100, rep(1, 100), 100) Condition Error in `sparse_double()`: x `positions` must not contain any duplicate values. i Duplicate values at index: 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ..., 99, and 100. --- Code sparse_double(c(1, 2), c(3, 1), 5) Condition Error in `sparse_double()`: ! `positions` must be sorted in increasing order. --- Code sparse_double(1, 10, 5) Condition Error in `sparse_double()`: x `positions` value must not be larger than `length`. i Offending values at index: 1. --- Code sparse_double(rep(1, 50), seq(25, 74), 50) Condition Error in `sparse_double()`: x `positions` value must not be larger than `length`. i Offending values at index: 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, ..., 49, and 50. --- Code sparse_double(1, 0, 5) Condition Error in `sparse_double()`: x `positions` value must positive. i Non-positive values at index: 1. --- Code sparse_double(rep(1, 101), seq(-50, 50), 100) Condition Error in `sparse_double()`: x `positions` value must positive. i Non-positive values at index: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ..., 50, and 51. --- Code sparse_double(0, 1, 10) Condition Error in `sparse_double()`: x `values` value must not be equal to the default 0. i 0 values at index: 1. --- Code sparse_double(rep(c(1, 0), 5), 1:10, 50) Condition Error in `sparse_double()`: x `values` value must not be equal to the default 0. i 0 values at index: 2, 4, 6, 8, and 10. # min method works with sparse_double() Code res <- min(sparse_double(integer(), integer(), 0)) Condition Warning: no non-missing arguments to min; returning Inf # max method works with sparse_double() Code res <- max(sparse_double(integer(), integer(), 0)) Condition Warning: no non-missing arguments to max; returning -Inf # default argument is working Code sparse_double(1, 1, 10, default = 1:10) Condition Error in `sparse_double()`: ! `default` must be a number, not an integer vector. --- Code sparse_double(1, 1, 10, default = TRUE) Condition Error in `sparse_double()`: ! `default` must be a number, not `TRUE`. --- Code sparse_double(c(1, 1, 4), c(1, 4, 6), 10, default = 1) Condition Error in `sparse_double()`: x `values` value must not be equal to the default 1. i 1 values at index: 1 and 2. # verbose testing Code tmp <- x[] Output sparsevctrs: Sparse vector materialized Code tmp <- x[] --- Code tmp <- x[] Condition Warning: sparsevctrs: Sparse vector materialized Code tmp <- x[] --- Code tmp <- x[] Condition Error: ! sparsevctrs: Sparse vector materialized # printing works #48 Code sparse_double(1, 1, 10) + 1 Output [1] 2 1 1 1 1 1 1 1 1 1 sparsevctrs/tests/testthat/_snaps/sparse_integer.md0000644000176200001440000001616314741321215022461 0ustar liggesusers# input checking is done correctly Code sparse_integer("1", 1, 1) Condition Error in `sparse_integer()`: ! Can't convert `values` to . --- Code sparse_integer(0.5, 1, 1) Condition Error in `sparse_integer()`: ! Can't convert from `values` to due to loss of precision. * Locations: 1 --- Code sparse_integer(NULL, 1, 1) Condition Error in `sparse_integer()`: ! `value` (0) and `positions` (1) must have the same length. --- Code sparse_integer(Inf, 1, 1) Condition Error in `sparse_integer()`: ! Can't convert from `values` to due to loss of precision. * Locations: 1 --- Code sparse_integer(NaN, 1, 1) Condition Error in `sparse_integer()`: x `values` must not contain NaN values. i NaN values at index: 1. --- Code sparse_integer(1, 1.5, 1) Condition Error in `sparse_integer()`: x `positions` must contain integer values. i Non-integer values at index: 1. --- Code sparse_integer(1, "1", 1) Condition Error in `sparse_integer()`: ! `positions` must be a integer vector, not a string. --- Code sparse_integer(1, NULL, 1) Condition Error in `sparse_integer()`: ! `positions` must be a integer vector, not NULL. --- Code sparse_integer(1, NA, 1) Condition Error in `sparse_integer()`: ! `positions` must be a integer vector, not `NA`. --- Code sparse_integer(1, Inf, 1) Condition Error in `sparse_integer()`: x `positions` must not contain infinite values. i Infinite values at index: 1. --- Code sparse_integer(1, NaN, 1) Condition Error in `sparse_integer()`: x `positions` must not contain NaN values. i NaN values at index: 1. --- Code sparse_integer(numeric(0), integer(0), -10) Condition Error in `sparse_integer()`: ! `length` must be a whole number larger than or equal to 0, not the number -10. --- Code sparse_integer(numeric(0), integer(0), 1e+10) Condition Error in `sparse_integer()`: ! `length` must be less than 2147483647, not 1e+10. --- Code sparse_integer(integer(0), integer(0), c(1, 10)) Condition Error in `sparse_integer()`: ! `length` must be a whole number, not a double vector. --- Code sparse_integer(integer(0), integer(0), 1.5) Condition Error in `sparse_integer()`: ! `length` must be a whole number, not the number 1.5. --- Code sparse_integer(integer(0), integer(0), "1") Condition Error in `sparse_integer()`: ! `length` must be a whole number, not the string "1". --- Code sparse_integer(integer(0), integer(0), NA) Condition Error in `sparse_integer()`: ! `length` must be a whole number, not `NA`. --- Code sparse_integer(integer(0), integer(0), Inf) Condition Error in `sparse_integer()`: ! `length` must be a whole number, not `Inf`. --- Code sparse_integer(integer(0), integer(0), NULL) Condition Error in `sparse_integer()`: ! `length` must be a whole number, not `NULL`. --- Code sparse_integer(integer(0), integer(0), NaN) Condition Error in `sparse_integer()`: ! `length` must be a whole number, not `NaN`. --- Code sparse_integer(1:4, 1:6, 10) Condition Error in `sparse_integer()`: ! `value` (4) and `positions` (6) must have the same length. --- Code sparse_integer(1, 1:6, 10) Condition Error in `sparse_integer()`: ! `value` (1) and `positions` (6) must have the same length. --- Code sparse_integer(1:4, c(1, 1, 5, 6), 10) Condition Error in `sparse_integer()`: x `positions` must not contain any duplicate values. i Duplicate values at index: 2. --- Code sparse_integer(1:100, rep(1, 100), 100) Condition Error in `sparse_integer()`: x `positions` must not contain any duplicate values. i Duplicate values at index: 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ..., 99, and 100. --- Code sparse_integer(c(1, 2), c(3, 1), 5) Condition Error in `sparse_integer()`: ! `positions` must be sorted in increasing order. --- Code sparse_integer(1, 10, 5) Condition Error in `sparse_integer()`: x `positions` value must not be larger than `length`. i Offending values at index: 1. --- Code sparse_integer(rep(1, 50), seq(25, 74), 50) Condition Error in `sparse_integer()`: x `positions` value must not be larger than `length`. i Offending values at index: 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, ..., 49, and 50. --- Code sparse_integer(1, 0, 5) Condition Error in `sparse_integer()`: x `positions` value must positive. i Non-positive values at index: 1. --- Code sparse_integer(rep(1, 101), seq(-50, 50), 100) Condition Error in `sparse_integer()`: x `positions` value must positive. i Non-positive values at index: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ..., 50, and 51. --- Code sparse_integer(0, 1, 10) Condition Error in `sparse_integer()`: x `values` value must not be equal to the default 0. i 0 values at index: 1. --- Code sparse_integer(rep(c(1, 0), 5), 1:10, 50) Condition Error in `sparse_integer()`: x `values` value must not be equal to the default 0. i 0 values at index: 2, 4, 6, 8, and 10. # min method works with sparse_integer() Code res <- min(sparse_integer(integer(), integer(), 0)) Condition Warning: no non-missing arguments to min; returning Inf # max method works with sparse_integer() Code res <- max(sparse_integer(integer(), integer(), 0)) Condition Warning: no non-missing arguments to max; returning -Inf # default argument is working Code sparse_integer(1, 1, 10, default = 1:10) Condition Error in `sparse_integer()`: ! `default` must be a whole number, not an integer vector. --- Code sparse_integer(1, 1, 10, default = TRUE) Condition Error in `sparse_integer()`: ! `default` must be a whole number, not `TRUE`. --- Code sparse_integer(c(1, 1, 4), c(1, 4, 6), 10, default = 1) Condition Error in `sparse_integer()`: x `values` value must not be equal to the default 1. i 1 values at index: 1 and 2. # verbose testing Code tmp <- x[] Output sparsevctrs: Sparse vector materialized Code tmp <- x[] --- Code tmp <- x[] Condition Warning: sparsevctrs: Sparse vector materialized Code tmp <- x[] --- Code tmp <- x[] Condition Error: ! sparsevctrs: Sparse vector materialized # printing works #48 Code sparse_integer(1, 1, 10) + 1 Output [1] 2 1 1 1 1 1 1 1 1 1 sparsevctrs/tests/testthat/_snaps/sparse_logical.md0000644000176200001440000001412014741321215022425 0ustar liggesusers# input checking is done correctly Code sparse_logical("1", 1, 1) Condition Error in `sparse_logical()`: ! `values` must be a logical vector, not a string. --- Code sparse_logical(1, 1, 1) Condition Error in `sparse_logical()`: ! `values` must be a logical vector, not a number. --- Code sparse_logical(NULL, 1, 1) Condition Error in `sparse_logical()`: ! `values` must be a logical vector, not NULL. --- Code sparse_logical(Inf, 1, 1) Condition Error in `sparse_logical()`: ! `values` must be a logical vector, not a number. --- Code sparse_logical(NaN, 1, 1) Condition Error in `sparse_logical()`: ! `values` must be a logical vector, not a numeric `NA`. --- Code sparse_logical(TRUE, 1.5, 1) Condition Error in `sparse_logical()`: x `positions` must contain integer values. i Non-integer values at index: 1. --- Code sparse_logical(TRUE, "1", 1) Condition Error in `sparse_logical()`: ! `positions` must be a integer vector, not a string. --- Code sparse_logical(TRUE, NULL, 1) Condition Error in `sparse_logical()`: ! `positions` must be a integer vector, not NULL. --- Code sparse_logical(TRUE, NA, 1) Condition Error in `sparse_logical()`: ! `positions` must be a integer vector, not `NA`. --- Code sparse_logical(TRUE, Inf, 1) Condition Error in `sparse_logical()`: x `positions` must not contain infinite values. i Infinite values at index: 1. --- Code sparse_logical(TRUE, NaN, 1) Condition Error in `sparse_logical()`: x `positions` must not contain NaN values. i NaN values at index: 1. --- Code sparse_logical(numeric(0), integer(0), -10) Condition Error in `sparse_logical()`: ! `length` must be a whole number larger than or equal to 0, not the number -10. --- Code sparse_logical(numeric(0), integer(0), 1e+10) Condition Error in `sparse_logical()`: ! `length` must be less than 2147483647, not 1e+10. --- Code sparse_logical(logical(0), integer(0), c(1, 10)) Condition Error in `sparse_logical()`: ! `length` must be a whole number, not a double vector. --- Code sparse_logical(logical(0), integer(0), 1.5) Condition Error in `sparse_logical()`: ! `length` must be a whole number, not the number 1.5. --- Code sparse_logical(logical(0), integer(0), "1") Condition Error in `sparse_logical()`: ! `length` must be a whole number, not the string "1". --- Code sparse_logical(logical(0), integer(0), NA) Condition Error in `sparse_logical()`: ! `length` must be a whole number, not `NA`. --- Code sparse_logical(logical(0), integer(0), Inf) Condition Error in `sparse_logical()`: ! `length` must be a whole number, not `Inf`. --- Code sparse_logical(logical(0), integer(0), NULL) Condition Error in `sparse_logical()`: ! `length` must be a whole number, not `NULL`. --- Code sparse_logical(logical(0), integer(0), NaN) Condition Error in `sparse_logical()`: ! `length` must be a whole number, not `NaN`. --- Code sparse_logical(c(TRUE, TRUE), 1:6, 10) Condition Error in `sparse_logical()`: ! `value` (2) and `positions` (6) must have the same length. --- Code sparse_logical(TRUE, 1:6, 10) Condition Error in `sparse_logical()`: ! `value` (1) and `positions` (6) must have the same length. --- Code sparse_logical(c(TRUE, TRUE, TRUE, TRUE), c(1, 1, 5, 6), 10) Condition Error in `sparse_logical()`: x `positions` must not contain any duplicate values. i Duplicate values at index: 2. --- Code sparse_logical(rep(TRUE, 100), rep(1, 100), 100) Condition Error in `sparse_logical()`: x `positions` must not contain any duplicate values. i Duplicate values at index: 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ..., 99, and 100. --- Code sparse_logical(c(TRUE, TRUE), c(3, 1), 5) Condition Error in `sparse_logical()`: ! `positions` must be sorted in increasing order. --- Code sparse_logical(TRUE, 10, 5) Condition Error in `sparse_logical()`: x `positions` value must not be larger than `length`. i Offending values at index: 1. --- Code sparse_logical(rep(TRUE, 50), seq(25, 74), 50) Condition Error in `sparse_logical()`: x `positions` value must not be larger than `length`. i Offending values at index: 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, ..., 49, and 50. --- Code sparse_logical(TRUE, 0, 5) Condition Error in `sparse_logical()`: x `positions` value must positive. i Non-positive values at index: 1. --- Code sparse_logical(rep(TRUE, 101), seq(-50, 50), 100) Condition Error in `sparse_logical()`: x `positions` value must positive. i Non-positive values at index: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ..., 50, and 51. # default argument is working Code sparse_logical(TRUE, 1, 10, default = TRUE) Condition Error in `sparse_logical()`: x `values` value must not be equal to the default TRUE. i TRUE values at index: 1. --- Code sparse_logical(c(TRUE, TRUE, NA), c(1, 4, 6), 10, default = TRUE) Condition Error in `sparse_logical()`: x `values` value must not be equal to the default TRUE. i TRUE values at index: 1 and 2. # verbose testing Code tmp <- x[] Output sparsevctrs: Sparse vector materialized Code tmp <- x[] --- Code tmp <- x[] Condition Warning: sparsevctrs: Sparse vector materialized Code tmp <- x[] --- Code tmp <- x[] Condition Error: ! sparsevctrs: Sparse vector materialized sparsevctrs/tests/testthat/_snaps/sparse_dummy.md0000644000176200001440000000111014741321215022141 0ustar liggesusers# sparse_dummy() errors with wrong input Code sparse_dummy(letters) Condition Error in `sparse_dummy()`: ! `x` must be a factor, not a character vector. --- Code sparse_dummy(mtcars) Condition Error in `sparse_dummy()`: ! `x` must be a factor, not a data frame. --- Code sparse_dummy(1:5) Condition Error in `sparse_dummy()`: ! `x` must be a factor, not an integer vector. --- Code sparse_dummy(NULL) Condition Error in `sparse_dummy()`: ! `x` must be a factor, not NULL. sparsevctrs/tests/testthat/_snaps/sparse_character.md0000644000176200001440000001553314741321215022760 0ustar liggesusers# input checking is done correctly Code sparse_character(1, 1, 1) Condition Error in `sparse_character()`: ! Can't convert `values` to . --- Code sparse_character(0.5, 1, 1) Condition Error in `sparse_character()`: ! Can't convert `values` to . --- Code sparse_character(NULL, 1, 1) Condition Error in `sparse_character()`: ! `value` (0) and `positions` (1) must have the same length. --- Code sparse_character(Inf, 1, 1) Condition Error in `sparse_character()`: ! Can't convert `values` to . --- Code sparse_character(NaN, 1, 1) Condition Error in `sparse_character()`: ! Can't convert `values` to . --- Code sparse_character("A", 1.5, 1) Condition Error in `sparse_character()`: x `positions` must contain integer values. i Non-integer values at index: 1. --- Code sparse_character("A", "1", 1) Condition Error in `sparse_character()`: ! `positions` must be a integer vector, not a string. --- Code sparse_character("A", NULL, 1) Condition Error in `sparse_character()`: ! `positions` must be a integer vector, not NULL. --- Code sparse_character("A", NA, 1) Condition Error in `sparse_character()`: ! `positions` must be a integer vector, not `NA`. --- Code sparse_character("A", Inf, 1) Condition Error in `sparse_character()`: x `positions` must not contain infinite values. i Infinite values at index: 1. --- Code sparse_character("A", NaN, 1) Condition Error in `sparse_character()`: x `positions` must not contain NaN values. i NaN values at index: 1. --- Code sparse_character(numeric(0), integer(0), -10) Condition Error in `sparse_character()`: ! `length` must be a whole number larger than or equal to 0, not the number -10. --- Code sparse_character(numeric(0), integer(0), 1e+10) Condition Error in `sparse_character()`: ! `length` must be less than 2147483647, not 1e+10. --- Code sparse_character(character(0), integer(0), c(1, 10)) Condition Error in `sparse_character()`: ! `length` must be a whole number, not a double vector. --- Code sparse_character(character(0), integer(0), 1.5) Condition Error in `sparse_character()`: ! `length` must be a whole number, not the number 1.5. --- Code sparse_character(character(0), integer(0), "1") Condition Error in `sparse_character()`: ! `length` must be a whole number, not the string "1". --- Code sparse_character(character(0), integer(0), NA) Condition Error in `sparse_character()`: ! `length` must be a whole number, not `NA`. --- Code sparse_character(character(0), integer(0), Inf) Condition Error in `sparse_character()`: ! `length` must be a whole number, not `Inf`. --- Code sparse_character(character(0), integer(0), NULL) Condition Error in `sparse_character()`: ! `length` must be a whole number, not `NULL`. --- Code sparse_character(character(0), integer(0), NaN) Condition Error in `sparse_character()`: ! `length` must be a whole number, not `NaN`. --- Code sparse_character(letters[1:4], 1:6, 10) Condition Error in `sparse_character()`: ! `value` (4) and `positions` (6) must have the same length. --- Code sparse_character("A", 1:6, 10) Condition Error in `sparse_character()`: ! `value` (1) and `positions` (6) must have the same length. --- Code sparse_character(letters[1:4], c(1, 1, 5, 6), 10) Condition Error in `sparse_character()`: x `positions` must not contain any duplicate values. i Duplicate values at index: 2. --- Code sparse_character(letters, rep(1, 26), 100) Condition Error in `sparse_character()`: x `positions` must not contain any duplicate values. i Duplicate values at index: 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ..., 25, and 26. --- Code sparse_character(c("A", "B"), c(3, 1), 5) Condition Error in `sparse_character()`: ! `positions` must be sorted in increasing order. --- Code sparse_character("A", 10, 5) Condition Error in `sparse_character()`: x `positions` value must not be larger than `length`. i Offending values at index: 1. --- Code sparse_character(rep("A", 50), seq(25, 74), 50) Condition Error in `sparse_character()`: x `positions` value must not be larger than `length`. i Offending values at index: 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, ..., 49, and 50. --- Code sparse_character("A", 0, 5) Condition Error in `sparse_character()`: x `positions` value must positive. i Non-positive values at index: 1. --- Code sparse_character(rep("A", 101), seq(-50, 50), 100) Condition Error in `sparse_character()`: x `positions` value must positive. i Non-positive values at index: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ..., 50, and 51. --- Code sparse_character("", 1, 10) Condition Error in `sparse_character()`: x `values` value must not be equal to the default . i values at index: 1. --- Code sparse_character(rep(c("A", ""), 5), 1:10, 50) Condition Error in `sparse_character()`: x `values` value must not be equal to the default . i values at index: 2, 4, 6, 8, and 10. # default argument is working Code sparse_character("A", 1, 10, default = letters) Condition Error in `sparse_character()`: ! `default` must be a single string, not a character vector. --- Code sparse_character("A", 1, 10, default = TRUE) Condition Error in `sparse_character()`: ! `default` must be a single string, not `TRUE`. --- Code sparse_character(c("A", "B", "C"), c(1, 4, 6), 10, default = "A") Condition Error in `sparse_character()`: x `values` value must not be equal to the default A. i A values at index: 1. # verbose testing Code tmp <- x[] Output sparsevctrs: Sparse vector materialized Code tmp <- x[] --- Code tmp <- x[] Condition Warning: sparsevctrs: Sparse vector materialized Code tmp <- x[] --- Code tmp <- x[] Condition Error: ! sparsevctrs: Sparse vector materialized # printing works #48 Code sparse_character("A", 1, 10)[] Output [1] "A" "" "" "" "" "" "" "" "" "" sparsevctrs/tests/testthat/_snaps/coerce.md0000644000176200001440000000473014741321215020704 0ustar liggesusers# coerce_to_sparse_matrix() errors on wrong input Code coerce_to_sparse_matrix(1:10) Condition Error in `coerce_to_sparse_matrix()`: ! `x` must be a , not an integer vector. --- Code coerce_to_sparse_matrix(matrix(0, nrow = 10, ncol = 10)) Condition Error in `coerce_to_sparse_matrix()`: ! `x` must be a , not a double matrix. --- Code coerce_to_sparse_matrix(iris) Condition Error in `coerce_to_sparse_matrix()`: x All columns of `x` must be numeric. i Non-numeric columns: Species. # coerce_to_sparse_matrix() materializes non-zero defaulted columns Code res <- coerce_to_sparse_matrix(sparse_df) Output sparsevctrs: Sparse vector materialized sparsevctrs: Sparse vector materialized # coerce_to_sparse_matrix() can pass through error call Code tmp_fun(1) Condition Error in `tmp_fun()`: ! `x` must be a , not a number. --- Code tmp_fun(1) Condition Error in `tmp_fun()`: ! `x` must be a , not a number. # coerce_to_sparse_data_frame() errors with no column names Code coerce_to_sparse_data_frame(sparse_mat) Condition Error in `coerce_to_sparse_data_frame()`: ! `x` must have column names. # coerce_to_sparse_data_frame() errors with wrong input Code coerce_to_sparse_data_frame(mtcars) Condition Error in `coerce_to_sparse_data_frame()`: ! `x` must be a , not a data frame. --- Code coerce_to_sparse_data_frame(1:10) Condition Error in `coerce_to_sparse_data_frame()`: ! `x` must be a , not an integer vector. # coerce_to_sparse_data_frame() can pass through error call Code tmp_fun(1) Condition Error in `tmp_fun()`: ! `x` must be a , not a number. # coerce_to_sparse_tibble() errors with no column names Code coerce_to_sparse_tibble(sparse_mat) Condition Error in `coerce_to_sparse_tibble()`: ! `x` must have column names. # coerce_to_sparse_tibble() errors with wrong input Code coerce_to_sparse_tibble(mtcars) Condition Error in `coerce_to_sparse_tibble()`: ! `x` must be a , not a data frame. --- Code coerce_to_sparse_tibble(1:10) Condition Error in `coerce_to_sparse_tibble()`: ! `x` must be a , not an integer vector. sparsevctrs/tests/testthat/test-coerce.R0000644000176200001440000002704314741321215020201 0ustar liggesusers### coerce_to_sparse_matrix ---------------------------------------------------- test_that("coerce_to_sparse_matrix() works", { skip_if_not_installed("Matrix") sparse_df <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) names(sparse_df) <- letters[1:10] sparse_df <- as.data.frame(sparse_df) res <- coerce_to_sparse_matrix(sparse_df) expect_s4_class(res, "dgCMatrix") expect_identical(dim(res), c(10L, 10L)) exp <- Matrix::diag(1:10, 10, 10) exp <- Matrix::Matrix(exp, sparse = TRUE) exp <- as(exp, "generalMatrix") exp <- as(exp, "CsparseMatrix") colnames(exp) <- colnames(res) rownames(exp) <- rownames(res) expect_identical(res, exp) }) test_that("coerce_to_sparse_matrix() with zero rows and columns", { skip_if_not_installed("Matrix") dat <- data.frame() exp <- Matrix::Matrix(nrow = 0, ncol = 0, sparse = TRUE) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame(x = integer(), y = integer()) exp <- Matrix::Matrix(nrow = 0, ncol = 2, sparse = TRUE) colnames(exp) <- c("x", "y") expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame(x = 1:2)[, integer()] exp <- Matrix::Matrix(nrow = 2, ncol = 0, sparse = TRUE) expect_identical( coerce_to_sparse_matrix(dat), exp ) }) test_that("coerce_to_sparse_matrix() works with single all sparse vector", { skip_if_not_installed("Matrix") exp <- Matrix::Matrix(0, nrow = 10, ncol = 1, sparse = TRUE) colnames(exp) <- c("x") dat <- data.frame(x = rep(0, 10)) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame(x = sparse_integer(integer(), integer(), 10)) expect_identical( coerce_to_sparse_matrix(dat), exp ) }) test_that("coerce_to_sparse_matrix() works with multiple all sparse vector", { skip_if_not_installed("Matrix") exp <- Matrix::Matrix(0, nrow = 10, ncol = 2, sparse = TRUE) colnames(exp) <- c("x", "y") dat <- data.frame(x = rep(0, 10), y = rep(0, 10)) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = sparse_integer(integer(), integer(), 10), y = sparse_integer(integer(), integer(), 10) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) }) test_that("coerce_to_sparse_matrix() works with sparse between dense", { skip_if_not_installed("Matrix") exp <- Matrix::Matrix(c(1, 0, 0, 0, 0, 1), nrow = 2, ncol = 3, sparse = TRUE) colnames(exp) <- c("x", "y", "z") dat <- data.frame( x = c(1, 0), y = c(0, 0), z = c(0, 1) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = sparse_integer(1, 1, 2), y = c(0, 0), z = c(0, 1) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = c(1, 0), y = c(0, 0), z = sparse_integer(1, 2, 2) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = sparse_integer(1, 1, 2), y = c(0, 0), z = sparse_integer(1, 2, 2) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = c(1, 0), y = sparse_integer(integer(), integer(), 2), z = c(0, 1) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = sparse_integer(1, 1, 2), y = sparse_integer(integer(), integer(), 2), z = c(0, 1) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = c(1, 0), y = sparse_integer(integer(), integer(), 2), z = sparse_integer(1, 2, 2) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = sparse_integer(1, 1, 2), y = sparse_integer(integer(), integer(), 2), z = sparse_integer(1, 2, 2) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) }) test_that("coerce_to_sparse_matrix() works with sparse before dense", { skip_if_not_installed("Matrix") exp <- Matrix::Matrix(c(0, 0, 0, 0, 0, 1), nrow = 3, ncol = 2, sparse = TRUE) colnames(exp) <- c("x", "y") dat <- data.frame( x = c(0, 0, 0), y = c(0, 0, 1) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = c(0, 0, 0), y = sparse_integer(1, 3, 3) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = sparse_integer(integer(), integer(), 3), y = c(0, 0, 1) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = sparse_integer(integer(), integer(), 3), y = sparse_integer(1, 3, 3) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) }) test_that("coerce_to_sparse_matrix() works with sparse after dense", { skip_if_not_installed("Matrix") exp <- Matrix::Matrix(c(1, 0, 0, 0, 0, 0), nrow = 3, ncol = 2, sparse = TRUE) colnames(exp) <- c("x", "y") dat <- data.frame( x = c(1, 0, 0), y = c(0, 0, 0) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = sparse_integer(1, 1, 3), y = c(0, 0, 0) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = c(1, 0, 0), y = sparse_integer(integer(), integer(), 3) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) dat <- data.frame( x = sparse_integer(1, 1, 3), y = sparse_integer(integer(), integer(), 3) ) expect_identical( coerce_to_sparse_matrix(dat), exp ) }) test_that("coerce_to_sparse_matrix() errors on wrong input", { skip_if_not_installed("Matrix") expect_snapshot( error = TRUE, coerce_to_sparse_matrix(1:10) ) expect_snapshot( error = TRUE, coerce_to_sparse_matrix(matrix(0, nrow = 10, ncol = 10)) ) expect_snapshot( error = TRUE, coerce_to_sparse_matrix(iris) ) }) test_that("coerce_to_sparse_matrix() will divert for non-sparse data.frames", { skip_if_not_installed("Matrix") expect_identical( coerce_to_sparse_matrix(mtcars), Matrix::Matrix(as.matrix(mtcars), sparse = TRUE) ) }) test_that("coerce_to_sparse_matrix() materializes non-zero defaulted columns", { skip_if_not_installed("Matrix") withr::local_options("sparsevctrs.verbose_materialize" = TRUE) sparse_df <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) names(sparse_df) <- letters[1:10] sparse_df <- as.data.frame(sparse_df) sparse_df$nonzero1 <- sparse_double(1, 1, 10, default = 10) sparse_df$nonzero2 <- sparse_double(1, 1, 10, default = 20) expect_snapshot( res <- coerce_to_sparse_matrix(sparse_df) ) withr::local_options("sparsevctrs.verbose_materialize" = NULL) expect_s4_class(res, "dgCMatrix") expect_identical(dim(res), c(10L, 12L)) exp <- Matrix::diag(1:10, 10, 10) exp <- Matrix::Matrix(exp, sparse = TRUE) exp <- as(exp, "generalMatrix") exp <- as(exp, "CsparseMatrix") exp <- cbind(exp, sparse_df$nonzero1) exp <- cbind(exp, sparse_df$nonzero2) colnames(exp) <- colnames(res) rownames(exp) <- rownames(res) expect_identical(res, exp) }) test_that("coerce_to_sparse_matrix() can pass through error call", { tmp_fun <- function(x) { coerce_to_sparse_matrix(x, call = rlang::caller_env(0)) } expect_snapshot( error = TRUE, tmp_fun(1) ) }) ### coerce_to_sparse_data_frame ------------------------------------------------ test_that("coerce_to_sparse_data_frame() works", { skip_if_not_installed("Matrix") sparse_mat <- Matrix::diag(1:10, 10, 10) sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE) sparse_mat <- as(sparse_mat, "generalMatrix") sparse_mat <- as(sparse_mat, "CsparseMatrix") colnames(sparse_mat) <- letters[1:10] rownames(sparse_mat) <- 1:10 res <- coerce_to_sparse_data_frame(sparse_mat) exp <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) names(exp) <- letters[1:10] exp <- as.data.frame(exp) expect_identical(res, exp) }) test_that("coerce_to_sparse_data_frame() works with non-dgCMatrix input", { skip_if_not_installed("Matrix") sparse_mat <- Matrix::diag(1:10, 10, 10) sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE) colnames(sparse_mat) <- letters[1:10] rownames(sparse_mat) <- 1:10 res <- coerce_to_sparse_data_frame(sparse_mat) exp <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) names(exp) <- letters[1:10] exp <- as.data.frame(exp) expect_identical(res, exp) }) test_that("coerce_to_sparse_data_frame() errors with no column names", { skip_if_not_installed("Matrix") sparse_mat <- Matrix::diag(1:10, 10, 10) sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE) expect_snapshot( error = TRUE, coerce_to_sparse_data_frame(sparse_mat) ) }) test_that("coerce_to_sparse_data_frame() errors with wrong input", { expect_snapshot( error = TRUE, coerce_to_sparse_data_frame(mtcars) ) expect_snapshot( error = TRUE, coerce_to_sparse_data_frame(1:10) ) }) test_that("coerce_to_sparse_data_frame() can pass through error call", { tmp_fun <- function(x) { coerce_to_sparse_data_frame(x, call = rlang::caller_env(0)) } expect_snapshot( error = TRUE, tmp_fun(1) ) }) ### coerce_to_sparse_tibble ---------------------------------------------------- test_that("coerce_to_sparse_tibble() works", { skip_if_not_installed("Matrix") skip_if_not_installed("tibble") sparse_mat <- Matrix::diag(1:10, 10, 10) sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE) sparse_mat <- as(sparse_mat, "generalMatrix") sparse_mat <- as(sparse_mat, "CsparseMatrix") colnames(sparse_mat) <- letters[1:10] rownames(sparse_mat) <- 1:10 res <- coerce_to_sparse_tibble(sparse_mat) exp <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) names(exp) <- letters[1:10] exp <- tibble::as_tibble(exp) expect_identical(res, exp) }) test_that("coerce_to_sparse_tibble() works with non-dgCMatrix input", { skip_if_not_installed("Matrix") skip_if_not_installed("tibble") sparse_mat <- Matrix::diag(1:10, 10, 10) sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE) colnames(sparse_mat) <- letters[1:10] rownames(sparse_mat) <- 1:10 res <- coerce_to_sparse_tibble(sparse_mat) exp <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) names(exp) <- letters[1:10] exp <- tibble::as_tibble(exp) expect_identical(res, exp) }) test_that("coerce_to_sparse_tibble() errors with no column names", { skip_if_not_installed("Matrix") skip_if_not_installed("tibble") sparse_mat <- Matrix::diag(1:10, 10, 10) sparse_mat <- Matrix::Matrix(sparse_mat, sparse = TRUE) expect_snapshot( error = TRUE, coerce_to_sparse_tibble(sparse_mat) ) }) test_that("coerce_to_sparse_tibble() errors with wrong input", { expect_snapshot( error = TRUE, coerce_to_sparse_tibble(mtcars) ) expect_snapshot( error = TRUE, coerce_to_sparse_tibble(1:10) ) }) test_that("coerce_to_sparse_matrix() can pass through error call", { tmp_fun <- function(x) { coerce_to_sparse_tibble(x, call = rlang::caller_env(0)) } expect_snapshot( error = TRUE, tmp_fun(1) ) }) ### .sparse_matrix_to_list ----------------------------------------------------- test_that(".sparse_matrix_to_list() handles fully sparse columns (#69)", { skip_if_not_installed("Matrix") x_mat <- matrix( c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), nrow = 3 ) colnames(x_mat) <- letters[1:6] x_df <- as.data.frame(x_mat) x_mat_sparse <- Matrix::Matrix(x_mat, sparse = TRUE) expect_identical( coerce_to_sparse_data_frame(x_mat_sparse), x_df ) }) sparsevctrs/tests/testthat/test-sparsity.R0000644000176200001440000000353714741572451020633 0ustar liggesuserstest_that("works with data.frames", { mtcars_exp_sparsity <- mean(mtcars == 0) expect_identical( sparsity(mtcars), mtcars_exp_sparsity ) }) test_that("works with non-numeric data.frames", { vs <- mtcars$vs mtcars$vs <- 4 mtcars_exp_sparsity <- mean(mtcars == 0) mtcars$vs <- as.character(vs) expect_identical( sparsity(mtcars), mtcars_exp_sparsity ) mtcars$vs <- as.logical(vs) expect_identical( sparsity(mtcars), mtcars_exp_sparsity ) mtcars$vs <- ifelse(vs == 1, 1, NA) expect_identical( sparsity(mtcars), mtcars_exp_sparsity ) }) test_that("works with data.frames sample arg", { set.seed(1234) exp <- mean(mtcars[sample(32, 10), ] == 0) set.seed(1234) expect_identical( sparsity(mtcars, sample = 10), exp ) set.seed(1234) exp <- mean(mtcars == 0) set.seed(1234) expect_identical( sparsity(mtcars, sample = 1000), exp ) expect_snapshot( error = TRUE, sparsity(mtcars, sample = 0.4) ) }) test_that("works with matrices", { mtcars_mat <- as.matrix(mtcars) mtcars_exp_sparsity <- mean(mtcars_mat == 0) expect_identical( sparsity(mtcars_mat), mtcars_exp_sparsity ) mtcars_mat[1, 1] <- NA expect_identical( sparsity(mtcars_mat), mtcars_exp_sparsity ) mtcars_lgl <- apply(mtcars_mat, 2, as.logical) expect_identical( sparsity(mtcars_lgl), 0 ) mtcars_chr <- apply(mtcars_mat, 2, as.character) expect_identical( sparsity(mtcars_chr), 0 ) }) test_that("works with sparse matrices", { mtcars_sparse_mat <- coerce_to_sparse_matrix(mtcars) mtcars_exp_sparsity <- mean(as.logical(mtcars_sparse_mat == 0)) expect_equal( sparsity(mtcars_sparse_mat), mtcars_exp_sparsity ) mtcars_sparse_mat[1, 1] <- NA expect_equal( sparsity(mtcars_sparse_mat), mtcars_exp_sparsity ) })sparsevctrs/tests/testthat/test-sparse_double.R0000644000176200001440000002316514741321215021571 0ustar liggesuserstest_that("input checking is done correctly", { expect_identical( sparse_double(1L, 1, 1), sparse_double(1, 1, 1) ) # value expect_snapshot( error = TRUE, sparse_double("1", 1, 1) ) expect_snapshot( error = TRUE, sparse_double(NULL, 1, 1) ) expect_snapshot( error = TRUE, sparse_double(Inf, 1, 1) ) expect_snapshot( error = TRUE, sparse_double(NaN, 1, 1) ) # position expect_snapshot( error = TRUE, sparse_double(1, 1.5, 1) ) expect_snapshot( error = TRUE, sparse_double(1, "1", 1) ) expect_snapshot( error = TRUE, sparse_double(1, NULL, 1) ) expect_snapshot( error = TRUE, sparse_double(1, NA, 1) ) expect_snapshot( error = TRUE, sparse_double(1, Inf, 1) ) expect_snapshot( error = TRUE, sparse_double(1, NaN, 1) ) # length expect_no_error( sparse_double(numeric(0), integer(0), 0) ) expect_snapshot( error = TRUE, sparse_double(numeric(0), integer(0), -10) ) expect_snapshot( error = TRUE, sparse_double(numeric(0), integer(0), 10000000000) ) expect_snapshot( error = TRUE, sparse_double(numeric(0), integer(0), c(1, 10)) ) expect_snapshot( error = TRUE, sparse_double(numeric(0), integer(0), 1.5) ) expect_snapshot( error = TRUE, sparse_double(numeric(0), integer(0), "1") ) expect_snapshot( error = TRUE, sparse_double(numeric(0), integer(0), NA) ) expect_snapshot( error = TRUE, sparse_double(numeric(0), integer(0), Inf) ) expect_snapshot( error = TRUE, sparse_double(numeric(0), integer(0), NULL) ) expect_snapshot( error = TRUE, sparse_double(numeric(0), integer(0), NaN) ) # Length restriction expect_snapshot( error = TRUE, sparse_double(1:4, 1:6, 10) ) expect_snapshot( error = TRUE, sparse_double(1, 1:6, 10) ) # duplicates in position expect_snapshot( error = TRUE, sparse_double(1:4, c(1, 1, 5, 6), 10) ) expect_snapshot( error = TRUE, sparse_double(1:100, rep(1, 100), 100) ) # Ordered position expect_snapshot( error = TRUE, sparse_double(c(1, 2), c(3, 1), 5) ) # Too large position values expect_snapshot( error = TRUE, sparse_double(1, 10, 5) ) expect_snapshot( error = TRUE, sparse_double(rep(1, 50), seq(25, 74), 50) ) # Too large position values expect_snapshot( error = TRUE, sparse_double(1, 0, 5) ) expect_snapshot( error = TRUE, sparse_double(rep(1, 101), seq(-50, 50), 100) ) # Too large position values expect_snapshot( error = TRUE, sparse_double(0, 1, 10) ) expect_snapshot( error = TRUE, sparse_double(rep(c(1, 0), 5), 1:10, 50) ) }) test_that("length() works with sparse_double()", { expect_identical( length(sparse_double(numeric(), integer(), 0)), 0L ) expect_identical( length(sparse_double(1, 1, 10)), 10L ) expect_identical( length(sparse_double(1, 1, 100)), 100L ) }) test_that("single subsetting works with sparse_double()", { x_sparse <- sparse_double(value = c(10, NA, 20), position = c(1, 5, 8), 10) x_dense <- c(10, 0, 0, 0, NA, 0, 0, 20, 0, 0) for (i in seq_len(10)) { expect_identical(x_sparse[i], x_dense[i]) } expect_identical(x_sparse[0], x_dense[0]) expect_identical(x_sparse[NA_integer_], x_dense[NA_integer_]) expect_identical(x_sparse[NULL], x_dense[NULL]) expect_identical(x_sparse[NaN], x_dense[NaN]) expect_identical(x_sparse[100], x_dense[100]) expect_identical(x_sparse[Inf], x_dense[Inf]) expect_identical(x_sparse["not a number"], x_dense["not a number"]) expect_identical(x_sparse[1.6], x_dense[1.6]) expect_identical(x_sparse[2.6], x_dense[2.6]) }) test_that("multiple subsetting works with sparse_double()", { x_sparse <- sparse_double(value = c(10, NA, 20), position = c(1, 5, 8), 10) x_dense <- c(10, 0, 0, 0, NA, 0, 0, 20, 0, 0) expect_identical(x_sparse[1:2], x_dense[1:2]) expect_identical(x_sparse[3:7], x_dense[3:7]) expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)]) expect_identical(x_sparse[-1], x_dense[-1]) expect_identical(x_sparse[-c(5:7)], x_dense[-c(5:7)]) expect_identical(x_sparse[FALSE], x_dense[FALSE]) expect_identical(x_sparse[TRUE], x_dense[TRUE]) expect_identical(x_sparse[NA], x_dense[NA]) expect_identical(x_sparse[c(1, NA, 4)], x_dense[c(1, NA, 4)]) expect_identical(x_sparse[c(1, NA, 0, 4, 0)], x_dense[c(1, NA, 0, 4, 0)]) expect_identical(x_sparse[c(1, 11)], x_dense[c(1, 11)]) expect_identical(x_sparse[c(1, Inf)], x_dense[c(1, Inf)]) expect_identical(x_sparse[c(1, NaN)], x_dense[c(1, NaN)]) }) test_that("materialization works with sparse_double()", { x_sparse <- sparse_double(value = c(10, NA, 20), position = c(1, 5, 8), 10) x_dense <- c(10, 0, 0, 0, NA, 0, 0, 20, 0, 0) expect_identical(x_sparse[], x_dense) }) test_that("sorting works with sparse_double()", { x_sparse <- sparse_double(numeric(), integer(), 10) expect_true(is_sparse_double(sort(x_sparse))) x_sparse <- sparse_double(NA, 4, 10) expect_identical( sort(x_sparse), rep(0, 9) ) x_sparse <- sparse_double(numeric(), integer(), 10) expect_true(is_sparse_double(sort(x_sparse))) x_sparse <- sparse_double(c(1, 4, 5), c(1, 4, 7), 7) expect_false(is_sparse_double(sort(x_sparse))) x_sparse <- sparse_double(c(1, 5), c(1, 7), 7) expect_false(is_sparse_double(sort(x_sparse))) x_sparse <- sparse_double(c(-1, 5), c(1, 7), 7) expect_true(is_sparse_double(sort(x_sparse))) }) test_that("min method works with sparse_double()", { expect_snapshot( res <- min(sparse_double(integer(), integer(), 0)) ) expect_identical(res, Inf) expect_identical( min(sparse_double(numeric(), integer(), 1000000000)), 0 ) expect_identical( min(sparse_double(-10, 10, 1000000000)), -10 ) expect_identical( min(sparse_double(-10, 10, 1000000000, default = -100)), -100 ) expect_identical( min(sparse_double(11:20, 11:20, 1000000000, default = 15.5)), 11 ) expect_identical( min(sparse_double(NA, 10, 1000000000)), NA_real_ ) expect_identical( min(sparse_double(c(11:19, NA), 11:20, 1000000000)), NA_real_ ) expect_identical( min(sparse_double(NA, 10, 1000000000), na.rm = TRUE), 0 ) expect_identical( min(sparse_double(c(-10, 11:19, NA), 10:20, 1000000000), na.rm = TRUE), -10 ) }) test_that("max method works with sparse_double()", { expect_snapshot( res <- max(sparse_double(integer(), integer(), 0)) ) expect_identical( max(sparse_double(numeric(), integer(), 1000000000)), 0 ) expect_identical( max(sparse_double(10, 10, 1000000000)), 10 ) expect_identical( max(sparse_double(10, 10, 1000000000, default = 100)), 100 ) expect_identical( max(sparse_double(11:20, 11:20, 1000000000, default = 15.5)), 20 ) expect_identical( max(sparse_double(NA, 10, 1000000000)), NA_real_ ) expect_identical( max(sparse_double(c(11:19, NA), 11:20, 1000000000)), NA_real_ ) expect_identical( max(sparse_double(NA, 10, 1000000000), na.rm = TRUE), 0 ) expect_identical( max(sparse_double(c(-10, 11:19, NA), 10:20, 1000000000), na.rm = TRUE), 19 ) }) test_that("anyNA method works with sparse_double", { expect_false( anyNA(sparse_double(numeric(), integer(), 1000000000)) ) expect_false( anyNA(sparse_double(1, 1, 1000000000)) ) expect_true( anyNA(sparse_double(NA, 1, 1000000000)) ) expect_true( anyNA(sparse_double(c(-10, 11:19, NA), 10:20, 1000000000)) ) }) test_that("sum method works with sparse_double", { expect_identical( sum(sparse_double(numeric(), integer(), 0)), 0 ) expect_identical( sum(sparse_double(numeric(), integer(), 1000000000)), 0 ) expect_identical( sum(sparse_double(numeric(), integer(), 1000000000, default = 0.0001)), 100000 ) expect_identical( sum(sparse_double(c(1, 5.4, 10), c(1, 5, 10), 10)), 16.4 ) expect_identical( sum(sparse_double(c(1, 5.4, 10), c(1, 5, 10), 10, default = -1)), 16.4 - 7 ) expect_identical( sum(sparse_double(c(1, 5.4, NA), c(1, 5, 10), 10)), NA_real_ ) expect_identical( sum(sparse_double(c(1, 5.4, NA), c(1, 5, 10), 10), na.rm = TRUE), 6.4 ) }) test_that("default argument is working", { expect_snapshot( error = TRUE, sparse_double(1, 1, 10, default = 1:10) ) expect_snapshot( error = TRUE, sparse_double(1, 1, 10, default = TRUE) ) expect_snapshot( error = TRUE, sparse_double(c(1, 1, 4), c(1, 4, 6), 10, default = 1) ) x_sparse <- sparse_double( value = c(10, NA, 20), position = c(1, 5, 8), length = 10, default = 4 ) x_dense <- c(10, 4, 4, 4, NA, 4, 4, 20, 4, 4) for (i in seq_len(10)) { expect_identical(x_sparse[i], x_dense[i]) } expect_identical(x_sparse[1:2], x_dense[1:2]) expect_identical(x_sparse[3:7], x_dense[3:7]) expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)]) expect_identical(x_sparse[], x_dense) }) test_that("verbose testing", { withr::local_options("sparsevctrs.verbose_materialize" = TRUE) x <- sparse_double(1, 1, 1) expect_snapshot({ tmp <- x[] tmp <- x[] }) withr::local_options("sparsevctrs.verbose_materialize" = 2) x <- sparse_double(1, 1, 1) expect_snapshot({ tmp <- x[] tmp <- x[] }) withr::local_options("sparsevctrs.verbose_materialize" = 3) x <- sparse_double(1, 1, 1) expect_snapshot( error = TRUE, { tmp <- x[] } ) }) test_that("printing works #48", { expect_snapshot( sparse_double(1, 1, 10) + 1 ) }) sparsevctrs/tests/testthat.R0000644000176200001440000000062214741321215015756 0ustar liggesusers# This file is part of the standard setup for testthat. # It is recommended that you do not modify it. # # Where should you do additional test configuration? # Learn more about the roles of various files in: # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview # * https://testthat.r-lib.org/articles/special-files.html library(testthat) library(sparsevctrs) test_check("sparsevctrs") sparsevctrs/MD50000644000176200001440000001144114744256122013151 0ustar liggesusers749d222507c5e0dd80da932753745aa1 *DESCRIPTION b832734d304931a348bf15d62146cfee *LICENSE bf4721792d68665627ee17e169b1ef6d *NAMESPACE eadf750fac240dbb932dac227e82d187 *NEWS.md a2ab5a0b69a546ff6564c0b614859eba *R/coerce-vector.R 836971611a8ba2772c2d84306d48c595 *R/coerce.R 858c11e42460abba00bf583dbcd43659 *R/extractors.R ba70039dbc865f6f9e01051d7adfe831 *R/has_sparse_elements.R c80a9eb1427c585807cecf618b6f3870 *R/import-standalone-obj-type.R c40f882046a958444c6058a9e2cb9a3b *R/import-standalone-types-check.R 7e9df0ff50d8f9d200adc4eda625bbe5 *R/options.R 05f6224a8682ea184e26629dd63a7faf *R/sparse_character.R 9f838adfc38f3ed69ee93260692fe10f *R/sparse_double.R 95a0a73c512330dd9f12542736f00abe *R/sparse_dummy.R 774511bc770658b86fbc2a3a9cda415a *R/sparse_integer.R 89de433e39a24aeaf31e54cca60ce7e1 *R/sparse_logical.R 862fc9c8fbaf4d824cae29cbfd198ac2 *R/sparse_mean.R dc7e77d386f7af85a323917fd2540165 *R/sparse_median.R e0c107b1e9b9f794f31d48619cdd4cb5 *R/sparse_sd.R 0cc03c9a18660f75a1472904b7957ada *R/sparse_var.R 61821d602c72a0ffb131fb2e78606daf *R/sparsevctrs-package.R 14f340cd98f9575136dc59d2afed9da7 *R/sparsity.R 58798dbfa28ebefd26dba717559e6a8e *R/type-predicates.R e6920021fd54b0f3f26eba80fc854b44 *R/validate-input.R 1980715cb88254de6adf5869ad3d414d *README.md 084f2d2f4e5e51c1e5023a8ae7f8e42e *build/vignette.rds 2dcdc01e5955c178044a9c4660c94b77 *inst/doc/design.R 2a8c8deb6b66483c407724c3ef2a529f *inst/doc/design.Rmd 3357a01d99faf30819293ca413888287 *inst/doc/design.html 3c2ee07d71aba836f9c3856df5580f36 *man/coerce-vector.Rd ccf664624ede1bbd6d9474816fa5f923 *man/coerce_to_sparse_data_frame.Rd d0f3ec0137a0a9da51cb1f588262ff9d *man/coerce_to_sparse_matrix.Rd 6501c5076d86749a741f4b79647454fb *man/coerce_to_sparse_tibble.Rd 81b431d8dcaaccf793b10ce495bde028 *man/extractors.Rd 99c17dca4de2c79f9648e1a972861d8e *man/figures/logo.png 408f57f41dfb4b85e46f491a07712202 *man/has_sparse_elements.Rd 4bd13d4ebc04ef302968451b5553b3fa *man/sparse_character.Rd c2249ab831fbe8b20df82a4404357a99 *man/sparse_double.Rd 6ba93a672e5f518368fb37e3ad5a79eb *man/sparse_dummy.Rd f9bae0c2f64397d523db90db3df9ec33 *man/sparse_integer.Rd ac5568574a190b285dd3e706816e1fd0 *man/sparse_logical.Rd 5b29384d0bcc0d0a7c004788d7ddcbbe *man/sparse_mean.Rd fe284b156aecae433b083c753171cab3 *man/sparse_median.Rd 87188817918e0d2776f7f02dc5d45ff6 *man/sparse_sd.Rd 2e7badaf7ebb21bdb05bfcbaab6892d5 *man/sparse_var.Rd 1f8a43fd2fbac8e4267eaf6d8b07f44f *man/sparsevctrs-package.Rd 8b2633cec9dfa624f1e64551cd47eae9 *man/sparsevctrs_options.Rd f5528b313662effb7a86fcdace64adf5 *man/sparsity.Rd 9121b3734097957bed241cfe883a2225 *man/type-predicates.Rd 542c3e13a81b8c335dfd084478ab68dc *src/altrep-sparse-double.c 3872077dcbfd4d2b873274143d888181 *src/altrep-sparse-integer.c ee78e46f755d5858d0579ba4536c3d05 *src/altrep-sparse-logical.c e041fd3e49a366e3c3cea30d11dfa92d *src/altrep-sparse-string.c 05965ddb356d763e4cf625759f512f94 *src/init.c 256ae7a48cfa4932572b98480c927f3e *src/sparse-dummy.c c0b2c933ac676c8709d25ce083aa88b0 *src/sparse-dummy.h aaa11683a74953af6afdca569ef68238 *src/sparse-extractors.c 48959c1d662835c3c714540c68b8bced *src/sparse-extractors.h bdf8520f6456d42213edf51cbbf5fcff *src/sparse-utils.c ced2ab3654b1c27ba3c63f1d3b606a8c *src/sparse-utils.h da33a9c985baf18422f6b14b86bcd957 *tests/testthat.R d4ca423ad880b8d8f931899cfb402512 *tests/testthat/_snaps/coerce.md f3f3aa549ba0fe101075f35ed9285136 *tests/testthat/_snaps/sparse_character.md 3b400f8043e2a23f882ad9d1c9714fdd *tests/testthat/_snaps/sparse_double.md 40662a0e0e462a85b61a875a439bd382 *tests/testthat/_snaps/sparse_dummy.md fdcb1733cb0a6677d5c137236eed0c7c *tests/testthat/_snaps/sparse_integer.md 068201327ae131eaa2d8a73fef095e8b *tests/testthat/_snaps/sparse_logical.md df7ca6ccb30c42568877e1903d348942 *tests/testthat/_snaps/sparsity.md 81ab1373f081f7dd4f7dbcf88fc38900 *tests/testthat/test-coerce-vector.R 1146df108b3fdb01665386e43585e172 *tests/testthat/test-coerce.R 2de166470bde87f7914abfbba12b9fa1 *tests/testthat/test-extractors.R 4c7264534c25cae760cf85a191e01bd7 *tests/testthat/test-has_sparse_elements.R 36f6be1a254fd80d66fedcb744054e3d *tests/testthat/test-sparse_character.R 9ea5e6f4bda12a233be7c3f27caded6f *tests/testthat/test-sparse_double.R e18e95f798dcd108faaa0f1557292824 *tests/testthat/test-sparse_dummy.R ebc7fc31c2fcccd4d1cfc360c6654ea5 *tests/testthat/test-sparse_integer.R cbd1f9c0cc0e791f130e4b4cf9cfb24e *tests/testthat/test-sparse_logical.R 645f7831217a502902bf7e7f5c2e4cb6 *tests/testthat/test-sparse_mean.R 4981aea74a3635f324a70ef3a716c8f3 *tests/testthat/test-sparse_median.R 286b83e8b93e7a9841b370c3fffe191e *tests/testthat/test-sparse_sd.R 1400abca951291fc6447e73585747344 *tests/testthat/test-sparse_var.R 473f088a320f0d8c47307ebd6e6d22b6 *tests/testthat/test-sparsity.R 873eca5bc4792edd8010b28de319ba62 *tests/testthat/test-type-predicates.R 2a8c8deb6b66483c407724c3ef2a529f *vignettes/design.Rmd sparsevctrs/R/0000755000176200001440000000000014741572451013044 5ustar liggesuserssparsevctrs/R/import-standalone-types-check.R0000644000176200001440000002761614741321215021046 0ustar liggesusers# Standalone file: do not edit by hand # Source: # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-types-check.R # last-updated: 2023-03-13 # license: https://unlicense.org # dependencies: standalone-obj-type.R # imports: rlang (>= 1.1.0) # --- # # ## Changelog # # 2023-03-13: # - Improved error messages of number checkers (@teunbrand) # - Added `allow_infinite` argument to `check_number_whole()` (@mgirlich). # - Added `check_data_frame()` (@mgirlich). # # 2023-03-07: # - Added dependency on rlang (>= 1.1.0). # # 2023-02-15: # - Added `check_logical()`. # # - `check_bool()`, `check_number_whole()`, and # `check_number_decimal()` are now implemented in C. # # - For efficiency, `check_number_whole()` and # `check_number_decimal()` now take a `NULL` default for `min` and # `max`. This makes it possible to bypass unnecessary type-checking # and comparisons in the default case of no bounds checks. # # 2022-10-07: # - `check_number_whole()` and `_decimal()` no longer treat # non-numeric types such as factors or dates as numbers. Numeric # types are detected with `is.numeric()`. # # 2022-10-04: # - Added `check_name()` that forbids the empty string. # `check_string()` allows the empty string by default. # # 2022-09-28: # - Removed `what` arguments. # - Added `allow_na` and `allow_null` arguments. # - Added `allow_decimal` and `allow_infinite` arguments. # - Improved errors with absent arguments. # # # 2022-09-16: # - Unprefixed usage of rlang functions with `rlang::` to # avoid onLoad issues when called from rlang (#1482). # # 2022-08-11: # - Added changelog. # # nocov start # Scalars ----------------------------------------------------------------- .standalone_types_check_dot_call <- .Call check_bool <- function(x, ..., allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x) && .standalone_types_check_dot_call(ffi_standalone_is_bool_1.0.7, x, allow_na, allow_null)) { return(invisible(NULL)) } stop_input_type( x, c("`TRUE`", "`FALSE`"), ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_string <- function(x, ..., allow_empty = TRUE, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { is_string <- .rlang_check_is_string( x, allow_empty = allow_empty, allow_na = allow_na, allow_null = allow_null ) if (is_string) { return(invisible(NULL)) } } stop_input_type( x, "a single string", ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } .rlang_check_is_string <- function(x, allow_empty, allow_na, allow_null) { if (is_string(x)) { if (allow_empty || !is_string(x, "")) { return(TRUE) } } if (allow_null && is_null(x)) { return(TRUE) } if (allow_na && (identical(x, NA) || identical(x, na_chr))) { return(TRUE) } FALSE } check_name <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { is_string <- .rlang_check_is_string( x, allow_empty = FALSE, allow_na = FALSE, allow_null = allow_null ) if (is_string) { return(invisible(NULL)) } } stop_input_type( x, "a valid name", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } IS_NUMBER_true <- 0 IS_NUMBER_false <- 1 IS_NUMBER_oob <- 2 check_number_decimal <- function(x, ..., min = NULL, max = NULL, allow_infinite = TRUE, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (missing(x)) { exit_code <- IS_NUMBER_false } else if (0 == (exit_code <- .standalone_types_check_dot_call( ffi_standalone_check_number_1.0.7, x, allow_decimal = TRUE, min, max, allow_infinite, allow_na, allow_null ))) { return(invisible(NULL)) } .stop_not_number( x, ..., exit_code = exit_code, allow_decimal = TRUE, min = min, max = max, allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_number_whole <- function(x, ..., min = NULL, max = NULL, allow_infinite = FALSE, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (missing(x)) { exit_code <- IS_NUMBER_false } else if (0 == (exit_code <- .standalone_types_check_dot_call( ffi_standalone_check_number_1.0.7, x, allow_decimal = FALSE, min, max, allow_infinite, allow_na, allow_null ))) { return(invisible(NULL)) } .stop_not_number( x, ..., exit_code = exit_code, allow_decimal = FALSE, min = min, max = max, allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } .stop_not_number <- function(x, ..., exit_code, allow_decimal, min, max, allow_na, allow_null, arg, call) { if (allow_decimal) { what <- "a number" } else { what <- "a whole number" } if (exit_code == IS_NUMBER_oob) { min <- min %||% -Inf max <- max %||% Inf if (min > -Inf && max < Inf) { what <- sprintf("%s between %s and %s", what, min, max) } else if (x < min) { what <- sprintf("%s larger than or equal to %s", what, min) } else if (x > max) { what <- sprintf("%s smaller than or equal to %s", what, max) } else { abort("Unexpected state in OOB check", .internal = TRUE) } } stop_input_type( x, what, ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_symbol <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_symbol(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a symbol", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_arg <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_symbol(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an argument name", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_call <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_call(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a defused call", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_environment <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_environment(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an environment", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_function <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_function(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a function", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_closure <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_closure(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an R function", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_formula <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_formula(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a formula", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } # Vectors ----------------------------------------------------------------- check_character <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_character(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a character vector", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_logical <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_logical(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a logical vector", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_data_frame <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is.data.frame(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a data frame", ..., allow_null = allow_null, arg = arg, call = call ) } # nocov end sparsevctrs/R/sparse_sd.R0000644000176200001440000000174314741321215015145 0ustar liggesusers#' Calculate standard diviation from sparse vectors #' #' @param x A sparse numeric vector. #' @param na_rm Logical, whether to remove missing values. Defaults to `FALSE`. #' #' @details #' This function, as with any of the other helper functions assumes that the #' input `x` is a sparse numeric vector. This is done for performance reasons, #' and it is thus the users responsibility to perform input checking. #' #' Much like [sd()] it uses the denominator `n-1`. #' #' @return single numeric value. #' #' @examples #' sparse_sd( #' sparse_double(1000, 1, 1000) #' ) #' #' sparse_sd( #' sparse_double(1000, 1, 1000, default = 1) #' ) #' #' sparse_sd( #' sparse_double(c(10, 50, 11), c(1, 50, 111), 1000) #' ) #' #' sparse_sd( #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000) #' ) #' #' sparse_sd( #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000), #' na_rm = TRUE #' ) #' #' @export sparse_sd <- function(x, na_rm = FALSE) { sqrt(sparse_var(x, na_rm = na_rm)) } sparsevctrs/R/sparsevctrs-package.R0000644000176200001440000000027214741321215017126 0ustar liggesusers#' @keywords internal "_PACKAGE" #' @import rlang #' @keywords internal NULL ## usethis namespace: start #' @useDynLib sparsevctrs, .registration = TRUE ## usethis namespace: end NULL sparsevctrs/R/sparse_dummy.R0000644000176200001440000000324214741321215015666 0ustar liggesusers#' Generate sparse dummy variables #' #' @param x A factor. #' @param one_hot A single logical value. Should the first factor level be #' included or not. Defaults to `FALSE`. #' #' @details #' Only factor variables can be used with [sparse_dummy()]. A call to #' `as.factor()` would be required for any other type of data. #' #' If only a single level is present after `one_hot` takes effect. Then the #' vector produced won't be sparse. #' #' A missing value at the `i`th element will produce missing values for all #' dummy variables at thr `i`th position. #' #' @return A list of sparse integer dummy variables. #' #' @examples #' x <- factor(c("a", "a", "b", "c", "d", "b")) #' #' sparse_dummy(x, one_hot = FALSE) #' #' x <- factor(c("a", "a", "b", "c", "d", "b")) #' #' sparse_dummy(x, one_hot = TRUE) #' #' x <- factor(c("a", NA, "b", "c", "d", NA)) #' #' sparse_dummy(x, one_hot = FALSE) #' #' x <- factor(c("a", NA, "b", "c", "d", NA)) #' #' sparse_dummy(x, one_hot = TRUE) #' @export sparse_dummy <- function(x, one_hot = TRUE) { if (!is.factor(x)) { cli::cli_abort("{.arg x} must be a factor, not {.obj_type_friendly {x}}.") } lvls <- levels(x) x <- as.integer(x) if (!one_hot) { lvls <- lvls[-1] x <- x - 1L } n_lvls <- length(lvls) if (n_lvls == 1 && one_hot) { res <- list(rep(1L, length(x))) names(res) <- lvls return(res) } counts <- tabulate(x, nbins = n_lvls) if (anyNA(x)) { n_missing <- sum(is.na(x)) counts <- counts + n_missing res <- .Call(ffi_sparse_dummy_na, x, lvls, counts, one_hot) } else { res <- .Call(ffi_sparse_dummy, x, lvls, counts, one_hot) } names(res) <- lvls res } sparsevctrs/R/sparse_double.R0000644000176200001440000000514314741321215016007 0ustar liggesusers#' Create sparse double vector #' #' Construction of vectors where only values and positions are recorded. The #' Length and default values determine all other information. #' #' @param values double vector, values of non-zero entries. #' @param positions integer vector, indices of non-zero entries. #' @param length integer value, Length of vector. #' @param default double value, value at indices not specified by `positions`. #' Defaults to `0`. Cannot be `NA`. #' #' @details #' #' `values` and `positions` are expected to be the same length, and are allowed #' to both have zero length. #' #' Allowed values for `value` is double and integer values. integer values will #' be coerced to doubles. Missing values such as `NA` and `NA_real_` are #' allowed. Everything else is disallowed, This includes `Inf` and `NaN`. The #' values are also not allowed to take the same value as `default`. #' #' `positions` should be integers or integer-like doubles. Everything else is #' not allowed. Positions should furthermore be positive (`0` not allowed), #' unique, and in increasing order. Lastly they should all be smaller that #' `length`. #' #' For developers: #' #' setting `options("sparsevctrs.verbose_materialize" = TRUE)` will print a #' message each time a sparse vector has been forced to materialize. #' #' @return sparse double vector #' #' @seealso [sparse_integer()] [sparse_character()] #' #' @examples #' sparse_double(numeric(), integer(), 10) #' #' sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10) #' #' str( #' sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 1000000000) #' ) #' @export sparse_double <- function(values, positions, length, default = 0) { check_number_decimal(default) validate_length(length) if (!is.integer(length)) { length <- as.integer(length) } if (is.integer(default)) { default <- as.numeric(default) } if (identical(values, NA)) { values <- NA_real_ } validate_values_double(values) if (is.integer(values)) { values <- as.double(values) } validate_positions(positions, length, len_values = length(values)) positions <- as.integer(positions) if (any(values == default, na.rm = TRUE)) { offenders <- which(values == default) cli::cli_abort( c( x = "{.arg values} value must not be equal to the default {default}.", i = "{default} values at index: {offenders}." ) ) } new_sparse_double(values, positions, length, default) } new_sparse_double <- function(values, positions, length, default) { x <- list( values, positions, length, default ) .Call(ffi_altrep_new_sparse_double, x) } sparsevctrs/R/coerce.R0000644000176200001440000001350314741321215014417 0ustar liggesusers#' Coerce sparse data frame to sparse matrix #' #' Turning data frame with sparse columns into sparse matrix using #' [Matrix::sparseMatrix()]. #' #' @param x a data frame or tibble with sparse columns. #' @inheritParams rlang::args_error_context #' #' @details #' No checking is currently do to `x` to determine whether it contains sparse #' columns or not. Thus it works with any data frame. Needless to say, creating #' a sparse matrix out of a dense data frame is not ideal. #' #' @return sparse matrix #' #' @seealso [coerce_to_sparse_data_frame()] [coerce_to_sparse_tibble()] #' @examplesIf rlang::is_installed("Matrix") #' sparse_tbl <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) #' names(sparse_tbl) <- letters[1:10] #' sparse_tbl <- as.data.frame(sparse_tbl) #' sparse_tbl #' #' res <- coerce_to_sparse_matrix(sparse_tbl) #' res #' @export coerce_to_sparse_matrix <- function(x, call = rlang::caller_env(0)) { rlang::check_installed("Matrix") if (!inherits(x, "data.frame")) { cli::cli_abort( "{.arg x} must be a {.cls data.frame}, not {.obj_type_friendly {x}}.", call = call ) } if (!all(vapply(x, is.numeric, logical(1)))) { offenders <- which(!vapply(x, is.numeric, logical(1))) offenders <- names(x)[offenders] cli::cli_abort( c( x = "All columns of {.arg x} must be numeric.", i = "Non-numeric columns: {.field {offenders}}." ), call = call ) } if (!any(vapply(x, is_sparse_numeric, logical(1)))) { res <- as.matrix(x) res <- Matrix::Matrix(res, sparse = TRUE) return(res) } if (!all(vapply(x, sparse_default, numeric(1)) == 0, na.rm = TRUE)) { offenders <- which(vapply(x, sparse_default, numeric(1)) != 0) for (i in offenders) { x[[i]] <- x[[i]][] } } all_positions <- lapply(x, sparse_positions) all_values <- lapply(x, sparse_values) all_rows <- rep(seq_along(x), times = lengths(all_positions)) all_positions <- unlist(all_positions, use.names = FALSE) all_values <- unlist(all_values, use.names = FALSE) # TODO: maybe faster to do this above? non_zero <- all_values != 0 all_rows <- all_rows[non_zero] all_positions <- all_positions[non_zero] all_values <- all_values[non_zero] n_row <- nrow(x) n_col <- ncol(x) if (identical(rownames(x), as.character(seq_len(nrow(x))))) { row_names <- NULL } else { row_names <- rownames(x) } res <- Matrix::sparseMatrix( i = all_positions, j = all_rows, x = all_values, dims = c(n_row, n_col), dimnames = list(row_names, colnames(x)) ) res } #' Coerce sparse matrix to tibble with sparse columns #' #' Turning a sparse matrix into a tibble. #' #' @param x sparse matrix. #' @inheritParams rlang::args_error_context #' #' @details #' The only requirement from the sparse matrix is that it contains column names. #' #' @return tibble with sparse columns #' #' @seealso [coerce_to_sparse_data_frame()] [coerce_to_sparse_matrix()] #' @examplesIf rlang::is_installed("tibble") #' set.seed(1234) #' mat <- matrix(sample(0:1, 100, TRUE, c(0.9, 0.1)), nrow = 10) #' colnames(mat) <- letters[1:10] #' sparse_mat <- Matrix::Matrix(mat, sparse = TRUE) #' sparse_mat #' #' res <- coerce_to_sparse_tibble(sparse_mat) #' res #' #' # All columns are sparse #' vapply(res, is_sparse_vector, logical(1)) #' @export coerce_to_sparse_tibble <- function(x, call = rlang::caller_env(0)) { rlang::check_installed("tibble") if (!any(methods::is(x) == "sparseMatrix")) { cli::cli_abort( "{.arg x} must be a {.cls sparseMatrix}, not {.obj_type_friendly {x}}.", call = call ) } if (!methods::is(x, "dgCMatrix")) { x <- methods::as(x, "generalMatrix") x <- methods::as(x, "CsparseMatrix") } if (is.null(colnames(x))) { cli::cli_abort( "{.arg x} must have column names.", call = call ) } res <- .sparse_matrix_to_list(x) res <- tibble::as_tibble(res) res } #' Coerce sparse matrix to data frame with sparse columns #' #' Turning a sparse matrix into a data frame #' #' @param x sparse matrix. #' @inheritParams rlang::args_error_context #' #' @details #' The only requirement from the sparse matrix is that it contains column names. #' #' @return data.frame with sparse columns #' #' @seealso [coerce_to_sparse_tibble()] [coerce_to_sparse_matrix()] #' @examplesIf rlang::is_installed("Matrix") #' set.seed(1234) #' mat <- matrix(sample(0:1, 100, TRUE, c(0.9, 0.1)), nrow = 10) #' colnames(mat) <- letters[1:10] #' sparse_mat <- Matrix::Matrix(mat, sparse = TRUE) #' sparse_mat #' #' res <- coerce_to_sparse_data_frame(sparse_mat) #' res #' #' # All columns are sparse #' vapply(res, is_sparse_vector, logical(1)) #' @export coerce_to_sparse_data_frame <- function(x, call = rlang::caller_env(0)) { if (!any(methods::is(x) == "sparseMatrix")) { cli::cli_abort( "{.arg x} must be a {.cls sparseMatrix}, not {.obj_type_friendly {x}}.", call = call ) } if (!methods::is(x, "dgCMatrix")) { x <- methods::as(x, "generalMatrix") x <- methods::as(x, "CsparseMatrix") } if (is.null(colnames(x))) { cli::cli_abort( "{.arg x} must have column names.", call = call ) } res <- .sparse_matrix_to_list(x) res <- as.data.frame(res) res } .sparse_matrix_to_list <- function(x) { values <- x@x x_positions <- x@i n_nonzero <- diff(x@p) x_length <- nrow(x) res <- list() start <- 1 for (i in seq_along(n_nonzero)) { if (n_nonzero[i] == 0) { res[[i]] <- sparse_double( values = double(), positions = double(), length = x_length ) next } index <- seq(start, start + n_nonzero[i] - 1) res[[i]] <- sparse_double( values = values[index], positions = x_positions[index] + 1, length = x_length ) start <- start + n_nonzero[i] } names(res) <- colnames(x) res } sparsevctrs/R/type-predicates.R0000644000176200001440000000433314741321215016262 0ustar liggesusers#' Sparse vector type checkers #' #' Helper functions to determine whether an vector is a sparse vector or not. #' #' @param x value to be checked. #' #' @details #' `is_sparse_vector()` is a general function that detects any type of sparse #' vector created with this package. `is_sparse_double()`, #' `is_sparse_integer()`, `is_sparse_character()`, and `is_sparse_logical()` are #' more specific functions that only detects the type. `is_sparse_numeric()` #' matches both sparse integers and doubles. #' #' @return single logical value #' #' @examples #' x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10) #' x_dense <- c(0, pi, 0, 0, 0.5, 0, 0, 0, 0, 0.1) #' #' is_sparse_vector(x_sparse) #' is_sparse_vector(x_dense) #' #' is_sparse_double(x_sparse) #' is_sparse_double(x_dense) #' #' is_sparse_character(x_sparse) #' is_sparse_character(x_dense) #' #' # Forced materialization #' is_sparse_vector(x_sparse[]) #' @name type-predicates NULL #' @rdname type-predicates #' @export is_sparse_vector <- function(x) { .Call(ffi_is_sparse_vector, x) } #' @rdname type-predicates #' @export is_sparse_numeric <- function(x) { res <- .Call(ffi_extract_altrep_class, x) if (is.null(res)) { return(FALSE) } res <- as.character(res[[1]]) res == "altrep_sparse_double" || res == "altrep_sparse_integer" } #' @rdname type-predicates #' @export is_sparse_double <- function(x) { res <- .Call(ffi_extract_altrep_class, x) if (is.null(res)) { return(FALSE) } res <- as.character(res[[1]]) res == "altrep_sparse_double" } #' @rdname type-predicates #' @export is_sparse_integer <- function(x) { res <- .Call(ffi_extract_altrep_class, x) if (is.null(res)) { return(FALSE) } res <- as.character(res[[1]]) res == "altrep_sparse_integer" } #' @rdname type-predicates #' @export is_sparse_character <- function(x) { res <- .Call(ffi_extract_altrep_class, x) if (is.null(res)) { return(FALSE) } res <- as.character(res[[1]]) res == "altrep_sparse_string" } #' @rdname type-predicates #' @export is_sparse_logical <- function(x) { res <- .Call(ffi_extract_altrep_class, x) if (is.null(res)) { return(FALSE) } res <- as.character(res[[1]]) res == "altrep_sparse_logical" } sparsevctrs/R/import-standalone-obj-type.R0000644000176200001440000002072714741321215020354 0ustar liggesusers# Standalone file: do not edit by hand # Source: # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-obj-type.R # last-updated: 2023-05-01 # license: https://unlicense.org # imports: rlang (>= 1.1.0) # --- # # ## Changelog # # 2023-05-01: # - `obj_type_friendly()` now only displays the first class of S3 objects. # # 2023-03-30: # - `stop_input_type()` now handles `I()` input literally in `arg`. # # 2022-10-04: # - `obj_type_friendly(value = TRUE)` now shows numeric scalars # literally. # - `stop_friendly_type()` now takes `show_value`, passed to # `obj_type_friendly()` as the `value` argument. # # 2022-10-03: # - Added `allow_na` and `allow_null` arguments. # - `NULL` is now backticked. # - Better friendly type for infinities and `NaN`. # # 2022-09-16: # - Unprefixed usage of rlang functions with `rlang::` to # avoid onLoad issues when called from rlang (#1482). # # 2022-08-11: # - Prefixed usage of rlang functions with `rlang::`. # # 2022-06-22: # - `friendly_type_of()` is now `obj_type_friendly()`. # - Added `obj_type_oo()`. # # 2021-12-20: # - Added support for scalar values and empty vectors. # - Added `stop_input_type()` # # 2021-06-30: # - Added support for missing arguments. # # 2021-04-19: # - Added support for matrices and arrays (#141). # - Added documentation. # - Added changelog. # # nocov start #' Return English-friendly type #' @param x Any R object. #' @param value Whether to describe the value of `x`. Special values #' like `NA` or `""` are always described. #' @param length Whether to mention the length of vectors and lists. #' @return A string describing the type. Starts with an indefinite #' article, e.g. "an integer vector". #' @noRd obj_type_friendly <- function(x, value = TRUE) { if (is_missing(x)) { return("absent") } if (is.object(x)) { if (inherits(x, "quosure")) { type <- "quosure" } else { type <- class(x)[[1L]] } return(sprintf("a <%s> object", type)) } if (!is_vector(x)) { return(.rlang_as_friendly_type(typeof(x))) } n_dim <- length(dim(x)) if (!n_dim) { if (!is_list(x) && length(x) == 1) { if (is_na(x)) { return(switch( typeof(x), logical = "`NA`", integer = "an integer `NA`", double = if (is.nan(x)) { "`NaN`" } else { "a numeric `NA`" }, complex = "a complex `NA`", character = "a character `NA`", .rlang_stop_unexpected_typeof(x) )) } show_infinites <- function(x) { if (x > 0) { "`Inf`" } else { "`-Inf`" } } str_encode <- function(x, width = 30, ...) { if (nchar(x) > width) { x <- substr(x, 1, width - 3) x <- paste0(x, "...") } encodeString(x, ...) } if (value) { if (is.numeric(x) && is.infinite(x)) { return(show_infinites(x)) } if (is.numeric(x) || is.complex(x)) { number <- as.character(round(x, 2)) what <- if (is.complex(x)) "the complex number" else "the number" return(paste(what, number)) } return(switch( typeof(x), logical = if (x) "`TRUE`" else "`FALSE`", character = { what <- if (nzchar(x)) "the string" else "the empty string" paste(what, str_encode(x, quote = "\"")) }, raw = paste("the raw value", as.character(x)), .rlang_stop_unexpected_typeof(x) )) } return(switch( typeof(x), logical = "a logical value", integer = "an integer", double = if (is.infinite(x)) show_infinites(x) else "a number", complex = "a complex number", character = if (nzchar(x)) "a string" else "\"\"", raw = "a raw value", .rlang_stop_unexpected_typeof(x) )) } if (length(x) == 0) { return(switch( typeof(x), logical = "an empty logical vector", integer = "an empty integer vector", double = "an empty numeric vector", complex = "an empty complex vector", character = "an empty character vector", raw = "an empty raw vector", list = "an empty list", .rlang_stop_unexpected_typeof(x) )) } } vec_type_friendly(x) } vec_type_friendly <- function(x, length = FALSE) { if (!is_vector(x)) { abort("`x` must be a vector.") } type <- typeof(x) n_dim <- length(dim(x)) add_length <- function(type) { if (length && !n_dim) { paste0(type, sprintf(" of length %s", length(x))) } else { type } } if (type == "list") { if (n_dim < 2) { return(add_length("a list")) } else if (is.data.frame(x)) { return("a data frame") } else if (n_dim == 2) { return("a list matrix") } else { return("a list array") } } type <- switch( type, logical = "a logical %s", integer = "an integer %s", numeric = , double = "a double %s", complex = "a complex %s", character = "a character %s", raw = "a raw %s", type = paste0("a ", type, " %s") ) if (n_dim < 2) { kind <- "vector" } else if (n_dim == 2) { kind <- "matrix" } else { kind <- "array" } out <- sprintf(type, kind) if (n_dim >= 2) { out } else { add_length(out) } } .rlang_as_friendly_type <- function(type) { switch( type, list = "a list", NULL = "`NULL`", environment = "an environment", externalptr = "a pointer", weakref = "a weak reference", S4 = "an S4 object", name = , symbol = "a symbol", language = "a call", pairlist = "a pairlist node", expression = "an expression vector", char = "an internal string", promise = "an internal promise", ... = "an internal dots object", any = "an internal `any` object", bytecode = "an internal bytecode object", primitive = , builtin = , special = "a primitive function", closure = "a function", type ) } .rlang_stop_unexpected_typeof <- function(x, call = caller_env()) { abort( sprintf("Unexpected type <%s>.", typeof(x)), call = call ) } #' Return OO type #' @param x Any R object. #' @return One of `"bare"` (for non-OO objects), `"S3"`, `"S4"`, #' `"R6"`, or `"R7"`. #' @noRd obj_type_oo <- function(x) { if (!is.object(x)) { return("bare") } class <- inherits(x, c("R6", "R7_object"), which = TRUE) if (class[[1]]) { "R6" } else if (class[[2]]) { "R7" } else if (isS4(x)) { "S4" } else { "S3" } } #' @param x The object type which does not conform to `what`. Its #' `obj_type_friendly()` is taken and mentioned in the error message. #' @param what The friendly expected type as a string. Can be a #' character vector of expected types, in which case the error #' message mentions all of them in an "or" enumeration. #' @param show_value Passed to `value` argument of `obj_type_friendly()`. #' @param ... Arguments passed to [abort()]. #' @inheritParams args_error_context #' @noRd stop_input_type <- function(x, what, ..., allow_na = FALSE, allow_null = FALSE, show_value = TRUE, arg = caller_arg(x), call = caller_env()) { # From standalone-cli.R cli <- env_get_list( nms = c("format_arg", "format_code"), last = topenv(), default = function(x) sprintf("`%s`", x), inherit = TRUE ) if (allow_na) { what <- c(what, cli$format_code("NA")) } if (allow_null) { what <- c(what, cli$format_code("NULL")) } if (length(what)) { what <- oxford_comma(what) } if (inherits(arg, "AsIs")) { format_arg <- identity } else { format_arg <- cli$format_arg } message <- sprintf( "%s must be %s, not %s.", format_arg(arg), what, obj_type_friendly(x, value = show_value) ) abort(message, ..., call = call, arg = arg) } oxford_comma <- function(chr, sep = ", ", final = "or") { n <- length(chr) if (n < 2) { return(chr) } head <- chr[seq_len(n - 1)] last <- chr[n] head <- paste(head, collapse = sep) # Write a or b. But a, b, or c. if (n > 2) { paste0(head, sep, final, " ", last) } else { paste0(head, " ", final, " ", last) } } # nocov end sparsevctrs/R/options.R0000644000176200001440000000201014741321215014641 0ustar liggesusers#' sparsevctrs options #' #' These options can be set with `options()`. #' #' ## sparsevctrs.verbose_materialize #' #' This option is meant to be used as a diagnostic tool. Materialization of #' sparse vectors are done silently by default. This can make it hard to #' determine if your code is doing what you want. #' #' Setting `sparsevctrs.verbose_materialize` is a way to alert when #' materialization occurs. Note that only the first materialization is counted #' for the options below, as the materialized vector is cached. #' #' Setting `sparsevctrs.verbose_materialize = 1` or #' `sparsevctrs.verbose_materialize = TRUE` will result in a message being #' emitted each time a sparse vector is materialized. #' #' Setting `sparsevctrs.verbose_materialize = 2` will result in a warning being #' thrown each time a sparse vector is materialized. #' #' Setting `sparsevctrs.verbose_materialize = 3` will result in an error being #' thrown each time a sparse vector is materialized. #' #' @name sparsevctrs_options NULLsparsevctrs/R/sparsity.R0000644000176200001440000000467314741572451015057 0ustar liggesusers#' Calculate sparsity of data frames, matrices, and sparse matrices #' #' Turning data frame with sparse columns into sparse matrix using #' [Matrix::sparseMatrix()]. #' #' @param x a data frame, matrix of sparse matrix. #' @param sample a integer or `NULL`. Number of rows to sample to estimate #' sparsity. If `NULL` then no sampling is performed. Will not be used when #' `x` is a sparse matrix. Defaults to `NULL`. #' #' @details #' Only numeric 0s are considered zeroes in this calculations. Missing values, #' logical vectors and then string `"0"` aren't counted. #' #' @return a single number, between 0 and 1. #' #' @examples #' #' # data frame #' sparsity(mtcars) #' #' # Matrix #' set.seed(1234) #' mat <- matrix(sample(0:1, 100, TRUE, c(0.9, 0.1)), nrow = 10) #' colnames(mat) <- letters[1:10] #' #' sparsity(mat) #' #' # Sparse matrix #' sparse_mat <- Matrix::Matrix(mat, sparse = TRUE) #' #' sparsity(sparse_mat) #' @export sparsity <- function(x, sample = NULL) { check_number_whole(sample, min = 1, allow_null = TRUE) x_type <- input_type(x) if (x_type != "sparse_matrix") { nrows <- nrow(x) if (!is.null(sample)) { if (nrows < sample) { sample <- nrows } x <- x[sample(nrows, sample), ] } } res <- switch( x_type, data.frame = sparsity_df(x), matrix = sparsity_mat(x), sparse_matrix = sparsity_sparse_mat(x) ) res } input_type <- function(x, call = rlang::caller_env()) { if (is.data.frame(x)) { return("data.frame") } else if (is.matrix(x)) { return("matrix") } else if (any(methods::is(x) == "sparseMatrix")) { return("sparse_matrix") } else { cli::cli_abort( "{.arg x} must be a data frame, matrix, or sparse matrix, Not {.obj_type_friendly {x}}.", call = call ) } } count_zeroes <- function(x) { if (!is.numeric(x)) { return(0) } if (is_sparse_vector(x)) { default <- sparse_default(x) values <- sparse_values(x) len <- length(x) if (default == 0) { res <- len - length(values) } else { res <- length(values) } } else { res <- sum(x == 0, na.rm = TRUE) } res } sparsity_df <- function(x) { res <- vapply(x, count_zeroes, double(1)) res <- sum(res) / (nrow(x) * ncol(x)) res } sparsity_mat <- function(x) { if (!is.numeric(x)) { return(0) } sum(x == 0, na.rm = TRUE) / length(x) } sparsity_sparse_mat <- function(x) { 1 - (length(x@x) / length(x)) } sparsevctrs/R/extractors.R0000644000176200001440000000256714741321215015365 0ustar liggesusers#' Information extraction from sparse vectors #' #' Extract positions, values, and default from sparse vectors without the need #' to materialize vector. #' #' @details #' #' `sparse_default()` returns `NA` when applied to non-sparse vectors. This is #' done to have an indicator of non-sparsity. #' #' @param x vector to be extracted from. #' #' @details #' for ease of use, these functions also works on non-sparse variables. #' #' @return vectors of requested attributes #' #' @examples #' x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10) #' x_dense <- c(0, pi, 0, 0, 0.5, 0, 0, 0, 0, 0.1) #' #' sparse_positions(x_sparse) #' sparse_values(x_sparse) #' sparse_default(x_sparse) #' #' sparse_positions(x_dense) #' sparse_values(x_dense) #' sparse_default(x_dense) #' #' x_sparse_3 <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10, default = 3) #' sparse_default(x_sparse_3) #' @name extractors NULL #' @rdname extractors #' @export sparse_positions <- function(x) { if (!is_sparse_vector(x)) { return(seq_along(x)) } .Call(ffi_altrep_sparse_positions, x) } #' @rdname extractors #' @export sparse_values <- function(x) { if (!is_sparse_vector(x)) { return(x) } .Call(ffi_altrep_sparse_values, x) } #' @rdname extractors #' @export sparse_default <- function(x) { if (!is_sparse_vector(x)) { return(NA) } .Call(ffi_altrep_sparse_default, x) } sparsevctrs/R/sparse_logical.R0000644000176200001440000000456114741321215016152 0ustar liggesusers#' Create sparse logical vector #' #' Construction of vectors where only values and positions are recorded. The #' Length and default values determine all other information. #' #' @param values logical vector, values of non-zero entries. #' @param positions integer vector, indices of non-zero entries. #' @param length integer value, Length of vector. #' @param default logical value, value at indices not specified by `positions`. #' Defaults to `FALSE`. Cannot be `NA`. #' #' @details #' #' `values` and `positions` are expected to be the same length, and are allowed #' to both have zero length. #' #' Allowed values for `value` are logical values. Missing values such as `NA` #' and `NA_real_` are allowed. Everything else is disallowed, The values are #' also not allowed to take the same value as `default`. #' #' `positions` should be integers or integer-like doubles. Everything else is #' not allowed. Positions should furthermore be positive (`0` not allowed), #' unique, and in increasing order. Lastly they should all be smaller that #' `length`. #' #' For developers: #' #' setting `options("sparsevctrs.verbose_materialize" = TRUE)` will print a #' message each time a sparse vector has been forced to materialize. #' #' @return sparse logical vector #' #' @seealso [sparse_double()] [sparse_integer()] [sparse_character()] #' #' @examples #' sparse_logical(logical(), integer(), 10) #' #' sparse_logical(c(TRUE, NA, TRUE), c(2, 5, 10), 10) #' #' str( #' sparse_logical(c(TRUE, NA, TRUE), c(2, 5, 10), 1000000000) #' ) #' @export sparse_logical <- function(values, positions, length, default = FALSE) { check_bool(default) validate_length(length) if (!is.integer(length)) { length <- as.integer(length) } validate_values_logical(values) validate_positions(positions, length, len_values = length(values)) positions <- as.integer(positions) if (any(values == default, na.rm = TRUE)) { offenders <- which(values == default) cli::cli_abort( c( x = "{.arg values} value must not be equal to the default {default}.", i = "{default} values at index: {offenders}." ) ) } new_sparse_logical(values, positions, length, default) } new_sparse_logical <- function(values, positions, length, default) { x <- list( values, positions, length, default ) .Call(ffi_altrep_new_sparse_logical, x) } sparsevctrs/R/coerce-vector.R0000644000176200001440000000403314741321215015715 0ustar liggesusers#' Coerce numeric vector to sparse double #' #' Takes a numeric vector, integer or double, and turn it into a sparse double #' vector. #' #' @param x a numeric vector. #' @param default default value to use. Defaults to `0`. #' #' The values of `x` must be double or integer. It must not contain any `Inf` or #' `NaN` values. #' #' @return sparse vectors #' #' @examples #' x_dense <- c(3, 0, 2, 0, 0, 0, 4, 0, 0, 0) #' x_sparse <- as_sparse_double(x_dense) #' x_sparse #' #' is_sparse_double(x_sparse) #' @name coerce-vector NULL #' @rdname coerce-vector #' @export as_sparse_double <- function(x, default = 0) { if (is_sparse_double(x)) { return(x) } validate_values_double(x) check_number_decimal(default) index <- which(x != default | is.na(x)) sparse_double( values = x[index], positions = index, length = length(x), default = default ) } #' @rdname coerce-vector #' @export as_sparse_integer <- function(x, default = 0L) { if (is_sparse_integer(x)) { return(x) } validate_values_integer(x) check_number_whole(default) values <- vctrs::vec_cast(x, integer()) default <- vctrs::vec_cast(default, integer()) index <- which(x != default | is.na(x)) sparse_integer( values = x[index], positions = index, length = length(x), default = default ) } #' @rdname coerce-vector #' @export as_sparse_character <- function(x, default = "") { if (is_sparse_character(x)) { return(x) } check_string(default) values <- vctrs::vec_cast(x, character()) default <- vctrs::vec_cast(default, character()) index <- which(x != default | is.na(x)) sparse_character( values = x[index], positions = index, length = length(x), default = default ) } #' @rdname coerce-vector #' @export as_sparse_logical <- function(x, default = FALSE) { if (is_sparse_logical(x)) { return(x) } check_bool(default) index <- which(x != default | is.na(x)) sparse_logical( values = x[index], positions = index, length = length(x), default = default ) }sparsevctrs/R/has_sparse_elements.R0000644000176200001440000000212414741321215017200 0ustar liggesusers#' Check for sparse elements #' #' This function checks to see if a data.frame, tibble or list contains one or #' more sparse vectors. #' #' @param x a data frame, tibble, or list. #' #' @details #' The checking in this function is done using [is_sparse_vector()], but is #' implemented using an early exit pattern to provide fast performance for wide #' data.frames. #' #' This function does not test whether `x` is a data.frame, tibble or list. It #' simply iterates over the elements and sees if they are sparse vectors. #' #' @return A single logical value. #' #' @examplesIf rlang::is_installed("Matrix") #' set.seed(1234) #' n_cols <- 10000 #' mat <- matrix(sample(0:1, n_cols * 10, TRUE, c(0.9, 0.1)), ncol = n_cols) #' colnames(mat) <- as.character(seq_len(n_cols)) #' sparse_mat <- Matrix::Matrix(mat, sparse = TRUE) #' #' res <- coerce_to_sparse_tibble(sparse_mat) #' has_sparse_elements(res) #' #' has_sparse_elements(mtcars) #' @export has_sparse_elements <- function(x) { res <- FALSE for (elt in x) { if (is_sparse_vector(elt)) { res <- TRUE break } } res }sparsevctrs/R/sparse_median.R0000644000176200001440000000244514741321215015774 0ustar liggesusers#' Calculate median from sparse vectors #' #' @param x A sparse numeric vector. #' @param na_rm Logical, whether to remove missing values. Defaults to `FALSE`. #' #' @details #' This function, as with any of the other helper functions assumes that the #' input `x` is a sparse numeric vector. This is done for performance reasons, #' and it is thus the users responsibility to perform input checking. #' #' @return single numeric value. #' #' @examples #' sparse_median( #' sparse_double(1000, 1, 1000) #' ) #' #' sparse_median( #' sparse_double(1000, 1, 1000, default = 1) #' ) #' #' sparse_median( #' sparse_double(c(10, 50, 11), c(1, 50, 111), 1000) #' ) #' #' sparse_median( #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000) #' ) #' #' sparse_median( #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000), #' na_rm = TRUE #' ) #' #' @export sparse_median <- function(x, na_rm = FALSE) { default <- sparse_default(x) values <- sparse_values(x) values_len <- length(values) if (values_len == 0) { return(default) } x_len <- length(x) if ((x_len / 2) > values_len) { if (na_rm) { return(default) } else { if (any(is.na(values))) { return(NA_real_) } else { return(default) } } } stats::median(x, na.rm = na_rm) } sparsevctrs/R/sparse_mean.R0000644000176200001440000000237114741321215015455 0ustar liggesusers#' Calculate mean from sparse vectors #' #' @param x A sparse numeric vector. #' @param na_rm Logical, whether to remove missing values. Defaults to `FALSE`. #' #' @details #' This function, as with any of the other helper functions assumes that the #' input `x` is a sparse numeric vector. This is done for performance reasons, #' and it is thus the users responsibility to perform input checking. #' #' @return single numeric value. #' #' @examples #' sparse_mean( #' sparse_double(1000, 1, 1000) #' ) #' #' sparse_mean( #' sparse_double(1000, 1, 1000, default = 1) #' ) #' #' sparse_mean( #' sparse_double(c(10, 50, 11), c(1, 50, 111), 1000) #' ) #' #' sparse_mean( #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000) #' ) #' #' sparse_mean( #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000), #' na_rm = TRUE #' ) #' #' @export sparse_mean <- function(x, na_rm = FALSE) { default <- sparse_default(x) values <- sparse_values(x) len_values <- length(values) if (len_values == 0) { return(default) } x_len <- length(x) res <- sum(values, na.rm = na_rm) if (default != 0) { res <- res + (x_len - len_values) * default } if (na_rm) { x_len <- x_len - sum(is.na(values)) } res <- res / x_len res } sparsevctrs/R/sparse_integer.R0000644000176200001440000000542514741321215016175 0ustar liggesusers#' Create sparse integer vector #' #' Construction of vectors where only values and positions are recorded. The #' Length and default values determine all other information. #' #' @param values integer vector, values of non-zero entries. #' @param positions integer vector, indices of non-zero entries. #' @param length integer value, Length of vector. #' @param default integer value, value at indices not specified by `positions`. #' Defaults to `0L`. Cannot be `NA`. #' #' @details #' #' `values` and `positions` are expected to be the same length, and are allowed #' to both have zero length. #' #' Allowed values for `value` is integer values. This means that the double #' vector `c(1, 5, 4)` is accepted as it can be losslessly converted to the #' integer vector `c(1L, 5L, 4L)`. Missing values such as `NA` and `NA_real_` #' are allowed. Everything else is disallowed, This includes `Inf` and `NaN`. #' The values are also not allowed to take the same value as `default`. #' #' `positions` should be integers or integer-like doubles. Everything else is #' not allowed. Positions should furthermore be positive (`0` not allowed), #' unique, and in increasing order. Lastly they should all be smaller that #' `length`. #' #' For developers: #' #' setting `options("sparsevctrs.verbose_materialize" = TRUE)` will print a #' message each time a sparse vector has been forced to materialize. #' #' @return sparse integer vector #' #' @seealso [sparse_double()] [sparse_character()] #' #' @examples #' sparse_integer(integer(), integer(), 10) #' #' sparse_integer(c(4, 5, 7), c(2, 5, 10), 10) #' #' str( #' sparse_integer(c(4, 5, 7), c(2, 5, 10), 1000000000) #' ) #' @export sparse_integer <- function(values, positions, length, default = 0L) { check_number_whole(default) validate_length(length) if (!is.integer(length)) { length <- as.integer(length) } if (any(is.nan(values))) { offenders <- which(is.nan(values)) cli::cli_abort( c( x = "{.arg values} must not contain NaN values.", i = "NaN values at index: {offenders}." ) ) } values <- vctrs::vec_cast(values, integer()) default <- vctrs::vec_cast(default, integer()) validate_positions(positions, length, len_values = length(values)) positions <- as.integer(positions) if (any(values == default, na.rm = TRUE)) { offenders <- which(values == default) cli::cli_abort( c( x = "{.arg values} value must not be equal to the default {default}.", i = "{default} values at index: {offenders}." ) ) } new_sparse_integer(values, positions, length, default) } new_sparse_integer <- function(values, positions, length, default) { x <- list( values, positions, length, default ) .Call(ffi_altrep_new_sparse_integer, x) } sparsevctrs/R/validate-input.R0000644000176200001440000000770314741321215016112 0ustar liggesusersvalidate_positions <- function(positions, length, len_values, call = rlang::caller_env()) { if (!is.numeric(positions)) { cli::cli_abort( "{.arg positions} must be a integer vector, \\ not {.obj_type_friendly {positions}}.", call = call ) } if (any(is.infinite(positions))) { offenders <- which(is.infinite(positions)) cli::cli_abort( c( x = "{.arg positions} must not contain infinite values.", i = "Infinite values at index: {offenders}." ), call = call ) } if (any(is.nan(positions))) { offenders <- which(is.nan(positions)) cli::cli_abort( c( x = "{.arg positions} must not contain NaN values.", i = "NaN values at index: {offenders}." ), call = call ) } if (!is.integer(positions)) { if (any(round(positions) != positions, na.rm = TRUE)) { offenders <- which(round(positions) != positions) cli::cli_abort( c( x = "{.arg positions} must contain integer values.", i = "Non-integer values at index: {offenders}." ), call = call ) } } len_positions <- length(positions) if (len_values != len_positions) { cli::cli_abort( "{.arg value} ({len_values}) and {.arg positions} ({len_positions}) \\ must have the same length.", call = call ) } if (anyDuplicated(positions) > 0) { offenders <- which(duplicated(positions)) cli::cli_abort( c( x = "{.arg positions} must not contain any duplicate values.", i = "Duplicate values at index: {offenders}." ), call = call ) } if (is.unsorted(positions)) { cli::cli_abort( "{.arg positions} must be sorted in increasing order.", call = call ) } if (len_positions > 0 && max(positions) > length) { offenders <- which(positions > length) cli::cli_abort( c( x = "{.arg positions} value must not be larger than {.arg length}.", i = "Offending values at index: {offenders}." ), call = call ) } if (len_positions > 0 && min(positions) < 1) { offenders <- which(positions < 1) cli::cli_abort( c( x = "{.arg positions} value must positive.", i = "Non-positive values at index: {offenders}." ), call = call ) } } validate_values_double <- function(values, call = rlang::caller_env()) { if (!is.numeric(values)) { cli::cli_abort( "{.arg values} must be a numeric vector, \\ not {.obj_type_friendly {values}}.", call = call ) } if (any(is.infinite(values))) { offenders <- which(is.infinite(values)) cli::cli_abort( c( x = "{.arg values} must not contain infinite values.", i = "Infinite values at index: {offenders}." ), call = call ) } if (any(is.nan(values))) { offenders <- which(is.nan(values)) cli::cli_abort( c( x = "{.arg values} must not contain NaN values.", i = "NaN values at index: {offenders}." ), call = call ) } } validate_values_integer <- function(values, call = rlang::caller_env()) { values <- vctrs::vec_cast(values, integer()) if (!is.integer(values)) { cli::cli_abort( "{.arg values} must be a integer vector, \\ not {.obj_type_friendly {values}}.", call = call ) } } validate_values_logical <- function(values, call = rlang::caller_env()) { if (!is.logical(values)) { cli::cli_abort( "{.arg values} must be a logical vector, \\ not {.obj_type_friendly {values}}.", call = call ) } } validate_length <- function(length, call = rlang::caller_env()) { check_number_whole(length, min = 0, call = call) if (length > .Machine$integer.max) { cli::cli_abort( "{.arg length} must be less than {(.Machine$integer.max)}, not {length}.", call = call ) } }sparsevctrs/R/sparse_character.R0000644000176200001440000000472514741321215016476 0ustar liggesusers#' Create sparse character vector #' #' Construction of vectors where only values and positions are recorded. The #' Length and default values determine all other information. #' #' @param values integer vector, values of non-zero entries. #' @param positions integer vector, indices of non-zero entries. #' @param length integer value, Length of vector. #' @param default integer value, value at indices not specified by `positions`. #' Defaults to `""`. Cannot be `NA`. #' #' @details #' #' `values` and `positions` are expected to be the same length, and are allowed #' to both have zero length. #' #' Allowed values for `value` are character values. Missing values such as `NA` #' and `NA_real_` are allowed as they are turned into `NA_character_`. #' Everything else is disallowed. The values are also not allowed to take the #' same value as `default`. #' #' `positions` should be integers or integer-like doubles. Everything else is #' not allowed. Positions should furthermore be positive (`0` not allowed), #' unique, and in increasing order. Lastly they should all be smaller that #' `length`. #' #' For developers: #' #' setting `options("sparsevctrs.verbose_materialize" = TRUE)` will print a #' message each time a sparse vector has been forced to materialize. #' #' @return sparse character vector #' #' @seealso [sparse_double()] [sparse_integer()] #' #' @examples #' sparse_character(character(), integer(), 10) #' #' sparse_character(c("A", "C", "E"), c(2, 5, 10), 10) #' #' str( #' sparse_character(c("A", "C", "E"), c(2, 5, 10), 1000000000) #' ) #' @export sparse_character <- function(values, positions, length, default = "") { check_string(default) validate_length(length) if (!is.integer(length)) { length <- as.integer(length) } values <- vctrs::vec_cast(values, character()) default <- vctrs::vec_cast(default, character()) validate_positions(positions, length, len_values = length(values)) positions <- as.integer(positions) if (any(values == default, na.rm = TRUE)) { offenders <- which(values == default) cli::cli_abort( c( x = "{.arg values} value must not be equal to the default {default}.", i = "{default} values at index: {offenders}." ) ) } new_sparse_character(values, positions, length, default) } new_sparse_character <- function(values, positions, length, default) { x <- list( values, positions, length, default ) .Call(ffi_altrep_new_sparse_string, x) } sparsevctrs/R/sparse_var.R0000644000176200001440000000257214741321215015330 0ustar liggesusers#' Calculate variance from sparse vectors #' #' @param x A sparse numeric vector. #' @param na_rm Logical, whether to remove missing values. Defaults to `FALSE`. #' #' @details #' This function, as with any of the other helper functions assumes that the #' input `x` is a sparse numeric vector. This is done for performance reasons, #' and it is thus the users responsibility to perform input checking. #' #' Much like [var()] it uses the denominator `n-1`. #' #' @return single numeric value. #' #' @examples #' sparse_var( #' sparse_double(1000, 1, 1000) #' ) #' #' sparse_var( #' sparse_double(1000, 1, 1000, default = 1) #' ) #' #' sparse_var( #' sparse_double(c(10, 50, 11), c(1, 50, 111), 1000) #' ) #' #' sparse_var( #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000) #' ) #' #' sparse_var( #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000), #' na_rm = TRUE #' ) #' #' @export sparse_var <- function(x, na_rm = FALSE) { values <- sparse_values(x) len_values <- length(values) if (len_values == 0) { return(0) } default <- sparse_default(x) x_len <- length(x) mean <- sparse_mean(x, na_rm = na_rm) res <- sum((values - mean) ^ 2, na.rm = na_rm) res <- res + (default - mean) ^ 2 * (x_len - len_values) denominator <- x_len - 1 if (na_rm) { denominator <- denominator - sum(is.na(values)) } res <- res / denominator res } sparsevctrs/vignettes/0000755000176200001440000000000014744254164014654 5ustar liggesuserssparsevctrs/vignettes/design.Rmd0000644000176200001440000001150314741321215016556 0ustar liggesusers--- title: "Design behind sparsevctrs" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Design behind sparsevctrs} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(sparsevctrs) ``` The sparsevctrs package produces 3 things; ALTREP classes, matrix/data.frame converting functions, helper functions. This document outlines the rationale behind each of these and the decisions behind them. The primary objective of this package is to provide tools to work with sparse data in data.frames/tibbles. The next highest priority is execution speed. This means that algorithms and methods in this package are written to minimize memory allocations whenever possible, once that is done, running the code as fast as we can. These choices are made because this package was written to deal with tasks that were otherwise not possible due to memory constraints. ## Altrep Functions The functions `sparse_double()` and its relatives are used to construct sparse vectors of the noted type. To work they all need 4 pieces of information: - `values` - `positions` - `length` - `default` (defaults to 0) The values need to match the type of the function name or be easily coerced into the type (double -> integer). The positions should be integers or doubles that can losslessly be turned into integers. The length should be a single non-negative integer-like value. Values and positions are paired, and will thus be expected to be the same length, furthermore, positions are expected to be sorted in increasing order with no duplicates. The ordering is done to let the various extraction methods work as efficiently as possible. These functions have quite strict input checking by choice, to allow the inner workings to be as efficient as possible. The input of these functions mirrors the values stored in the ALTREP class that they produce. ## Converting Functions 3 functions fall into this category: - `coerce_to_sparse_data_frame()` - `coerce_to_sparse_tibble()` - `coerce_to_sparse_matrix()` the first two take a sparse matrix from the Matrix package and produce a data.frame/tibble with sparse columns. The last one takes a data.frame/tibble with sparse columns and produces a sparse matrix using the Matrix package. These functions are expected to be inverse of each other, such that `coerce_to_sparse_matrix(coerce_to_sparse_data_frame(x))` returns `x` back. They are made to be highly performant both in terms of speed and memory consumption, Meaning that sparsity is applied when appropriate. These functions have quite strict input checking by choice, to allow the inner workings to be as efficient as possible. It is in part why data.frames with sparse vectors with different can't be used with `coerce_to_sparse_matrix()` yet. ## Helper Functions There are 3 types of helper functions. First, we have the `is_*` family of functions. The specific `is_sparse_double()` and more general `is_sparse_vector()` can be used as a way to determine whether a vector is an ALTREP sparse vector. This is otherwise hard to tell as `as.numeric()` can't tell the difference. Secondly, we have the extraction functions. They are `sparse_values()` and `sparse_positions()`. These extract the values and positions respectively, without materializing the whole dense vector. These functions are made to work with non-sparse vectors as well to make them more ergonomic for the user. Internally they call `is_sparse_vector()`, so the choice to return something useful as the alternative wasn't hard. There is no `sparse_length()` function as `length()` works with these types of The last type of helper function is less clearly defined and is expanded as needed. The functions provide alternatives to functions that don't have ALTREP support. Such as `mean()`. Calling `mean()` on a sparse vector will force materialization, and then calculate the mean. This is memory inefficient as it could have been calculated like so. ```r sum(sparse_values(x)) / length(x) ``` These functions, all starting with the name prefix `sparse_*`, are made to work with non-sparse vectors for the same reasons listed above regarding ergonomic use. ## FAQ > Why aren't the results returned as {vctrs} classes? As it stands right now, it is viewed to be beneficial to have the users not be alerted to these vectors as they are expected to be used internally in packages and rarely by the end user. Furthermore having these sparse vectors produce the same result as dense vectors with `class()` is a big plus. > Will this package try to replace the {Matrix} package? Not at all. The sparse vector types provided in this package mimic those created with `Matrix::sparseVector()`. They work with different types and allow for different defaults. None of the matrix operations will be reimplemented here.sparsevctrs/src/0000755000176200001440000000000014744254164013433 5ustar liggesuserssparsevctrs/src/sparse-extractors.c0000644000176200001440000000045114741321215017255 0ustar liggesusers#include "sparse-extractors.h" SEXP ffi_altrep_sparse_positions(SEXP x) { SEXP out = extract_pos(x); return out; } SEXP ffi_altrep_sparse_values(SEXP x) { SEXP out = extract_val(x); return out; } SEXP ffi_altrep_sparse_default(SEXP x) { SEXP out = extract_default(x); return out; } sparsevctrs/src/sparse-utils.c0000644000176200001440000000734014741321215016223 0ustar liggesusers#include "sparse-utils.h" SEXP extract_val(SEXP x) { SEXP data1 = R_altrep_data1(x); SEXP out = VECTOR_ELT(data1, 0); return out; } SEXP extract_pos(SEXP x) { SEXP data1 = R_altrep_data1(x); SEXP out = VECTOR_ELT(data1, 1); return out; } R_xlen_t extract_len(SEXP x) { SEXP data1 = R_altrep_data1(x); SEXP len = VECTOR_ELT(data1, 2); R_xlen_t out = (R_xlen_t) INTEGER_ELT(len, 0); return out; } SEXP extract_default(SEXP x) { SEXP data1 = R_altrep_data1(x); SEXP out = VECTOR_ELT(data1, 3); return out; } double extract_default_double(SEXP x) { SEXP default_val = extract_default(x); double out = REAL_ELT(default_val, 0); return out; } int extract_default_integer(SEXP x) { SEXP default_val = extract_default(x); int out = INTEGER_ELT(default_val, 0); return out; } SEXP extract_default_string(SEXP x) { SEXP default_val = extract_default(x); SEXP out = STRING_ELT(default_val, 0); return out; } Rboolean extract_default_logical(SEXP x) { SEXP default_val = extract_default(x); Rboolean out = LOGICAL_ELT(default_val, 0); return out; } bool is_altrep(SEXP x) { return (bool) ALTREP(x); } SEXP ffi_extract_altrep_class(SEXP x) { if (!is_altrep(x)) { return (R_NilValue); } return ATTRIB(ALTREP_CLASS(x)); } static inline SEXP altrep_package(SEXP x) { return VECTOR_ELT(Rf_PairToVectorList(ATTRIB(ALTREP_CLASS(x))), 1); } SEXP ffi_is_sparse_vector(SEXP x) { if (!is_altrep(x)) { return (Rf_ScalarLogical(FALSE)); } return Rf_ScalarLogical(altrep_package(x) == Rf_install("sparsevctrs")); } static inline R_xlen_t midpoint(R_xlen_t lhs, R_xlen_t rhs) { return lhs + (rhs - lhs) / 2; } R_xlen_t binary_search(int needle, const int* v_haystack, R_xlen_t size) { R_xlen_t loc_lower_bound = 0; R_xlen_t loc_upper_bound = size - 1; while (loc_lower_bound <= loc_upper_bound) { const R_xlen_t loc_middle_bound = midpoint(loc_lower_bound, loc_upper_bound); const R_xlen_t haystack = v_haystack[loc_middle_bound]; if (needle == haystack) { return loc_middle_bound; } else if (needle < haystack) { loc_upper_bound = loc_middle_bound - 1; } else { // needle > haystack loc_lower_bound = loc_middle_bound + 1; } } return size; } bool is_index_handleable(SEXP x) { if (TYPEOF(x) != INTSXP) { // i.e. can't handle indexing for long vectors return false; } R_xlen_t size = Rf_xlength(x); const int* v_x = INTEGER_RO(x); for (R_xlen_t i = 0; i < size; ++i) { const int elt = v_x[i]; if (elt == NA_INTEGER) { continue; } if (elt == 0) { // `0` indices would create a result with a size `< length(indx)`, and we // can't easily handle that right now return false; } if (elt < 0) { // Pathological case, should never happen return false; } } return true; } void verbose_materialize(void) { SEXP option = Rf_GetOption1(Rf_install("sparsevctrs.verbose_materialize")); if (!Rf_isNull(option)) { if (TYPEOF(option) == LGLSXP) { Rprintf("sparsevctrs: Sparse vector materialized\n"); } if (TYPEOF(option) == REALSXP) { if (*REAL_RO(option) == 3) { Rf_error("sparsevctrs: Sparse vector materialized"); } else if (*REAL_RO(option) == 2) { Rf_warning("sparsevctrs: Sparse vector materialized"); } else { Rprintf("sparsevctrs: Sparse vector materialized\n"); } } if (TYPEOF(option) == INTSXP) { if (*INTEGER_RO(option) == 3) { Rf_error("sparsevctrs: Sparse vector materialized"); } else if (*INTEGER_RO(option) == 2) { Rf_warning("sparsevctrs: Sparse vector materialized"); } else { Rprintf("sparsevctrs: Sparse vector materialized\n"); } } } } sparsevctrs/src/init.c0000644000176200001440000000366114741321215014535 0ustar liggesusers#include #include "sparse-extractors.h" #include "sparse-utils.h" #include "sparse-dummy.h" // Defined in altrep-sparse-double.c extern SEXP ffi_altrep_new_sparse_double(SEXP); extern void sparsevctrs_init_altrep_sparse_double(DllInfo*); // Defined in altrep-sparse-integer.c extern SEXP ffi_altrep_new_sparse_integer(SEXP); extern void sparsevctrs_init_altrep_sparse_integer(DllInfo*); // Defined in altrep-sparse-string.c extern SEXP ffi_altrep_new_sparse_string(SEXP); extern void sparsevctrs_init_altrep_sparse_string(DllInfo*); // Defined in altrep-sparse-logical.c extern SEXP ffi_altrep_new_sparse_logical(SEXP); extern void sparsevctrs_init_altrep_sparse_logical(DllInfo*); static const R_CallMethodDef CallEntries[] = { {"ffi_altrep_new_sparse_double", (DL_FUNC) &ffi_altrep_new_sparse_double, 1 }, {"ffi_altrep_new_sparse_integer", (DL_FUNC) &ffi_altrep_new_sparse_integer, 1}, {"ffi_altrep_new_sparse_string", (DL_FUNC) &ffi_altrep_new_sparse_string, 1 }, {"ffi_altrep_new_sparse_logical", (DL_FUNC) &ffi_altrep_new_sparse_logical, 1}, {"ffi_altrep_sparse_positions", (DL_FUNC) &ffi_altrep_sparse_positions, 1}, {"ffi_altrep_sparse_values", (DL_FUNC) &ffi_altrep_sparse_values, 1}, {"ffi_altrep_sparse_default", (DL_FUNC) &ffi_altrep_sparse_default, 1}, {"ffi_extract_altrep_class", (DL_FUNC) &ffi_extract_altrep_class, 1}, {"ffi_is_sparse_vector", (DL_FUNC) &ffi_is_sparse_vector, 1}, {"ffi_sparse_dummy", (DL_FUNC) &ffi_sparse_dummy, 4}, {"ffi_sparse_dummy_na", (DL_FUNC) &ffi_sparse_dummy_na, 4}, {NULL, NULL, 0} }; void R_init_sparsevctrs(DllInfo* dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); // altrep classes sparsevctrs_init_altrep_sparse_double(dll); sparsevctrs_init_altrep_sparse_integer(dll); sparsevctrs_init_altrep_sparse_string(dll); sparsevctrs_init_altrep_sparse_logical(dll); } sparsevctrs/src/altrep-sparse-logical.c0000644000176200001440000002115214741321215017757 0ustar liggesusers#define R_NO_REMAP #include #include #include "sparse-utils.h" // Initialised at load time R_altrep_class_t altrep_sparse_logical_class; SEXP ffi_altrep_new_sparse_logical(SEXP x) { return R_new_altrep(altrep_sparse_logical_class, x, R_NilValue); } SEXP alrep_sparse_logical_Materialize(SEXP x) { SEXP out = R_altrep_data2(x); if (out != R_NilValue) { return out; } verbose_materialize(); SEXP val = extract_val(x); const int* v_val = LOGICAL_RO(val); SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t len = extract_len(x); const int v_default_val = extract_default_logical(x); out = PROTECT(Rf_allocVector(LGLSXP, len)); int* v_out = LOGICAL(out); for (R_xlen_t i = 0; i < len; ++i) { v_out[i] = v_default_val; } const R_xlen_t n_positions = Rf_xlength(pos); for (R_xlen_t i = 0; i < n_positions; ++i) { const int loc = v_pos[i] - 1; v_out[loc] = v_val[i]; } R_set_altrep_data2(x, out); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // ALTVEC void* altrep_sparse_logical_Dataptr(SEXP x, Rboolean writeable) { return DATAPTR(alrep_sparse_logical_Materialize(x)); } const void* altrep_sparse_logical_Dataptr_or_null(SEXP x) { SEXP out = R_altrep_data2(x); if (out == R_NilValue) { return NULL; } else { return DATAPTR(out); } } static SEXP altrep_sparse_logical_Extract_subset(SEXP x, SEXP indx, SEXP call) { if (!is_index_handleable(indx)) { return NULL; } const R_xlen_t len = extract_len(x); SEXP val = extract_val(x); const int* v_val = LOGICAL_RO(val); SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t n_pos = Rf_xlength(pos); const int* v_indx = INTEGER_RO(indx); const R_xlen_t size = Rf_xlength(indx); R_xlen_t n_hits = 0; SEXP matches = PROTECT(Rf_allocVector(INTSXP, size)); int* v_matches = INTEGER(matches); for (R_xlen_t i = 0; i < size; ++i) { // 1 indexed! const int index = v_indx[i]; if (index == NA_INTEGER) { v_matches[i] = NA_INTEGER; ++n_hits; continue; } if (index > len) { // (Uses `>` not `>=` because `index` is 1 indexed) // OOB v_matches[i] = NA_INTEGER; ++n_hits; continue; } const R_xlen_t loc = binary_search(index, v_pos, n_pos); if (loc == n_pos) { // Not in `pos`, gets default value v_matches[i] = (int) n_pos; continue; } // Did find in `pos` v_matches[i] = (int) loc; ++n_hits; } SEXP out = PROTECT(Rf_allocVector(VECSXP, 4)); SEXP out_val = Rf_allocVector(LGLSXP, n_hits); SET_VECTOR_ELT(out, 0, out_val); int* v_out_val = LOGICAL(out_val); SEXP out_pos = Rf_allocVector(INTSXP, n_hits); SET_VECTOR_ELT(out, 1, out_pos); int* v_out_pos = INTEGER(out_pos); SEXP out_length = Rf_ScalarInteger((int) size); SET_VECTOR_ELT(out, 2, out_length); SEXP out_default = extract_default(x); SET_VECTOR_ELT(out, 3, out_default); R_xlen_t i_out = 0; for (R_xlen_t i = 0; i < size; ++i) { const int match = v_matches[i]; if (match == (int) n_pos) { // Default value case continue; } if (match == NA_INTEGER) { v_out_val[i_out] = NA_LOGICAL; v_out_pos[i_out] = (int) i + 1; ++i_out; continue; } // Otherwise we have a hit from `pos` v_out_val[i_out] = v_val[match]; v_out_pos[i_out] = (int) i + 1; ++i_out; } SEXP altrep = ffi_altrep_new_sparse_logical(out); UNPROTECT(2); return altrep; } // ----------------------------------------------------------------------------- // ALTREP R_xlen_t altrep_sparse_logical_Length(SEXP x) { R_xlen_t out = extract_len(x); return out; } // What gets printed when .Internal(inspect()) is used Rboolean altrep_sparse_logical_Inspect( SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int) ) { Rprintf( "sparsevctrs_altrep_sparse_logical (materialized=%s, length=%i)\n", R_altrep_data2(x) != R_NilValue ? "T" : "F", (int) extract_len(x) ); return TRUE; } SEXP altrep_sparse_logical_Duplicate(SEXP x, Rboolean deep) { SEXP data1 = R_altrep_data1(x); SEXP data2 = R_altrep_data2(x); /* If deep or already materialized, do the default behavior */ if (deep || data2 != R_NilValue) { return NULL; } return ffi_altrep_new_sparse_logical(data1); } // ----------------------------------------------------------------------------- // ALTLOGICAL static int altrep_sparse_logical_Elt(SEXP x, R_xlen_t i) { SEXP val = extract_val(x); SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t size = Rf_xlength(pos); const R_xlen_t len = extract_len(x); const int v_default_val = extract_default_logical(x); if (i > len) { // OOB of vector itself return NA_LOGICAL; } // TODO: Add `r_xlen_t_to_int()` const int needle = (int) i + 1; const R_xlen_t loc = binary_search(needle, v_pos, size); if (loc == size) { // Can't find it, must be the default value return v_default_val; } else { // Look it up in `val` return LOGICAL_ELT(val, loc); } } int altrep_sparse_logical_Is_sorted(SEXP x) { SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t pos_len = Rf_xlength(pos); SEXP val = extract_val(x); const int* v_val = LOGICAL_RO(val); const int v_default_val = extract_default_logical(x); // zero length vector are by def sorted if (pos_len == 0) { return TRUE; } // 1 length vector are by def sorted if (pos_len == 1) { if (R_IsNA(v_val[0])) { // unless equal to NA return FALSE; } else { return TRUE; } } int current_value; if (v_pos[0] == 1) { current_value = v_val[0]; } else { current_value = v_default_val; } for (R_xlen_t i = 0; i < pos_len; i++) { if (R_IsNA(v_val[i])) { return FALSE; } if (v_val[i] < current_value) { return FALSE; } current_value = v_val[i]; if (i + 1 == pos_len) { break; } // If there is a gap between values check against default if ((v_pos[i + 1] - v_pos[i]) > 1) { if (v_default_val < current_value) { return FALSE; } current_value = v_default_val; } } return TRUE; } static int altrep_sparse_logical_No_NA_method(SEXP x) { const SEXP val = extract_val(x); const int* v_val = LOGICAL_RO(val); const R_xlen_t val_len = Rf_xlength(val); for (R_xlen_t i = 0; i < val_len; i++) { if (R_IsNA(v_val[i])) { return FALSE; } } return TRUE; } static SEXP altrep_sparse_logical_Sum_method(SEXP x, Rboolean na_rm) { const SEXP val = extract_val(x); const int* v_val = LOGICAL_RO(val); const R_xlen_t val_len = Rf_xlength(val); const R_xlen_t len = extract_len(x); int sum = 0; if (len == 0) { return Rf_ScalarLogical(sum); } for (R_xlen_t i = 0; i < val_len; i++) { if (R_IsNA(v_val[i])) { if (na_rm) { continue; } else { return Rf_ScalarLogical(NA_LOGICAL); } } sum = sum + v_val[i]; } // default can be non-zero const int v_default_val = extract_default_integer(x); if (v_default_val != 0) { sum = sum + (len - val_len) * v_default_val; } return Rf_ScalarLogical(sum); } // ----------------------------------------------------------------------------- void sparsevctrs_init_altrep_sparse_logical(DllInfo* dll) { altrep_sparse_logical_class = R_make_altlogical_class("altrep_sparse_logical", "sparsevctrs", dll); // ALTVEC R_set_altvec_Dataptr_method( altrep_sparse_logical_class, altrep_sparse_logical_Dataptr ); R_set_altvec_Dataptr_or_null_method( altrep_sparse_logical_class, altrep_sparse_logical_Dataptr_or_null ); R_set_altvec_Extract_subset_method( altrep_sparse_logical_class, altrep_sparse_logical_Extract_subset ); // ALTREP R_set_altrep_Length_method( altrep_sparse_logical_class, altrep_sparse_logical_Length ); R_set_altrep_Inspect_method( altrep_sparse_logical_class, altrep_sparse_logical_Inspect ); R_set_altrep_Duplicate_method( altrep_sparse_logical_class, altrep_sparse_logical_Duplicate ); // ALTLOGICAL R_set_altlogical_Elt_method( altrep_sparse_logical_class, altrep_sparse_logical_Elt ); R_set_altlogical_Is_sorted_method( altrep_sparse_logical_class, altrep_sparse_logical_Is_sorted ); R_set_altlogical_No_NA_method( altrep_sparse_logical_class, altrep_sparse_logical_No_NA_method ); R_set_altlogical_Sum_method( altrep_sparse_logical_class, altrep_sparse_logical_Sum_method ); } sparsevctrs/src/sparse-utils.h0000644000176200001440000000122614741321215016225 0ustar liggesusers#ifndef SPARSEVCTRS_SPARSE_UTILS_H #define SPARSEVCTRS_SPARSE_UTILS_H #define R_NO_REMAP #include #include SEXP extract_val(SEXP x); SEXP extract_pos(SEXP x); R_xlen_t extract_len(SEXP x); SEXP extract_default(SEXP x); double extract_default_double(SEXP x); int extract_default_integer(SEXP x); SEXP extract_default_string(SEXP x); Rboolean extract_default_logical(SEXP x); bool is_altrep(SEXP x); SEXP ffi_extract_altrep_class(SEXP x); SEXP ffi_is_sparse_vector(SEXP x); R_xlen_t binary_search(int needle, const int* v_haystack, R_xlen_t size); bool is_index_handleable(SEXP x); void verbose_materialize(void); #endif sparsevctrs/src/sparse-dummy.h0000644000176200001440000000044314741321215016220 0ustar liggesusers#ifndef SPARSEVCTRS_SPARSE_DUMMY_H #define SPARSEVCTRS_SPARSE_DUMMY_H #define R_NO_REMAP #include #include "sparse-utils.h" SEXP ffi_sparse_dummy(SEXP x, SEXP lvls, SEXP counts, SEXP one_hot); SEXP ffi_sparse_dummy_na(SEXP x, SEXP lvls, SEXP counts, SEXP one_hot); #endif sparsevctrs/src/sparse-extractors.h0000644000176200001440000000043214741321215017261 0ustar liggesusers#ifndef SPARSEVCTRS_SPARSE_EXTRACTORS_H #define SPARSEVCTRS_SPARSE_EXTRACTORS_H #define R_NO_REMAP #include #include "sparse-utils.h" SEXP ffi_altrep_sparse_positions(SEXP x); SEXP ffi_altrep_sparse_values(SEXP x); SEXP ffi_altrep_sparse_default(SEXP x); #endif sparsevctrs/src/altrep-sparse-integer.c0000644000176200001440000002457314741321215020014 0ustar liggesusers#define R_NO_REMAP #include #include #include "sparse-utils.h" // Initialised at load time R_altrep_class_t altrep_sparse_integer_class; SEXP ffi_altrep_new_sparse_integer(SEXP x) { return R_new_altrep(altrep_sparse_integer_class, x, R_NilValue); } SEXP alrep_sparse_integer_Materialize(SEXP x) { SEXP out = R_altrep_data2(x); if (out != R_NilValue) { return out; } verbose_materialize(); SEXP val = extract_val(x); const int* v_val = INTEGER_RO(val); SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t len = extract_len(x); const int v_default_val = extract_default_integer(x); out = PROTECT(Rf_allocVector(INTSXP, len)); int* v_out = INTEGER(out); for (R_xlen_t i = 0; i < len; ++i) { v_out[i] = v_default_val; } const R_xlen_t n_positions = Rf_xlength(pos); for (R_xlen_t i = 0; i < n_positions; ++i) { const int loc = v_pos[i] - 1; v_out[loc] = v_val[i]; } R_set_altrep_data2(x, out); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // ALTVEC void* altrep_sparse_integer_Dataptr(SEXP x, Rboolean writeable) { return DATAPTR(alrep_sparse_integer_Materialize(x)); } const void* altrep_sparse_integer_Dataptr_or_null(SEXP x) { SEXP out = R_altrep_data2(x); if (out == R_NilValue) { return NULL; } else { return DATAPTR(out); } } static SEXP altrep_sparse_integer_Extract_subset(SEXP x, SEXP indx, SEXP call) { if (!is_index_handleable(indx)) { return NULL; } const R_xlen_t len = extract_len(x); SEXP val = extract_val(x); const int* v_val = INTEGER_RO(val); SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t n_pos = Rf_xlength(pos); const int* v_indx = INTEGER_RO(indx); const R_xlen_t size = Rf_xlength(indx); R_xlen_t n_hits = 0; SEXP matches = PROTECT(Rf_allocVector(INTSXP, size)); int* v_matches = INTEGER(matches); for (R_xlen_t i = 0; i < size; ++i) { // 1 indexed! const int index = v_indx[i]; if (index == NA_INTEGER) { v_matches[i] = NA_INTEGER; ++n_hits; continue; } if (index > len) { // (Uses `>` not `>=` because `index` is 1 indexed) // OOB v_matches[i] = NA_INTEGER; ++n_hits; continue; } const R_xlen_t loc = binary_search(index, v_pos, n_pos); if (loc == n_pos) { // Not in `pos`, gets default value v_matches[i] = (int) n_pos; continue; } // Did find in `pos` v_matches[i] = (int) loc; ++n_hits; } SEXP out = PROTECT(Rf_allocVector(VECSXP, 4)); SEXP out_val = Rf_allocVector(INTSXP, n_hits); SET_VECTOR_ELT(out, 0, out_val); int* v_out_val = INTEGER(out_val); SEXP out_pos = Rf_allocVector(INTSXP, n_hits); SET_VECTOR_ELT(out, 1, out_pos); int* v_out_pos = INTEGER(out_pos); SEXP out_length = Rf_ScalarInteger((int) size); SET_VECTOR_ELT(out, 2, out_length); SEXP out_default = extract_default(x); SET_VECTOR_ELT(out, 3, out_default); R_xlen_t i_out = 0; for (R_xlen_t i = 0; i < size; ++i) { const int match = v_matches[i]; if (match == (int) n_pos) { // Default value case continue; } if (match == NA_INTEGER) { v_out_val[i_out] = NA_INTEGER; v_out_pos[i_out] = (int) i + 1; ++i_out; continue; } // Otherwise we have a hit from `pos` v_out_val[i_out] = v_val[match]; v_out_pos[i_out] = (int) i + 1; ++i_out; } SEXP altrep = ffi_altrep_new_sparse_integer(out); UNPROTECT(2); return altrep; } // ----------------------------------------------------------------------------- // ALTREP R_xlen_t altrep_sparse_integer_Length(SEXP x) { R_xlen_t out = extract_len(x); return out; } // What gets printed when .Internal(inspect()) is used Rboolean altrep_sparse_integer_Inspect( SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int) ) { Rprintf( "sparsevctrs_altrep_sparse_integer (materialized=%s, length=%i)\n", R_altrep_data2(x) != R_NilValue ? "T" : "F", (int) extract_len(x) ); return TRUE; } SEXP altrep_sparse_integer_Duplicate(SEXP x, Rboolean deep) { SEXP data1 = R_altrep_data1(x); SEXP data2 = R_altrep_data2(x); /* If deep or already materialized, do the default behavior */ if (deep || data2 != R_NilValue) { return NULL; } return ffi_altrep_new_sparse_integer(data1); } // ----------------------------------------------------------------------------- // ALTINTEGER static int altrep_sparse_integer_Elt(SEXP x, R_xlen_t i) { SEXP val = extract_val(x); SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t size = Rf_xlength(pos); const R_xlen_t len = extract_len(x); const int v_default_val = extract_default_integer(x); if (i > len) { // OOB of vector itself return NA_INTEGER; } // TODO: Add `r_xlen_t_to_int()` const int needle = (int) i + 1; const R_xlen_t loc = binary_search(needle, v_pos, size); if (loc == size) { // Can't find it, must be the default value return v_default_val; } else { // Look it up in `val` return INTEGER_ELT(val, loc); } } int altrep_sparse_integer_Is_sorted(SEXP x) { SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t pos_len = Rf_xlength(pos); SEXP val = extract_val(x); const int* v_val = INTEGER_RO(val); const int v_default_val = extract_default_integer(x); // zero length vector are by def sorted if (pos_len == 0) { return TRUE; } // 1 length vector are by def sorted if (pos_len == 1) { if (v_val[0] == R_NaInt) { // unless equal to NA return FALSE; } else { return TRUE; } } int current_value; if (v_pos[0] == 1) { current_value = v_val[0]; } else { current_value = v_default_val; } for (R_xlen_t i = 0; i < pos_len; i++) { if (v_val[i] == R_NaInt) { return FALSE; } if (v_val[i] < current_value) { return FALSE; } current_value = v_val[i]; if (i + 1 == pos_len) { break; } // If there is a gap between values check against default if ((v_pos[i + 1] - v_pos[i]) > 1) { if (v_default_val < current_value) { return FALSE; } current_value = v_default_val; } } return TRUE; } static SEXP altrep_sparse_integer_Min_method(SEXP x, Rboolean na_rm) { int min = INT_MAX; if (extract_len(x) == 0) { Rf_warning("no non-missing arguments to min; returning Inf"); return Rf_ScalarReal(R_PosInf); } const SEXP val = extract_val(x); const int* v_val = INTEGER_RO(val); const R_xlen_t val_len = Rf_xlength(val); const int v_default_val = extract_default_integer(x); if (val_len == 0) { min = v_default_val; } if (v_default_val < min) { min = v_default_val; } for (R_xlen_t i = 0; i < val_len; i++) { if (v_val[i] == R_NaInt) { if (na_rm) { continue; } else { return Rf_ScalarInteger(NA_INTEGER); } } if (v_val[i] < min) { min = v_val[i]; } } return Rf_ScalarInteger(min); } static SEXP altrep_sparse_integer_Max_method(SEXP x, Rboolean na_rm) { int max = INT_MIN; if (extract_len(x) == 0) { Rf_warning("no non-missing arguments to max; returning -Inf"); return Rf_ScalarReal(R_NegInf); } const SEXP val = extract_val(x); const int* v_val = INTEGER_RO(val); const R_xlen_t val_len = Rf_xlength(val); const int v_default_val = extract_default_integer(x); if (val_len == 0) { max = v_default_val; } if (v_default_val > max) { max = v_default_val; } for (R_xlen_t i = 0; i < val_len; i++) { if (v_val[i] == R_NaInt) { if (na_rm) { continue; } else { return Rf_ScalarInteger(NA_INTEGER); } } if (v_val[i] > max) { max = v_val[i]; } } return Rf_ScalarInteger(max); } static int altrep_sparse_integer_No_NA_method(SEXP x) { const SEXP val = extract_val(x); const int* v_val = INTEGER_RO(val); const R_xlen_t val_len = Rf_xlength(val); for (R_xlen_t i = 0; i < val_len; i++) { if (v_val[i] == R_NaInt) { return FALSE; } } return TRUE; } static SEXP altrep_sparse_integer_Sum_method(SEXP x, Rboolean na_rm) { const SEXP val = extract_val(x); const int* v_val = INTEGER_RO(val); const R_xlen_t val_len = Rf_xlength(val); const R_xlen_t len = extract_len(x); int sum = 0; if (len == 0) { return Rf_ScalarInteger(sum); } for (R_xlen_t i = 0; i < val_len; i++) { if (v_val[i] == R_NaInt) { if (na_rm) { continue; } else { return Rf_ScalarInteger(NA_INTEGER); } } sum = sum + v_val[i]; } // default can be non-zero const int v_default_val = extract_default_integer(x); if (v_default_val != 0) { sum = sum + (len - val_len) * v_default_val; } return Rf_ScalarInteger(sum); } // ----------------------------------------------------------------------------- void sparsevctrs_init_altrep_sparse_integer(DllInfo* dll) { altrep_sparse_integer_class = R_make_altinteger_class("altrep_sparse_integer", "sparsevctrs", dll); // ALTVEC R_set_altvec_Dataptr_method( altrep_sparse_integer_class, altrep_sparse_integer_Dataptr ); R_set_altvec_Dataptr_or_null_method( altrep_sparse_integer_class, altrep_sparse_integer_Dataptr_or_null ); R_set_altvec_Extract_subset_method( altrep_sparse_integer_class, altrep_sparse_integer_Extract_subset ); // ALTREP R_set_altrep_Length_method( altrep_sparse_integer_class, altrep_sparse_integer_Length ); R_set_altrep_Inspect_method( altrep_sparse_integer_class, altrep_sparse_integer_Inspect ); R_set_altrep_Duplicate_method( altrep_sparse_integer_class, altrep_sparse_integer_Duplicate ); // ALTINTEGER R_set_altinteger_Elt_method( altrep_sparse_integer_class, altrep_sparse_integer_Elt ); R_set_altinteger_Is_sorted_method( altrep_sparse_integer_class, altrep_sparse_integer_Is_sorted ); R_set_altinteger_Min_method( altrep_sparse_integer_class, altrep_sparse_integer_Min_method ); R_set_altinteger_Max_method( altrep_sparse_integer_class, altrep_sparse_integer_Max_method ); R_set_altinteger_No_NA_method( altrep_sparse_integer_class, altrep_sparse_integer_No_NA_method ); R_set_altinteger_Sum_method( altrep_sparse_integer_class, altrep_sparse_integer_Sum_method ); } sparsevctrs/src/sparse-dummy.c0000644000176200001440000001544214744243770016234 0ustar liggesusers#include "sparse-dummy.h" // Defined in altrep-sparse-integer.c extern SEXP ffi_altrep_new_sparse_integer(SEXP); extern void sparsevctrs_init_altrep_sparse_integer(DllInfo*); SEXP create_dummy(SEXP pos, R_xlen_t length) { SEXP out = PROTECT(Rf_allocVector(VECSXP, 4)); const R_xlen_t pos_len = Rf_length(pos); // values SEXP out_val = Rf_allocVector(INTSXP, pos_len); SET_VECTOR_ELT(out, 0, out_val); int* v_out_val = INTEGER(out_val); for (R_xlen_t i = 0; i < pos_len; ++i) { v_out_val[i] = 1; } // positions SET_VECTOR_ELT(out, 1, pos); // length const SEXP out_length = Rf_ScalarInteger((int) length); SET_VECTOR_ELT(out, 2, out_length); // default const SEXP out_default = Rf_ScalarInteger(0); SET_VECTOR_ELT(out, 3, out_default); UNPROTECT(1); return ffi_altrep_new_sparse_integer(out); } SEXP create_dummy_na(SEXP values, SEXP pos, R_xlen_t length) { SEXP out = PROTECT(Rf_allocVector(VECSXP, 4)); // values SET_VECTOR_ELT(out, 0, values); // positions SET_VECTOR_ELT(out, 1, pos); // length const SEXP out_length = Rf_ScalarInteger((int) length); SET_VECTOR_ELT(out, 2, out_length); // default const SEXP out_default = Rf_ScalarInteger(0); SET_VECTOR_ELT(out, 3, out_default); UNPROTECT(1); return ffi_altrep_new_sparse_integer(out); } SEXP ffi_sparse_dummy(SEXP x, SEXP lvls, SEXP counts, SEXP one_hot) { const R_xlen_t n_lvls = Rf_length(lvls); const R_xlen_t len = Rf_length(x); const int* v_x = INTEGER_RO(x); // Generate list of integer vectors. One vector for each level, with its // length equal to the number of occurances of that level. SEXP out = PROTECT(Rf_allocVector(VECSXP, n_lvls)); for (R_xlen_t i = 0; i < n_lvls; ++i) { R_xlen_t n_val = INTEGER_ELT(counts, i); SET_VECTOR_ELT(out, i, Rf_allocVector(INTSXP, n_val)); } // Vector of positions to keep track of how far into each position vector we // are. Initialize to 0 to indicate first position. SEXP pos_index = PROTECT(Rf_allocVector(INTSXP, n_lvls)); int* v_pos_index = INTEGER(pos_index); for (R_xlen_t i = 0; i < n_lvls; ++i) { SET_INTEGER_ELT(pos_index, i, 0); } // Itterate over input, find its position index, and place the position value // at the index. Increment specific index. if ((Rboolean) LOGICAL_ELT(one_hot, 0) == TRUE) { for (R_xlen_t i = 0; i < len; ++i) { int current_val = v_x[i] - 1; int index = v_pos_index[current_val]; SEXP pos_vec = VECTOR_ELT(out, current_val); int* v_pos_vec = INTEGER(pos_vec); // we need the result to be 1-indexed v_pos_vec[index] = i + 1; v_pos_index[current_val]++; } } else { for (R_xlen_t i = 0; i < len; ++i) { int current_val = v_x[i] - 1; if (current_val == -1) { continue; } int index = v_pos_index[current_val]; SEXP pos_vec = VECTOR_ELT(out, current_val); int* v_pos_vec = INTEGER(pos_vec); // we need the result to be 1-indexed v_pos_vec[index] = i + 1; v_pos_index[current_val]++; } } // Turn list of integer vectors with positions, into list of sparse integer // vectors. for (R_xlen_t i = 0; i < n_lvls; ++i) { SEXP positions = VECTOR_ELT(out, i); SEXP dummy = create_dummy(positions, len); SET_VECTOR_ELT(out, i, dummy); } UNPROTECT(2); return out; } SEXP ffi_sparse_dummy_na(SEXP x, SEXP lvls, SEXP counts, SEXP one_hot) { const R_xlen_t n_lvls = Rf_length(lvls); const R_xlen_t len = Rf_length(x); const int* v_x = INTEGER_RO(x); // Generate lists of integer vectors. One vector for each level, with its // length equal to the number of occurances of that level. SEXP out_positions = PROTECT(Rf_allocVector(VECSXP, n_lvls)); SEXP out_values = PROTECT(Rf_allocVector(VECSXP, n_lvls)); for (R_xlen_t i = 0; i < n_lvls; ++i) { R_xlen_t n_val = INTEGER_ELT(counts, i); SET_VECTOR_ELT(out_values, i, Rf_allocVector(INTSXP, n_val)); SET_VECTOR_ELT(out_positions, i, Rf_allocVector(INTSXP, n_val)); } // Vector of positions to keep track of how far into each position vector we // are. Initialize to 0 to indicate first position. SEXP pos_index = PROTECT(Rf_allocVector(INTSXP, n_lvls)); int* v_pos_index = INTEGER(pos_index); for (R_xlen_t i = 0; i < n_lvls; ++i) { SET_INTEGER_ELT(pos_index, i, 0); } // Itterate over input, find its position index, and place the position value // at the index. Increment specific index. if ((Rboolean) LOGICAL_ELT(one_hot, 0) == TRUE) { for (R_xlen_t i = 0; i < len; ++i) { int current_val = v_x[i]; if (current_val == R_NaInt) { for (R_xlen_t j = 0; j < n_lvls; ++j) { int index = v_pos_index[j]; SEXP pos_vec = VECTOR_ELT(out_positions, j); int* v_pos_vec = INTEGER(pos_vec); SEXP val_vec = VECTOR_ELT(out_values, j); int* v_val_vec = INTEGER(val_vec); v_pos_vec[index] = i + 1; v_val_vec[index] = R_NaInt; v_pos_index[j]++; } } else { --current_val; int index = v_pos_index[current_val]; SEXP pos_vec = VECTOR_ELT(out_positions, current_val); int* v_pos_vec = INTEGER(pos_vec); SEXP val_vec = VECTOR_ELT(out_values, current_val); int* v_val_vec = INTEGER(val_vec); // we need the result to be 1-indexed v_pos_vec[index] = i + 1; v_val_vec[index] = 1; v_pos_index[current_val]++; } } } else { for (R_xlen_t i = 0; i < len; ++i) { int current_val = v_x[i]; if (current_val == R_NaInt) { for (R_xlen_t j = 0; j < n_lvls; ++j) { int index = v_pos_index[j]; SEXP pos_vec = VECTOR_ELT(out_positions, j); int* v_pos_vec = INTEGER(pos_vec); SEXP val_vec = VECTOR_ELT(out_values, j); int* v_val_vec = INTEGER(val_vec); v_pos_vec[index] = i + 1; v_val_vec[index] = R_NaInt; v_pos_index[j]++; } } else { --current_val; if (current_val == -1) { continue; } int index = v_pos_index[current_val]; SEXP pos_vec = VECTOR_ELT(out_positions, current_val); int* v_pos_vec = INTEGER(pos_vec); SEXP val_vec = VECTOR_ELT(out_values, current_val); int* v_val_vec = INTEGER(val_vec); // we need the result to be 1-indexed v_pos_vec[index] = i + 1; v_val_vec[index] = 1; v_pos_index[current_val]++; } } } // Turn list of integer vectors with positions, into list of sparse integer // vectors. for (R_xlen_t i = 0; i < n_lvls; ++i) { SEXP positions = VECTOR_ELT(out_positions, i); SEXP values = VECTOR_ELT(out_values, i); SEXP dummy = create_dummy_na(values, positions, len); SET_VECTOR_ELT(out_positions, i, dummy); } UNPROTECT(3); return out_positions; } sparsevctrs/src/altrep-sparse-string.c0000644000176200001440000001723414741321215017661 0ustar liggesusers#define R_NO_REMAP #include #include #include "sparse-utils.h" // Initialised at load time R_altrep_class_t altrep_sparse_string_class; SEXP ffi_altrep_new_sparse_string(SEXP x) { return R_new_altrep(altrep_sparse_string_class, x, R_NilValue); } SEXP alrep_sparse_string_Materialize(SEXP x) { SEXP out = R_altrep_data2(x); if (out != R_NilValue) { return out; } verbose_materialize(); SEXP val = extract_val(x); SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t len = extract_len(x); const SEXP v_default_val = extract_default_string(x); out = PROTECT(Rf_allocVector(STRSXP, len)); for (R_xlen_t i = 0; i < len; ++i) { SET_STRING_ELT(out, i, v_default_val); } const R_xlen_t n_positions = Rf_xlength(pos); for (R_xlen_t i = 0; i < n_positions; ++i) { const int loc = v_pos[i] - 1; SET_STRING_ELT(out, loc, STRING_ELT(val, i)); } R_set_altrep_data2(x, out); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // ALTVEC void* altrep_sparse_string_Dataptr(SEXP x, Rboolean writeable) { return DATAPTR(alrep_sparse_string_Materialize(x)); } const void* altrep_sparse_string_Dataptr_or_null(SEXP x) { SEXP out = R_altrep_data2(x); if (out == R_NilValue) { return NULL; } else { return DATAPTR(out); } } static SEXP altrep_sparse_string_Extract_subset(SEXP x, SEXP indx, SEXP call) { if (!is_index_handleable(indx)) { return NULL; } const R_xlen_t len = extract_len(x); SEXP val = extract_val(x); SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t n_pos = Rf_xlength(pos); const int* v_indx = INTEGER_RO(indx); const R_xlen_t size = Rf_xlength(indx); R_xlen_t n_hits = 0; SEXP matches = PROTECT(Rf_allocVector(INTSXP, size)); int* v_matches = INTEGER(matches); for (R_xlen_t i = 0; i < size; ++i) { // 1 indexed! const int index = v_indx[i]; if (index == NA_INTEGER) { v_matches[i] = NA_INTEGER; ++n_hits; continue; } if (index > len) { // (Uses `>` not `>=` because `index` is 1 indexed) // OOB v_matches[i] = NA_INTEGER; ++n_hits; continue; } const R_xlen_t loc = binary_search(index, v_pos, n_pos); if (loc == n_pos) { // Not in `pos`, gets default value v_matches[i] = (int) n_pos; continue; } // Did find in `pos` v_matches[i] = (int) loc; ++n_hits; } SEXP out = PROTECT(Rf_allocVector(VECSXP, 4)); SEXP out_val = Rf_allocVector(STRSXP, n_hits); SET_VECTOR_ELT(out, 0, out_val); SEXP out_pos = Rf_allocVector(INTSXP, n_hits); SET_VECTOR_ELT(out, 1, out_pos); int* v_out_pos = INTEGER(out_pos); SEXP out_length = Rf_ScalarInteger((int) size); SET_VECTOR_ELT(out, 2, out_length); SEXP out_default = extract_default(x); SET_VECTOR_ELT(out, 3, out_default); R_xlen_t i_out = 0; for (R_xlen_t i = 0; i < size; ++i) { const int match = v_matches[i]; if (match == (int) n_pos) { // Default value case continue; } if (match == NA_INTEGER) { SET_STRING_ELT(out_val, i_out, NA_STRING); v_out_pos[i_out] = (int) i + 1; ++i_out; continue; } // Otherwise we have a hit from `pos` SET_STRING_ELT(out_val, i_out, STRING_ELT(val, match)); v_out_pos[i_out] = (int) i + 1; ++i_out; } SEXP altrep = ffi_altrep_new_sparse_string(out); UNPROTECT(2); return altrep; } // ----------------------------------------------------------------------------- // ALTREP R_xlen_t altrep_sparse_string_Length(SEXP x) { R_xlen_t out = extract_len(x); return out; } // What gets printed when .Internal(inspect()) is used Rboolean altrep_sparse_string_Inspect( SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int) ) { Rprintf( "sparsevctrs_altrep_sparse_string (materialized=%s, length=%i)\n", R_altrep_data2(x) != R_NilValue ? "T" : "F", (int) extract_len(x) ); return TRUE; } SEXP altrep_sparse_string_Duplicate(SEXP x, Rboolean deep) { SEXP data1 = R_altrep_data1(x); SEXP data2 = R_altrep_data2(x); /* If deep or already materialized, do the default behavior */ if (deep || data2 != R_NilValue) { return NULL; } return ffi_altrep_new_sparse_string(data1); } // ----------------------------------------------------------------------------- // ALTSTRING static SEXP altrep_sparse_string_Elt(SEXP x, R_xlen_t i) { SEXP val = extract_val(x); SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t size = Rf_xlength(pos); const R_xlen_t len = extract_len(x); SEXP v_default_val = extract_default_string(x); if (i > len) { // OOB of vector itself return NA_STRING; } // TODO: Add `r_xlen_t_to_int()` const int needle = (int) i + 1; const R_xlen_t loc = binary_search(needle, v_pos, size); if (loc == size) { // Can't find it, must be the default value return v_default_val; } else { // Look it up in `val` return STRING_ELT(val, loc); } } int altrep_sparse_string_Is_sorted(SEXP x) { Rprintf("const char *, ..."); SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t pos_len = Rf_xlength(pos); SEXP val = extract_val(x); SEXP v_default_val = extract_default_string(x); // zero length vector are by def sorted if (pos_len == 0) { return TRUE; } // 1 length vector are by def sorted if (pos_len == 1) { if (STRING_ELT(val, 0) == NA_STRING) { // unless equal to NA return FALSE; } else { return TRUE; } } SEXP current_value; if (v_pos[0] == 1) { current_value = STRING_ELT(val, 0); } else { current_value = v_default_val; } for (R_xlen_t i = 0; i < pos_len; i++) { if (STRING_ELT(val, i) == NA_STRING) { return FALSE; } if (STRING_ELT(val, i) < current_value) { return FALSE; } current_value = STRING_ELT(val, i); if (i + 1 == pos_len) { break; } // If there is a gap between values check against default if ((v_pos[i + 1] - v_pos[i]) > 1) { if (v_default_val < current_value) { return FALSE; } current_value = v_default_val; } } return TRUE; } void altrep_sparse_string_Set_elt(SEXP x, R_xlen_t i, SEXP value) { SEXP out = R_altrep_data2(x); if (out == R_NilValue) { out = alrep_sparse_string_Materialize(x); } SET_STRING_ELT(out, i, value); } // ----------------------------------------------------------------------------- void sparsevctrs_init_altrep_sparse_string(DllInfo* dll) { altrep_sparse_string_class = R_make_altstring_class("altrep_sparse_string", "sparsevctrs", dll); // ALTVEC R_set_altvec_Dataptr_method( altrep_sparse_string_class, altrep_sparse_string_Dataptr ); R_set_altvec_Dataptr_or_null_method( altrep_sparse_string_class, altrep_sparse_string_Dataptr_or_null ); R_set_altvec_Extract_subset_method( altrep_sparse_string_class, altrep_sparse_string_Extract_subset ); // ALTREP R_set_altrep_Length_method( altrep_sparse_string_class, altrep_sparse_string_Length ); R_set_altrep_Inspect_method( altrep_sparse_string_class, altrep_sparse_string_Inspect ); R_set_altrep_Duplicate_method( altrep_sparse_string_class, altrep_sparse_string_Duplicate ); // ALTSTRING R_set_altstring_Elt_method( altrep_sparse_string_class, altrep_sparse_string_Elt ); R_set_altstring_Is_sorted_method( altrep_sparse_string_class, altrep_sparse_string_Is_sorted ); R_set_altstring_Set_elt_method( altrep_sparse_string_class, altrep_sparse_string_Set_elt ); } sparsevctrs/src/altrep-sparse-double.c0000644000176200001440000002424514741321215017625 0ustar liggesusers#define R_NO_REMAP #include #include #include "sparse-utils.h" // Initialised at load time R_altrep_class_t altrep_sparse_double_class; SEXP ffi_altrep_new_sparse_double(SEXP x) { return R_new_altrep(altrep_sparse_double_class, x, R_NilValue); } SEXP alrep_sparse_double_Materialize(SEXP x) { SEXP out = R_altrep_data2(x); if (out != R_NilValue) { return out; } verbose_materialize(); SEXP val = extract_val(x); const double* v_val = REAL_RO(val); SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t len = extract_len(x); const double v_default_val = extract_default_double(x); out = PROTECT(Rf_allocVector(REALSXP, len)); double* v_out = REAL(out); for (R_xlen_t i = 0; i < len; ++i) { v_out[i] = v_default_val; } const R_xlen_t n_positions = Rf_xlength(pos); for (R_xlen_t i = 0; i < n_positions; ++i) { const int loc = v_pos[i] - 1; v_out[loc] = v_val[i]; } R_set_altrep_data2(x, out); UNPROTECT(1); return out; } // ----------------------------------------------------------------------------- // ALTVEC void* altrep_sparse_double_Dataptr(SEXP x, Rboolean writeable) { return DATAPTR(alrep_sparse_double_Materialize(x)); } const void* altrep_sparse_double_Dataptr_or_null(SEXP x) { SEXP out = R_altrep_data2(x); if (out == R_NilValue) { return NULL; } else { return DATAPTR(out); } } static SEXP altrep_sparse_double_Extract_subset(SEXP x, SEXP indx, SEXP call) { if (!is_index_handleable(indx)) { return NULL; } const R_xlen_t len = extract_len(x); SEXP val = extract_val(x); const double* v_val = REAL_RO(val); SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t n_pos = Rf_xlength(pos); const int* v_indx = INTEGER_RO(indx); const R_xlen_t size = Rf_xlength(indx); R_xlen_t n_hits = 0; SEXP matches = PROTECT(Rf_allocVector(INTSXP, size)); int* v_matches = INTEGER(matches); for (R_xlen_t i = 0; i < size; ++i) { // 1 indexed! const int index = v_indx[i]; if (index == NA_INTEGER) { v_matches[i] = NA_INTEGER; ++n_hits; continue; } if (index > len) { // (Uses `>` not `>=` because `index` is 1 indexed) // OOB v_matches[i] = NA_INTEGER; ++n_hits; continue; } const R_xlen_t loc = binary_search(index, v_pos, n_pos); if (loc == n_pos) { // Not in `pos`, gets default value v_matches[i] = (int) n_pos; continue; } // Did find in `pos` v_matches[i] = (int) loc; ++n_hits; } SEXP out = PROTECT(Rf_allocVector(VECSXP, 4)); SEXP out_val = Rf_allocVector(REALSXP, n_hits); SET_VECTOR_ELT(out, 0, out_val); double* v_out_val = REAL(out_val); SEXP out_pos = Rf_allocVector(INTSXP, n_hits); SET_VECTOR_ELT(out, 1, out_pos); int* v_out_pos = INTEGER(out_pos); SEXP out_length = Rf_ScalarInteger((int) size); SET_VECTOR_ELT(out, 2, out_length); SEXP out_default = extract_default(x); SET_VECTOR_ELT(out, 3, out_default); R_xlen_t i_out = 0; for (R_xlen_t i = 0; i < size; ++i) { const int match = v_matches[i]; if (match == (int) n_pos) { // Default value case continue; } if (match == NA_INTEGER) { v_out_val[i_out] = NA_REAL; v_out_pos[i_out] = (int) i + 1; ++i_out; continue; } // Otherwise we have a hit from `pos` v_out_val[i_out] = v_val[match]; v_out_pos[i_out] = (int) i + 1; ++i_out; } SEXP altrep = ffi_altrep_new_sparse_double(out); UNPROTECT(2); return altrep; } // ----------------------------------------------------------------------------- // ALTREP R_xlen_t altrep_sparse_double_Length(SEXP x) { R_xlen_t out = extract_len(x); return out; } // What gets printed when .Internal(inspect()) is used Rboolean altrep_sparse_double_Inspect( SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int) ) { Rprintf( "sparsevctrs_altrep_sparse_double (materialized=%s, length=%i)\n", R_altrep_data2(x) != R_NilValue ? "T" : "F", (int) extract_len(x) ); return TRUE; } SEXP altrep_sparse_double_Duplicate(SEXP x, Rboolean deep) { SEXP data1 = R_altrep_data1(x); SEXP data2 = R_altrep_data2(x); /* If deep or already materialized, do the default behavior */ if (deep || data2 != R_NilValue) { return NULL; } return ffi_altrep_new_sparse_double(data1); } // ----------------------------------------------------------------------------- // ALTREAL static double altrep_sparse_double_Elt(SEXP x, R_xlen_t i) { SEXP val = extract_val(x); SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t size = Rf_xlength(pos); const R_xlen_t len = extract_len(x); const double v_default_val = extract_default_double(x); if (i > len) { // OOB of vector itself return NA_REAL; } // TODO: Add `r_xlen_t_to_int()` const int needle = (int) i + 1; const R_xlen_t loc = binary_search(needle, v_pos, size); if (loc == size) { // Can't find it, must be the default value return v_default_val; } else { // Look it up in `val` return REAL_ELT(val, loc); } } int altrep_sparse_double_Is_sorted(SEXP x) { SEXP pos = extract_pos(x); const int* v_pos = INTEGER_RO(pos); const R_xlen_t pos_len = Rf_xlength(pos); SEXP val = extract_val(x); const double* v_val = REAL_RO(val); const double v_default_val = extract_default_double(x); // zero length vector are by def sorted if (pos_len == 0) { return TRUE; } // 1 length vector are by def sorted if (pos_len == 1) { if (R_IsNA(v_val[0])) { // unless equal to NA return FALSE; } else { return TRUE; } } double current_value; if (v_pos[0] == 1) { current_value = v_val[0]; } else { current_value = v_default_val; } for (R_xlen_t i = 0; i < pos_len; i++) { if (R_IsNA(v_val[i])) { return FALSE; } if (v_val[i] < current_value) { return FALSE; } current_value = v_val[i]; if (i + 1 == pos_len) { break; } // If there is a gap between values check against default if ((v_pos[i + 1] - v_pos[i]) > 1) { if (v_default_val < current_value) { return FALSE; } current_value = v_default_val; } } return TRUE; } static SEXP altrep_sparse_double_Min_method(SEXP x, Rboolean na_rm) { double min = R_PosInf; if (extract_len(x) == 0) { Rf_warning("no non-missing arguments to min; returning Inf"); return Rf_ScalarReal(min); } const SEXP val = extract_val(x); const double* v_val = REAL_RO(val); const R_xlen_t val_len = Rf_xlength(val); const double v_default_val = extract_default_double(x); if (val_len == 0) { min = v_default_val; } if (v_default_val < min) { min = v_default_val; } for (R_xlen_t i = 0; i < val_len; i++) { if (R_IsNA(v_val[i]) && !na_rm) { return Rf_ScalarReal(NA_REAL); } if (v_val[i] < min) { min = v_val[i]; } } return Rf_ScalarReal(min); } static SEXP altrep_sparse_double_Max_method(SEXP x, Rboolean na_rm) { double max = R_NegInf; if (extract_len(x) == 0) { Rf_warning("no non-missing arguments to max; returning -Inf"); return Rf_ScalarReal(max); } const SEXP val = extract_val(x); const double* v_val = REAL_RO(val); const R_xlen_t val_len = Rf_xlength(val); const double v_default_val = extract_default_double(x); if (val_len == 0) { max = v_default_val; } if (v_default_val > max) { max = v_default_val; } for (R_xlen_t i = 0; i < val_len; i++) { if (R_IsNA(v_val[i]) && !na_rm) { return Rf_ScalarReal(NA_REAL); } if (v_val[i] > max) { max = v_val[i]; } } return Rf_ScalarReal(max); } static int altrep_sparse_double_No_NA_method(SEXP x) { const SEXP val = extract_val(x); const double* v_val = REAL_RO(val); const R_xlen_t val_len = Rf_xlength(val); for (R_xlen_t i = 0; i < val_len; i++) { if (R_IsNA(v_val[i])) { return FALSE; } } return TRUE; } static SEXP altrep_sparse_double_Sum_method(SEXP x, Rboolean na_rm) { const SEXP val = extract_val(x); const double* v_val = REAL_RO(val); const R_xlen_t val_len = Rf_xlength(val); const R_xlen_t len = extract_len(x); double sum = 0; if (len == 0) { return Rf_ScalarReal(sum); } for (R_xlen_t i = 0; i < val_len; i++) { if (R_IsNA(v_val[i])) { if (na_rm) { continue; } else { return Rf_ScalarReal(NA_REAL); } } sum = sum + v_val[i]; } // default can be non-zero const double v_default_val = extract_default_double(x); if (v_default_val != 0) { sum = sum + (len - val_len) * v_default_val; } return Rf_ScalarReal(sum); } // ----------------------------------------------------------------------------- void sparsevctrs_init_altrep_sparse_double(DllInfo* dll) { altrep_sparse_double_class = R_make_altreal_class("altrep_sparse_double", "sparsevctrs", dll); // ALTVEC R_set_altvec_Dataptr_method( altrep_sparse_double_class, altrep_sparse_double_Dataptr ); R_set_altvec_Dataptr_or_null_method( altrep_sparse_double_class, altrep_sparse_double_Dataptr_or_null ); R_set_altvec_Extract_subset_method( altrep_sparse_double_class, altrep_sparse_double_Extract_subset ); // ALTREP R_set_altrep_Length_method( altrep_sparse_double_class, altrep_sparse_double_Length ); R_set_altrep_Inspect_method( altrep_sparse_double_class, altrep_sparse_double_Inspect ); R_set_altrep_Duplicate_method( altrep_sparse_double_class, altrep_sparse_double_Duplicate ); // ALTREAL R_set_altreal_Elt_method( altrep_sparse_double_class, altrep_sparse_double_Elt ); R_set_altreal_Is_sorted_method( altrep_sparse_double_class, altrep_sparse_double_Is_sorted ); R_set_altreal_Min_method( altrep_sparse_double_class, altrep_sparse_double_Min_method ); R_set_altreal_Max_method( altrep_sparse_double_class, altrep_sparse_double_Max_method ); R_set_altreal_No_NA_method( altrep_sparse_double_class, altrep_sparse_double_No_NA_method ); R_set_altreal_Sum_method( altrep_sparse_double_class, altrep_sparse_double_Sum_method ); } sparsevctrs/NAMESPACE0000644000176200001440000000140614741572451014063 0ustar liggesusers# Generated by roxygen2: do not edit by hand export(as_sparse_character) export(as_sparse_double) export(as_sparse_integer) export(as_sparse_logical) export(coerce_to_sparse_data_frame) export(coerce_to_sparse_matrix) export(coerce_to_sparse_tibble) export(has_sparse_elements) export(is_sparse_character) export(is_sparse_double) export(is_sparse_integer) export(is_sparse_logical) export(is_sparse_numeric) export(is_sparse_vector) export(sparse_character) export(sparse_default) export(sparse_double) export(sparse_dummy) export(sparse_integer) export(sparse_logical) export(sparse_mean) export(sparse_median) export(sparse_positions) export(sparse_sd) export(sparse_values) export(sparse_var) export(sparsity) import(rlang) useDynLib(sparsevctrs, .registration = TRUE) sparsevctrs/LICENSE0000644000176200001440000000006114741321215013633 0ustar liggesusersYEAR: 2023 COPYRIGHT HOLDER: sparsevctrs authors sparsevctrs/NEWS.md0000644000176200001440000000175114744254127013745 0ustar liggesusers# sparsevctrs 0.2.0 ## New Functions * `sparsity()` has been added, allows sparsity calculations of data.frames, matrices, and sparse matrices. (#82) * Utility function `has_sparse_elements()` has been added. (#70) * Helper function `sparse_dummy()` has beenn added. (#49) * Helper functions `sparse_mean()`, `sparse_var()`, `sparse_sd()`, `sparse_median()` has been added. (#49) ## Improvements * All sparse vector types now have a significant smaller base object size. (#67) * All coerce functions have received a `call` argument. (#72) * `is_sparse_vector()` has been rewritten for speed improvement. (#76) * `coerce_to_sparse_matrix()` Now turns dense zeroes into sparse zeroes. (#77) ## Bug Fixes * Fixed bug where `coerce_to_sparse_data_frame()` and `coerce_to_sparse_tibble()` didn't work with matrices with fully sparse columns. (#69) * Fixed bugs where `coerce_to_sparse_matrix()` would error for completely sparse columns. (#77) # sparsevctrs 0.1.0 * Initial CRAN submission. sparsevctrs/inst/0000755000176200001440000000000014744254164013621 5ustar liggesuserssparsevctrs/inst/doc/0000755000176200001440000000000014744254164014366 5ustar liggesuserssparsevctrs/inst/doc/design.R0000644000176200001440000000036614744254164015767 0ustar liggesusers## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(sparsevctrs) sparsevctrs/inst/doc/design.html0000644000176200001440000003361714744254164016537 0ustar liggesusers Design behind sparsevctrs

Design behind sparsevctrs

library(sparsevctrs)

The sparsevctrs package produces 3 things; ALTREP classes, matrix/data.frame converting functions, helper functions. This document outlines the rationale behind each of these and the decisions behind them.

The primary objective of this package is to provide tools to work with sparse data in data.frames/tibbles. The next highest priority is execution speed. This means that algorithms and methods in this package are written to minimize memory allocations whenever possible, once that is done, running the code as fast as we can. These choices are made because this package was written to deal with tasks that were otherwise not possible due to memory constraints.

Altrep Functions

The functions sparse_double() and its relatives are used to construct sparse vectors of the noted type. To work they all need 4 pieces of information:

  • values
  • positions
  • length
  • default (defaults to 0)

The values need to match the type of the function name or be easily coerced into the type (double -> integer). The positions should be integers or doubles that can losslessly be turned into integers. The length should be a single non-negative integer-like value.

Values and positions are paired, and will thus be expected to be the same length, furthermore, positions are expected to be sorted in increasing order with no duplicates. The ordering is done to let the various extraction methods work as efficiently as possible.

These functions have quite strict input checking by choice, to allow the inner workings to be as efficient as possible.

The input of these functions mirrors the values stored in the ALTREP class that they produce.

Converting Functions

3 functions fall into this category:

  • coerce_to_sparse_data_frame()
  • coerce_to_sparse_tibble()
  • coerce_to_sparse_matrix()

the first two take a sparse matrix from the Matrix package and produce a data.frame/tibble with sparse columns. The last one takes a data.frame/tibble with sparse columns and produces a sparse matrix using the Matrix package.

These functions are expected to be inverse of each other, such that coerce_to_sparse_matrix(coerce_to_sparse_data_frame(x)) returns x back. They are made to be highly performant both in terms of speed and memory consumption, Meaning that sparsity is applied when appropriate.

These functions have quite strict input checking by choice, to allow the inner workings to be as efficient as possible. It is in part why data.frames with sparse vectors with different can’t be used with coerce_to_sparse_matrix() yet.

Helper Functions

There are 3 types of helper functions. First, we have the is_* family of functions. The specific is_sparse_double() and more general is_sparse_vector() can be used as a way to determine whether a vector is an ALTREP sparse vector. This is otherwise hard to tell as as.numeric() can’t tell the difference.

Secondly, we have the extraction functions. They are sparse_values() and sparse_positions(). These extract the values and positions respectively, without materializing the whole dense vector. These functions are made to work with non-sparse vectors as well to make them more ergonomic for the user. Internally they call is_sparse_vector(), so the choice to return something useful as the alternative wasn’t hard. There is no sparse_length() function as length() works with these types of

The last type of helper function is less clearly defined and is expanded as needed. The functions provide alternatives to functions that don’t have ALTREP support. Such as mean(). Calling mean() on a sparse vector will force materialization, and then calculate the mean. This is memory inefficient as it could have been calculated like so.

sum(sparse_values(x)) / length(x)

These functions, all starting with the name prefix sparse_*, are made to work with non-sparse vectors for the same reasons listed above regarding ergonomic use.

FAQ

Why aren’t the results returned as {vctrs} classes?

As it stands right now, it is viewed to be beneficial to have the users not be alerted to these vectors as they are expected to be used internally in packages and rarely by the end user. Furthermore having these sparse vectors produce the same result as dense vectors with class() is a big plus.

Will this package try to replace the {Matrix} package?

Not at all. The sparse vector types provided in this package mimic those created with Matrix::sparseVector(). They work with different types and allow for different defaults. None of the matrix operations will be reimplemented here.

sparsevctrs/inst/doc/design.Rmd0000644000176200001440000001150314741321215016270 0ustar liggesusers--- title: "Design behind sparsevctrs" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Design behind sparsevctrs} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(sparsevctrs) ``` The sparsevctrs package produces 3 things; ALTREP classes, matrix/data.frame converting functions, helper functions. This document outlines the rationale behind each of these and the decisions behind them. The primary objective of this package is to provide tools to work with sparse data in data.frames/tibbles. The next highest priority is execution speed. This means that algorithms and methods in this package are written to minimize memory allocations whenever possible, once that is done, running the code as fast as we can. These choices are made because this package was written to deal with tasks that were otherwise not possible due to memory constraints. ## Altrep Functions The functions `sparse_double()` and its relatives are used to construct sparse vectors of the noted type. To work they all need 4 pieces of information: - `values` - `positions` - `length` - `default` (defaults to 0) The values need to match the type of the function name or be easily coerced into the type (double -> integer). The positions should be integers or doubles that can losslessly be turned into integers. The length should be a single non-negative integer-like value. Values and positions are paired, and will thus be expected to be the same length, furthermore, positions are expected to be sorted in increasing order with no duplicates. The ordering is done to let the various extraction methods work as efficiently as possible. These functions have quite strict input checking by choice, to allow the inner workings to be as efficient as possible. The input of these functions mirrors the values stored in the ALTREP class that they produce. ## Converting Functions 3 functions fall into this category: - `coerce_to_sparse_data_frame()` - `coerce_to_sparse_tibble()` - `coerce_to_sparse_matrix()` the first two take a sparse matrix from the Matrix package and produce a data.frame/tibble with sparse columns. The last one takes a data.frame/tibble with sparse columns and produces a sparse matrix using the Matrix package. These functions are expected to be inverse of each other, such that `coerce_to_sparse_matrix(coerce_to_sparse_data_frame(x))` returns `x` back. They are made to be highly performant both in terms of speed and memory consumption, Meaning that sparsity is applied when appropriate. These functions have quite strict input checking by choice, to allow the inner workings to be as efficient as possible. It is in part why data.frames with sparse vectors with different can't be used with `coerce_to_sparse_matrix()` yet. ## Helper Functions There are 3 types of helper functions. First, we have the `is_*` family of functions. The specific `is_sparse_double()` and more general `is_sparse_vector()` can be used as a way to determine whether a vector is an ALTREP sparse vector. This is otherwise hard to tell as `as.numeric()` can't tell the difference. Secondly, we have the extraction functions. They are `sparse_values()` and `sparse_positions()`. These extract the values and positions respectively, without materializing the whole dense vector. These functions are made to work with non-sparse vectors as well to make them more ergonomic for the user. Internally they call `is_sparse_vector()`, so the choice to return something useful as the alternative wasn't hard. There is no `sparse_length()` function as `length()` works with these types of The last type of helper function is less clearly defined and is expanded as needed. The functions provide alternatives to functions that don't have ALTREP support. Such as `mean()`. Calling `mean()` on a sparse vector will force materialization, and then calculate the mean. This is memory inefficient as it could have been calculated like so. ```r sum(sparse_values(x)) / length(x) ``` These functions, all starting with the name prefix `sparse_*`, are made to work with non-sparse vectors for the same reasons listed above regarding ergonomic use. ## FAQ > Why aren't the results returned as {vctrs} classes? As it stands right now, it is viewed to be beneficial to have the users not be alerted to these vectors as they are expected to be used internally in packages and rarely by the end user. Furthermore having these sparse vectors produce the same result as dense vectors with `class()` is a big plus. > Will this package try to replace the {Matrix} package? Not at all. The sparse vector types provided in this package mimic those created with `Matrix::sparseVector()`. They work with different types and allow for different defaults. None of the matrix operations will be reimplemented here.sparsevctrs/README.md0000644000176200001440000001032014744250027014111 0ustar liggesusers # sparsevctrs sparsevctrs website [![R-CMD-check](https://github.com/r-lib/sparsevctrs/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-lib/sparsevctrs/actions/workflows/R-CMD-check.yaml) [![Codecov test coverage](https://codecov.io/gh/r-lib/sparsevctrs/branch/main/graph/badge.svg)](https://app.codecov.io/gh/r-lib/sparsevctrs?branch=main) The goal of sparsevctrs is to provide a sparse vector [ALTREP](https://svn.r-project.org/R/branches/ALTREP/ALTREP.html) class. With this, you can have sparse data in the form of sparse columns in `data.frame` or [tibble](https://tibble.tidyverse.org/). Due to the nature of how ALTREP vectors work, these sparse vectors will behave like the normal dense vectors you are used you. The vectors will contain their sparseness as much as they can, and only materialize when they have to. ## Installation You can install the development version of sparsevctrs like so: ``` r remotes::install_github("r-lib/sparsevctrs") ``` ## Examples A sparse vector, here specifically a sparse double vector, will be identical to its dense counterpart, often with a smaller memory footprint. ``` r library(sparsevctrs) library(lobstr) x_sparse <- sparse_double(value = c(3, 1, 10), position = c(2, 7, 15), length = 1000) x_dense <- numeric(1000) x_dense[2] <- 3 x_dense[7] <- 1 x_dense[15] <- 10 obj_size(x_sparse) #> 936 B obj_size(x_dense) #> 8.05 kB identical(x_sparse, x_dense) #> [1] TRUE ``` The memory of a sparse vector is proportional to the number of elements plus a constant. This means that increasing the length of a sparse vector doesn’t increase how much memory it uses. Unlike dense vectors who has a much smaller constant, but increases according to the length of the values. ``` r x_sparse_0 <- sparse_double(numeric(), integer(), length = 0) x_sparse_1000 <- sparse_double(numeric(), integer(), length = 1000) x_sparse_1000000 <- sparse_double(numeric(), integer(), length = 10000000) obj_size(x_sparse_0) #> 888 B obj_size(x_sparse_1000) #> 888 B obj_size(x_sparse_1000000) #> 888 B x_dense_0 <- numeric(0) x_dense_1000 <- numeric(1000) x_dense_1000000 <- numeric(10000000) obj_size(x_dense_0) #> 48 B obj_size(x_dense_1000) #> 8.05 kB obj_size(x_dense_1000000) #> 80.00 MB ``` These sparse vectors are compatible with tibbles and data frames. ``` r library(tibble) set.seed(1234) tibble( x = sample(1:1000), y = sparse_double(1, 7, 1000) ) #> # A tibble: 1,000 × 2 #> x y #> #> 1 284 0 #> 2 848 0 #> 3 918 0 #> 4 101 0 #> 5 623 0 #> 6 905 0 #> 7 645 1 #> 8 934 0 #> 9 400 0 #> 10 900 0 #> # ℹ 990 more rows ``` ## Motivation Sparse data happens from ingestion and preprocessing calculations. text to counts, dummy variables etc etc There are computational tools for calculations using sparse matrices, specifically the Matrix package and some modeling packages (e.g., xgboost, glmnet, etc.). We want to utilize these tools as best we can without making redundant implementations. However, sparse matrices are not great for data in general, or at least not until the very end, when mathematical calculations occur. Converting everything to “numeric” is problematic for dates, factors, etc. There are good reasons why data frames were created in the first place. Matrices are efficient but primitive. The problem is that many tools, especially the tidyverse, rely on data frames since they are more expressive and accommodate different variable types. We need to merge and filter rows/columns, etc, in a flexible and user-friendly way. (joins, pivoting) Having a sparse representation of data that allows us to use modern data manipulation interfaces, keeps memory overhead low, and can be efficiently converted to a more primitive matrix format so that we can let Matrix and other packages do what they do best. This is achieved with this package, by providing sparse vectors that fit into a data frame. Along with converting tools between sparse matrices and data frames. sparsevctrs/build/0000755000176200001440000000000014744254164013743 5ustar liggesuserssparsevctrs/build/vignette.rds0000644000176200001440000000032414744254164016301 0ustar liggesusersmP0 0Q0&&|^'7\,4L^םB0vAE|-0 )S*KG$> Hn"+ Z $8LYޞgf'x#ןwH)ur[O@6]bv ЃًQ!%$ isparsevctrs/man/0000755000176200001440000000000014741572451013416 5ustar liggesuserssparsevctrs/man/sparsity.Rd0000644000176200001440000000211514741572451015562 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sparsity.R \name{sparsity} \alias{sparsity} \title{Calculate sparsity of data frames, matrices, and sparse matrices} \usage{ sparsity(x, sample = NULL) } \arguments{ \item{x}{a data frame, matrix of sparse matrix.} \item{sample}{a integer or \code{NULL}. Number of rows to sample to estimate sparsity. If \code{NULL} then no sampling is performed. Will not be used when \code{x} is a sparse matrix. Defaults to \code{NULL}.} } \value{ a single number, between 0 and 1. } \description{ Turning data frame with sparse columns into sparse matrix using \code{\link[Matrix:sparseMatrix]{Matrix::sparseMatrix()}}. } \details{ Only numeric 0s are considered zeroes in this calculations. Missing values, logical vectors and then string \code{"0"} aren't counted. } \examples{ # data frame sparsity(mtcars) # Matrix set.seed(1234) mat <- matrix(sample(0:1, 100, TRUE, c(0.9, 0.1)), nrow = 10) colnames(mat) <- letters[1:10] sparsity(mat) # Sparse matrix sparse_mat <- Matrix::Matrix(mat, sparse = TRUE) sparsity(sparse_mat) } sparsevctrs/man/coerce_to_sparse_data_frame.Rd0000644000176200001440000000253614741321215021363 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/coerce.R \name{coerce_to_sparse_data_frame} \alias{coerce_to_sparse_data_frame} \title{Coerce sparse matrix to data frame with sparse columns} \usage{ coerce_to_sparse_data_frame(x, call = rlang::caller_env(0)) } \arguments{ \item{x}{sparse matrix.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ data.frame with sparse columns } \description{ Turning a sparse matrix into a data frame } \details{ The only requirement from the sparse matrix is that it contains column names. } \examples{ \dontshow{if (rlang::is_installed("Matrix")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} set.seed(1234) mat <- matrix(sample(0:1, 100, TRUE, c(0.9, 0.1)), nrow = 10) colnames(mat) <- letters[1:10] sparse_mat <- Matrix::Matrix(mat, sparse = TRUE) sparse_mat res <- coerce_to_sparse_data_frame(sparse_mat) res # All columns are sparse vapply(res, is_sparse_vector, logical(1)) \dontshow{\}) # examplesIf} } \seealso{ \code{\link[=coerce_to_sparse_tibble]{coerce_to_sparse_tibble()}} \code{\link[=coerce_to_sparse_matrix]{coerce_to_sparse_matrix()}} } sparsevctrs/man/sparse_logical.Rd0000644000176200001440000000345214741321215016666 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sparse_logical.R \name{sparse_logical} \alias{sparse_logical} \title{Create sparse logical vector} \usage{ sparse_logical(values, positions, length, default = FALSE) } \arguments{ \item{values}{logical vector, values of non-zero entries.} \item{positions}{integer vector, indices of non-zero entries.} \item{length}{integer value, Length of vector.} \item{default}{logical value, value at indices not specified by \code{positions}. Defaults to \code{FALSE}. Cannot be \code{NA}.} } \value{ sparse logical vector } \description{ Construction of vectors where only values and positions are recorded. The Length and default values determine all other information. } \details{ \code{values} and \code{positions} are expected to be the same length, and are allowed to both have zero length. Allowed values for \code{value} are logical values. Missing values such as \code{NA} and \code{NA_real_} are allowed. Everything else is disallowed, The values are also not allowed to take the same value as \code{default}. \code{positions} should be integers or integer-like doubles. Everything else is not allowed. Positions should furthermore be positive (\code{0} not allowed), unique, and in increasing order. Lastly they should all be smaller that \code{length}. For developers: setting \code{options("sparsevctrs.verbose_materialize" = TRUE)} will print a message each time a sparse vector has been forced to materialize. } \examples{ sparse_logical(logical(), integer(), 10) sparse_logical(c(TRUE, NA, TRUE), c(2, 5, 10), 10) str( sparse_logical(c(TRUE, NA, TRUE), c(2, 5, 10), 1000000000) ) } \seealso{ \code{\link[=sparse_double]{sparse_double()}} \code{\link[=sparse_integer]{sparse_integer()}} \code{\link[=sparse_character]{sparse_character()}} } sparsevctrs/man/sparse_dummy.Rd0000644000176200001440000000225714741321215016411 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sparse_dummy.R \name{sparse_dummy} \alias{sparse_dummy} \title{Generate sparse dummy variables} \usage{ sparse_dummy(x, one_hot = TRUE) } \arguments{ \item{x}{A factor.} \item{one_hot}{A single logical value. Should the first factor level be included or not. Defaults to \code{FALSE}.} } \value{ A list of sparse integer dummy variables. } \description{ Generate sparse dummy variables } \details{ Only factor variables can be used with \code{\link[=sparse_dummy]{sparse_dummy()}}. A call to \code{as.factor()} would be required for any other type of data. If only a single level is present after \code{one_hot} takes effect. Then the vector produced won't be sparse. A missing value at the \code{i}th element will produce missing values for all dummy variables at thr \code{i}th position. } \examples{ x <- factor(c("a", "a", "b", "c", "d", "b")) sparse_dummy(x, one_hot = FALSE) x <- factor(c("a", "a", "b", "c", "d", "b")) sparse_dummy(x, one_hot = TRUE) x <- factor(c("a", NA, "b", "c", "d", NA)) sparse_dummy(x, one_hot = FALSE) x <- factor(c("a", NA, "b", "c", "d", NA)) sparse_dummy(x, one_hot = TRUE) } sparsevctrs/man/sparse_mean.Rd0000644000176200001440000000175414741321215016177 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sparse_mean.R \name{sparse_mean} \alias{sparse_mean} \title{Calculate mean from sparse vectors} \usage{ sparse_mean(x, na_rm = FALSE) } \arguments{ \item{x}{A sparse numeric vector.} \item{na_rm}{Logical, whether to remove missing values. Defaults to \code{FALSE}.} } \value{ single numeric value. } \description{ Calculate mean from sparse vectors } \details{ This function, as with any of the other helper functions assumes that the input \code{x} is a sparse numeric vector. This is done for performance reasons, and it is thus the users responsibility to perform input checking. } \examples{ sparse_mean( sparse_double(1000, 1, 1000) ) sparse_mean( sparse_double(1000, 1, 1000, default = 1) ) sparse_mean( sparse_double(c(10, 50, 11), c(1, 50, 111), 1000) ) sparse_mean( sparse_double(c(10, NA, 11), c(1, 50, 111), 1000) ) sparse_mean( sparse_double(c(10, NA, 11), c(1, 50, 111), 1000), na_rm = TRUE ) } sparsevctrs/man/sparse_integer.Rd0000644000176200001440000000363414741321215016713 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sparse_integer.R \name{sparse_integer} \alias{sparse_integer} \title{Create sparse integer vector} \usage{ sparse_integer(values, positions, length, default = 0L) } \arguments{ \item{values}{integer vector, values of non-zero entries.} \item{positions}{integer vector, indices of non-zero entries.} \item{length}{integer value, Length of vector.} \item{default}{integer value, value at indices not specified by \code{positions}. Defaults to \code{0L}. Cannot be \code{NA}.} } \value{ sparse integer vector } \description{ Construction of vectors where only values and positions are recorded. The Length and default values determine all other information. } \details{ \code{values} and \code{positions} are expected to be the same length, and are allowed to both have zero length. Allowed values for \code{value} is integer values. This means that the double vector \code{c(1, 5, 4)} is accepted as it can be losslessly converted to the integer vector \code{c(1L, 5L, 4L)}. Missing values such as \code{NA} and \code{NA_real_} are allowed. Everything else is disallowed, This includes \code{Inf} and \code{NaN}. The values are also not allowed to take the same value as \code{default}. \code{positions} should be integers or integer-like doubles. Everything else is not allowed. Positions should furthermore be positive (\code{0} not allowed), unique, and in increasing order. Lastly they should all be smaller that \code{length}. For developers: setting \code{options("sparsevctrs.verbose_materialize" = TRUE)} will print a message each time a sparse vector has been forced to materialize. } \examples{ sparse_integer(integer(), integer(), 10) sparse_integer(c(4, 5, 7), c(2, 5, 10), 10) str( sparse_integer(c(4, 5, 7), c(2, 5, 10), 1000000000) ) } \seealso{ \code{\link[=sparse_double]{sparse_double()}} \code{\link[=sparse_character]{sparse_character()}} } sparsevctrs/man/type-predicates.Rd0000644000176200001440000000251014741321215016773 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/type-predicates.R \name{type-predicates} \alias{type-predicates} \alias{is_sparse_vector} \alias{is_sparse_numeric} \alias{is_sparse_double} \alias{is_sparse_integer} \alias{is_sparse_character} \alias{is_sparse_logical} \title{Sparse vector type checkers} \usage{ is_sparse_vector(x) is_sparse_numeric(x) is_sparse_double(x) is_sparse_integer(x) is_sparse_character(x) is_sparse_logical(x) } \arguments{ \item{x}{value to be checked.} } \value{ single logical value } \description{ Helper functions to determine whether an vector is a sparse vector or not. } \details{ \code{is_sparse_vector()} is a general function that detects any type of sparse vector created with this package. \code{is_sparse_double()}, \code{is_sparse_integer()}, \code{is_sparse_character()}, and \code{is_sparse_logical()} are more specific functions that only detects the type. \code{is_sparse_numeric()} matches both sparse integers and doubles. } \examples{ x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10) x_dense <- c(0, pi, 0, 0, 0.5, 0, 0, 0, 0, 0.1) is_sparse_vector(x_sparse) is_sparse_vector(x_dense) is_sparse_double(x_sparse) is_sparse_double(x_dense) is_sparse_character(x_sparse) is_sparse_character(x_dense) # Forced materialization is_sparse_vector(x_sparse[]) } sparsevctrs/man/has_sparse_elements.Rd0000644000176200001440000000233014741321215017715 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/has_sparse_elements.R \name{has_sparse_elements} \alias{has_sparse_elements} \title{Check for sparse elements} \usage{ has_sparse_elements(x) } \arguments{ \item{x}{a data frame, tibble, or list.} } \value{ A single logical value. } \description{ This function checks to see if a data.frame, tibble or list contains one or more sparse vectors. } \details{ The checking in this function is done using \code{\link[=is_sparse_vector]{is_sparse_vector()}}, but is implemented using an early exit pattern to provide fast performance for wide data.frames. This function does not test whether \code{x} is a data.frame, tibble or list. It simply iterates over the elements and sees if they are sparse vectors. } \examples{ \dontshow{if (rlang::is_installed("Matrix")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} set.seed(1234) n_cols <- 10000 mat <- matrix(sample(0:1, n_cols * 10, TRUE, c(0.9, 0.1)), ncol = n_cols) colnames(mat) <- as.character(seq_len(n_cols)) sparse_mat <- Matrix::Matrix(mat, sparse = TRUE) res <- coerce_to_sparse_tibble(sparse_mat) has_sparse_elements(res) has_sparse_elements(mtcars) \dontshow{\}) # examplesIf} } sparsevctrs/man/sparsevctrs-package.Rd0000644000176200001440000000221114741321215017637 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sparsevctrs-package.R \docType{package} \name{sparsevctrs-package} \alias{sparsevctrs} \alias{sparsevctrs-package} \title{sparsevctrs: Sparse Vectors for Use in Data Frames} \description{ \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} Provides sparse vectors powered by ALTREP (Alternative Representations for R Objects) that behave like regular vectors, and can thus be used in data frames. Also provides tools to convert between sparse matrices and data frames with sparse columns and functions to interact with sparse vectors. } \seealso{ Useful links: \itemize{ \item \url{https://github.com/r-lib/sparsevctrs} \item \url{https://r-lib.github.io/sparsevctrs/} \item Report bugs at \url{https://github.com/r-lib/sparsevctrs/issues} } } \author{ \strong{Maintainer}: Emil Hvitfeldt \email{emil.hvitfeldt@posit.co} (\href{https://orcid.org/0000-0002-0679-1945}{ORCID}) Other contributors: \itemize{ \item Davis Vaughan \email{davis@posit.co} [contributor] \item Posit Software, PBC [copyright holder, funder] } } \keyword{internal} sparsevctrs/man/sparse_sd.Rd0000644000176200001440000000207514741321215015662 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sparse_sd.R \name{sparse_sd} \alias{sparse_sd} \title{Calculate standard diviation from sparse vectors} \usage{ sparse_sd(x, na_rm = FALSE) } \arguments{ \item{x}{A sparse numeric vector.} \item{na_rm}{Logical, whether to remove missing values. Defaults to \code{FALSE}.} } \value{ single numeric value. } \description{ Calculate standard diviation from sparse vectors } \details{ This function, as with any of the other helper functions assumes that the input \code{x} is a sparse numeric vector. This is done for performance reasons, and it is thus the users responsibility to perform input checking. Much like \code{\link[=sd]{sd()}} it uses the denominator \code{n-1}. } \examples{ sparse_sd( sparse_double(1000, 1, 1000) ) sparse_sd( sparse_double(1000, 1, 1000, default = 1) ) sparse_sd( sparse_double(c(10, 50, 11), c(1, 50, 111), 1000) ) sparse_sd( sparse_double(c(10, NA, 11), c(1, 50, 111), 1000) ) sparse_sd( sparse_double(c(10, NA, 11), c(1, 50, 111), 1000), na_rm = TRUE ) } sparsevctrs/man/extractors.Rd0000644000176200001440000000212414741321215016070 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/extractors.R \name{extractors} \alias{extractors} \alias{sparse_positions} \alias{sparse_values} \alias{sparse_default} \title{Information extraction from sparse vectors} \usage{ sparse_positions(x) sparse_values(x) sparse_default(x) } \arguments{ \item{x}{vector to be extracted from.} } \value{ vectors of requested attributes } \description{ Extract positions, values, and default from sparse vectors without the need to materialize vector. } \details{ \code{sparse_default()} returns \code{NA} when applied to non-sparse vectors. This is done to have an indicator of non-sparsity. for ease of use, these functions also works on non-sparse variables. } \examples{ x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10) x_dense <- c(0, pi, 0, 0, 0.5, 0, 0, 0, 0, 0.1) sparse_positions(x_sparse) sparse_values(x_sparse) sparse_default(x_sparse) sparse_positions(x_dense) sparse_values(x_dense) sparse_default(x_dense) x_sparse_3 <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10, default = 3) sparse_default(x_sparse_3) } sparsevctrs/man/sparsevctrs_options.Rd0000644000176200001440000000216614741321215020032 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/options.R \name{sparsevctrs_options} \alias{sparsevctrs_options} \title{sparsevctrs options} \description{ These options can be set with \code{options()}. } \details{ \subsection{sparsevctrs.verbose_materialize}{ This option is meant to be used as a diagnostic tool. Materialization of sparse vectors are done silently by default. This can make it hard to determine if your code is doing what you want. Setting \code{sparsevctrs.verbose_materialize} is a way to alert when materialization occurs. Note that only the first materialization is counted for the options below, as the materialized vector is cached. Setting \code{sparsevctrs.verbose_materialize = 1} or \code{sparsevctrs.verbose_materialize = TRUE} will result in a message being emitted each time a sparse vector is materialized. Setting \code{sparsevctrs.verbose_materialize = 2} will result in a warning being thrown each time a sparse vector is materialized. Setting \code{sparsevctrs.verbose_materialize = 3} will result in an error being thrown each time a sparse vector is materialized. } } sparsevctrs/man/coerce-vector.Rd0000644000176200001440000000162114741321215016433 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/coerce-vector.R \name{coerce-vector} \alias{coerce-vector} \alias{as_sparse_double} \alias{as_sparse_integer} \alias{as_sparse_character} \alias{as_sparse_logical} \title{Coerce numeric vector to sparse double} \usage{ as_sparse_double(x, default = 0) as_sparse_integer(x, default = 0L) as_sparse_character(x, default = "") as_sparse_logical(x, default = FALSE) } \arguments{ \item{x}{a numeric vector.} \item{default}{default value to use. Defaults to \code{0}. The values of \code{x} must be double or integer. It must not contain any \code{Inf} or \code{NaN} values.} } \value{ sparse vectors } \description{ Takes a numeric vector, integer or double, and turn it into a sparse double vector. } \examples{ x_dense <- c(3, 0, 2, 0, 0, 0, 4, 0, 0, 0) x_sparse <- as_sparse_double(x_dense) x_sparse is_sparse_double(x_sparse) } sparsevctrs/man/sparse_character.Rd0000644000176200001440000000346114741321215017210 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sparse_character.R \name{sparse_character} \alias{sparse_character} \title{Create sparse character vector} \usage{ sparse_character(values, positions, length, default = "") } \arguments{ \item{values}{integer vector, values of non-zero entries.} \item{positions}{integer vector, indices of non-zero entries.} \item{length}{integer value, Length of vector.} \item{default}{integer value, value at indices not specified by \code{positions}. Defaults to \code{""}. Cannot be \code{NA}.} } \value{ sparse character vector } \description{ Construction of vectors where only values and positions are recorded. The Length and default values determine all other information. } \details{ \code{values} and \code{positions} are expected to be the same length, and are allowed to both have zero length. Allowed values for \code{value} are character values. Missing values such as \code{NA} and \code{NA_real_} are allowed as they are turned into \code{NA_character_}. Everything else is disallowed. The values are also not allowed to take the same value as \code{default}. \code{positions} should be integers or integer-like doubles. Everything else is not allowed. Positions should furthermore be positive (\code{0} not allowed), unique, and in increasing order. Lastly they should all be smaller that \code{length}. For developers: setting \code{options("sparsevctrs.verbose_materialize" = TRUE)} will print a message each time a sparse vector has been forced to materialize. } \examples{ sparse_character(character(), integer(), 10) sparse_character(c("A", "C", "E"), c(2, 5, 10), 10) str( sparse_character(c("A", "C", "E"), c(2, 5, 10), 1000000000) ) } \seealso{ \code{\link[=sparse_double]{sparse_double()}} \code{\link[=sparse_integer]{sparse_integer()}} } sparsevctrs/man/figures/0000755000176200001440000000000014741321215015050 5ustar liggesuserssparsevctrs/man/figures/logo.png0000644000176200001440000010034414741321215016520 0ustar liggesusersPNG  IHDRޫh cHRMz&u0`:pQ<bKGD pHYs  ~tIME HIDATxwtu NUuD$A9gΐ89*lYm9\[Βu+Wr%gKd[&hr3CsQՍ Yk©s[PoC h衄)=*H Ҳ_}$kq1M3݀"n{z)i('Z)TkS j }X[Ҵ20\3Nљnz7"g1ݳ@m)_ÌBS~F(W% @!AJ pK~2>nj{#t<=ӏRQ$,Ţw4̄T.(MoraǓK^eK~O֟uH̓˥i}LwM9(&/F4٬/!e4rpNҒgobDu8BA݃װF !~>m oX֟Yţ[euHƢw=ѤG zBxkc`J##u,ސR}n&itV,xl1eDZ)>BU(ki0EJB(<,yfks/PJZFZ{fKJgی9VR-ň &>"~X9{^C.8s,+sɛ~ץ%f?>OKmwLw]"o#?wύ륊WPm`MDZ+sxɛJJJKԍ(^-\\V^ |{i.ǟK2M6W\?7dy3חpO0:m鮿Q$-'=ˈTPħ84lM\+ :AEI%HҔK E1b:{OWqǢH[6 :zД!~XI7cs\'o>ϣV䜛nfݞMKlW1HfP$4cGe~1#2{Bqn@q ԟrH$Eތz>ux1\`-${l%U3BӄE@ xuB dWd[e ΡQ.yRFHZOL}P\VOI4=oeod(DQ_Ad(\Ne}3э$ U{Y'q:pVH)bU.ISouDdk8Fɒw,q̚!ț3{f$kl_ iC,i}ۖ-D:puY"-e~/ x -0yw//yAO8yEsC*o1ys}KeXU강0i}vE݅0"IVyP(G9 ތ,#ȹDɛ2M-Y@t$K)d5G7F4gwXH"~J1eʦ1yw;}<yK*-+W=9t 3Ӿ?vH`nW[VŌ BU>+Xw/ rw,M/2Uys%ZZa CqO^mcQ$0P^O6qҐ5Eԯw,Mț;ZK)A’beME݅ފn#ē‘MI7Xn/ysdFM7yǼF+R/Zϴm{OaRи+ mx+YTi593&JG7w|;47"oI+}vՏ䟥e2p>1C'<~bGs&H܄n=ygwFɛgIdd4jA#Ĉt<33pmx+fܨV4SBe~)+OޏlRpcE AKcw@)iCZx+ibDtQ^}*>-yTx@=sވn2iz{cY1ț"n7QEէ%̤yG >z[q_5nV}BIɐc'C^"CLL0B(BEU4u έanP3pӣ[ז0"f*(|{rLɛ!ͱF}8aftw#JS-[CqޛFJ׬YJ՚ԀזyOE&b= :Q:&]MQ7O[a? FZjmu7Eu^n}8G“e ysܘf@8&ܷiD-<9 2g =Oy/ k& 'K5t3[IBN'y`fț CHUZϪ=5p#,x/xjGjKp%xM:ʠ&t"$;Sʢ&o^̓7YaɯٲELHa K?߃KPߖm"=Nޢn,ɛ\j [,ƒC zI FU_UpӶ>?z?:M:yg,H)AJGc)嗌pB-T-Ƚx PTőQ9"ojP33i7"͇Ѳ`2 ?`CZЋ넦~F(W,F떐pdy5H:\ůHK~Cز`ӗfy3ߍKG >)'ATE 7w ~3uuȍɛo&͊o&yuQU$dPş؍OjRzS9!DpLՏ)8țțMe~QقPӛ;OOެ7꺮w\Kc6-PZOb=#eiOUH=mzޝMn%0oe ɡ-S9`NJD0.yI *sX ެID3Yɬ.Y`cl{ň$Yyϡ⧏.s羭]0bzQ>%SvwR7"oALMlBlQyvH" /7K E'nr̄QX9!\5tDɛΨ`9O$* @Qq*=^mѳrr$Zﳗ+x뢺[K>y?+?%7{J}_M!xX>pf#L7{c1kc> }̓&񰪩|ʵϔUg--Eދ*V'*~m",sd&1f](yԠ,g]q5ETJyM?{>p߹x|L}0I:OLIS:aY1u c2&Dd4o,g,.%o RV]F*"(LX+G˖nF}:2o>oe.k@đM>MM25)NH$}3E^s}wHeB<^9V~0ax<%W zEZk6?xUHC72@:<%^Hr^?+4,tlL |$'oq&ȟNet&,*R}'f~5bi 8BTJU<>tkH[lo=0}L SB/釨X9,LV Uc*y b rɛ=7M^)!P?n~-eO2DZ5N Q MwK7?]tIK>y?r?=}xRnbI;5(unRj(JIeyU1ГM`F;|; +<Ғ_6cn4͘+,s??r%MUDZW~!>F!7C^df"yݣdMߔ $X{[Eެ)wHR__G]c k'M`k+yc8e((=˴>%R߸Q3ҟzO>dIv-mgE7y4y K{:+Y{ME޼(w<(B?ugy3׾1y3?y7C)&8ucFEnznE0'r}`D')Ϙ1ϥ 6^O;R|.aa>|ȒWG^yE~B9"w8+iMyE3Iޜ{YTUx$NgeZOKKzKWaY;=_A ʜNmVyG?ddyY)gKBU_v~ n1@274NlW}w% MP>jZ.bxu"}Q䝸Im'r[!on=l3;[ĘP5RT0=ԧu!DDP37sȋ@ SD;1Y H4ndnH;ԔEeɋ2u"GAмag6IϊINUm8/?gw e硶!%`':-`P&~qh#yG^C^fyot."-`(SXc  n5͊ &ozT 7=Frˍ'e㋚;RRf^ b9dLcoJ/G0d!Nb!lCE]ee &,`n(B|yLlV[ɛ#qțՖI'C^M7n R"y'j'/`NhSYҎM^-8A(wQDNPp# bj͐rWwhz#8]$AQj0ܾTri2%m>e++s WA^ o/E&h[PQWAx l 7/`  ߓoGk:!/'oy99nfLWjTϭ"X`k0@0ysg{H$7E޼(f./koaHgFWd5ن1O!/ȼYO-h }j=nMoҹ~GLΗ7!{^iE4R@ij!y 7 yswKyɛMלifn!MsEWdjye)w\Kߣ!#`":ak#i L&+'K^!{-""yg\z'@ޢ,pf0WPRY`0#OZ֒73:J^A쵺9]mɛ;;et?-`_%`n mB{9d#ocjz /Ye o&/No֣,yg;BC^S7ن4)ulCQ֪C$q7'Gd7\ 7];=jyix,PlRڃݚ$%`6$Vo8} u/3dެsVjzotͲTUMW⧬楯gx4m$AB}0?+:GkWN2[@^M7rɛaUs& I&H˜]$0 Θ eY19eX& so)Wf|ǏM^$2dis\ 7}̹W>r+Ifa\b%myy K(Suck7KP0MtC64C^yݻ`"v곬]J%eq]$2aY\GlUF+^`#B/bN{" $Ft9UGEQ0ǯ }Ip(B" Pм*A<OX&L σ/xϞ$HƒICP=ݏeI3˗yIh ՛{C7GYUAߩ7i*%I&c $zBi۶RES)*AOX ̄aC8BT_^K:XkEyV u$A2dK4'I\C4W`9E40yx%A-lbMj\JEi>}$Ro2vn،c F()b. (0t:8|$Ξ{x8i:x5A7cҕj^FCdv?g$IbZeĦH`'ΝAŋpl\P(_҉eZ^*+Xr;Voff^?x+9p('Oegd"e5,[m7q)vurq>A_g8a!AIY{nOs5DfMgmͣQUWM) rQ]kH闉ǧ(PPXxMu Ap?ڮr .\L2de] 떬w!z4 A>q}+|[M2EUמYT|5s;S]ZELc:o|dwn?w8wh)2~~,U(če>I^<:_hA,,< +/~w9|K V7/)[ @[_Pmg0o>'* 9Ľ %DRwV N@MsȞ'iKH4t|/ѝO։ųWG/<(On{o}.ZxVZES~NUs+w6>s_OHbXӼ\•پ_g8]Pȯ=٬6\l мW;K ,RU> |OǨ()'aIc߿'SZf `q +0L!D7y,(q?O_e# C,{mg0pGߨ^/ 6lݣ<>o^\%b5l]=vĆSQW8sP˴B5 VP,ïq9z0-J-#kwL')CDQT*+/e-sYZDQ*jg$=^pzj6.^ƕgf)2|i?<3 DwNF(/-c㲵ja4T ɁSG@@y k6GxDmenMAx=lyU/iJ}%~?ZK C!MjNy'}~ﻜv]שaulXFM$-`0slO,MjV{o>|H(C"(L\PJ 2PVÆU 극|ё8ey4?gܣ?ækc? $ɨ,,I"|~ 1 H<^O_(:\_f5,2)+-#[ #~bzXl$#0޽#ޣvt85<<Ӵ?;Nx(iXhGpY~wr}7F뜺|Ρ7bZ.r7s2ZTj*ٺ|ax(eZh^r} ~OcX_{tw.B( ƿ_#&O`cD,~)F}meDH4s֤3%W?yM (TUÄFFKn'MLĒ)J4;^DI#:Xˁxٰ ne ݶB&mKeM5+R|bJKi6HjW ?&]qے MSSYH,+kYiam6UT n깼b/L2t>?K2Js|-_2%#a;nl\Z4;ݵ|.aZl9˵􄎢TVO0w^mH,)bi[uv9kVyQn+ygY%Zmxm; u 8JJWohx$n}m:}m}ǐw#!_v-`m'K̨!"7y1yK`KO&0,@9Ux7Mnxs}q_sW73oCyc'bIy>e4Λqeu}R.]!2=ߍaԖU m]rRJ_RRt{{O{w6]_W CKL:'ZCX9&Sc4,bHj&mqZk| "XΏEWg7FȈHKlo1zaId0ӗJ?<}~>Ƕ=A8o-z:>5OD ;WV&t{h< J*>oZͯ? λ`La7Kpp nh"/S#o4%iHƓ;8@,Oy>lP f.t9~5eH)2gnOm݃GWyLn1qorGH'lL" 6tD0 !TT<vrHtq V_ƶbj X9mard,kYX rJJ쭓+-dB޾Nǣ/4o>U Go=WPP硿 nɛE^gZvySWG{_'(F6{|5sx|,|SK/%!_ #|sd6{~w3gHD=4ʕo^c[+t.q6~Yl1OQWQC=CqPN>&K!4n;Sn:YeR<>]ia@<~HioTpTO x) :}=q.+NY$]B.\IEV?)f[eS`W?x װgn~ɟNG6U g=ap4ļz,_IgG`&n7VF~-])L{{b,_s; pϠT~0Yj9ұ{?G9|G˲f-<zoCZZC,XIV_'?zg+aPSYxtT+Pd.=ewg/?*K,bϚH$yށ~+ĖhnbGfMqY/>Ҝ*TVW?,ry4yx}l^W/0Jg1t!PUZ7v̾sij'4*+X4Mr1Hrt4cl^_o^x뭭ϙd,INrm3{WmK'Hiú)т\ܚ̤I[{;'񙇞NKyb 5kA^`hâ<6,X͙ |oD@%-`#`pC˘ƚys :@`Z&BSq,) x|Bj_+zy僷->v,̽K0`Z&%~P7~D۵.iZD";TU>“{&bx5Ao.D}dQ-<%CKv>J$eNe? zE( Sg#Kɳ$ɒ$" ڮvWu܎ٵr;W 1MBhTEe(S ~tE,Fb8uO 2>8y8{4+i(3WJ?wٴbK/E(8w"'Ν= /=|8zK< 8t易@b&M^m᏿$ݝyZ]3^Y뗬_K9rm=DCy1K w\%: t)0?y*yE8d}x<$hd4A2(\iK=>y:"pͧ(Qd2I,JjG|5fWsdy~;Z2$$0:{gkh>vQ$-Iޛ)+I^ͤG$#ɴJ&E03+߃/``&Dd,I2Djɒz3~w#i;Ofy4v[׾n%IĜt)P*NhoGO.a;<^ էӾn!4ףbdԻ c+?ez 4T<ĆgJRyE^&/8^#i`HNӉ-+R)iDbJZ҉}t![)I3qF"XqĹԳ ,;׉b$dHq[1-y,;efϱr9/,BF;V{]/Kis/ȜpK)5$\{|F3Lsc|H *CP&'oVO!/cȐ50V JYy(4V$]2F3n\-0kIu07ȸ*KS3с5wYW8)kRϓz9)}ҏמq+` E 2#Lo.y"2BLf7!-o̺GRhYkcnor7׋i8k 9OqzN(͓-_PL_Fz,bMY#oy )`pC]=gMLnY`J2ʹ RYD^䝍%`*LZ~e- ȹLmCUd$ bwd2DȜ yal"yg %t^鲣 x Y &CE^2RAy4bhA |%؛%o5䝭-`>@,+Hڴ u?Li\?tވE޵HU`ku }Ua,hA;3m7c(s{$-Mut5|ﺮ"y)*ʂ40bGفn";aVf'H3_&{;&yuy|ސ2EV2*f %kN"oj)T8yOGޱV*]wO&ģr+:]/`pC<-"oMɐ7wY"yFU` _"'4y8I}'K̓; ]a,(T ׾nyeYE]Ȳ 3Y'e5yqBwZ$` Jg %R<>Pj1m$/9ެ8!9+Y%1B}!F c Ju#1װpeu5IxĞ֍EtEE g(B4-KXe)-g[9Ȭq!% |kA$ ГaIĝȴF7!E1;`_RV]ƺݫhg$"q,K ڋic&T%HĒ%#MŒb7/y[,*RݫU K|$]WI&t'av ͮD ,Y9U}$g\`kX8i1y.fLHk{7##`(gd P0d=Gʦ/x&XazZ{e&JJQ'KLŔ8u Fw3\~vr?<`x̊ R {Yg͎6Ts\޶>t /2x  cJ;VZq)~a[lwӶ6STǡ׎rt $Z"#}El ހZ$ϙ"Y]g^=Ud[-ʉk ϶ ' 4y0ep?`(z :mb&Tq{;mIJw6{鄢*˃ί&T`((F@7ױ lx`-p~(zլjnVJ+2-A<`{c=ii;P.me}ʢ"nTJyC2KfмJ+KhZ1OmeD"|(?@ Hvȧ\ {)2p 0zb#t@TD虗ٗpMI$EODGx-nd<rHq7( ,&0T͘!7$6ifg?>}K !KFM^7!QI p$b {6f}YZv<:vgZmwS3M y72'z6pm}lK/<}p些ѷ- <>niC*$2UHmÃؼg^S+GiۃrW uzSKᔻ)' 8]l|h=w˜zq |і"ȃ\$m$bIG0:a?$r۞mNH 8)cK\hyg{3@JcCᴻi=+kЫǸpC=CvI[PmO V`HwlC&K\DL8DfN/iX=p܉ȲKS3gA:I׵L"qET`+0 nYGސȹg%۞HD^eB"'͇xZNBӛ2p%cIa;}Oo6=o>n:qvhǒ.wS0*bf0  X!DNgԧp #LDT(xfTr퓜z m=qw!KzG7yC"W:! sգ~ yB":~i%enweIK0=ֲ54s赣mcd0e9Uhӥu"]FI#tM= vQ$IFs }0L$$2 !Sk _;q{u5ܲkzSK괺i$Ml~d#+.㡟9 9q:vJCGz U+0T;I |d_yï =wb$q 6pI\{==lzxC&}+Ga!N1DJPk '!`/$r% 9N}p["i>3R-&e8=mb<lWi͟ Ayi_D D|zsnqHTU{67Q==akVs蕣\?BO}@K)ER;0Ǭ0s/pô]찣9$r:ӌD=ofTϭ99;=EwӬ %`P;d\=Cސsfܻ S79{=.$r2ͤ\7$0AZD g=+2gA=_;FŎ4ًQ))8glf3[Y |g]`{жb~7r M>M2+}@lyt#wL.`0heSSlM$n %`_6!7KƇֳiz'$4|)R{B" !o &ɸp_=lڳN\ǡWp5B}eSyYq 9!۟ŠmˈB"9E_ *з|]M8y(Jo[[TWr|t&}O #`%` \~;F;$C\8t!'NvDN%͊~P8M=K߳ၵo-<9uz-Z()xd J0dDbW92ƘȩBK=x.R="C[zfnr~{q]0Tyk E(o39Uhx7Qp> I01Ȧ7j .sc_r)ƅlC-`hGOh`yxj+K7..ȩbv7'40BwK[Ⱥݫkᰓg('}Oq6yTϫ"3=*TWttHdǕ.xKǮ))?sՒ/~BC7H8e8z4-Of%~bzRq)Ҹcd #Ѽ[îCe]]~ȕ!Hf'gw&m(RQ9iKiǓ C"kP;d\'M`i9>`S;|r%,߲~ƒa^=81p0 KZ"y6pIKb$ b#qM4i0ӳpqI}{ 5Un-K4q|]p~co+3LSUBZxwE!MTpveZ4,5TU&IJ wz|pTԔ, PVUJ6·vVtP;z6Ct- AXPPLL;:p,\LÒyHpTMF /(Fpf,(ie#kwagF#NHޱ7NVDmC_MT 4k{R4pM QUWUl}|s!qcxWD1u\K8-;OmeXT&;kP\biduɚ{`ٖ{X!cKi%cѝ%tnGވw웋T&lW~uMu['v:P4k{'.A~cQ6GC2d΂zY@*]'5S4pM,YRdlj ۟܊7ࣧ~aF{ΞJz'qv$Vjz GǶ6xBjUs±b bTȪ;$,`D,A]c-ýÄBӑ \[nM$ýt_E"iXbkˉG$cIL*C~7裪Wph^D.>>iU\9q . E$::#Gkz ˡn}lwn~ _;ғ=JSY"װM;p/Z7S0ۡV.v([ 7cPG v ڋh\+)AhTY3yv;[!{_8HǥN$lzx=GG^M|9*NߓeټgNqǯ%CYu) 4ۅW73=ȁsn]8;O -P_ V`S'#GӁ=(F 9{6h*TR\wOwWVȗ%̭ۗVpv1ngzT`89@d0d(w:;c n;k{ek68QDJ6ղulڳO9:/wB }mģtn;[-bn\Lͼjq19hU{r gV goa#q'=NE8:t;rQ+}GKW$A^wS4P0-=!t ,Ԏ;;-cKX~}}x4Pfeޒl|h=15~ !S+k7*s4 R$-[݄KO8{ [5]40Y4,l\DNHp略+'w>+ VEPt] fkw7g%?W4EU \t`!Pu@`=}x^g&|%>b#=f)_Hs?wNz`)N:Kd:PZY{Vм.qUbNt?pi}QєSHе0nUh>-<ā%@ٝ1 C"EkU=N€YnJ["{X{j#1pïǩ?kFPYWƇS^[wN~!;47aQ:!>BMcz7*M )+( !3@w" kO ҼhծE{FDzmю噐+]9Y':-%2R~ulzx=aqc$1R|7HOO8.jV5IES̈́PI&1{eI~XEvBTU`u3 Kv@dDzLgv""J}* Vn[ z^"ZlLUzDZH VU4,s8pjY"S(kkشgG -%`RƐ@ZwCW>}$A'.aBN!.*̓IEEg4(wS2UN+9{ɰn+7$.$V7q3 8Y>^nrH|Ã8)zgM;kK\`gRךSX|]񪧬!XTz&M "q=dɗ%i|v}t'\Hr|0]W%U!P`%,ߺswP$wy+XJSQK҇/{9%ćCիK*>H;R!D`;NB>wS4AJJh^D(nu[ i ^NwTg+0TUW3o\;ϵS׉XViKBiSտ'd(A$#S BkFWjNuӀ nAJx. ,x4 \n+K;$򉫼tdI6!%[Ŧ=8I:vJ)H^3Uݱ0m$wן6r YGj B cg-MN}x1Mէ%Tv YQ(<ۑwEj4.o`&Ә!Q;H ˘Bb[pSedp s"OW{2e29i-qt;kP}Z۵77@($B[ىw LK곗՛l`Ӟ 5q#\9~ɧTVlv<9t\dߋ|*+&bk4}3 Gi?{Ǟ|p#8)Rm>Mףzcf\Ӿ ,I'}Og<8|N:=cf [D=p/Ba;atrkk0 K"uL%0P}Z]i-m10Zq¹u(E$ u#i0g]@J$hӰu7-lkG!:&9<7ױi,Ӳ#fP}-ky8率j!t-+A{Z0-[ae|u7IX(Jo[?C=CW`u3C(؈]4S)e;$rcoqݚiê&޷0dd~ϥ)32(ܖ6p]<; ţE;BR-N L3LOK/BK=եN=yE ܵ>yEnelgk* +/Q#-V̯o0I̘֎zET<."4P-<1{F"Tϫbj;}eR{B",gٙ Sd!EETΜAJiIK'+iQkKw0A< R#[4_BteӃ|{ZD[^Y H#i( %AV6!KDvMS5l~%` \2? )_{$mI8βիEefLUabE"O \={G7ձpM3afv^_H+$r pv*qy\Kob \2IO%+iD# &?x˞w2((7N#\ng-W SP%)!8&D#t%u}m}l}|3+[I(,k{ -gZ c8uwoY9Ei̫sd]5q$/J){=~3m[SDA8TG5^o EgES S;òC":7,b%H$m91/v02FO跭v-`|ίE(>;o4p82X˖nHj5c9oL7`<p )߶tHRʫysg.mG9yMԊr,mU,Ұ>*4(" ic b<vZ*.m 9{]K"0[ҧddŶeN-K׵nNsh(F]sM[&KW`X<{\0H)/4T~_NۇE-tQN!WRjcBSl3KѝTԔs>.By[IT3k0ga=a!}hVOXJ;F4Uūu#JgSu>փ)MR-N yC"v@*$9"2ec޶~A;}ςUM^bF"OiǺ+0TY}JlK\>zeŒٲe~?k35MhPK7`)ZЛP1+i$n"3ț%r"ĽbD祖H[cf:}{HēicԤ$1{1^C}%\GE]'ͩ0K%izw-Jz#t94%JMeVxP߭h,q~޵y󻘷p.϶w>!WH"!;}&G'0`d }SZ-o׊sCF8ISD{fk)j7޻ES̤c81xUȗ%2veȖ1B"z/RJ-Ǣ5 -'I;4/`8C/4#{ө51Ӊ;)Z{TV4ڢaHrY"=^Ҫx/G6NHX{BQzZ PU_iWX2iY[7oZDV +/g#`p"Pl\ si$314~و;~{Y]UjW[32+e gϱ g?.V4q!gc{D,9n"o{2jf"=M)Cî[2.pdCIoy%+aDC1;]#wA*Q4_IxԽb~WSkcT!}C6S{zBаx4SVSr7YcRVizuΫd,#i/#U}Z_2@:B]IFqyRNX9bC!s Vа{G6"-8Dv\uY"ݔ Gi%<zgޢ9n2ML9NG<ǣ?\BHkjjWZO;%jBqVH\a?dl'uY"!*+}x^gf~ZDܶRa\ZcYַe?0DA), 4l[Pk:O w!9ھOmzN϶C\;uv}UJ4@mC5kvbӞUvCʉk H:"<>Ъ+^| N"'By3pF:b=fxU(⨐LH=l'Pgx{~-9Qv~Pd\/GQ״R#@%1 $8UQSH(v4a5~%P_](<'uNnHdZx<@i.rô_5o5Q$?rg`2c-Z7G,CP*nP{sc5n}|Y08 u޶>Q8.} /5ff'Jm(Q0K1oJ/W:Ϡ_8B<~eե,ZOoc&8aPh˚]W%x^3io w$He$+*B@UڹO!ζB"' }HYZ_*QN[nBᎈ;wEk衸)R4 ހZ7呍h{q#t]&bcT{YZHO4:[w ]Ө-/ V|~룛V"q~Xf8JK4(5b& >,so sd(_V%m}kxRCC"!\%&-O4YYd6(fH PX#$HU2.?;.wZXr tg͘NH)/Tx_8;Elphwסȝ)%_ԈJ*ŧGxu| |Dv9u)͘fܠ}_Upו)<PO\Bu/kL)%ϓxOaI)Bs~ Ke?%񚢩zNJ-gԀ7*~ɑ-ο݆,ю+M7_k5?蹙" \25GqPo!tǃr%cG ͸A[P$p#iuߪ])嗬f, E4^-V;_g|pgR2?ţXIOqHv5247c!Hpv#_JS*u+q*y82dް+.yώ'[G#i/{u׋Y"g)Raf(ޱeRZҒ_KOg/Ũ]@mMY3(s8E7\2L ի X_sgQ"-z+$c)HEMiY_VΙ_9IDAT'bE" 4޳ŧaV}X_hoh^J-FP)*ݳIENDB`sparsevctrs/man/sparse_median.Rd0000644000176200001440000000200214741321215016477 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sparse_median.R \name{sparse_median} \alias{sparse_median} \title{Calculate median from sparse vectors} \usage{ sparse_median(x, na_rm = FALSE) } \arguments{ \item{x}{A sparse numeric vector.} \item{na_rm}{Logical, whether to remove missing values. Defaults to \code{FALSE}.} } \value{ single numeric value. } \description{ Calculate median from sparse vectors } \details{ This function, as with any of the other helper functions assumes that the input \code{x} is a sparse numeric vector. This is done for performance reasons, and it is thus the users responsibility to perform input checking. } \examples{ sparse_median( sparse_double(1000, 1, 1000) ) sparse_median( sparse_double(1000, 1, 1000, default = 1) ) sparse_median( sparse_double(c(10, 50, 11), c(1, 50, 111), 1000) ) sparse_median( sparse_double(c(10, NA, 11), c(1, 50, 111), 1000) ) sparse_median( sparse_double(c(10, NA, 11), c(1, 50, 111), 1000), na_rm = TRUE ) } sparsevctrs/man/sparse_double.Rd0000644000176200001440000000347714741321215016535 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sparse_double.R \name{sparse_double} \alias{sparse_double} \title{Create sparse double vector} \usage{ sparse_double(values, positions, length, default = 0) } \arguments{ \item{values}{double vector, values of non-zero entries.} \item{positions}{integer vector, indices of non-zero entries.} \item{length}{integer value, Length of vector.} \item{default}{double value, value at indices not specified by \code{positions}. Defaults to \code{0}. Cannot be \code{NA}.} } \value{ sparse double vector } \description{ Construction of vectors where only values and positions are recorded. The Length and default values determine all other information. } \details{ \code{values} and \code{positions} are expected to be the same length, and are allowed to both have zero length. Allowed values for \code{value} is double and integer values. integer values will be coerced to doubles. Missing values such as \code{NA} and \code{NA_real_} are allowed. Everything else is disallowed, This includes \code{Inf} and \code{NaN}. The values are also not allowed to take the same value as \code{default}. \code{positions} should be integers or integer-like doubles. Everything else is not allowed. Positions should furthermore be positive (\code{0} not allowed), unique, and in increasing order. Lastly they should all be smaller that \code{length}. For developers: setting \code{options("sparsevctrs.verbose_materialize" = TRUE)} will print a message each time a sparse vector has been forced to materialize. } \examples{ sparse_double(numeric(), integer(), 10) sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10) str( sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 1000000000) ) } \seealso{ \code{\link[=sparse_integer]{sparse_integer()}} \code{\link[=sparse_character]{sparse_character()}} } sparsevctrs/man/coerce_to_sparse_matrix.Rd0000644000176200001440000000274014741321215020601 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/coerce.R \name{coerce_to_sparse_matrix} \alias{coerce_to_sparse_matrix} \title{Coerce sparse data frame to sparse matrix} \usage{ coerce_to_sparse_matrix(x, call = rlang::caller_env(0)) } \arguments{ \item{x}{a data frame or tibble with sparse columns.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ sparse matrix } \description{ Turning data frame with sparse columns into sparse matrix using \code{\link[Matrix:sparseMatrix]{Matrix::sparseMatrix()}}. } \details{ No checking is currently do to \code{x} to determine whether it contains sparse columns or not. Thus it works with any data frame. Needless to say, creating a sparse matrix out of a dense data frame is not ideal. } \examples{ \dontshow{if (rlang::is_installed("Matrix")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} sparse_tbl <- lapply(1:10, function(x) sparse_double(x, x, length = 10)) names(sparse_tbl) <- letters[1:10] sparse_tbl <- as.data.frame(sparse_tbl) sparse_tbl res <- coerce_to_sparse_matrix(sparse_tbl) res \dontshow{\}) # examplesIf} } \seealso{ \code{\link[=coerce_to_sparse_data_frame]{coerce_to_sparse_data_frame()}} \code{\link[=coerce_to_sparse_tibble]{coerce_to_sparse_tibble()}} } sparsevctrs/man/sparse_var.Rd0000644000176200001440000000206414741321215016042 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sparse_var.R \name{sparse_var} \alias{sparse_var} \title{Calculate variance from sparse vectors} \usage{ sparse_var(x, na_rm = FALSE) } \arguments{ \item{x}{A sparse numeric vector.} \item{na_rm}{Logical, whether to remove missing values. Defaults to \code{FALSE}.} } \value{ single numeric value. } \description{ Calculate variance from sparse vectors } \details{ This function, as with any of the other helper functions assumes that the input \code{x} is a sparse numeric vector. This is done for performance reasons, and it is thus the users responsibility to perform input checking. Much like \code{\link[=var]{var()}} it uses the denominator \code{n-1}. } \examples{ sparse_var( sparse_double(1000, 1, 1000) ) sparse_var( sparse_double(1000, 1, 1000, default = 1) ) sparse_var( sparse_double(c(10, 50, 11), c(1, 50, 111), 1000) ) sparse_var( sparse_double(c(10, NA, 11), c(1, 50, 111), 1000) ) sparse_var( sparse_double(c(10, NA, 11), c(1, 50, 111), 1000), na_rm = TRUE ) } sparsevctrs/man/coerce_to_sparse_tibble.Rd0000644000176200001440000000251314741321215020534 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/coerce.R \name{coerce_to_sparse_tibble} \alias{coerce_to_sparse_tibble} \title{Coerce sparse matrix to tibble with sparse columns} \usage{ coerce_to_sparse_tibble(x, call = rlang::caller_env(0)) } \arguments{ \item{x}{sparse matrix.} \item{call}{The execution environment of a currently running function, e.g. \code{caller_env()}. The function will be mentioned in error messages as the source of the error. See the \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \value{ tibble with sparse columns } \description{ Turning a sparse matrix into a tibble. } \details{ The only requirement from the sparse matrix is that it contains column names. } \examples{ \dontshow{if (rlang::is_installed("tibble")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} set.seed(1234) mat <- matrix(sample(0:1, 100, TRUE, c(0.9, 0.1)), nrow = 10) colnames(mat) <- letters[1:10] sparse_mat <- Matrix::Matrix(mat, sparse = TRUE) sparse_mat res <- coerce_to_sparse_tibble(sparse_mat) res # All columns are sparse vapply(res, is_sparse_vector, logical(1)) \dontshow{\}) # examplesIf} } \seealso{ \code{\link[=coerce_to_sparse_data_frame]{coerce_to_sparse_data_frame()}} \code{\link[=coerce_to_sparse_matrix]{coerce_to_sparse_matrix()}} } sparsevctrs/DESCRIPTION0000644000176200001440000000276714744256121014361 0ustar liggesusersPackage: sparsevctrs Title: Sparse Vectors for Use in Data Frames Version: 0.2.0 Authors@R: c( person("Emil", "Hvitfeldt", , "emil.hvitfeldt@posit.co", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-0679-1945")), person("Davis", "Vaughan", , "davis@posit.co", role = "ctb"), person("Posit Software, PBC", role = c("cph", "fnd")) ) Description: Provides sparse vectors powered by ALTREP (Alternative Representations for R Objects) that behave like regular vectors, and can thus be used in data frames. Also provides tools to convert between sparse matrices and data frames with sparse columns and functions to interact with sparse vectors. License: MIT + file LICENSE URL: https://github.com/r-lib/sparsevctrs, https://r-lib.github.io/sparsevctrs/ BugReports: https://github.com/r-lib/sparsevctrs/issues Depends: R (>= 4.0.0) Imports: cli (>= 3.4.0), rlang (>= 1.1.0), vctrs Suggests: knitr, Matrix, methods, rmarkdown, testthat (>= 3.0.0), tibble, withr VignetteBuilder: knitr Config/Needs/website: tidyverse/tidytemplate, rmarkdown, lobstr, ggplot2, bench, tidyr, ggbeeswarm Config/testthat/edition: 3 Encoding: UTF-8 RoxygenNote: 7.3.2 NeedsCompilation: yes Packaged: 2025-01-22 20:43:32 UTC; emilhvitfeldt Author: Emil Hvitfeldt [aut, cre] (), Davis Vaughan [ctb], Posit Software, PBC [cph, fnd] Maintainer: Emil Hvitfeldt Repository: CRAN Date/Publication: 2025-01-22 21:00:01 UTC