distributional/0000755000176200001440000000000015131672333013312 5ustar liggesusersdistributional/tests/0000755000176200001440000000000015112047012014441 5ustar liggesusersdistributional/tests/testthat/0000755000176200001440000000000015131672333016314 5ustar liggesusersdistributional/tests/testthat/test-issues.R0000644000176200001440000000034615112047012020717 0ustar liggesuserstest_that("is.na() on [[1]] (#29)", { x <- c(dist_normal(0,1), NA) expect_equal( is.na(x), c(FALSE, TRUE) ) expect_equal( is.na(x[[1]]), FALSE ) expect_equal( is.na(x[[2]]), TRUE ) }) distributional/tests/testthat/setup-tests.R0000644000176200001440000000007715112047012020730 0ustar liggesuserscontext("Configure test options") options(cli.unicode = FALSE) distributional/tests/testthat/test-dist-gamma.R0000644000176200001440000000125415112047012021426 0ustar liggesuserstest_that("Gamma distribution", { dist <- dist_gamma(7.5, 2) expect_equal(format(dist), "Gamma(7.5, 2)") # quantiles expect_equal(quantile(dist, 0.1), stats::qgamma(0.1, 7.5, 2)) expect_equal(quantile(dist, 0.5), stats::qgamma(0.5, 7.5, 2)) # pdf expect_equal(density(dist, 0), stats::dgamma(0, 7.5, 2)) expect_equal(density(dist, 3), stats::dgamma(3, 7.5, 2)) # cdf expect_equal(cdf(dist, 0), stats::pgamma(0, 7.5, 2)) expect_equal(cdf(dist, 3), stats::pgamma(3, 7.5, 2)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) # stats expect_equal(mean(dist), 7.5/2) expect_equal(variance(dist), 7.5/2^2) }) distributional/tests/testthat/test-dist-multivariate-t.R0000644000176200001440000001076115131656325023334 0ustar liggesuserstest_that("Multivariate t distribution", { df <- 5 mu <- c(1, 2) sigma <- matrix(c(4, 2, 2, 3), ncol = 2) dist <- dist_multivariate_t(df = df, mu = list(mu), sigma = list(sigma)) dimnames(dist) <- c("a", "b") expect_equal(format(dist), "MVT[2](5)") # stats expect_equal( mean(dist), matrix(c(1, 2), nrow = 1, dimnames = list(NULL, c("a", "b"))) ) # Covariance for multivariate t is df/(df-2) * sigma expected_cov <- df / (df - 2) * sigma expect_equal(covariance(dist), list(`colnames<-`(expected_cov, dimnames(dist)))) # quantiles - marginal # For marginal t distribution: scale = sqrt(diag(sigma)) expect_equal( quantile(dist, 0.1, kind = "marginal"), matrix(c( qt(0.1, df) * sqrt(sigma[1, 1]) + mu[1], qt(0.1, df) * sqrt(sigma[2, 2]) + mu[2] ), nrow = 1, dimnames = list(NULL, c("a", "b"))) ) skip_if_not_installed("mvtnorm") # quantiles - equicoordinate expect_equivalent( quantile(dist, 0.1, kind = "equicoordinate"), rep(mvtnorm::qmvt(0.1, delta = mu, sigma = sigma, df = df)$quantile, 2), tolerance = 1e-3 ) # pdf expect_equal( density(dist, cbind(1, 2)), mvtnorm::dmvt(c(1, 2), delta = mu, sigma = sigma, df = df, log = FALSE) ) expect_equal( density(dist, cbind(-3, 4)), mvtnorm::dmvt(c(-3, 4), delta = mu, sigma = sigma, df = df, log = FALSE) ) # cdf expect_equivalent( cdf(dist, cbind(1, 2)), as.numeric(mvtnorm::pmvt(upper = c(1, 2), delta = mu, sigma = sigma, df = df)), tolerance = 1e-3 ) expect_equivalent( cdf(dist, cbind(-3, 4)), as.numeric(mvtnorm::pmvt(upper = c(-3, 4), delta = mu, sigma = sigma, df = df)), tolerance = 1e-3 ) }) test_that("Multivariate t with df = 1 (Cauchy-like)", { df <- 1 mu <- c(0, 0) sigma <- diag(2) dist <- dist_multivariate_t(df = df, mu = list(mu), sigma = list(sigma)) # Mean is undefined for df <= 1 expect_warning(mean(dist), "undefined") expect_true(all(is.na(mean(dist)))) }) test_that("Multivariate t with df = 2", { df <- 2 mu <- c(1, 2) sigma <- matrix(c(1, 0.5, 0.5, 1), ncol = 2) dist <- dist_multivariate_t(df = df, mu = list(mu), sigma = list(sigma)) # Mean exists for df > 1 expect_equal(mean(dist), matrix(mu, nrow = 1)) # Covariance is undefined for df <= 2 expect_warning(covariance(dist), "undefined") expect_true(all(is.na(covariance(dist)[[1]]))) }) test_that("Multivariate t with larger df approaches normal", { skip_if_not_installed("mvtnorm") df <- 100 # Large df mu <- c(1, 2) sigma <- matrix(c(4, 2, 2, 3), ncol = 2) dist_t <- dist_multivariate_t(df = df, mu = list(mu), sigma = list(sigma)) dist_normal <- dist_multivariate_normal(mu = list(mu), sigma = list(sigma)) # Densities should be similar for large df test_point <- cbind(1, 2) expect_equal( density(dist_t, test_point), density(dist_normal, test_point), tolerance = 0.01 ) }) test_that("Multivariate t parameter validation", { expect_error( dist_multivariate_t(df = 0, mu = list(c(0, 0)), sigma = list(diag(2))), "positive" ) expect_error( dist_multivariate_t(df = -5, mu = list(c(0, 0)), sigma = list(diag(2))), "positive" ) # Valid inputs should not error expect_silent( dist_multivariate_t(df = 5, mu = list(c(0, 0)), sigma = list(diag(2))) ) }) test_that("Multivariate t generation", { skip_if_not_installed("mvtnorm") df <- 5 mu <- c(1, 2) sigma <- matrix(c(4, 2, 2, 3), ncol = 2) dist <- dist_multivariate_t(df = df, mu = list(mu), sigma = list(sigma)) set.seed(123) samples <- generate(dist, 100) # Check output structure - generate returns a list expect_true(is.list(samples)) expect_equal(length(samples), 1) expect_true(is.matrix(samples[[1]])) expect_equal(ncol(samples[[1]]), 2) expect_equal(nrow(samples[[1]]), 100) # Sample mean should be close to theoretical mean (for df > 1) expect_equal(colMeans(samples[[1]]), mu, tolerance = 0.5) }) test_that("Multivariate t with different dimensions", { # 3-dimensional df <- 10 mu <- c(1, 2, 3) sigma <- diag(3) dist <- dist_multivariate_t(df = df, mu = list(mu), sigma = list(sigma)) # Test the dim method on the unwrapped element expect_equal(dim.dist_mvt(vctrs::vec_data(dist)[[1]]), 3) expect_equal(format(dist), "MVT[3](10)") # 1-dimensional (should still work) dist1d <- dist_multivariate_t(df = 5, mu = list(0), sigma = list(matrix(1))) expect_equal(dim.dist_mvt(vctrs::vec_data(dist1d)[[1]]), 1) expect_equal(format(dist1d), "MVT[1](5)") }) distributional/tests/testthat/test-dist-negative-binomial.R0000644000176200001440000000134315112047012023735 0ustar liggesuserstest_that("Negative Binomial distribution", { dist <- dist_negative_binomial(10, 0.4) expect_equal(format(dist), "NB(10, 0.4)") # quantiles expect_equal(quantile(dist, 0.6), stats::qnbinom(0.6, 10, 0.4)) expect_equal(quantile(dist, 0.61), stats::qnbinom(0.61, 10, 0.4)) # pdf expect_equal(density(dist, 0), stats::dnbinom(0, 10, 0.4)) expect_equal(density(dist, 1), stats::dnbinom(1, 10, 0.4)) # cdf expect_equal(cdf(dist, 0), stats::pnbinom(0, 10, 0.4)) expect_equal(cdf(dist, 1), stats::pnbinom(1, 10, 0.4)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.6358)), 0.6358, tolerance = 1e-3) # stats expect_equal(mean(dist), 0.6*10/(1-0.6)) expect_equal(variance(dist), 0.6*10/(1-0.6)^2) }) distributional/tests/testthat/test-dist-sample.R0000644000176200001440000000131415112047012021622 0ustar liggesuserstest_that("Emperical/sample distribution", { x <- generate(dist_normal(0, 1), 100) dist <- dist_sample(x) expect_equal(format(dist), "sample[100]") # quantiles expect_equal(quantile(dist, 0.6), unname(quantile(x[[1]], 0.6))) expect_equal(quantile(dist, 0.24), unname(quantile(x[[1]], 0.24))) # pdf # cdf # F(Finv(a)) ~= a # stats expect_equal(mean(dist), mean(x[[1]])) expect_equal(median(dist), median(x[[1]])) expect_equal(median(dist[[1]]), median(x[[1]])) expect_equal(variance(dist), var(x[[1]])) # transform expect_equal( dist, dist + 1 - 1 ) }) test_that("CDF of degenerate dist_sample() is correct", { expect_equal(cdf(dist_sample(list(2)), 2)[[1]], 1) }) distributional/tests/testthat/test-hilo.R0000644000176200001440000000112215112047012020330 0ustar liggesuserstest_that("hilo", { # defaults hl <- new_hilo() expect_length(hl, 0) # display expect_output(print(hl), "") expect_output(print(new_hilo(0,1,95)), "\\[0, 1\\]95") dist <- dist_normal() # hilo.distribution hl <- hilo(dist, 95) expect_length(hl, 1) expect_equal(hl, new_hilo(qnorm(0.025), qnorm(0.975), 95)) # vec_math.hilo expect_equal(is.na(hl), FALSE) expect_equal(is.nan(hl), FALSE) # vec_arith.hilo expect_equal( hl*3+1, new_hilo(qnorm(0.025)*3+1, qnorm(0.975)*3+1, 95) ) expect_equal(-hl, hl) expect_equal(-3*hl, hl/(1/3)) }) distributional/tests/testthat/test-dist-gh.R0000644000176200001440000000167615112047012020752 0ustar liggesuserstest_that("g-and-h distribution", { # Define g-and-h distribution parameters A <- 0 B <- 1 g <- 0 h <- 0.5 c <- 0.8 dist <- dist_gh(A, B, g, h, c) # Check formatting expect_equal(format(dist), "gh(A = 0, B = 1, g = 0, h = 0.5)") # Require package installed skip_if_not_installed("gk", "0.1.0") # quantiles expect_equal(quantile(dist, 0.1), gk::qgh(0.1, A, B, g, h, c)) expect_equal(quantile(dist, 0.5), gk::qgh(0.5, A, B, g, h, c)) # pdf expect_equal(density(dist, 0), gk::dgh(0, A, B, g, h, c)) expect_equal(density(dist, 3), gk::dgh(3, A, B, g, h, c)) # cdf expect_equal(cdf(dist, 0), gk::pgh(0, A, B, g, h, c)) expect_equal(cdf(dist, 3), gk::pgh(3, A, B, g, h, c)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4)), 0.4, tolerance = 1e-3) # Generate random samples set.seed(123) samples <- generate(dist, 10) set.seed(123) expect_equal(samples[[1L]], gk::rgh(10, A, B, g, h, c)) }) distributional/tests/testthat/test-apply.R0000644000176200001440000001334515131656306020551 0ustar liggesuserstest_that("Recycling rules and output for applying multiple inputs over multiple univariate distributions", { # is a distribution vector of length 10 dist <- dist_named <- dist_normal(1:10, 1:10) dimnames(dist_named) <- "name" # p = 0.5: apply p across all elements of (recycling p onto ) # Returns a vector of length 10 expect_equal( quantile(dist, 0.5), qnorm(0.5, 1:10, 1:10) ) expect_equal( quantile(dist_named, 0.5), qnorm(0.5, 1:10, 1:10) ) # p = c(0.5, 0.9): Cannot recycle p (length 2) onto (length 10) # Returns a list containing values for each p. expect_equal( quantile(dist, c(0.5, 0.9)), mapply({function(mean, sd) qnorm(c(0.5, 0.9), mean, sd)}, mean = 1:10, sd = 1:10, SIMPLIFY = FALSE) ) expect_equal( quantile(dist_named, c(0.5, 0.9)), mapply({function(mean, sd) qnorm(c(0.5, 0.9), mean, sd)}, mean = 1:10, sd = 1:10, SIMPLIFY = FALSE) ) # p = ppoints(10): apply each p to each element of (no recycling) # Returns a list for each distribution with the 10 quantiles. expect_equal( quantile(dist, ppoints(10)), mapply({function(mean, sd) qnorm(ppoints(10), mean, sd)}, mean = 1:10, sd = 1:10, SIMPLIFY = FALSE) ) expect_equal( quantile(dist_named, ppoints(10)), mapply({function(mean, sd) qnorm(ppoints(10), mean, sd)}, mean = 1:10, sd = 1:10, SIMPLIFY = FALSE) ) # p = list(0.5): equivalent to p = 0.5, but returns a list output # Returns a tibble with 1 vector column of length 10 expect_equal( quantile(dist, list(a = 0.5)), new_data_frame(list(a = quantile(dist, 0.5))) ) expect_equal( quantile(dist_named, list(a = 0.5)), new_data_frame(list(a = quantile(dist, 0.5))) ) # p = list(c(0.5, 0.9)): # Cannot recycle p[[1]] (length 2) onto (length 10) # Returns an error. expect_error( quantile(dist, list(a = c(0.5, 0.9))), "Cannot recycle input" ) expect_error( quantile(dist_named, list(a = c(0.5, 0.9))), "Cannot recycle input" ) # p = list(p1, 0.5): equivalent to df(quantile(, p1), quantile(, 0.5)). # Returns a tibble with 2 vector columns of length 10 # Names of p are used in output. expect_equal( quantile(dist, list(a=ppoints(10), b=0.5)), new_data_frame(list(a = qnorm(ppoints(10), 1:10, 1:10), b = quantile(dist, 0.5))) ) expect_equal( quantile(dist_named, list(a=ppoints(10), b=0.5)), new_data_frame(list(a = qnorm(ppoints(10), 1:10, 1:10), b = quantile(dist, 0.5))) ) }) test_that("Recycling rules and output for applying multiple inputs over multiple multivariate distributions", { # is a bivariate distribution vector of length 2 dist <- dist_named <- dist_multivariate_normal(mu = list(c(1,2), c(3,5)), sigma = list(matrix(c(4,2,2,3), ncol=2), matrix(c(5,1,1,4), ncol=2))) dimnames(dist_named) <- c("a", "b") expect_equal( quantile(dist, 0.5, kind = "marginal"), matrix(c(1,3,2,5), nrow = 2) ) expect_equal( quantile(dist_named, 0.5, kind = "marginal"), matrix(c(1,3,2,5), nrow = 2, dimnames = list(NULL, c("a", "b"))) ) expect_equal( quantile(dist, c(0.5, 0.9), kind = "marginal"), list(matrix(c(1, 3.5631031310892, 2, 4.21971242404268), ncol = 2), matrix(c(3, 5.86563641722901, 5, 7.5631031310892), ncol = 2)) ) expect_equal( quantile(dist_named, c(0.5, 0.9), kind = "marginal"), list(matrix(c(1, 3.5631031310892, 2, 4.21971242404268), ncol = 2, dimnames = list(NULL, c("a", "b"))), matrix(c(3, 5.86563641722901, 5, 7.5631031310892), ncol = 2, dimnames = list(NULL, c("a", "b")))) ) expect_equal( quantile(dist, c(0.5, 0.9, 0.95), kind = "marginal"), list( matrix(c(1, 3.5631031310892, 4.28970725390294, 2, 4.21971242404268, 4.84897005289389), ncol = 2), matrix(c(3, 5.86563641722901, 6.67800452290057, 5, 7.5631031310892, 8.28970725390294), ncol = 2) ) ) expect_equal( quantile(dist_named, c(0.5, 0.9, 0.95), kind = "marginal"), list( matrix(c(1, 3.5631031310892, 4.28970725390294, 2, 4.21971242404268, 4.84897005289389), ncol = 2, dimnames = list(NULL, c("a","b"))), matrix(c(3, 5.86563641722901, 6.67800452290057, 5, 7.5631031310892, 8.28970725390294), ncol = 2, dimnames = list(NULL, c("a", "b"))) ) ) expect_equal( quantile(dist, list(single = 0.5, varied = c(0.8, 0.3))), new_data_frame( list(single = quantile(dist, 0.5), varied = rbind(quantile(dist[1], 0.8), quantile(dist[2], 0.3))) ) ) expect_equal( quantile(dist_named, list(single = 0.5, varied = c(0.8, 0.3))), new_data_frame( list(single = quantile(dist_named, 0.5), varied = rbind(quantile(dist_named[1], 0.8), quantile(dist_named[2], 0.3))) ) ) skip_if_not_installed("mvtnorm") expect_equal( density(dist, cbind(2, 3)), c(0.046649277604197, 0.0215708514518913) ) expect_equal( density(dist_named, cbind(2, 3)), c(0.046649277604197, 0.0215708514518913) ) expect_equal( mean(dist), matrix(c(1,3,2,5), nrow = 2) ) expect_equal( mean(dist_named), matrix(c(1,3,2,5), nrow = 2, dimnames = list(NULL, c("a", "b"))) ) expect_equal( covariance(dist), list( matrix(c(4,2,2,3), nrow = 2), matrix(c(5,1,1,4), nrow = 2) ) ) expect_equal( covariance(dist_named), list( matrix(c(4,2,2,3), nrow = 2, dimnames = list(NULL, c("a", "b"))), matrix(c(5,1,1,4), nrow = 2, dimnames = list(NULL, c("a", "b"))) ) ) }) distributional/tests/testthat/test-dist-gumbel.R0000644000176200001440000000146015112047012021616 0ustar liggesuserstest_that("Gumbel distribution", { dist <- dist_gumbel(1, 2) expect_equal(format(dist), "Gumbel(1, 2)") # Require package installed skip_if_not_installed("actuar", "2.0.0") # quantiles expect_equal(quantile(dist, 0.1), actuar::qgumbel(0.1, 1, 2)) expect_equal(quantile(dist, 0.5), actuar::qgumbel(0.5, 1, 2)) # pdf expect_equal(density(dist, 0), actuar::dgumbel(0, 1, 2)) expect_equal(density(dist, 3), actuar::dgumbel(3, 1, 2)) # cdf expect_equal(cdf(dist, 0), actuar::pgumbel(0, 1, 2)) expect_equal(cdf(dist, 3), actuar::pgumbel(3, 1, 2)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4)), 0.4, tolerance = 1e-3) # stats expect_equal(mean(dist), actuar::mgumbel(1, 1, 2)) expect_equal(variance(dist), actuar::mgumbel(2, 1, 2) - actuar::mgumbel(1, 1, 2)^2) }) distributional/tests/testthat/test-dist-laplace.R0000644000176200001440000000670215131033753021760 0ustar liggesuserstest_that("Laplace distribution", { # defaults - testing with explicit parameters dist <- dist_laplace(0, 1) expect_equal(mean(dist), 0) expect_equal(variance(dist), 2) # display expect_s3_class(dist, "distribution") expect_output(print(dist), "Laplace\\(0, 1\\)") expect_output(print(dist_laplace(numeric(), numeric())), "") # error checking expect_error( dist_laplace(0, -1), "positive" ) expect_error( dist_laplace(0, 0), "positive" ) expect_silent( dist_laplace(mu = 0L, sigma = 1L) ) set.seed(123) mu <- c(-0.560475646552213, -0.23017748948328, 1.55870831414912, 0.070508391424576, 0.129287735160946, 1.71506498688328, 0.460916205989202, -1.26506123460653, -0.686852851893526, -0.445661970099958) sigma <- c(1.22408179743946, 0.359813827057364, 0.400771450594052, 0.11068271594512, 0.555841134754075, 1.78691313680308, 0.497850478229239, 1.96661715662964, 0.701355901563686, 0.472791407727934) dist <- dist_laplace(mu, sigma) # summary statistics expect_equal(mean(dist), c(-0.560475646552213, -0.23017748948328, 1.55870831414912, 0.070508391424576, 0.129287735160946, 1.71506498688328, 0.460916205989202, -1.26506123460653, -0.686852851893526, -0.445661970099958), tolerance = 1e-6) expect_equal(median(dist), c(-0.560475646552213, -0.23017748948328, 1.55870831414912, 0.070508391424576, 0.129287735160946, 1.71506498688328, 0.460916205989202, -1.26506123460653, -0.686852851893526, -0.445661970099958)) expect_equal(variance(dist), c(2.99675249364525, 0.258931980283333, 0.321235511222522, 0.0245013272179761, 0.617918734169395, 6.38611711695883, 0.495710197346165, 7.73516608150009, 0.98380020131642, 0.447063430442723)) expect_equal(skewness(dist), c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)) expect_equal(kurtosis(dist), c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3)) # symmetry expect_equal(has_symmetry(dist), c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE)) # density expect_equal( density(dist, 0), c(0.25840877941931, 0.732940031399069, 0.0255261195667388, 2.38908099437143, 0.712858180290539, 0.107160364045725, 0.397920035658436, 0.133623419562148, 0.267742715233142, 0.412027664803725) ) # log density expect_equal( density(dist, 0, log = TRUE), c(-1.35321253156994, -0.310691392994745, -3.66805305431685, 0.87090877081269, -0.338472783975555, -2.23342883709496, -0.921504209314974, -2.0127297371183, -1.31772877737028, -0.886664784305052) ) # cdf expect_equal( cdf(dist, 5), c(0.994676987466257, 0.999999756691372, 0.999906711705461, 1, 0.99992177527756, 0.920458518481099, 0.999945128184019, 0.979325448598997, 0.999849495376496, 0.999995025840115) ) # quantile expect_equal( quantile(dist, 0.7), c(0.0648167011653952, -0.0463753668370739, 1.76343264038643, 0.127047958837356, 0.413225629536493, 2.62786600460636, 0.71523098707285, -0.2604627988623, -0.328582285995298, -0.204148004336135) ) # Test that cdf and quantile are inverses probs <- c(0.1, 0.25, 0.5, 0.75, 0.9) expect_equal( cdf(dist[1L], quantile(dist[1L], probs)[[1L]])[[1L]], probs, tolerance = 1e-10 ) # generate set.seed(123) generated <- generate(dist, 10) expect_length(generated, length(dist)) expect_true(all(sapply(generated, length) == 10)) large_sample <- generate(dist[[1]], 10000)[[1]] expect_equal(mean(large_sample), mu[[1]], tolerance = 0.1) expect_equal(var(large_sample), 2 * sigma[[1]]^2, tolerance = 0.2) })distributional/tests/testthat/test-dist-missing.R0000755000176200001440000000064315112047012022021 0ustar liggesuserstest_that("Missing distribution", { dist <- dist_missing() expect_equal(format(dist), "NA") # quantiles expect_equal(quantile(dist, c(0.1, 0.9)), list(c(NA_real_, NA_real_))) # pdf expect_equal(density(dist, 1:2), list(c(NA_real_, NA_real_))) # cdf expect_equal(cdf(dist, 1:2), list(c(NA_real_, NA_real_))) # stats expect_equal(mean(dist), NA_real_) expect_equal(variance(dist), NA_real_) }) distributional/tests/testthat/test-dist-exponential.R0000644000176200001440000000117615112047012022675 0ustar liggesuserstest_that("Exponential distribution", { dist <- dist_exponential(2) expect_equal(format(dist), "Exp(2)") # quantiles expect_equal(quantile(dist, 0.1), stats::qexp(0.1, 2)) expect_equal(quantile(dist, 0.5), stats::qexp(0.5, 2)) # pdf expect_equal(density(dist, 0), stats::dexp(0, 2)) expect_equal(density(dist, 3), stats::dexp(3, 2)) # cdf expect_equal(cdf(dist, 0), stats::pexp(0, 2)) expect_equal(cdf(dist, 3), stats::pexp(3, 2)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) # stats expect_equal(mean(dist), 1/2) expect_equal(variance(dist), 1/2^2) }) distributional/tests/testthat/test-dist-inverse-gamma.R0000644000176200001440000000164215112047012023100 0ustar liggesuserstest_that("Inverse Gamma distribution", { dist <- dist_inverse_gamma(3, 2) expect_equal(format(dist), "InvGamma(3, 0.5)") # Require package installed skip_if_not_installed("actuar", "2.0.0") # quantiles expect_equal(quantile(dist, 0.1), actuar::qinvgamma(0.1, 3, 2)) expect_equal(quantile(dist, 0.5), actuar::qinvgamma(0.5, 3, 2)) # pdf expect_equal(density(dist, 0), actuar::dinvgamma(0, 3, 2)) expect_equal(density(dist, 3), actuar::dinvgamma(3, 3, 2)) # cdf expect_equal(cdf(dist, 0), actuar::pinvgamma(0, 3, 2)) expect_equal(cdf(dist, 3), actuar::pinvgamma(3, 3, 2)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4)), 0.4, tolerance = 1e-3) # stats expect_equal(mean(dist), (1/2) / (3 - 1)) expect_equal(median(dist), actuar::qinvgamma(0.5, 3, 2)) expect_equal(median(dist[[1]]), actuar::qinvgamma(0.5, 3, 2)) expect_equal(variance(dist), (1/2)^2/((3-1)^2*(3-2))) }) distributional/tests/testthat/test-distribution.R0000644000176200001440000000116715112047012022125 0ustar liggesuserstest_that("is_distribution", { expect_false(is_distribution(iris)) expect_true(is_distribution(dist_normal())) expect_false(is_distribution(NULL)) expect_false(is_distribution(0)) df <- data.frame(a = 1:10, b = dist_poisson(1:10), c = dist_normal(1:10)) expect_true(all(sapply(df, is_distribution) == c(FALSE, TRUE, TRUE))) }) test_that("variance() works correctly on vectors/matrices of different dimension", { x = 1:8 expect_equal(variance(x), 6) expect_equal(variance(matrix(x, nrow = 2)), rep(0.5, 4)) }) test_that("variance() throws an error on non-numeric objects", { expect_error(variance("foo")) }) distributional/tests/testthat/test-dist-categorical.R0000644000176200001440000000145015112047012022617 0ustar liggesuserstest_that("Categorical distribution", { dist <- dist_categorical(list(c(0.4, 0.2, 0.3, 0.1))) expect_equal(format(dist), "Categorical[4]") # quantiles expect_true(all(is.na(quantile(dist, 0.5)))) expect_true(all(is.na(quantile(dist, 0.2)))) expect_equal(quantile(dist, c(0.1, 0.9)), list(c(NA_real_, NA_real_))) # pdf expect_equal(density(dist, -1), NA_real_) expect_equal(density(dist, 0), NA_real_) expect_equal(density(dist, 1), 0.4) expect_equal(density(dist, 2), 0.2) expect_equal(density(dist, 5), NA_real_) expect_equal(density(dist, 3:5), list(c(0.3, 0.1, NA_real_))) # cdf expect_true(all(is.na(cdf(dist, 1)))) expect_equal(cdf(dist, 1:2), list(c(NA_real_, NA_real_))) # stats expect_true(all(is.na(mean(dist)))) expect_true(all(is.na(variance(dist)))) }) distributional/tests/testthat/test-dist-lognormal.R0000644000176200001440000000165515112047012022343 0ustar liggesuserstest_that("Log-normal distribution", { # defaults dist <- dist_lognormal() expect_equal(mean(dist), exp(1/2)) expect_equal(variance(dist), (exp(1)-1)*exp(1)) # display expect_s3_class(dist, "distribution") expect_output(print(dist), "lN\\(0, 1\\)") expect_output(print(dist_lognormal(numeric())), "") # error checking expect_error( dist_lognormal(0, -1), "non-negative" ) expect_silent( dist_lognormal(mu = 0, sigma = 1) ) # density expect_equal( density(dist, 0), dlnorm(0, mean = 0, sd = 1) ) # cdf expect_equal( cdf(dist, 5), plnorm(5, mean = 0, sd = 1) ) # quantile expect_equal( quantile(dist, 0.1), qlnorm(0.1, mean = 0, sd = 1) ) # generate expect_equal( { set.seed(0) generate(dist, 10) }, { set.seed(0) mapply(function(m, s) rlnorm(10, m, s), m = 0, s = 1, SIMPLIFY = FALSE) } ) }) distributional/tests/testthat/test-dist-student-t.R0000644000176200001440000000550315112047012022274 0ustar liggesuserstest_that("Student T distribution", { dist <- dist_student_t(5) expect_equal(format(dist), "t(5, 0, 1)") # quantiles expect_equal(quantile(dist, 0.1), stats::qt(0.1, 5)) expect_equal(quantile(dist, 0.5), stats::qt(0.5, 5)) # pdf expect_equal(density(dist, 0), stats::dt(0, 5)) expect_equal(density(dist, 3), stats::dt(3, 5)) # cdf expect_equal(cdf(dist, 0), stats::pt(0, 5)) expect_equal(cdf(dist, 3), stats::pt(3, 5)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) # stats expect_equal(mean(dist), 0) expect_equal(variance(dist), 5 / (5-2)) }) test_that("Noncentral Student t distribution", { dist <- dist_student_t(8, ncp = 6) expect_equal(format(dist), "t(8, 0, 1, 6)") # quantiles expect_equal(quantile(dist, 0.1), stats::qt(0.1, 8, ncp = 6)) expect_equal(quantile(dist, 0.5), stats::qt(0.5, 8, ncp = 6)) # pdf expect_equal(density(dist, 0), stats::dt(0, 8, ncp = 6)) expect_equal(density(dist, 3), stats::dt(3, 8, ncp = 6)) # cdf expect_equal(cdf(dist, 0), stats::pt(0, 8, ncp = 6)) expect_equal(cdf(dist, 3), stats::pt(3, 8, ncp = 6)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) # stats expect_equal(mean(dist), 2 * gamma(7/2)) expect_equal(variance(dist), 148/3 - 4 * gamma(7/2)^2) }) test_that("Location-scale Student t distribution", { dist <- dist_student_t(5, 2, 3) expect_equal(format(dist), "t(5, 2, 3)") # quantiles expect_equal(quantile(dist, 0.1), stats::qt(0.1, 5)*3 + 2) expect_equal(quantile(dist, 0.5), stats::qt(0.5, 5)*3 + 2) # pdf expect_equal(density(dist, 0), stats::dt(-2/3, 5)/3) expect_equal(density(dist, 3), stats::dt((3-2)/3, 5)/3) # cdf expect_equal(cdf(dist, 0), stats::pt(-2/3, 5)) expect_equal(cdf(dist, 3), stats::pt((3-2)/3, 5)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) # stats expect_equal(mean(dist), 2) expect_equal(variance(dist), 5 / (5-2) * 3^2) }) test_that("Noncentral location-scale Student t distribution", { dist <- dist_student_t(5, 2, 3, ncp = 6) expect_equal(format(dist), "t(5, 2, 3, 6)") # quantiles expect_equal(quantile(dist, 0.1), stats::qt(0.1, 5, 6)*3 + 2) expect_equal(quantile(dist, 0.5), stats::qt(0.5, 5, 6)*3 + 2) # pdf expect_equal(density(dist, 0), stats::dt(-2/3, 5, 6)/3) expect_equal(density(dist, 3), stats::dt((3-2)/3, 5, 6)/3) # cdf expect_equal(cdf(dist, 0), stats::pt(-2/3, 5, 6)) expect_equal(cdf(dist, 3), stats::pt((3-2)/3, 5, 6)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) # stats expect_equal(mean(dist), 2 + 6 * sqrt(5/2) * gamma(2)/gamma(5/2) * 3) expect_equal(variance(dist), ((5*(1+6^2))/(5-2) - (6 * sqrt(5/2) * (gamma((5-1)/2)/gamma(5/2)))^2)*3^2) }) distributional/tests/testthat/test_gev.R0000644000176200001440000000366215112047012020253 0ustar liggesuserstest_that("GEV", { dist <- dist_gev(location = c(0, .5, 0), scale = c(1, 2, 3), shape = c(0, 0.1, 1.1)) euler <- 0.57721566490153286 # Euler's constant # Mean expect_equal(mean(dist), c( euler, 0.5 + 2 * (gamma(0.9) - 1) / 0.1, # location + scale*(gamma(1 - shape) - 1)/shape Inf # Since shape >= 1 ), tolerance = 0.0001) # Median expect_equal(median(dist), c( -log(log(2)), 0.5 + 2 * (log(2)^(-0.1) - 1) / 0.1, # location + scale*(log(2)^(-shape) - 1)/shape 3 * (log(2)^(-1.1) - 1) / 1.1 # location + scale*(log(2)^(-shape) - 1)/shape ), tolerance = 0.0001) expect_equal(median(dist), quantile(dist, 0.5)) # Variance expect_equal(distributional::variance(dist), c( pi^2 / 6, 2^2 * (gamma(1 - 2 * 0.1) - gamma(1 - 0.1)^2) / 0.1^2, # scale^2 * (g2 - g1^2)/shape^2 Inf # since shape >= 0.5 ), tolerance = 0.0001) # Density at <- (0:20) / 10 expect_equal(density(dist, at), list( evd::dgev(at, loc = 0, scale = 1, shape = 0), evd::dgev(at, loc = 0.5, scale = 2, shape = 0.1), evd::dgev(at, loc = 0, scale = 3, shape = 1.1) )) # CDF expect_equal(distributional::cdf(dist, at), list( evd::pgev(at, loc = 0, scale = 1, shape = 0), evd::pgev(at, loc = 0.5, scale = 2, shape = 0.1), evd::pgev(at, loc = 0, scale = 3, shape = 1.1) )) # Quantiles p <- (1:19) / 20 expect_equal(quantile(dist, p = p), list( evd::qgev(p = p, loc = 0, scale = 1, shape = 0), evd::qgev(p = p, loc = 0.5, scale = 2, shape = 0.1), evd::qgev(p = p, loc = 0, scale = 3, shape = 1.1) )) # Generate set.seed(123) rand_dist <- distributional::generate(dist, times = 1e6) expect_equal(unlist(lapply(rand_dist[1:2], mean)), mean(dist)[1:2], tolerance = 0.01 ) expect_equal(unlist(lapply(rand_dist[1:2], var)), distributional::variance(dist)[1:2], tolerance = 0.01 ) expect_equal(unlist(lapply(rand_dist, median)), median(dist), tolerance = 0.01 ) }) distributional/tests/testthat/test-mixture.R0000644000176200001440000001065715112047012021107 0ustar liggesuserstest_that("Mixture of Normals", { dist <- dist_mixture(dist_normal(0, 1), dist_normal(10, 4), weights = c(0.5, 0.5)) # format expect_equal(format(dist), "mixture(0.5*N(0, 1), 0.5*N(10, 16))") # quantiles expect_equal(quantile(dist, 0.5), 2, tolerance = 1e-5) expect_equal(quantile(dist, 0.1), -0.854, tolerance = 1e-3) # pdf expect_equal(density(dist, 0), 0.5 * dnorm(0) + 0.5 * dnorm(0, 10, 4)) expect_equal(density(dist, 3), 0.5 * dnorm(3) + 0.5 * dnorm(3, 10, 4)) # cdf expect_equal(cdf(dist, 0), 0.5 * pnorm(0) + 0.5 * pnorm(0, 10, 4)) expect_equal(cdf(dist, 3), 0.5 * pnorm(3) + 0.5 * pnorm(3, 10, 4)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.5)), 0.5, tolerance = 1e-6) expect_equal(mean(dist), 5) expect_equal(variance(dist), 33.5) }) test_that("Mixture of different distributions", { dist <- dist_mixture(dist_normal(0, 1), dist_student_t(10), weights = c(0.3, 0.7)) # format expect_equal(format(dist), "mixture(0.3*N(0, 1), 0.7*t(10, 0, 1))") # quantiles expect_equal(quantile(dist, 0.5), 0, tolerance = 1e-5) expect_equal(quantile(dist, 0.1), -1.343, tolerance = 1e-3) # pdf expect_equal(density(dist, 0), 0.3 * dnorm(0) + 0.7 * dt(0, 10)) expect_equal(density(dist, 3), 0.3 * dnorm(3) + 0.7 * dt(3, 10)) # cdf expect_equal(cdf(dist, 0), 0.3 * pnorm(0) + 0.7 * pt(0, 10)) expect_equal(cdf(dist, 3), 0.3 * pnorm(3) + 0.7 * pt(3, 10)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.5)), 0.5, tolerance = 1e-6) expect_equal(mean(dist), 0) expect_equal(variance(dist), 1.175) }) test_that("Mixture of point masses", { dist <- dist_mixture(dist_degenerate(1), dist_degenerate(2), dist_degenerate(3), weights = c(0.1, 0.2, 0.7)) # format expect_equal(format(dist, width = 10), "mixture(n=3)") expect_equal(format(dist), "mixture(0.1*1, 0.2*2, 0.7*3)") # quantiles expect_equal(quantile(dist, c(0, 0.1, 0.3, 1))[[1]], c(1, 1:3), tolerance = .Machine$double.eps^0.25) # pmf expect_equal(density(dist, 1:3)[[1]], c(0.1, 0.2, 0.7)) # cdf expect_equal(cdf(dist, 1:3)[[1]], c(0.1, 0.3, 1)) # mean expect_equal(mean(dist), 2.6) }) test_that("Mixture of multivariate distributions", { mu1 <- c(0, 0) mu2 <- c(1, 2) mu3 <- c(-10, -10) mu4 <- c(10, 10) sigma1 <- diag(2) sigma2 <- matrix(c(4, 2, 2, 3), 2, 2) w1 <- 0.3 w2 <- 1 - w1 w3 <- 0.5 w4 <- 1 - w3 # Two mixtures of bivariate normals dist <- c( dist1 = dist_mixture( dist_multivariate_normal(mu = list(mu1), sigma = list(sigma1)), dist_multivariate_normal(mu = list(mu2), sigma = list(sigma2)), weights = c(w1, w2) ), dist2 = dist_mixture( dist_multivariate_normal(mu = list(mu3), sigma = list(sigma1)), dist_multivariate_normal(mu = list(mu4), sigma = list(sigma1)), weights = c(w3, w4) ) ) # Mean expect_equal(mean(dist), rbind(w1 * mu1 + w2 * mu2, w3 * mu3 + w4 * mu4)) # Quantile expect_error(quantile(dist, 0.5)) # CDF at <- matrix(rnorm(4), 2, 2) cdf_dist <- cdf(dist, q = at) expect_equal(length(cdf_dist), 2L) expect_equal(lengths(cdf_dist), c(2L, 2L)) # Density skip_if_not_installed("mvtnorm") expect_equal( density(dist, at), list( c(w1 * mvtnorm::dmvnorm(at, mean = mu1, sigma = sigma1) + w2 * mvtnorm::dmvnorm(at, mean = mu2, sigma = sigma2)), c(w3 * mvtnorm::dmvnorm(at, mean = mu3, sigma = sigma1) + w4 * mvtnorm::dmvnorm(at, mean = mu4, sigma = sigma1)) ) ) # Mixture equivalent to multivariate normal dist <- dist_multivariate_normal(mu = list(mu2), sigma = list(sigma2)) mdist <- dist_mixture(dist, dist, weights = c(w1, w2)) expect_equal(mean(dist), mean(mdist)) expect_equal(covariance(dist), covariance(mdist)) expect_equal(unname(density(dist, rbind(mu1))), density(mdist, rbind(mu1))) set.seed(1) expect_equal(cdf(dist, rbind(c(0, 0))), cdf(mdist, rbind(c(0, 0))), tolerance = 0.001 ) }) test_that("Mixture with bad weights",{ expect_error( dist_mixture(dist_normal(0, 1), dist_normal(10, 4), weights = c(0.6, 0.5)) ) expect_error( dist_mixture(dist_normal(0, 1), dist_normal(10, 4), weights = c(0.4, 0.5)) ) dist <- dist_mixture(dist_normal(0, 1), dist_normal(10, 4), dist_normal(5, 2), weights = c(0.2177682569661155698, 0.0491619687405063024, 0.7330697742933780514)) expect_equal( class(dist)[1], "distribution") }) distributional/tests/testthat/test-dist-multinomial.R0000644000176200001440000000353115131573347022716 0ustar liggesuserstest_that("Multinomial distribution", { p <- c(0.3, 0.5, 0.2) dist <- dist_multinomial(size = 4, prob = list(p)) dimnames(dist) <- c("a", "b", "c") expect_equal(format(dist), "Multinomial(4)[3]") # quantiles # pdf expect_equal(density(dist, cbind(1, 2, 1)), dmultinom(c(1, 2, 1), 4, p)) # cdf # For multinomial, CDF is the probability that each category count is <= given values # This requires summing probabilities over all valid outcomes within the bounds # CDF should be 1 when bounds allow all possible outcomes expect_equal(cdf(dist, cbind(4, 4, 4)), 1) # CDF at a specific point should be >= PDF at that point test_point <- cbind(1, 2, 1) expect_gte(cdf(dist, test_point), density(dist, test_point)) # CDF should be monotonically increasing expect_gte(cdf(dist, cbind(2, 2, 2)), cdf(dist, cbind(1, 1, 1))) # CDF at tight bounds should match the single point probability if only one outcome is possible # When bounds are (0, 0, 4), only the outcome (0, 0, 4) is valid expect_equal(cdf(dist, cbind(0, 0, 4)), dmultinom(c(0, 0, 4), 4, p)) # CDF at bounds (1, 1, 4) should include multiple valid outcomes: # (0,0,4), (0,1,3), (1,0,3), (1,1,2) valid_outcomes <- rbind(c(0,0,4), c(0,1,3), c(1,0,3), c(1,1,2)) expected_cdf <- sum(apply(valid_outcomes, 1, function(x) dmultinom(x, 4, p))) expect_equal(cdf(dist, cbind(1, 1, 4)), expected_cdf) # CDF at impossible values (where sum of bounds < size) should be 0 expect_equal(cdf(dist, cbind(0, 0, 0)), 0) # F(Finv(a)) ~= a # stats expect_equal( mean(dist), matrix(c(1.2, 2, 0.8), nrow = 1, dimnames = list(NULL, c("a", "b", "c"))) ) expect_equal( covariance(dist), list( matrix( c(0.84, -0.6, -0.24, -0.6, 1, -0.4, -0.24, -0.4, 0.64), nrow = 3, dimnames = list(NULL, c("a", "b", "c")) ) ) ) }) distributional/tests/testthat/test-dist-percentile.R0000644000176200001440000000123315112047012022473 0ustar liggesuserstest_that("Negative Binomial distribution", { dist <- dist_normal(0, 1) percentiles <- seq(0.01, 0.99, by = 0.01) x <- vapply(percentiles, quantile, double(1L), x = dist) dist <- dist_percentile(list(x), list(percentiles*100)) expect_equal(format(dist), "percentile[99]") # quantiles expect_equal(quantile(dist, 0.6), stats::qnorm(0.6, 0, 1)) expect_equal(quantile(dist, 0.61), stats::qnorm(0.61, 0, 1)) # pdf # cdf expect_equal(cdf(dist, 0), stats::pnorm(0, 0, 1)) expect_equal(cdf(dist, 1), stats::pnorm(1, 0, 1), tolerance = 1e-3) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.6)), 0.6, tolerance = 1e-3) # stats }) distributional/tests/testthat/test-truncated.R0000644000176200001440000000410215112047012021367 0ustar liggesuserstest_that("Truncated Normal distributions", { dist <- dist_truncated(dist_normal(0, 1), -5, 5) # format expect_equal(format(dist), sprintf("%s[-5,5]", format(dist_normal(0,1)))) # quantiles expect_equal( quantile(dist, 0.1), -1.28155025885944 #dput(extraDistr::qtnorm(0.1, 0, 1, -5, 5)) ) expect_equal( quantile(dist, 0.5), -1.39145821233588e-16 #dput(extraDistr::qtnorm(0.5, 0, 1, -5, 5)) ) # pdf expect_equal( density(dist, 0), 0.398942509116427, #dput(extraDistr::dtnorm(0, 0, 1, -5, 5)) ) expect_equal( density(dist, 3), 0.00443185095273209, #dput(extraDistr::dtnorm(3, 0, 1, -5, 5)) ) # cdf expect_equal(cdf(dist, 0), 0.5) expect_equal( cdf(dist, 3), 0.998650387846205, #dput(extraDistr::ptnorm(3, 0, 1, -5, 5)) ) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.3)), 0.3, tolerance = 1e-6) # stats # expect_equal(mean(dist), ???) # expect_equal(variance(dist), ???) }) test_that("Truncated Binomial distributions", { dist <- dist_truncated(dist_binomial(100, 0.83), 76, 86) # format expect_equal(format(dist), sprintf("%s[76,86]", format(dist_binomial(100,0.83)))) # quantiles expect_equal( quantile(dist, 0.1), 79 #dput(extraDistr::qtbinom(0.1, 100, 0.83, 76, 86)) ) expect_equal( quantile(dist, 0.5), 82 #dput(extraDistr::qtbinom(0.5, 100, 0.83, 76, 86)) ) # pdf expect_equal(density(dist, 75), 0) expect_equal(density(dist, 87), 0) expect_equal( density(dist, 80), 0.094154977726162, #dput(extraDistr::dtbinom(80, 100, 0.83, 76, 86)) ) expect_equal( density(dist, 85), 0.123463609708811, #dput(extraDistr::dtbinom(85, 100, 0.83, 76, 86)) ) # cdf expect_equal(cdf(dist, 0), 0) expect_equal(cdf(dist, 76), 0) expect_equal(cdf(dist, 86), 1) expect_equal( cdf(dist, 80), 0.259185477677455, #dput(extraDistr::ptbinom(80, 100, 0.83, 76, 86)) ) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.372)), 0.372, tolerance = 1e-3) # stats # expect_equal(mean(dist), ???) # expect_equal(variance(dist), ???) }) distributional/tests/testthat/test-transformations.R0000644000176200001440000001535515112047012022643 0ustar liggesuserstest_that("hilo of transformed distributions", { expect_identical( hilo(exp(dist_poisson(3))), exp(hilo((dist_poisson(3)))) ) }) test_that("chains of transformations", { expect_identical( hilo(dist_student_t(5)), hilo(log(exp(dist_student_t(5)))) ) expect_output( print(exp(dist_student_t(5))-1), "t\\(t\\(5, 0, 1\\)\\)" ) }) test_that("handling of transformation arguments", { expect_identical( hilo(logb(dist_uniform(0, 100), base = 10)), logb(hilo(dist_uniform(0, 100)), base = 10) ) expect_identical( hilo(10^logb(dist_uniform(0, 100), base = 10)), 10^logb(hilo(dist_uniform(0, 100)), base = 10) ) }) test_that("LogNormal distributions", { dist <- dist_transformed(dist_normal(0, 0.5), exp, log) ln_dist <- dist_lognormal(0, 0.5) # Test exp() shortcut expect_identical( exp(dist_normal(0, 0.5)), ln_dist ) expect_identical( log(ln_dist), dist_normal(0, 0.5) ) # Test log() shortcut with different bases expect_equal(log(dist_lognormal(0, log(3)), base = 3), dist_normal(0, 1)) expect_equal(log2(dist_lognormal(0, log(2))), dist_normal(0, 1)) expect_equal(log10(dist_lognormal(0, log(10))), dist_normal(0, 1)) # format expect_equal(format(dist), sprintf("t(%s)", format(dist_normal(0, 0.5)))) # quantiles expect_equal( quantile(dist, c(0.1, 0.5)), quantile(ln_dist, c(0.1, 0.5)) ) # pdf expect_equal( density(dist, c(1, 20)), density(ln_dist, c(1, 20)) ) # cdf expect_equal( cdf(dist, c(4, 90)), cdf(ln_dist, c(4, 90)) ) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.372)), 0.372, tolerance = 1e-3) # stats (approximate due to bias adjustment method) expect_equal(mean(dist), exp(0.25/2), tolerance = 0.01) expect_equal(variance(dist), (exp(0.25) - 1)*exp(0.25), tolerance = 0.1) }) test_that("inverses are applied automatically", { dist <- dist_gamma(1,1) log2dist <- log(dist, base = 2) log2dist_t <- dist_transformed(dist, log2, function(x) 2 ^ x) expect_equal(density(log2dist, 0.5), density(log2dist_t, 0.5)) expect_equal(cdf(log2dist, 0.5), cdf(log2dist_t, 0.5)) expect_equal(quantile(log2dist, 0.5), quantile(log2dist_t, 0.5)) # test multiple transformations that get stacked together by dist_transformed explogdist <- exp(log(dist)) expect_equal(density(dist, 0.5), density(explogdist, 0.5)) expect_equal(cdf(dist, 0.5), cdf(explogdist, 0.5)) expect_equal(quantile(dist, 0.5), quantile(explogdist, 0.5)) # test multiple transformations created by operators (via Ops) explog2dist <- 2 ^ log2dist expect_equal(density(dist, 0.5), density(explog2dist, 0.5)) expect_equal(cdf(dist, 0.5), cdf(explog2dist, 0.5)) expect_equal(quantile(dist, 0.5), quantile(explog2dist, 0.5)) # basic set of inverses expect_equal(density(sqrt(dist^2), 0.5), density(dist, 0.5)) expect_equal(density(exp(log(dist)), 0.5), density(dist, 0.5)) expect_equal(density(10^(log10(dist)), 0.5), density(dist, 0.5)) expect_equal(density(expm1(log1p(dist)), 0.5), density(dist, 0.5)) expect_equal(density(cos(acos(dist)), 0.5), density(dist, 0.5)) expect_equal(density(sin(asin(dist)), 0.5), density(dist, 0.5)) expect_equal(density(tan(atan(dist)), 0.5), density(dist, 0.5)) expect_equal(density(cosh(acosh(dist + 1)) - 1, 0.5), density(dist, 0.5)) expect_equal(density(sinh(asinh(dist)), 0.5), density(dist, 0.5)) expect_equal(density(tanh(atanh(dist)), 0.5), density(dist, 0.5)) expect_equal(density(dist + 1 - 1, 0.5), density(dist, 0.5)) expect_equal(density(dist * 2 / 2, 0.5), density(dist, 0.5)) # inverting a gamma distribution skip_if_not_installed("actuar") expect_equal(density(1/dist_gamma(4, 3), 0.5), density(dist_inverse_gamma(4, 1/3), 0.5)) expect_equal(density(1/(1/dist_gamma(4, 3)), 0.5), density(dist_gamma(4, 3), 0.5)) }) test_that("transformed distributions' density is 0 outside of the support region", { dist <- dist_wrap('norm') expect_equal(density(exp(dist), 0)[[1]], 0) expect_equal(density(exp(dist), -1)[[1]], 0) dist <- dist_wrap('gamma', shape = 1, rate = 1) expect_equal(density(exp(dist), 0)[[1]], 0) expect_equal(density(exp(dist), 1)[[1]], 1) }) test_that("transformed distributions' cdf is 0/1 outside of the support region", { dist <- dist_wrap('norm') expect_equal(cdf(exp(dist), 0)[[1]], 0) expect_equal(cdf(exp(dist), -1)[[1]], 0) expect_equal(cdf(-1*exp(dist), 0)[[1]], 1) expect_equal(cdf(-1*exp(dist), 2)[[1]], 1) }) test_that("unary negation operator works", { dist <- dist_normal(1,1) expect_equal(density(-dist, 0.5), density(dist, -0.5)) dist <- dist_wrap('norm', mean = 1) expect_equal(density(-dist, 0.5), density(dist, -0.5)) dist <- dist_student_t(3, mu = 1) expect_equal(density(-dist, 0.5), density(dist, -0.5)) }) test_that("transformed distributions pdf integrates to 1", { dist_names <- c('norm', 'gamma', 'beta', 'chisq', 'exp', 'logis', 't', 'unif', 'weibull') dist_args <- list(list(mean = 1, sd = 1), list(shape = 2, rate = 1), list(shape1 = 3, shape2 = 5), list(df = 5), list(rate = 1), list(location = 1.5, scale = 1), list(df = 10), list(min = 0, max = 1), list(shape = 3, scale = 1)) names(dist_args) <- dist_names dist <- lapply(dist_names, function(x) do.call(dist_wrap, c(x, dist_args[[x]]))) dist <- do.call(c, dist) dfun <- function(x, id, transform) density(get(transform)(dist[id]), x)[[1]] twoexp <- function(x) 2^x square <- function(x) x^2 mult2 <- function(x) 2*x identity <- function(x) x tol <- 1e-5 for (i in 1:length(dist)) { expect_equal(integrate(dfun, -Inf, Inf, id = i, transform = 'identity')$value, 1, tolerance = tol) expect_equal(integrate(dfun, -Inf, Inf, id = i, transform = 'exp')$value, 1, tolerance = tol) expect_equal(integrate(dfun, -Inf, Inf, id = i, transform = 'twoexp')$value, 1, tolerance = tol) expect_equal(integrate(dfun, -Inf, Inf, id = i, transform = 'mult2')$value, 1, tolerance = tol) lower_bound <- field(support(dist[[i]]), "lim")[[1]][1] if (near(lower_bound, 0)) { expect_equal(integrate(dfun, -Inf, 5, id = i, transform = 'log')$value, 1, tolerance = tol) expect_equal(integrate(dfun, -Inf, Inf, id = i, transform = 'square')$value, 1, tolerance = tol) } } }) test_that("monotonically decreasing transformations (#100)", { dist <- dist_lognormal() expect_equal( quantile(-dist, 0.2), -quantile(dist, 1 - 0.2) ) expect_equal( quantile(1/dist, 0.2), 1/quantile(dist, 1 - 0.2) ) expect_equal( quantile(-1/dist, 0.7), -1/quantile(dist, 0.7) ) expect_equal( cdf(-dist, -2), 1 - cdf(dist, 2) ) expect_equal( cdf(1/dist, 2), 1 - cdf(dist, 1/2) ) expect_equal( cdf(-1/dist, -2), cdf(dist, 1/2) ) }) distributional/tests/testthat/test-dist-weibull.R0000644000176200001440000000136315112047012022010 0ustar liggesuserstest_that("Weibull distribution", { dist <- dist_weibull(1.5, 1) expect_equal(format(dist), "Weibull(1.5, 1)") # quantiles expect_equal(quantile(dist, 0.1), stats::qweibull(0.1, 1.5, 1)) expect_equal(quantile(dist, 0.5), stats::qweibull(0.5, 1.5, 1)) # pdf expect_equal(density(dist, 0), stats::dweibull(0, 1.5, 1)) expect_equal(density(dist, 3), stats::dweibull(3, 1.5, 1)) # cdf expect_equal(cdf(dist, 0), stats::pweibull(0, 1.5, 1)) expect_equal(cdf(dist, 3), stats::pweibull(3, 1.5, 1)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) # stats expect_equal(mean(dist), 1 * gamma(1 + 1/1.5)) expect_equal(variance(dist), 1^2 * (gamma(1 + 2/1.5) - gamma(1 + 1/1.5)^2)) }) distributional/tests/testthat/test-support.R0000644000176200001440000000101615112047012021113 0ustar liggesuserstest_that("support gives the correct bounds", { s <- support(c(dist_normal(), dist_gamma(1, 1), dist_gamma(2, 1), dist_lognormal(), dist_beta(1, 1), dist_beta(1, 2), dist_beta(2, 1), dist_beta(2, 2), exp(dist_wrap('norm')), 2*atan(dist_normal()))) out <- unname(format(s)) expect_equal(out, c("R","[0,Inf)","(0,Inf)","(0,Inf)","[0,1]","[0,1)","(0,1]","(0,1)","(0,Inf)","(-pi,pi)")) }) distributional/tests/testthat/test-dist-burr.R0000644000176200001440000000143615112047012021320 0ustar liggesuserstest_that("Burr distribution", { dist <- dist_burr(2, 3) expect_equal(format(dist), "Burr12(2, 3, 1)") # Require package installed skip_if_not_installed("actuar", "2.0.0") # quantiles expect_equal(quantile(dist, 0.1), actuar::qburr(0.1, 2, 3)) expect_equal(quantile(dist, 0.5), actuar::qburr(0.5, 2, 3)) # pdf expect_equal(density(dist, 0), actuar::dburr(0, 2, 3)) expect_equal(density(dist, 3), actuar::dburr(3, 2, 3)) # cdf expect_equal(cdf(dist, 0), actuar::pburr(0, 2, 3)) expect_equal(cdf(dist, 3), actuar::pburr(3, 2, 3)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4)), 0.4, tolerance = 1e-3) # stats expect_equal(mean(dist), actuar::mburr(1, 2, 3)) expect_equal(variance(dist), actuar::mburr(2, 2, 3) - actuar::mburr(1, 2, 3)^2) }) distributional/tests/testthat/test-dist-multivariate-normal.R0000644000176200001440000000253715131656316024363 0ustar liggesuserstest_that("Multivariate normal distribution", { mu <- c(1,2) sigma <- matrix(c(4,2,2,3), ncol=2) dist <- dist_multivariate_normal(mu = list(mu), sigma = list(sigma)) dimnames(dist) <- c("a", "b") expect_equal(format(dist), "MVN[2]") # stats expect_equal( mean(dist), matrix(c(1,2), nrow = 1, dimnames = list(NULL, c("a", "b"))) ) expect_equal(covariance(dist), list(`colnames<-`(sigma, dimnames(dist)))) # quantiles expect_equal( quantile(dist, 0.1, kind = "marginal"), matrix(c(qnorm(0.1, mu[1], sqrt(sigma[1,1])), qnorm(0.1, mu[2], sqrt(sigma[2,2]))), nrow = 1, dimnames = list(NULL, c("a", "b"))) ) skip_if_not_installed("mvtnorm") expect_equivalent( quantile(dist, 0.1, kind = "equicoordinate"), rep(mvtnorm::qmvnorm(0.1, mean = mu, sigma = sigma)$quantile, 2) ) # pdf expect_equal(density(dist, cbind(1, 2)), mvtnorm::dmvnorm(c(1, 2), mean = mu, sigma = sigma)) expect_equal(density(dist, cbind(-3, 4)), mvtnorm::dmvnorm(c(-3, 4), mean = mu, sigma = sigma)) # cdf expect_equivalent(cdf(dist, cbind(1, 2)), mvtnorm::pmvnorm(upper = c(1,2), mean = mu, sigma = sigma)) expect_equivalent(cdf(dist, cbind(-3, 4)), mvtnorm::pmvnorm(upper = c(-3, 4), mean = mu, sigma = sigma)) # F(Finv(a)) ~= a # expect_equal(cdf(dist, list(as.numeric(quantile(dist, 0.53)))), 0.53, tolerance = 1e-3) }) distributional/tests/testthat/test-dist-poisson-inverse-gaussian.R0000644000176200001440000000145615112047012025323 0ustar liggesuserstest_that("Poisson Inverse Gaussian distribution", { dist <- dist_poisson_inverse_gaussian(0.1, 0.8) expect_equal(format(dist), "PIG(0.1, 0.8)") # Require package installed skip_if_not_installed("actuar", "2.0.0") # quantiles expect_equal(quantile(dist, 0.1), actuar::qpig(0.1, 0.1, 0.8)) expect_equal(quantile(dist, 0.5), actuar::qpig(0.5, 0.1, 0.8)) # pdf expect_equal(density(dist, 0), actuar::dpig(0, 0.1, 0.8)) expect_equal(density(dist, 3), actuar::dpig(3, 0.1, 0.8)) # cdf expect_equal(cdf(dist, 0), actuar::ppig(0, 0.1, 0.8)) expect_equal(cdf(dist, 3), actuar::ppig(3, 0.1, 0.8)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.994)), 0.994, tolerance = 1e-3) # stats expect_equal(mean(dist), 0.1) expect_equal(variance(dist), 0.1/0.8*(0.1^2 + 0.8)) }) distributional/tests/testthat/test-dist-logarithmic.R0000644000176200001440000000156515112047012022653 0ustar liggesuserstest_that("Logarithmic distribution", { dist <- dist_logarithmic(0.66) expect_equal(format(dist), "Logarithmic(0.66)") # Require package installed skip_if_not_installed("actuar", "2.0.0") # quantiles expect_equal(quantile(dist, 0.5), actuar::qlogarithmic(0.5, 0.66)) expect_equal(quantile(dist, 0.99), actuar::qlogarithmic(0.99, 0.66)) # pdf expect_equal(density(dist, 1), actuar::dlogarithmic(1, 0.66)) expect_equal(density(dist, 9), actuar::dlogarithmic(9, 0.66)) # cdf expect_equal(cdf(dist, 3), actuar::plogarithmic(3, 0.66)) expect_equal(cdf(dist, 12), actuar::plogarithmic(12, 0.66)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.9963064)), 0.9963064, tolerance = 1e-3) # stats expect_equal(mean(dist), -1/log(1-0.66)*(0.66/(1-0.66))) expect_equal(variance(dist), -(0.66^2 + 0.66*log(1-0.66))/((1-0.66)^2*log(1-0.66)^2)) }) distributional/tests/testthat/test-dist-gk.R0000644000176200001440000000166615112047012020754 0ustar liggesuserstest_that("g-k distribution", { # Define g-k distribution parameters A <- 0 B <- 1 g <- 0 k <- 0.5 c <- 0.8 dist <- dist_gk(A, B, g, k, c) # Check formatting expect_equal(format(dist), "gk(A = 0, B = 1, g = 0, k = 0.5)") # Require package installed skip_if_not_installed("gk", "0.1.0") # quantiles expect_equal(quantile(dist, 0.1), gk::qgk(0.1, A, B, g, k, c)) expect_equal(quantile(dist, 0.5), gk::qgk(0.5, A, B, g, k, c)) # pdf expect_equal(density(dist, 0), gk::dgk(0, A, B, g, k, c)) expect_equal(density(dist, 3), gk::dgk(3, A, B, g, k, c)) # cdf expect_equal(cdf(dist, 0), gk::pgk(0, A, B, g, k, c)) expect_equal(cdf(dist, 3), gk::pgk(3, A, B, g, k, c)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4)), 0.4, tolerance = 1e-3) # Generate random samples set.seed(123) samples <- generate(dist, 10) set.seed(123) expect_equal(samples[[1L]], gk::rgk(10, A, B, g, k, c)) }) distributional/tests/testthat/test-dist-normal.R0000644000176200001440000000265015112047012021635 0ustar liggesuserstest_that("Normal distribution", { # defaults dist <- dist_normal() expect_equal(mean(dist), 0) expect_equal(variance(dist), 1) # display expect_s3_class(dist, "distribution") expect_output(print(dist), "N\\(0, 1\\)") expect_output(print(dist_normal(numeric())), "") # error checking expect_error( dist_normal(0, -1), "non-negative" ) expect_silent( dist_normal(mu = 0L, sigma = 1L) ) mu <- rnorm(10) sigma <- abs(rnorm(10)) dist <- dist_normal(mu, sigma) # summary statistics expect_equal(mean(dist), mu) expect_equal(median(dist), mu) expect_equal(median(dist[[1]]), mu[[1]]) expect_equal(variance(dist), sigma^2) # math expect_equal(mean(dist*3+1), mu*3+1) expect_equal(variance(dist*3+1), (sigma*3)^2) expect_equal(mean(dist + dist), mean(dist) + mean(dist)) expect_equal(variance(dist + dist), variance(dist) + variance(dist)) expect_equal(dist_normal() * 2, dist_normal(0, 2)) # density expect_equal( density(dist, 0), dnorm(0, mean = mu, sd = sigma) ) # cdf expect_equal( cdf(dist, 5), pnorm(5, mean = mu, sd = sigma) ) # quantile expect_equal( quantile(dist, 0.1), qnorm(0.1, mean = mu, sd = sigma) ) # generate expect_equal( { set.seed(0) generate(dist, 10) }, { set.seed(0) mapply(function(m, s) rnorm(10, m, s), m = mu, s = sigma, SIMPLIFY = FALSE) } ) }) distributional/tests/testthat/test-dist-bernoulli.R0000644000176200001440000000120415112047012022332 0ustar liggesuserstest_that("Bernoulli distribution", { dist <- dist_bernoulli(0.4) expect_equal(format(dist), "Bernoulli(0.4)") # quantiles expect_equal(quantile(dist, 0.6), FALSE) expect_equal(quantile(dist, 0.61), TRUE) # pdf expect_equal(density(dist, 0), stats::dbinom(0, 1, 0.4)) expect_equal(density(dist, 1), stats::dbinom(1, 1, 0.4)) # cdf expect_equal(cdf(dist, 0), stats::pbinom(0, 1, 0.4)) expect_equal(cdf(dist, 1), stats::pbinom(1, 1, 0.4)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.6)), 0.6, tolerance = 1e-3) # stats expect_equal(mean(dist), 0.4) expect_equal(variance(dist), 0.4*(1-0.4)) }) distributional/tests/testthat/test-dist-geometric.R0000644000176200001440000000124415112047012022321 0ustar liggesuserstest_that("Geometric distribution", { dist <- dist_geometric(0.4) expect_equal(format(dist), "Geometric(0.4)") # quantiles expect_equal(quantile(dist, 0.6), stats::qgeom(0.6, 0.4)) expect_equal(quantile(dist, 0.9), stats::qgeom(0.9, 0.4)) # pdf expect_equal(density(dist, 0), stats::dgeom(0, 0.4)) expect_equal(density(dist, 5), stats::dgeom(5, 0.4)) # cdf expect_equal(cdf(dist, 0), stats::pgeom(0, 0.4)) expect_equal(cdf(dist, 10), stats::pgeom(10, 0.4)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.64)), 0.64, tolerance = 1e-3) # stats expect_equal(mean(dist), 1/0.4 - 1) expect_equal(variance(dist), 1/0.4^2 - 1/0.4) }) distributional/tests/testthat/test-dist-beta.R0000644000176200001440000000116315112047012021256 0ustar liggesuserstest_that("Beta distribution", { dist <- dist_beta(3, 4) expect_equal(format(dist), "Beta(3, 4)") # quantiles expect_equal(quantile(dist, 0.1), qbeta(0.1, 3, 4)) expect_equal(quantile(dist, 0.5), qbeta(0.5, 3, 4)) # pdf expect_equal(density(dist, 0), dbeta(0, 3, 4)) expect_equal(density(dist, 3), dbeta(3, 3, 4)) # cdf expect_equal(cdf(dist, 0), pbeta(0, 3, 4)) expect_equal(cdf(dist, 3), pbeta(3, 3, 4)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4)), 0.4, tolerance = 1e-3) # stats expect_equal(mean(dist), 3/(3+4)) expect_equal(variance(dist), 3*4/((3+4)^2*(3+4+1))) }) distributional/tests/testthat/test-dist-studentised-range.R0000644000176200001440000000101215112047012023761 0ustar liggesuserstest_that("Studentized Range distribution", { dist <- dist_studentized_range(6, 5, 1) expect_equal(format(dist), "StudentizedRange(6, 5, 1)") # quantiles expect_equal(quantile(dist, 0.1), stats::qtukey(0.1, 6, 5, 1)) expect_equal(quantile(dist, 0.5), stats::qtukey(0.5, 6, 5, 1)) # pdf # cdf expect_equal(cdf(dist, 0), stats::ptukey(0, 6, 5, 1)) expect_equal(cdf(dist, 3), stats::ptukey(3, 6, 5, 1)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) }) distributional/tests/testthat/test-dist-f.R0000644000176200001440000000136715112047012020576 0ustar liggesuserstest_that("F distribution", { dist <- dist_f(5, 2) expect_equal(format(dist), "F(5, 2)") # quantiles expect_equal(quantile(dist, 0.1), stats::qf(0.1, 5, 2)) expect_equal(quantile(dist, 0.5), stats::qf(0.5, 5, 2)) # pdf expect_equal(density(dist, 0), stats::df(0, 5, 2)) expect_equal(density(dist, 3), stats::df(3, 5, 2)) # cdf expect_equal(cdf(dist, 0), stats::pf(0, 5, 2)) expect_equal(cdf(dist, 3), stats::pf(3, 5, 2)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) # stats expect_equal(mean(dist), NA_real_) expect_equal(variance(dist), NA_real_) dist <- dist_f(5, 5) expect_equal(mean(dist), 5/(5-2)) expect_equal(variance(dist), 2*5^2*(5+5-2)/(5*(5-2)^2*(5-4))) }) distributional/tests/testthat/test-inflated.R0000644000176200001440000000255615112047012021177 0ustar liggesuserstest_that("Check zero inflation", { dist <- dist_inflated(dist_poisson(6), 0.33) expect_equal(format(dist), "0+Pois(6)") # quantiles expect_equal(quantile(dist, 0.1), 0) expect_equal(quantile(dist, 0.5), 4) # pdf expect_equal(density(dist, 0), 0.33 + 0.67*dpois(0, 6)) expect_equal(density(dist, 3), 0.67*dpois(3, 6)) # cdf expect_equal(cdf(dist, 0), 0.33 + 0.67*ppois(0, 6)) expect_equal(cdf(dist, 3), 0.33 + 0.67*ppois(3, 6)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.52)), 0.52, tolerance = 1e-3) # stats expect_equal(mean(dist), 0.67*6) expect_equal(variance(dist), 0.67*6 + (0.33/0.67)*(0.67*6)^2) }) test_that("Check non-zero inflation", { dist <- dist_inflated(dist_poisson(6), 0.33, 2) expect_equal(format(dist), "2+Pois(6)") # quantiles expect_equal(quantile(dist, 0), 0) expect_equal(quantile(dist, 0.1), 2) expect_equal(quantile(dist, 0.33), 2) expect_equal(quantile(dist, 0.5), 4) # pdf expect_equal(density(dist, 0), 0.67*dpois(0, 6)) expect_equal(density(dist, 2), 0.33 + 0.67*dpois(2, 6)) expect_equal(density(dist, 3), 0.67*dpois(3, 6)) # cdf expect_equal(cdf(dist, 0), 0.67*ppois(0, 6)) expect_equal(cdf(dist, 2), 0.33 + 0.67*ppois(2, 6)) expect_equal(cdf(dist, 3), 0.33 + 0.67*ppois(3, 6)) # stats expect_equal(mean(dist), 0.33*2 + 0.67*6) # expect_equal(variance(d), ???) }) distributional/tests/testthat/test-dist-degenerate.R0000644000176200001440000000136415112047012022451 0ustar liggesuserstest_that("Degenerate distribution", { dist <- dist_degenerate(1) expect_equal( dist, vec_cast(1, new_dist()) ) expect_equal(format(dist), "1") # quantiles expect_equal(quantile(dist, 0), 1) expect_equal(quantile(dist, 0.5), 1) expect_equal(quantile(dist, 1), 1) # pdf expect_equal(density(dist, 1), 1) expect_equal(density(dist, 0.5), 0) expect_equal(density(dist, 0.99999), 0) # cdf expect_equal(cdf(dist, 0), 0) expect_equal(cdf(dist, 1), 1) expect_equal(cdf(dist, 0.9999), 0) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 1)), 1, tolerance = 1e-3) expect_equal(cdf(dist, quantile(dist, 0)), 1, tolerance = 1e-3) # stats expect_equal(mean(dist), 1) expect_equal(variance(dist), 0) }) distributional/tests/testthat/test-dist-uniform.R0000644000176200001440000000125015112047012022017 0ustar liggesuserstest_that("Uniform distribution", { dist <- dist_uniform(-2, 4) expect_equal(format(dist), "U(-2, 4)") # quantiles expect_equal(quantile(dist, 0.1), stats::qunif(0.1, -2, 4)) expect_equal(quantile(dist, 0.5), stats::qunif(0.5, -2, 4)) # pdf expect_equal(density(dist, 0), stats::dunif(0, -2, 4)) expect_equal(density(dist, 3), stats::dunif(3, -2, 4)) # cdf expect_equal(cdf(dist, 0), stats::punif(0, -2, 4)) expect_equal(cdf(dist, 3), stats::punif(3, -2, 4)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) # stats expect_equal(mean(dist), 0.5*(-2 + 4)) expect_equal(variance(dist), (4+2)^2/12) }) distributional/tests/testthat/test-dist-inverse-gaussian.R0000644000176200001440000000153615112047012023632 0ustar liggesuserstest_that("Inverse Gaussian distribution", { dist <- dist_inverse_gaussian(3, .2) expect_equal(format(dist), "IG(3, 0.2)") # Require package installed skip_if_not_installed("actuar", "2.0.0") # quantiles expect_equal(quantile(dist, 0.1), actuar::qinvgauss(0.1, 3, .2)) expect_equal(quantile(dist, 0.5), actuar::qinvgauss(0.5, 3, .2)) # pdf expect_equal(density(dist, 0), actuar::dinvgauss(0, 3, .2)) expect_equal(density(dist, 3), actuar::dinvgauss(3, 3, .2)) # cdf expect_equal(cdf(dist, 0), actuar::pinvgauss(0, 3, .2)) expect_equal(cdf(dist, 3), actuar::pinvgauss(3, 3, .2)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4)), 0.4, tolerance = 1e-3) # stats expect_equal(mean(dist), actuar::minvgauss(1, 3, .2)) expect_equal(variance(dist), actuar::minvgauss(2, 3, .2) - actuar::minvgauss(1, 3, .2)^2) }) distributional/tests/testthat/test-dist-pareto.R0000644000176200001440000000147315112047012021641 0ustar liggesuserstest_that("Pareto distribution", { dist <- dist_pareto(10, 1) expect_equal(format(dist), "Pareto(10, 1)") # Require package installed skip_if_not_installed("actuar", "2.0.0") # quantiles expect_equal(quantile(dist, 0.1), actuar::qpareto(0.1, 10, 1)) expect_equal(quantile(dist, 0.5), actuar::qpareto(0.5, 10, 1)) # pdf expect_equal(density(dist, 0), actuar::dpareto(0, 10, 1)) expect_equal(density(dist, 3), actuar::dpareto(3, 10, 1)) # cdf expect_equal(cdf(dist, 0), actuar::ppareto(0, 10, 1)) expect_equal(cdf(dist, 3), actuar::ppareto(3, 10, 1)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4)), 0.4, tolerance = 1e-3) # stats expect_equal(mean(dist), actuar::mpareto(1, 10, 1)) expect_equal(variance(dist), actuar::mpareto(2, 10, 1) - actuar::mpareto(1, 10, 1)^2) }) distributional/tests/testthat/test-dist-hypergeometric.R0000644000176200001440000000144715112047012023376 0ustar liggesuserstest_that("Hypergeometric distribution", { dist <- dist_hypergeometric(500, 50, 100) expect_equal(format(dist), "Hypergeometric(500, 50, 100)") # quantiles expect_equal(quantile(dist, 0.1), stats::qhyper(0.1, 500, 50, 100)) expect_equal(quantile(dist, 0.5), stats::qhyper(0.5, 500, 50, 100)) # pdf expect_equal(density(dist, 0), stats::dhyper(0, 500, 50, 100)) expect_equal(density(dist, 3), stats::dhyper(3, 500, 50, 100)) # cdf expect_equal(cdf(dist, 0), stats::phyper(0, 500, 50, 100)) expect_equal(cdf(dist, 3), stats::phyper(3, 500, 50, 100)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) # stats p <- 500/(500+50) expect_equal(mean(dist), 100*p) expect_equal(variance(dist), 100*p*(1-p)*(500+50-100)/(500+50-1)) }) distributional/tests/testthat/test-dist-inverse-exponential.R0000644000176200001440000000151315112047012024341 0ustar liggesuserstest_that("Inverse Exponential distribution", { dist <- dist_inverse_exponential(5) expect_equal(format(dist), "InvExp(5)") # Require package installed skip_if_not_installed("actuar", "2.0.0") # quantiles expect_equal(quantile(dist, 0.1), actuar::qinvexp(0.1, 5)) expect_equal(quantile(dist, 0.5), actuar::qinvexp(0.5, 5)) # pdf expect_equal(density(dist, 0), actuar::dinvexp(0, 5)) expect_equal(density(dist, 3), actuar::dinvexp(3, 5)) # cdf expect_equal(cdf(dist, 0), actuar::pinvexp(0, 5)) expect_equal(cdf(dist, 3), actuar::pinvexp(3, 5)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4)), 0.4, tolerance = 1e-3) # stats expect_equal(mean(dist), NA_real_) # dput(actuar::minvexp(1, 5)) expect_equal(variance(dist), NA_real_) # dput(actuar::minvexp(2, 5) - actuar::minvexp(1, 5)^2) }) distributional/tests/testthat/test-dist-cauchy.R0000644000176200001440000000126115112047012021616 0ustar liggesuserstest_that("Cauchy distribution", { dist <- dist_cauchy(-2, 1) expect_equal(format(dist), "Cauchy(-2, 1)") # quantiles expect_equal(quantile(dist, 0.1), stats::qcauchy(0.1, -2, 1)) expect_equal(quantile(dist, 0.5), stats::qcauchy(0.5, -2, 1)) # pdf expect_equal(density(dist, 0), stats::dcauchy(0, -2, 1)) expect_equal(density(dist, 3), stats::dcauchy(3, -2, 1)) # cdf expect_equal(cdf(dist, 0), stats::pcauchy(0, -2, 1)) expect_equal(cdf(dist, 3), stats::pcauchy(3, -2, 1)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) # stats expect_equal(mean(dist), NA_real_) expect_equal(variance(dist), NA_real_) }) distributional/tests/testthat/test_gpd.R0000644000176200001440000000355515112047012020245 0ustar liggesuserstest_that("GPD", { dist <- dist_gpd(location = c(0, .5, 0), scale = c(1, 2, 3), shape = c(0, 0.1, 1.1)) # Mean expect_equal(mean(dist), c( 1, 0.5 + 2 / 0.9, # location + scale/(1 - shape) Inf # Since shape >= 1 ), tolerance = 0.0001) # Median expect_equal(median(dist), c( -log(0.5), 0.5 + 2 * (2^0.1 - 1) / 0.1, # location + scale * (2^shape - 1) / shape 3 * (2^1.1 - 1) / 1.1 # location + scale * (2^shape - 1) / shape ), tolerance = 0.0001) expect_equal(median(dist), quantile(dist, 0.5)) # Variance expect_equal(distributional::variance(dist), c( 1, 2^2 / 0.9^2 / (1 - 2 * 0.1), # scale^2 / (1 - shape)^2 / (1 - 2 * shape) Inf # since shape >= 0.5 ), tolerance = 0.0001) # Density at <- (0:20) / 10 + 1e-8 # Avoiding being on the boundary where evd gives wrong result expect_equal(density(dist, at), list( evd::dgpd(at, loc = 0, scale = 1, shape = 0), evd::dgpd(at, loc = 0.5, scale = 2, shape = 0.1), evd::dgpd(at, loc = 0, scale = 3, shape = 1.1) )) # CDF expect_equal(distributional::cdf(dist, at), list( evd::pgpd(at, loc = 0, scale = 1, shape = 0), evd::pgpd(at, loc = 0.5, scale = 2, shape = 0.1), evd::pgpd(at, loc = 0, scale = 3, shape = 1.1) )) # Quantiles p <- (1:19) / 20 expect_equal(quantile(dist, p = p), list( evd::qgpd(p = p, loc = 0, scale = 1, shape = 0), evd::qgpd(p = p, loc = 0.5, scale = 2, shape = 0.1), evd::qgpd(p = p, loc = 0, scale = 3, shape = 1.1) )) # Generate set.seed(123) rand_dist <- distributional::generate(dist, times = 1e6) expect_equal(unlist(lapply(rand_dist[1:2], mean)), mean(dist)[1:2], tolerance = 0.01 ) expect_equal(unlist(lapply(rand_dist[1:2], var)), distributional::variance(dist)[1:2], tolerance = 0.01 ) expect_equal(unlist(lapply(rand_dist, median)), median(dist), tolerance = 0.01 ) }) distributional/tests/testthat/test-dist-logistic.R0000644000176200001440000000124415112047012022160 0ustar liggesuserstest_that("Logistic distribution", { dist <- dist_logistic(5, 2) expect_equal(format(dist), "Logistic(5, 2)") # quantiles expect_equal(quantile(dist, 0.1), stats::qlogis(0.1, 5, 2)) expect_equal(quantile(dist, 0.5), stats::qlogis(0.5, 5, 2)) # pdf expect_equal(density(dist, 0), stats::dlogis(0, 5, 2)) expect_equal(density(dist, 3), stats::dlogis(3, 5, 2)) # cdf expect_equal(cdf(dist, 0), stats::plogis(0, 5, 2)) expect_equal(cdf(dist, 3), stats::plogis(3, 5, 2)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) # stats expect_equal(mean(dist), 5) expect_equal(variance(dist), (2*pi)^2/3) }) distributional/tests/testthat/test-dist-chisq.R0000644000176200001440000000117115112047012021451 0ustar liggesuserstest_that("Chisq distribution", { dist <- dist_chisq(9) expect_equal(format(dist), "x2(9)") # quantiles expect_equal(quantile(dist, 0.1), stats::qchisq(0.1, 9)) expect_equal(quantile(dist, 0.5), stats::qchisq(0.5, 9)) # pdf expect_equal(density(dist, 0), stats::dchisq(0, 9)) expect_equal(density(dist, 3), stats::dchisq(3, 9)) # cdf expect_equal(cdf(dist, 0), stats::pchisq(0, 9)) expect_equal(cdf(dist, 3), stats::pchisq(3, 9)) # F(Finv(a)) ~= a expect_equal(cdf(dist, quantile(dist, 0.4246)), 0.4246, tolerance = 1e-3) # stats expect_equal(mean(dist), 9) expect_equal(variance(dist), 2*9) }) distributional/tests/testthat.R0000644000176200001440000000011015112047012016414 0ustar liggesuserslibrary(testthat) library(distributional) test_check("distributional") distributional/MD50000644000176200001440000002576715131672333013643 0ustar liggesusersd1b73d68c6369b6ef6278788e046c78e *DESCRIPTION 181e9456bbafdaa8833fb57407eaef9f *NAMESPACE 520fba42275810bb48a6cb627a9a117c *NEWS.md 87d3c13f1eeff7544722bdc2420be081 *R/default.R 6c1ad28b78fcd0c080f8d34a05433460 *R/dist_bernoulli.R dc43681fa6e29be1aafb25d54e2c7183 *R/dist_beta.R a5a9d7ed647e61fb349a70b30634626f *R/dist_binomial.R 66942ac584555a44ce84f74bbf3c3513 *R/dist_burr.R e553e5de7e579141df9dd65a99102e8f *R/dist_categorical.R 53cb3790bb9795a7064f42186fcd05c7 *R/dist_cauchy.R 1b7658b0a5e7a9f506ad03e57deb686b *R/dist_chisq.R 6c78897fd1ada1945b96343fcdb76112 *R/dist_degenerate.R 38b9536e1e4757972d5c53e623bc9f10 *R/dist_exponential.R 8b809d6c869199eff99b1473c329761d *R/dist_f.R 3a4620b9047b9329f2d5f6b0906a3c6e *R/dist_gamma.R 0dbe291bdcf9857264963fd0c9f2268e *R/dist_geometric.R 1c26fb43a854b8436fa97c008adcdfd6 *R/dist_gev.R 58f5107d5b219583d88cf1ebc25ba0e6 *R/dist_gh.R bf135f178469eb3454a62252d80755e9 *R/dist_gk.R 098e0503ae29c7722509a65b6749b19e *R/dist_gpd.R dc76df793b9ac313ddf8e11dc8e56b84 *R/dist_gumbel.R e44ec5f5f34812b8d02236bd5a6de641 *R/dist_hypergeometric.R f3582064d19f827653ed0f532857185e *R/dist_inflated.R 492dc5e9b3fe4a6dbb94ea9b7cd9fa13 *R/dist_inverse_exponential.R 93958c95e2e798cfb6bc0233e57f165d *R/dist_inverse_gamma.R 02cd68cfd5ae78860238146c41bcddb3 *R/dist_inverse_gaussian.R 3619a8abea89cdab3d38cafc81a07fb7 *R/dist_laplace.R bfe9442538556f60e5b92b03c4f98a7f *R/dist_logarithmic.R 1509968893e901612bfc323fd35f3eff *R/dist_logistic.R 7167b0bc067bf8bbf12f7560d4003df7 *R/dist_lognormal.R b717e18f3f302804854a656f73dd8e81 *R/dist_missing.R 5864484934ec3f1252e781b617c30f3e *R/dist_mixture.R ff7dd42c3208eccf562f6238c86338d3 *R/dist_multinomial.R b456daa319606a25b23353649fdc2fef *R/dist_multivariate_normal.R fa05dd5aee84eef3240adfb9b40e1ee8 *R/dist_multivariate_t.R bb8315831c75db335eb1b3be4b7eb686 *R/dist_negative_binomial.R 844e525afa0886e791a4e17a0ef155e8 *R/dist_normal.R 3dd0da03ebb7384268ddf05928f45d70 *R/dist_pareto.R 71aa10276fba4fc016a3b3a2c0c22f3f *R/dist_percentile.R 8f9eceeb07408e8385e44d743b400116 *R/dist_poisson.R ce817212c8ccc3ef19261f7552324fd3 *R/dist_poisson_inverse_gaussian.R 1ea5a1dc653e85554b39ccf9f987c4b1 *R/dist_sample.R 2bb609cffc51aaf56d8ae832cc2d4518 *R/dist_student_t.R 3dc1cd3f28fce7d0f31426f898f0cbcc *R/dist_studentized_range.R 489bff8d1b4c6cdecb3ecc3bcd34ad16 *R/dist_transformed.R 92cbdfa92a3e4f333875c4627dcb1b9d *R/dist_truncated.R 1b9e3e56b8b99878e680b0c77707b804 *R/dist_uniform.R cdf6dd70ee68586584bc592cb4e5d58d *R/dist_weibull.R 3a742753b06e39a47446a6885c39f33f *R/dist_wrap.R 4bd9374ae082686e1372ddc4fa28168b *R/distribution.R ffb71983be3681c86021ae7d15341027 *R/distributional-package.R c1256209192fb4e9f8de412047c8946e *R/hdr.R ee9e87cbb170b0c8965763f42accf30c *R/hilo.R c0ea658b5cab1732627259bc8885988a *R/reexports.R 4d1c5b0ec4898a63218919e293999951 *R/support.R 68a296b724f91b4a7b8a07080bc6aac1 *R/utils.R de6e6606a975b15f1b3d7c82a0a80adc *README.md 7f4496b2c3ab1c9dfe9f421cced55827 *man/cdf.Rd e3c345c3d92b4a371a98683cf42691ae *man/covariance.Rd b5682bb0e93de8c91011b23a3d18f010 *man/covariance.distribution.Rd 693f7c772c36c0005443597983f32908 *man/density.distribution.Rd 24c0131c706c146bdb4e906d47246e62 *man/dist_bernoulli.Rd da7ceb52c9935adef3815ba3ab282364 *man/dist_beta.Rd 2f0b01234a29e00daf0f3a28573c33a8 *man/dist_binomial.Rd 44c7aa4b51aea8b2397c25a8e280a5b4 *man/dist_burr.Rd ab82dc66e159206531bfe27f235a9117 *man/dist_categorical.Rd 42eb2f1918da66e331a25b648965247f *man/dist_cauchy.Rd 1a90cf102d9b471844d47a11b2d272a7 *man/dist_chisq.Rd 8cb644ed1ee6716b0c5bf3cc69976830 *man/dist_degenerate.Rd 68184b765b5859ea26e12d3eeb840f58 *man/dist_exponential.Rd 87f7d447796be5c7f9c22cbfa66efbff *man/dist_f.Rd f336b3a2015aad5cc15e0174edeb9ef7 *man/dist_gamma.Rd c354bff8cebc0fec14adf05596aa12f3 *man/dist_geometric.Rd fe4946ea7f00b041a6a433fef057605c *man/dist_gev.Rd bd26cf3b4ad0581053add69edce01032 *man/dist_gh.Rd ca40b309521f46bffa4b16c310c1b5f7 *man/dist_gk.Rd e861fb9fa48dc833a45ff58dbf0c7ad0 *man/dist_gpd.Rd be30743520ee252a1b97caa1eb3ed71a *man/dist_gumbel.Rd f89a348a55a8687bc7e3165cdd1acc20 *man/dist_hypergeometric.Rd cdc481ad870b8c78f8ada89cc31bf18e *man/dist_inflated.Rd aa653096f6b1c0c5d7c44c4bd10e83ee *man/dist_inverse_exponential.Rd 0a1ab9e995b65d62ea443d1b800dcb53 *man/dist_inverse_gamma.Rd d21b3dc346bcc95bae1f0b73487a0543 *man/dist_inverse_gaussian.Rd 56bf9e55a77c6e63c14227704734062b *man/dist_laplace.Rd ee30c6d5fc805e39b8fe468c8dbad44d *man/dist_logarithmic.Rd 3163dc2e4e6415a64d0c5ea3f24b7aa2 *man/dist_logistic.Rd 54f63beff5f01b22e83020ec87271728 *man/dist_lognormal.Rd 8d37c832a060d227077659cb3ec22fb0 *man/dist_missing.Rd 4a44ac2bf3c10149c89fa80fc43443b9 *man/dist_mixture.Rd 0bf83c7f52ff91b2c9c3a138283540f4 *man/dist_multinomial.Rd a487fe4256bec04bdbf4bb255c2fbcde *man/dist_multivariate_normal.Rd 8d0a5ccf109b631713bbaf4a2f391485 *man/dist_multivariate_t.Rd 0c7acfb7a8bed8e924e7d650501e455a *man/dist_negative_binomial.Rd c6b7089d8f3b12ff7e2b4634bf677e57 *man/dist_normal.Rd b664d2bd9ce2aaf9d0ffe5e2c0106933 *man/dist_pareto.Rd e5bb650d86fe753023916735fbb67347 *man/dist_percentile.Rd 6ea65078a9ac47ff7947e257952a2eac *man/dist_poisson.Rd ab594463c8940a724006e47e0ca296d8 *man/dist_poisson_inverse_gaussian.Rd daa5ccc2dc423af11a0739b91311dd64 *man/dist_sample.Rd d73b81db24eb44490002569db22dc1bd *man/dist_student_t.Rd 4623c4ef5e96ec7ba8b86a70940b0b1a *man/dist_studentized_range.Rd 05053ca04f8b65c43f482b63a9100665 *man/dist_transformed.Rd 978730a96c46f82b867eb77daa0b01ec *man/dist_truncated.Rd be08142af1ae06a850d182e2dbf04ecc *man/dist_uniform.Rd c4d29461e05ee593ce7cf8ba133e1eab *man/dist_weibull.Rd b0351f0ac9127d3f9bcb94eae9dafbc4 *man/dist_wrap.Rd 19a58490ca339df3fe626059ce24b7a2 *man/distributional-package.Rd 20025842953d1cd5981027989f5cd274 *man/family.distribution.Rd d8bfd7225d4236c2eb773a282275d515 *man/figures/README-plot-1.png cb1e46f469cfbbbde29c8b5113e1d789 *man/figures/lifecycle-archived.svg c0d2e5a54f1fa4ff02bf9533079dd1f7 *man/figures/lifecycle-defunct.svg a1b8c987c676c16af790f563f96cbb1f *man/figures/lifecycle-deprecated.svg c3978703d8f40f2679795335715e98f4 *man/figures/lifecycle-experimental.svg 952b59dc07b171b97d5d982924244f61 *man/figures/lifecycle-maturing.svg 27b879bf3677ea76e3991d56ab324081 *man/figures/lifecycle-questioning.svg 46de21252239c5a23d400eae83ec6b2d *man/figures/lifecycle-retired.svg 6902bbfaf963fbc4ed98b86bda80caa2 *man/figures/lifecycle-soft-deprecated.svg 53b3f893324260b737b3c46ed2a0e643 *man/figures/lifecycle-stable.svg 1c1fe7a759b86dc6dbcbe7797ab8246c *man/figures/lifecycle-superseded.svg 04c48ad61c1d116d5b02cc6c4146e945 *man/generate.distribution.Rd 0863a0275061c3bbeec96f42340a87f1 *man/has_symmetry.Rd 1b79fdc35b5a03963705a461f71621a6 *man/hdr.Rd 3d4855a002b050cb30200a5a376ca0f0 *man/hdr.distribution.Rd af538fafed85b884cb83e28cb184b32e *man/hilo.Rd 4c48389ab6ee0995ada5eaad7d79896b *man/hilo.distribution.Rd c31ec75190a32487c2b2941bb81a53b0 *man/is-distribution.Rd 202225fd45b8291ef9a0e6e296955913 *man/is_hdr.Rd e7315a386366939fde82a760fce20ddc *man/is_hilo.Rd 3bc682bf2bcf9ed6541ac81c0bb88784 *man/kurtosis.Rd d15439b0a1ed4eebea6941cd0bf40d9a *man/likelihood.Rd 5a0459a4187ec01636c60f53b8616996 *man/mean.distribution.Rd 60b7e7a40f8891df167be49c94ceeac7 *man/median.distribution.Rd 3c583b85f522031681fc10a92a2a2487 *man/new_dist.Rd a7d0b9e27c3fcde27d461e01f6b479a9 *man/new_hdr.Rd 6e6912255884abfcbddee2c8d8e554de *man/new_hilo.Rd b4f17fb8e52f77a7536392e34c62e2ad *man/new_support_region.Rd b0e14f9396753070ec6164a8f68c152c *man/parameters.Rd f2ac97537a13995bad697d8c76dcb233 *man/quantile.distribution.Rd be73e31803dd103145a71b28ad94d6a3 *man/reexports.Rd 41ebfdf74e6a185be163f35ed69b97d3 *man/skewness.Rd 4f7f739114cb00764a6e78a519fc3e9e *man/support.Rd 7133556c6920f36b9a42712915018e1a *man/variance.Rd f26ee827eae085802defdb357c2788af *man/variance.distribution.Rd 0eed614527f4bcdb7b5d80cef3a90163 *tests/testthat.R c9d07a0c2baafa1d864c61cae8939b68 *tests/testthat/setup-tests.R 667c66fed7b0769332b9b164f608efd4 *tests/testthat/test-apply.R 34d59205418fbb439986220b21e6a2e1 *tests/testthat/test-dist-bernoulli.R 98c6f80f397d5be23582ad8374af5837 *tests/testthat/test-dist-beta.R f9e4e4131783f7237d4f97dfe72fbe3a *tests/testthat/test-dist-burr.R 6de6f030c39f2525d1de82dafcc7b912 *tests/testthat/test-dist-categorical.R c0267778b901e4a3691a6be6e1461687 *tests/testthat/test-dist-cauchy.R 46065b755f643e6d67e14b3dbbab2f43 *tests/testthat/test-dist-chisq.R c2bc05c26f197ef3e2d532f76e6dcd16 *tests/testthat/test-dist-degenerate.R 1a43b2c7b3a8d43de284e9b6ffc01fd8 *tests/testthat/test-dist-exponential.R 028b728b08ac8f88871d0b8b5f4881d2 *tests/testthat/test-dist-f.R d682147ddb5e7d006cd3b6f33a1bdb55 *tests/testthat/test-dist-gamma.R ffdfbf31992e228107c0d64534549edd *tests/testthat/test-dist-geometric.R 5c622b05e2e1828aa98c78f655689ee1 *tests/testthat/test-dist-gh.R ce777d0eb221c8ce4708c11f792bb142 *tests/testthat/test-dist-gk.R c857ddebb4f3ef4436cb234c87b2abc0 *tests/testthat/test-dist-gumbel.R 3c203e6e7e503d1d611abe46e84064a1 *tests/testthat/test-dist-hypergeometric.R 50b33bef9f581dddf82060fc0e9a84f9 *tests/testthat/test-dist-inverse-exponential.R 661e60cc3d1cce040072848183fa3a94 *tests/testthat/test-dist-inverse-gamma.R fb99988132630fcc5055dd320050d692 *tests/testthat/test-dist-inverse-gaussian.R 5b76396ede5b7dd4b27ab2038c482eb6 *tests/testthat/test-dist-laplace.R 3788908d3a0ebdd2437f987b2f80fd75 *tests/testthat/test-dist-logarithmic.R 3b8646feb5b8006e2ad931d47a3e2f86 *tests/testthat/test-dist-logistic.R 121c000ccb65141a721f4157398a9def *tests/testthat/test-dist-lognormal.R 1320dbcb946cd401b212cb0dbad5f0bc *tests/testthat/test-dist-missing.R c2e594717d7fae730dce9f4476b9900a *tests/testthat/test-dist-multinomial.R 429cb89d50d8a6c545a550b559d2951a *tests/testthat/test-dist-multivariate-normal.R 8e6814ce9ae649221379b8fa1d4c65b2 *tests/testthat/test-dist-multivariate-t.R 8e6d6d1d5e21ce78f6efdef37b2bf132 *tests/testthat/test-dist-negative-binomial.R ed995c49bec61a4f37b19fefdf8bb77f *tests/testthat/test-dist-normal.R dbbd61cb750f4ee5c164b377c2a5f3e0 *tests/testthat/test-dist-pareto.R b1d1c4055d8c137124c31b00aeb013f7 *tests/testthat/test-dist-percentile.R 8dbd79f036ef97752d1c226d4d5aee40 *tests/testthat/test-dist-poisson-inverse-gaussian.R 0b090de31a10083ef1cc2b63586aa81b *tests/testthat/test-dist-sample.R d6eb44eeb209211d414093e9965b899b *tests/testthat/test-dist-student-t.R a76bf887960e6fe5d8e74b7eb7841b85 *tests/testthat/test-dist-studentised-range.R 54e2974b72000400caf3aafcf7aee453 *tests/testthat/test-dist-uniform.R c534bea9f61ff207a0250a4f9c291e62 *tests/testthat/test-dist-weibull.R a6cba40c30d644db4a3d7757be58ce25 *tests/testthat/test-distribution.R a275086e34a17feb80675c2036a3a6e7 *tests/testthat/test-hilo.R 3dfad230c0483029636f11da9836bf2f *tests/testthat/test-inflated.R 8dd171e53c24bf91d7910cfb4a27bd47 *tests/testthat/test-issues.R eb99ea17d014e7cb4eabca616571a890 *tests/testthat/test-mixture.R 7e1219625486c6b68fb3d78cef3b1ca9 *tests/testthat/test-support.R d8c1fabe28c13779a48066aac1e6e77a *tests/testthat/test-transformations.R 538487050b2556bbc9691abefdfab6f8 *tests/testthat/test-truncated.R 0503cfbe17c71034486a63bfbec17f91 *tests/testthat/test_gev.R 9997677dd7c463d60af1775f9627028a *tests/testthat/test_gpd.R distributional/R/0000755000176200001440000000000015131633677013523 5ustar liggesusersdistributional/R/dist_uniform.R0000644000176200001440000000557115131565225016351 0ustar liggesusers#' The Uniform distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' A distribution with constant density on an interval. #' #' @inheritParams stats::dunif #' #' @details #' #' `r pkgdown_doc_link("dist_uniform")` #' #' In the following, let \eqn{X} be a Uniform random variable with parameters #' `min` = \eqn{a} and `max` = \eqn{b}. #' #' **Support**: \eqn{[a, b]} #' #' **Mean**: \eqn{\frac{a + b}{2}} #' #' **Variance**: \eqn{\frac{(b - a)^2}{12}} #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \frac{1}{b - a} #' }{ #' f(x) = 1 / (b - a) #' } #' #' for \eqn{x \in [a, b]}{x in [a, b]}, and \eqn{f(x) = 0} otherwise. #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = \frac{x - a}{b - a} #' }{ #' F(x) = (x - a) / (b - a) #' } #' #' for \eqn{x \in [a, b]}{x in [a, b]}, with \eqn{F(x) = 0} for \eqn{x < a} #' and \eqn{F(x) = 1} for \eqn{x > b}. #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = \frac{e^{tb} - e^{ta}}{t(b - a)} #' }{ #' E(e^(tX)) = (e^(tb) - e^(ta)) / (t(b - a)) #' } #' #' for \eqn{t \neq 0}{t != 0}, and \eqn{E(e^{tX}) = 1} for \eqn{t = 0}. #' #' **Skewness**: \eqn{0} #' #' **Excess Kurtosis**: \eqn{-\frac{6}{5}}{-6/5} #' #' @seealso [stats::Uniform] #' #' @examples #' dist <- dist_uniform(min = c(3, -2), max = c(5, 4)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_uniform #' @export dist_uniform <- function(min, max){ min <- vec_cast(min, double()) max <- vec_cast(max, double()) if(any(min > max)){ abort("The min of a Uniform distribution must be less than max.") } new_dist(l = min, u = max, class = "dist_uniform") } #' @export format.dist_uniform <- function(x, digits = 2, ...){ sprintf( "U(%s, %s)", format(x[["l"]], digits = digits, ...), format(x[["u"]], digits = digits, ...) ) } #' @export density.dist_uniform <- function(x, at, ...){ stats::dunif(at, x[["l"]], x[["u"]]) } #' @export log_density.dist_uniform <- function(x, at, ...){ stats::dunif(at, x[["l"]], x[["u"]], log = TRUE) } #' @export quantile.dist_uniform <- function(x, p, ...){ stats::qunif(p, x[["l"]], x[["u"]]) } #' @export cdf.dist_uniform <- function(x, q, ...){ stats::punif(q, x[["l"]], x[["u"]]) } #' @export generate.dist_uniform <- function(x, times, ...){ stats::runif(times, x[["l"]], x[["u"]]) } #' @export mean.dist_uniform <- function(x, ...){ (x[["u"]]+x[["l"]])/2 } #' @export covariance.dist_uniform <- function(x, ...){ (x[["u"]]-x[["l"]])^2/12 } #' @export skewness.dist_uniform <- function(x, ...) 0 #' @export kurtosis.dist_uniform <- function(x, ...) -6/5 #' @export has_symmetry.dist_uniform <- function(x, ...) TRUEdistributional/R/dist_negative_binomial.R0000644000176200001440000000717215131607203020336 0ustar liggesusers#' The Negative Binomial distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' A generalization of the geometric distribution. It is the number #' of failures in a sequence of i.i.d. Bernoulli trials before #' a specified number of successes (`size`) occur. The probability of success in #' each trial is given by `prob`. #' #' @param size The number of successful trials (target number of successes). #' Must be a positive number. Also called the dispersion parameter. #' @param prob The probability of success in each trial. Must be between 0 and 1. #' #' @details #' #' `r pkgdown_doc_link("dist_negative_binomial")` #' #' In the following, let \eqn{X} be a Negative Binomial random variable with #' success probability `prob` = \eqn{p} and the number of successes `size` = #' \eqn{r}. #' #' #' **Support**: \eqn{\{0, 1, 2, 3, ...\}} #' #' **Mean**: \eqn{\frac{r(1-p)}{p}} #' #' **Variance**: \eqn{\frac{r(1-p)}{p^2}} #' #' **Probability mass function (p.m.f)**: #' #' \deqn{ #' P(X = k) = \binom{k + r - 1}{k} (1-p)^r p^k #' }{ #' P(X = k) = choose(k+r-1, k) * (1-p)^r * p^k #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(k) = \sum_{i=0}^{\lfloor k \rfloor} \binom{i + r - 1}{i} (1-p)^r p^i #' }{ #' F(k) = sum_{i=0}^floor(k) choose(i+r-1, i) * (1-p)^r * p^i #' } #' #' This can also be expressed in terms of the regularized incomplete beta #' function, and is computed numerically. #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = \left(\frac{1-p}{1-pe^t}\right)^r, \quad t < -\log p #' }{ #' E(e^(tX)) = ((1-p)/(1-p*e^t))^r, t < -log p #' } #' #' **Skewness**: #' #' \deqn{ #' \gamma_1 = \frac{2-p}{\sqrt{r(1-p)}} #' }{ #' \gamma_1 = (2-p) / sqrt(r(1-p)) #' } #' #' **Excess Kurtosis**: #' #' \deqn{ #' \gamma_2 = \frac{6}{r} + \frac{p^2}{r(1-p)} #' }{ #' \gamma_2 = 6/r + p^2/(r(1-p)) #' } #' #' @seealso [stats::NegBinomial] #' #' @examples #' dist <- dist_negative_binomial(size = 10, prob = 0.5) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' support(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @export dist_negative_binomial <- function(size, prob){ size <- vec_cast(size, double()) prob <- vec_cast(prob, double()) if(any(prob < 0 | prob > 1)){ abort("Probability of success must be between 0 and 1.") } new_dist(n = size, p = prob, class = "dist_negbin") } #' @export format.dist_negbin <- function(x, digits = 2, ...){ sprintf( "NB(%s, %s)", format(x[["n"]], digits = digits, ...), format(x[["p"]], digits = digits, ...) ) } #' @export density.dist_negbin <- function(x, at, ...){ stats::dnbinom(at, x[["n"]], x[["p"]]) } #' @export log_density.dist_negbin <- function(x, at, ...){ stats::dnbinom(at, x[["n"]], x[["p"]], log = TRUE) } #' @export quantile.dist_negbin <- function(x, p, ...){ stats::qnbinom(p, x[["n"]], x[["p"]]) } #' @export cdf.dist_negbin <- function(x, q, ...){ stats::pnbinom(q, x[["n"]], x[["p"]]) } #' @export generate.dist_negbin <- function(x, times, ...){ stats::rnbinom(times, x[["n"]], x[["p"]]) } #' @export mean.dist_negbin <- function(x, ...){ x[["n"]] * (1 - x[["p"]]) / x[["p"]] } #' @export covariance.dist_negbin <- function(x, ...){ x[["n"]] * (1 - x[["p"]]) / x[["p"]]^2 } #' @export skewness.dist_negbin <- function(x, ...) { (2 - x[["p"]]) / sqrt(x[["n"]] * (1 - x[["p"]])) } #' @export kurtosis.dist_negbin <- function(x, ...) { 6 / x[["n"]] + x[["p"]]^2 / (x[["n"]] * (1 - x[["p"]])) } distributional/R/dist_inflated.R0000644000176200001440000001072415131577114016454 0ustar liggesusers#' Inflate a value of a probability distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Inflated distributions add extra probability mass at a specific value, #' most commonly zero (zero-inflation). These distributions are useful for #' modeling data with excess observations at a particular value compared to #' what the base distribution would predict. Common applications include #' zero-inflated Poisson or negative binomial models for count data with #' many zeros. #' #' @param dist The distribution(s) to inflate. #' @param prob The added probability of observing `x`. #' @param x The value to inflate. The default of `x = 0` is for zero-inflation. #' #' @details #' #' `r pkgdown_doc_link("dist_inflated")` #' #' In the following, let \eqn{Y} be an inflated random variable based on #' a base distribution \eqn{X}, with inflation value `x` = \eqn{c} and #' inflation probability `prob` = \eqn{p}. #' #' **Support**: Same as the base distribution, but with additional #' probability mass at \eqn{c} #' #' **Mean**: (when `x` is numeric) #' #' \deqn{ #' E(Y) = p \cdot c + (1-p) \cdot E(X) #' }{ #' E(Y) = p * c + (1-p) * E(X) #' } #' #' **Variance**: (when `x = 0`) #' #' \deqn{ #' \text{Var}(Y) = (1-p) \cdot \text{Var}(X) + p(1-p) \cdot [E(X)]^2 #' }{ #' Var(Y) = (1-p) * Var(X) + p(1-p) * [E(X)]^2 #' } #' #' For non-zero inflation values, the variance is not computed in closed form. #' #' **Probability mass/density function (p.m.f/p.d.f)**: #' #' For discrete distributions: #' \deqn{ #' f_Y(y) = \begin{cases} #' p + (1-p) \cdot f_X(c) & \text{if } y = c \\ #' (1-p) \cdot f_X(y) & \text{if } y \neq c #' \end{cases} #' }{ #' f_Y(y) = p + (1-p) * f_X(c) if y = c, (1-p) * f_X(y) if y != c #' } #' #' For continuous distributions: #' \deqn{ #' f_Y(y) = \begin{cases} #' p & \text{if } y = c \\ #' (1-p) \cdot f_X(y) & \text{if } y \neq c #' \end{cases} #' }{ #' f_Y(y) = p if y = c, (1-p) * f_X(y) if y != c #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F_Y(q) = \begin{cases} #' (1-p) \cdot F_X(q) & \text{if } q < c \\ #' p + (1-p) \cdot F_X(q) & \text{if } q \geq c #' \end{cases} #' }{ #' F_Y(q) = (1-p) * F_X(q) if q < c, p + (1-p) * F_X(q) if q >= c #' } #' #' **Quantile function**: #' #' The quantile function is computed numerically by inverting the #' inflated CDF, accounting for the jump in probability at the #' inflation point. #' #' @examples #' # Zero-inflated Poisson #' dist <- dist_inflated(dist_poisson(lambda = 2), prob = 0.3, x = 0) #' #' dist #' mean(dist) #' variance(dist) #' #' generate(dist, 10) #' #' density(dist, 0) #' density(dist, 1) #' #' cdf(dist, 2) #' #' quantile(dist, 0.5) #' #' @name dist_inflated #' @export dist_inflated <- function(dist, prob, x = 0){ vec_is(dist, new_dist()) if(prob < 0 || prob > 1){ abort("The inflation probability must be between 0 and 1.") } new_dist(dist = dist, x = x, p = prob, dimnames = dimnames(dist), class = "dist_inflated") } #' @export format.dist_inflated <- function(x, ...){ sprintf( "%s+%s", format(x[["x"]]), format(x[["dist"]]) ) } #' @export density.dist_inflated <- function(x, at, ...){ x[["p"]]*(at==x[["x"]]) + (1-x[["p"]])*density(x[["dist"]], at, ...) } #' @export quantile.dist_inflated <- function(x, p, ...){ qt <- quantile(x[["dist"]], pmax(0, (p - x[["p"]]) / (1-x[["p"]])), ...) if(qt >= x[["x"]]) return(qt) qt <- quantile(x[["dist"]], p, ...) if(qt < x[["x"]]) qt else x[["x"]] } #' @export cdf.dist_inflated <- function(x, q, ...){ x[["p"]]*(q>=x[["x"]]) + (1-x[["p"]])*cdf(x[["dist"]], q, ...) } #' @export generate.dist_inflated <- function(x, times, ...){ p <- x[["p"]] inf <- stats::runif(times) < p r <- vec_init(x[["x"]], times) r[inf] <- x[["x"]] r[!inf] <- generate(x[["dist"]], sum(!inf)) r } #' @export mean.dist_inflated <- function(x, ...){ # Can't compute if inflation value is not numeric if(!vec_is(x[["x"]], numeric())) return(NA_real_) p <- x[["p"]] p*x[["x"]] + (1-p)*mean(x[["dist"]]) } #' @export covariance.dist_inflated <- function(x, ...){ # Can't compute if inflation value is not numeric if(!vec_is(x[["x"]], numeric())) return(NA_real_) # Can't (easily) compute if inflation value is not zero if(x[["x"]] != 0) return(NA_real_) m1 <- mean(x[["dist"]]) v <- variance(x[["dist"]]) m2 <- v + m1^2 p <- x[["p"]] (1-p)*v + p*(1-p)*m1^2 } distributional/R/dist_sample.R0000644000176200001440000001544015131633677016156 0ustar liggesusers#' Sampling distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The sampling distribution represents an empirical distribution based on #' observed samples. It is useful for bootstrapping, representing posterior #' distributions from Markov Chain Monte Carlo (MCMC) algorithms, or working #' with any empirical data where the parametric form is unknown. Unlike #' parametric distributions, the sampling distribution makes no assumptions #' about the underlying data-generating process and instead uses the sample #' itself to estimate distributional properties. The distribution can handle #' both univariate and multivariate samples. #' #' @param x A list of sampled values. For univariate distributions, each #' element should be a numeric vector. For multivariate distributions, each #' element should be a matrix where columns represent variables and rows #' represent observations. #' #' @details #' #' `r pkgdown_doc_link("dist_sample")` #' #' In the following, let \eqn{X} be a random variable with sample #' \eqn{x_1, x_2, \ldots, x_n} of size \eqn{n}. #' #' **Support**: The observed range of the sample #' #' **Mean** (univariate): #' #' \deqn{ #' \bar{x} = \frac{1}{n} \sum_{i=1}^{n} x_i #' }{ #' mean(x) = (1/n) sum(x_i) #' } #' #' **Mean** (multivariate): Computed independently for each variable. #' #' **Variance** (univariate): #' #' \deqn{ #' s^2 = \frac{1}{n-1} \sum_{i=1}^{n} (x_i - \bar{x})^2 #' }{ #' s^2 = (1/(n-1)) sum((x_i - mean(x))^2) #' } #' #' **Covariance** (multivariate): The sample covariance matrix. #' #' **Skewness** (univariate): #' #' \deqn{ #' g_1 = \frac{\sqrt{n} \sum_{i=1}^{n} (x_i - \bar{x})^3}{\left(\sum_{i=1}^{n} (x_i - \bar{x})^2\right)^{3/2}} \left(1 - \frac{1}{n}\right)^{3/2} #' }{ #' g_1 = (sqrt(n) sum((x_i - mean(x))^3) / (sum((x_i - mean(x))^2)^(3/2))) * (1 - 1/n)^(3/2) #' } #' #' **Probability density function**: Approximated numerically using #' kernel density estimation. #' #' **Cumulative distribution function** (univariate): #' #' \deqn{ #' F(q) = \frac{1}{n} \sum_{i=1}^{n} I(x_i \leq q) #' }{ #' F(q) = (1/n) sum(I(x_i <= q)) #' } #' #' where \eqn{I(\cdot)} is the indicator function. #' #' **Cumulative distribution function** (multivariate): #' #' \deqn{ #' F(\mathbf{q}) = \frac{1}{n} \sum_{i=1}^{n} I(\mathbf{x}_i \leq \mathbf{q}) #' }{ #' F(q) = (1/n) sum(I(x_i <= q)) #' } #' #' where the inequality is applied element-wise. #' #' **Quantile function** (univariate): The sample quantile, computed using #' the specified quantile type (see [stats::quantile()]). #' #' **Quantile function** (multivariate): Marginal quantiles are computed #' independently for each variable. #' #' **Random generation**: Bootstrap sampling with replacement from the #' empirical sample. #' #' @seealso [stats::density()], [stats::quantile()], [stats::cov()] #' #' @examples #' # Univariate numeric samples #' dist <- dist_sample(x = list(rnorm(100), rnorm(100, 10))) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' generate(dist, 10) #' #' density(dist, 1) #' #' # Multivariate numeric samples #' dist <- dist_sample(x = list(cbind(rnorm(100), rnorm(100, 10)))) #' dimnames(dist) <- c("x", "y") #' #' dist #' mean(dist) #' variance(dist) #' generate(dist, 10) #' quantile(dist, 0.4) # Returns the marginal quantiles #' cdf(dist, matrix(c(0.3,9), nrow = 1)) #' #' @name dist_sample #' @export dist_sample <- function(x){ vec_assert(x, list()) x <- as_list_of(x, .ptype = vec_ptype(x[[1]])) new_dist(x = x, class = "dist_sample") } #' @export format.dist_sample <- function(x, ...){ sprintf( "sample[%s]", vapply(x, vec_size, integer(1L)) ) } #' @export density.dist_sample <- function(x, at, ..., na.rm = TRUE){ # Apply independently over sample variates if(is.matrix(x$x)) { abort("Multivariate sample density is not yet implemented.") } z <- numeric(length(at)) zi <- is.finite(at) at <- at[zi] zl <- vec_size(at) # Shortcut if only one point in density is needed if(zl == 1){ z[zi] <- density(x[["x"]], from = at, to = at, n = 1)$y } else if (zl > 1) { d <- density(x[["x"]], from = min(at), to = max(at), ..., na.rm=na.rm) z[zi] <- stats::approx(d$x, d$y, xout = at)$y } z } #' @export quantile.dist_sample <- function(x, p, type = "marginal", ..., na.rm = TRUE){ type <- match.arg(type) # Apply independently over sample variates if(is.matrix(x$x)) { # Marginal quantiles return( matrix(apply(x$x, 2, quantile, p = p, ..., na.rm = na.rm), nrow = length(p)) ) } quantile(x$x, probs = p, ..., na.rm = na.rm, names = FALSE) } #' @export cdf.dist_sample <- function(x, q, ..., na.rm = TRUE){ if(vec_size(q) > 1) return(vapply(q, cdf, numeric(1L), x = x, ...)) if(is.matrix(x$x)) { return(mean(x$x <= vec_recycle(q, vec_size(x$x)))) } mean(x <= q, ..., na.rm = na.rm) # vapply(x, function(x, q) mean(x <= q, ..., na.rm = na.rm), numeric(1L), q = q) } #' @export generate.dist_sample <- function(x, times, ...){ i <- sample.int(vec_size(x[["x"]]), size = times, replace = TRUE) if(is.matrix(x$x)) x$x[i,,drop = FALSE] else x$x[i] } #' @export mean.dist_sample <- function(x, ...){ if(is.matrix(x$x)) { matrix(colMeans(x$x, ...), nrow = 1L) } else { mean(x$x, ...) } } #' @export median.dist_sample <- function(x, na.rm = FALSE, ...){ if(is.matrix(x$x)) matrix(apply(x$x, 2, median, na.rm = na.rm, ...), nrow = 1L) else median(x$x, na.rm = na.rm, ...) } #' @export covariance.dist_sample <- function(x, ...){ if(is.matrix(x$x)) stats::cov(x$x, ...) else stats::var(x$x, ...) } #' @export skewness.dist_sample <- function(x, ..., na.rm = FALSE) { if(is.matrix(x$x)) {abort("Multivariate sample skewness is not yet implemented.")} n <- lengths(x, use.names = FALSE) x <- lapply(x, function(.) . - mean(., na.rm = na.rm)) sum_x2 <- vapply(x, function(.) sum(.^2, na.rm = na.rm), numeric(1L), USE.NAMES = FALSE) sum_x3 <- vapply(x, function(.) sum(.^3, na.rm = na.rm), numeric(1L), USE.NAMES = FALSE) y <- sqrt(n) * sum_x3/(sum_x2^(3/2)) y * ((1 - 1/n))^(3/2) } #' @export support.dist_sample <- function(x, ...) { new_support_region( list(vctrs::vec_init(x$x, n = 0L)), list(range(x$x)), list(rep(TRUE, 2)) ) } #' @method Math dist_sample #' @export Math.dist_sample <- function(x, ...) { x <- .mapply(.Generic, list(parameters(x)$x, ...), MoreArgs = NULL) names(x) <- "x" enclass_dist(x, "dist_sample") } #' @method Ops dist_sample #' @export Ops.dist_sample <- function(e1, e2) { is_dist <- c(inherits(e1, "dist_sample"), inherits(e2, "dist_sample")) if(is_dist[1]) { e1 <- parameters(e1)$x } if(is_dist[2]) { e2 <- parameters(e2)$x } x <- .mapply(.Generic, list(e1, e2), MoreArgs = NULL) names(x) <- "x" enclass_dist(x, "dist_sample") } distributional/R/hilo.R0000644000176200001440000001065515131620153014571 0ustar liggesusers#' Construct hilo intervals #' #' @description #' `r lifecycle::badge('stable')` #' #' Class constructor function to help with manually creating hilo interval #' objects. #' #' @param lower,upper A numeric vector of values for lower and upper limits. #' @param size Size of the interval between \[0, 100\]. #' #' @return A "hilo" vector #' #' @author Earo Wang & Mitchell O'Hara-Wild #' #' @examples #' new_hilo(lower = rnorm(10), upper = rnorm(10) + 5, size = 95) #' #' @export new_hilo <- function(lower = double(), upper = double(), size = double()) { vec_assert(size, double()) if (any(size < 0 | size > 100, na.rm = TRUE)) abort("'size' must be between [0, 100].") out <- vec_recycle_common(lower = lower, upper = upper) if(vec_is(lower, double()) && vec_is(upper, double())) { if (any(out[["upper"]] < out[["lower"]], na.rm = TRUE)) { abort("`upper` can't be lower than `lower`.") } } len <- vec_size(out[[1]]) out[["level"]] <- vctrs::vec_recycle(size, len) vctrs::new_rcrd(out, class = "hilo") } #' Compute intervals #' #' @description #' `r lifecycle::badge('stable')` #' #' Used to extract a specified prediction interval at a particular confidence #' level from a distribution. #' #' The numeric lower and upper bounds can be extracted from the interval using #' `$lower` and `$upper` as shown in the examples below. #' #' @param x Object to create hilo from. #' @param ... Additional arguments used by methods. #' #' @examples #' # 95% interval from a standard normal distribution #' interval <- hilo(dist_normal(0, 1), 95) #' interval #' #' # Extract the individual quantities with `$lower`, `$upper`, and `$level` #' interval$lower #' interval$upper #' interval$level #' @export hilo <- function(x, ...){ UseMethod("hilo") } #' @export hilo.default <- function(x, ...){ abort(sprintf( "Objects of type `%s` are not supported by `hilo()`, you can create a custom `hilo` with `new_hilo()`", class(x) )) } #' Is the object a hilo #' #' @param x An object. #' #' @export is_hilo <- function(x) { inherits(x, "hilo") } #' @export format.hilo <- function(x, justify = "right", ...) { lwr <- field(x, "lower") upr <- field(x, "upper") if(is.matrix(lwr)) { lwr <- if(ncol(lwr) > 1) vctrs::vec_ptype_abbr(lwr) else drop(lwr) } if(is.matrix(upr)) { upr <- if(ncol(upr) > 1) vctrs::vec_ptype_abbr(upr) else drop(upr) } limit <- paste( format(lwr, justify = justify, ...), format(upr, justify = justify, ...), sep = ", " ) paste0("[", limit, "]", field(x, "level")) } #' @export is.na.hilo <- function(x) { # both lower and upper are NA's x <- vec_data(x) is.na(x$lower) & is.na(x$upper) } #' @export vec_ptype2.hilo.hilo <- function(x, y, ...){ x } #' @export vec_cast.character.hilo <- function(x, to, ...){ sprintf( "[%s, %s]%s", as.character(x$lower), as.character(x$upper), as.character(x$level) ) } #' @method vec_math hilo #' @export vec_math.hilo <- function(.fn, .x, ...){ out <- vec_data(.x) if(.fn == "mean") abort("Cannot compute the mean of hilo intervals.") out[["lower"]] <- get(.fn)(out[["lower"]], ...) out[["upper"]] <- get(.fn)(out[["upper"]], ...) if(.fn %in% c("is.nan", "is.finite", "is.infinite")) return(out[["lower"]] | out[["upper"]]) vec_restore(out, .x) } #' @method vec_arith hilo #' @export vec_arith.hilo <- function(op, x, y, ...){ out <- dt_x <- vec_data(x) if(is_hilo(y)){ abort("Intervals should not be added to other intervals, the sum of intervals is not the interval from a sum of distributions.") } else if(is_empty(y)){ if(op == "-"){ out[["upper"]] <- get(op)(dt_x[["lower"]]) out[["lower"]] <- get(op)(dt_x[["upper"]]) } } else{ out[["lower"]] <- get(op)(dt_x[["lower"]], y) out[["upper"]] <- get(op)(dt_x[["upper"]], y) } vec_restore(out, x) } #' @method vec_arith.numeric hilo #' @export vec_arith.numeric.hilo <- function(op, x, y, ...){ out <- hl <- vec_data(y) out[["lower"]] <- get(op)(x, hl[["lower"]]) out[["upper"]] <- get(op)(x, hl[["upper"]]) if(x < 0 && op %in% c("*", "/")){ out[c("lower", "upper")] <- out[c("upper", "lower")] } vec_restore(out, y) } #' @importFrom utils .DollarNames #' @export .DollarNames.hilo <- function(x, pattern){ utils::.DollarNames(vec_data(x), pattern) } #' @export `$.hilo` <- function(x, name){ field(x, name) } #' @export `names<-.hilo` <- function(x, value) { # abort("A object cannot be named.") x } distributional/R/dist_truncated.R0000644000176200001440000001105315131601120016634 0ustar liggesusers#' Truncate a distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Note that the samples are generated using inverse transform sampling, and the #' means and variances are estimated from samples. #' #' @param dist The distribution(s) to truncate. #' @param lower,upper The range of values to keep from a distribution. #' #' @details #' #' `r pkgdown_doc_link("dist_truncated")` #' #' In the following, let \eqn{X} be a truncated random variable with #' underlying distribution \eqn{Y}, truncation bounds `lower` = \eqn{a} and #' `upper` = \eqn{b}, where \eqn{F_Y(x)} is the c.d.f. of \eqn{Y} and #' \eqn{f_Y(x)} is the p.d.f. of \eqn{Y}. #' #' **Support**: \eqn{[a, b]} #' #' **Mean**: For the general case, the mean is approximated numerically. #' For a truncated Normal distribution with underlying mean \eqn{\mu} and #' standard deviation \eqn{\sigma}, the mean is: #' #' \deqn{ #' E(X) = \mu + \frac{\phi(\alpha) - \phi(\beta)}{\Phi(\beta) - \Phi(\alpha)} \sigma #' }{ #' E(X) = mu + (phi(alpha) - phi(beta)) / (Phi(beta) - Phi(alpha)) * sigma #' } #' #' where \eqn{\alpha = (a - \mu)/\sigma}, \eqn{\beta = (b - \mu)/\sigma}, #' \eqn{\phi} is the standard Normal p.d.f., and \eqn{\Phi} is the #' standard Normal c.d.f. #' #' **Variance**: Approximated numerically for all distributions. #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \begin{cases} #' \frac{f_Y(x)}{F_Y(b) - F_Y(a)} & \text{if } a \le x \le b \\ #' 0 & \text{otherwise} #' \end{cases} #' }{ #' f(x) = f_Y(x) / (F_Y(b) - F_Y(a)) if a <= x <= b, 0 otherwise #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = \begin{cases} #' 0 & \text{if } x < a \\ #' \frac{F_Y(x) - F_Y(a)}{F_Y(b) - F_Y(a)} & \text{if } a \le x \le b \\ #' 1 & \text{if } x > b #' \end{cases} #' }{ #' F(x) = 0 if x < a, (F_Y(x) - F_Y(a)) / (F_Y(b) - F_Y(a)) if a <= x <= b, 1 if x > b #' } #' #' **Quantile function**: #' #' \deqn{ #' Q(p) = F_Y^{-1}(F_Y(a) + p(F_Y(b) - F_Y(a))) #' }{ #' Q(p) = F_Y^(-1)(F_Y(a) + p(F_Y(b) - F_Y(a))) #' } #' #' clamped to the interval \eqn{[a, b]}. #' #' @name dist_truncated #' #' @examples #' dist <- dist_truncated(dist_normal(2,1), lower = 0) #' #' dist #' mean(dist) #' variance(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' if(requireNamespace("ggdist")) { #' library(ggplot2) #' ggplot() + #' ggdist::stat_dist_halfeye( #' aes(y = c("Normal", "Truncated"), #' dist = c(dist_normal(2,1), dist_truncated(dist_normal(2,1), lower = 0))) #' ) #' } #' #' @export dist_truncated <- function(dist, lower = -Inf, upper = Inf){ vec_is(dist, new_dist()) vec_is(lower, numeric()) vec_is(upper, numeric()) if(any(lower >= upper)){ abort("The `lower` truncation bound must be lower than the `upper` bound.") } new_dist(dist = dist, lower = lower, upper = upper, dimnames = dimnames(dist), class = "dist_truncated") } #' @export format.dist_truncated <- function(x, ...){ sprintf( "%s[%s,%s]", format(x[["dist"]]), x[["lower"]], x[["upper"]] ) } #' @export density.dist_truncated <- function(x, at, ...){ in_lim <- at >= x[["lower"]] & at <= x[["upper"]] cdf_upr <- cdf(x[["dist"]], x[["upper"]]) cdf_lwr <- cdf(x[["dist"]], x[["lower"]]) out <- numeric(length(at)) out[in_lim] <- density(x[["dist"]], at = at[in_lim], ...)/(cdf_upr - cdf_lwr) out } #' @export quantile.dist_truncated <- function(x, p, ...){ F_lwr <- cdf(x[["dist"]], x[["lower"]]) F_upr <- cdf(x[["dist"]], x[["upper"]]) qt <- quantile(x[["dist"]], F_lwr + p * (F_upr - F_lwr), ...) pmin(pmax(x[["lower"]], qt), x[["upper"]]) } #' @export cdf.dist_truncated <- function(x, q, ...){ cdf_upr <- cdf(x[["dist"]], x[["upper"]]) cdf_lwr <- cdf(x[["dist"]], x[["lower"]]) out <- numeric(length(q)) q_lwr <- q < x[["lower"]] # out[q_lwr <- q < x[["lower"]]] <- 0 out[q_upr <- q > x[["upper"]]] <- 1 q_mid <- !(q_lwr|q_upr) out[q_mid] <- (cdf(x[["dist"]], q = q[q_mid], ...) - cdf_lwr)/(cdf_upr - cdf_lwr) out } #' @export mean.dist_truncated <- function(x, ...) { if(inherits(x$dist, "dist_sample")) { y <- x$dist[[1]] mean(y[y >= x$lower & y <= x$upper]) } else if(inherits(x$dist, "dist_normal")) { mu <- x$dist$mu s <- x$dist$sigma a <- (x$lower - mu) / s b <- (x$upper - mu) / s mu + (stats::dnorm(a) - stats::dnorm(b))/(stats::pnorm(b) - stats::pnorm(a))*s } else { NextMethod() } } distributional/R/default.R0000755000176200001440000002172515131633665015301 0ustar liggesusers#' @export density.dist_default <- function(x, ...){ abort( sprintf("The distribution class `%s` does not support `density()`", class(x)[1]) ) } #' @export log_density.dist_default <- function(x, ...){ log(density(x, ...)) } #' @export quantile.dist_default <- function(x, p, ...){ # abort( # sprintf("The distribution class `%s` does not support `quantile()`", # class(x)[1]) # ) stats::optim(0, function(pos){ (p - cdf(x, pos, ...))^2 })$par } #' @export log_quantile.dist_default <- function(x, p, ...){ quantile(x, exp(p), ...) } #' @export cdf.dist_default <- function(x, q, times = 1e5,...){ # Use Monte Carlo integration r <- generate(x, times = times) if(is.list(q)) { # Turn into matrix q <- do.call(rbind, q) } out <- numeric(NROW(q)) for(i in seq_along(out)) { out[i] <- mean(apply(sweep(r, 2, q[i,]) < 0, 1, all)) } return(out) } #' @export log_cdf.dist_default <- function(x, q, ...){ log(cdf(x, q, ...)) } #' @export generate.dist_default <- function(x, times, ...){ vapply(stats::runif(times,0,1), quantile, numeric(1L), x = x, ...) } #' @export likelihood.dist_default <- function(x, sample, ...){ prod(vapply(sample, density, numeric(1L), x = x)) } #' @export log_likelihood.dist_default <- function(x, sample, ...){ sum(vapply(sample, log_density, numeric(1L), x = x)) } #' @export parameters.dist_default <- function(x, ...) { # Reduce parameter structures to length 1 list if needed. lapply(unclass(x), function(z) { if(inherits(z, "dist_default")) wrap_dist(list(z)) else if (tryCatch(vec_size(z), error = function(e) Inf) > 1) list(z) else z }) } #' @export family.dist_default <- function(object, ...) { substring(class(object)[1], first = 6) } #' @export support.dist_default <- function(x, ...) { lims <- quantile(x, c(0, 1)) closed <- if(any(is.na(lims))) { c(FALSE, FALSE) } else { # Default to open limits on error lim_dens <- tryCatch( suppressWarnings(density(x, lims)), error = function(e) c(0,0) ) !near(lim_dens, 0) } new_support_region( list(vctrs::vec_init(generate(x, 1), n = 0L)), list(lims), list(closed) ) } #' @export has_symmetry.dist_default <- function(x, ...){ # Crude test that works in *most* cases. # Should be replaced with a more robust check using moments. tryCatch( isTRUE(skewness(x) == 0), error = function(e) FALSE ) } #' @export mean.dist_default <- function(x, ...){ x_sup <- support(x) dist_type <- field(x_sup, "x")[[1]] if (!is.numeric(dist_type)) return(NA_real_) if (is.double(dist_type)) { limits <- field(x_sup, "lim")[[1]] tryCatch( stats::integrate(function(at) density(x, at) * at, limits[1], limits[2])$value, error = function(e) NA_real_ ) } else { mean(quantile(x, stats::ppoints(1000)), na.rm = TRUE) } } #' @export variance.dist_default <- function(x, ...){ x <- covariance(x, ...) if(is.matrix(x[[1]]) && ncol(x[[1]]) > 1){ matrix(diag(x[[1]]), nrow = 1) } else x } #' @export covariance.dist_default <- function(x, ...){ x_sup <- support(x) dist_type <- field(x_sup, "x")[[1]] if (!is.numeric(dist_type)) return(NA_real_) else if (is.matrix(dist_type)) stats::cov(generate(x, times = 1000)) else if (is.double(dist_type)) { limits <- field(x_sup, "lim")[[1]] tryCatch( stats::integrate(function(at) density(x, at) * at^2, limits[1], limits[2])$value, error = function(e) NA_real_ ) - mean(x)^2 } else { stats::var(quantile(x, stats::ppoints(1000)), na.rm = TRUE) } } #' @export median.dist_default <- function(x, na.rm = FALSE, ...){ quantile(x, p = 0.5, ...) } #' @export hilo.dist_default <- function(x, size = 95, ...){ lower <- quantile(x, 0.5-size/200, ...) upper <- quantile(x, 0.5+size/200, ...) if(is.matrix(lower) && is.matrix(upper)) { return( vctrs::new_data_frame(split( new_hilo(drop(lower), drop(upper), size = rep_len(size, length(lower))), seq_along(lower))) ) } new_hilo(lower, upper, size) } #' @export hdr.dist_default <- function(x, size = 95, n = 4096, ...){ # Use exact quantiles for symmetric distributions if (has_symmetry(x)) { return( new_hdr( lower = list(quantile(x, 0.5 - size/200)), upper = list(quantile(x, 0.5 + size/200)), size = size ) ) } dist_x <- quantile(x, seq(0.5/n, 1 - 0.5/n, length.out = n)) # Remove duplicate values of dist_x from less continuous distributions dist_x <- unique(dist_x) dist_y <- density(x, dist_x) alpha <- quantile(dist_y, probs = 1-size/100) crossing_alpha <- function(alpha, x, y){ it <- seq_len(length(y) - 1) dd <- y - alpha dd <- dd[it + 1] * dd[it] index <- it[dd <= 0] # unique() removes possible duplicates if sequential dd has same value. # More robust approach is required. out <- unique( vapply( index, function(.x) stats::approx(y[.x + c(0,1)], x[.x + c(0,1)], xout = alpha)$y, numeric(1L) ) ) # Add boundary values which may exceed the crossing point. c(x[1][y[1]>alpha], out, x[length(x)][y[length(y)]>alpha]) } # purrr::map(alpha, crossing_alpha, dist_x, dist_y) hdr <- crossing_alpha(alpha, dist_x, dist_y) lower_hdr <- seq_along(hdr)%%2==1 new_hdr(lower = list(hdr[lower_hdr]), upper = list(hdr[!lower_hdr]), size = size) } #' @export format.dist_default <- function(x, ...){ "?" } #' @export print.dist_default <- function(x, ...){ cat(format(x, ...)) } #' @export dim.dist_default <- function(x){ # Quick and dirty dimension calculation NCOL(generate(x, times = 1)) } invert_fail <- function(...) stop("Inverting transformations for distributions is not yet supported.") #' Attempt to get the inverse of f(x) by name. Returns invert_fail #' (a function that raises an error if called) if there is no known inverse. #' @param f string. Name of a function. #' @noRd get_unary_inverse <- function(f) { switch(f, sqrt = function(x) x^2, exp = log, log = function(x, base = exp(1)) base ^ x, log2 = function(x) 2^x, log10 = function(x) 10^x, expm1 = log1p, log1p = expm1, cos = acos, sin = asin, tan = atan, acos = cos, asin = sin, atan = tan, cosh = acosh, sinh = asinh, tanh = atanh, acosh = cosh, asinh = sinh, atanh = tanh, invert_fail ) } #' Attempt to get the inverse of f(x, constant) by name. Returns invert_fail #' (a function that raises an error if called) if there is no known inverse. #' @param f string. Name of a function. #' @param constant a constant value #' @noRd get_binary_inverse_1 <- function(f, constant) { force(constant) switch(f, `+` = function(x) x - constant, `-` = function(x) x + constant, `*` = function(x) x / constant, `/` = function(x) x * constant, `^` = function(x) x ^ (1/constant), invert_fail ) } #' Attempt to get the inverse of f(constant, x) by name. Returns invert_fail #' (a function that raises an error if called) if there is no known inverse. #' @param f string. Name of a function. #' @param constant a constant value #' @noRd get_binary_inverse_2 <- function(f, constant) { force(constant) switch(f, `+` = function(x) x - constant, `-` = function(x) constant - x, `*` = function(x) x / constant, `/` = function(x) constant / x, `^` = function(x) log(x, base = constant), invert_fail ) } #' @method Math dist_default #' @export Math.dist_default <- function(x, ...) { if(dim(x) > 1) stop("Transformations of multivariate distributions are not yet supported.") trans <- new_function(exprs(x = ), body = expr((!!sym(.Generic))(x, !!!dots_list(...)))) inverse_fun <- get_unary_inverse(.Generic) inverse <- new_function(exprs(x = ), body = expr((!!inverse_fun)(x, !!!dots_list(...)))) vec_data(dist_transformed(wrap_dist(list(x)), trans, inverse))[[1]] } #' @method Ops dist_default #' @export Ops.dist_default <- function(e1, e2) { if(.Generic %in% c("-", "+") && missing(e2)){ e2 <- e1 e1 <- if(.Generic == "+") 1 else -1 .Generic <- "*" } is_dist <- c(inherits(e1, "dist_default"), inherits(e2, "dist_default")) if(any(vapply(list(e1, e2)[is_dist], dim, numeric(1L)) > 1)){ stop("Transformations of multivariate distributions are not yet supported.") } trans <- if(all(is_dist)) { if(identical(e1$dist, e2$dist)){ new_function(exprs(x = ), expr((!!sym(.Generic))((!!e1$transform)(x), (!!e2$transform)(x)))) } else { stop(sprintf("The %s operation is not supported for <%s> and <%s>", .Generic, class(e1)[1], class(e2)[1])) } } else if(is_dist[1]){ new_function(exprs(x = ), body = expr((!!sym(.Generic))(x, !!e2))) } else { new_function(exprs(x = ), body = expr((!!sym(.Generic))(!!e1, x))) } inverse <- if(all(is_dist)) { invert_fail } else if(is_dist[1]){ get_binary_inverse_1(.Generic, e2) } else { get_binary_inverse_2(.Generic, e1) } vec_data(dist_transformed(wrap_dist(list(e1,e2)[which(is_dist)]), trans, inverse))[[1]] } distributional/R/dist_wrap.R0000644000176200001440000000743715131577031015644 0ustar liggesusers#' Create a distribution from p/d/q/r style functions #' #' @description #' `r lifecycle::badge('maturing')` #' #' If a distribution is not yet supported, you can vectorise p/d/q/r functions #' using this function. `dist_wrap()` stores the distributions parameters, and #' provides wrappers which call the appropriate p/d/q/r functions. #' #' Using this function to wrap a distribution should only be done if the #' distribution is not yet available in this package. If you need a distribution #' which isn't in the package yet, consider making a request at #' https://github.com/mitchelloharawild/distributional/issues. #' #' @param dist The name of the distribution used in the functions (name that is #' prefixed by p/d/q/r) #' @param ... Named arguments used to parameterise the distribution. #' @param package The package from which the distribution is provided. If NULL, #' the calling environment's search path is used to find the distribution #' functions. Alternatively, an arbitrary environment can also be provided here. # #' @param p,d,q,r The functions used to compute the p/d/q/r # #' (pdf/cdf/quantile/generate) #' #' @details #' #' The `dist_wrap()` function provides a generic interface to create distribution #' objects from any set of p/d/q/r style functions. The statistical properties #' depend on the specific distribution being wrapped. #' #' @examples #' dist <- dist_wrap("norm", mean = 1:3, sd = c(3, 9, 2)) #' #' density(dist, 1) # dnorm() #' cdf(dist, 4) # pnorm() #' quantile(dist, 0.975) # qnorm() #' generate(dist, 10) # rnorm() #' #' library(actuar) #' dist <- dist_wrap("invparalogis", package = "actuar", shape = 2, rate = 2) #' density(dist, 1) # actuar::dinvparalogis() #' cdf(dist, 4) # actuar::pinvparalogis() #' quantile(dist, 0.975) # actuar::qinvparalogis() #' generate(dist, 10) # actuar::rinvparalogis() #' #' @export dist_wrap <- function(dist, ..., package = NULL){ vec_assert(dist, character(), 1L) if(is.null(package)) { env <- rlang::caller_env() } else if (is.character(package)) { env <- rlang::pkg_env(package) } else { env <- as.environment(package) } par <- vec_recycle_common(dist = dist, env = list(env), ...) new_dist(!!!par, class = "dist_wrap") } #' @export format.dist_wrap <- function(x, ...){ sprintf( "%s(%s)", x[["dist"]], paste0(x[-(1:2)], collapse = ", ") ) } #' @export density.dist_wrap <- function(x, at, ...){ fn <- get(paste0("d", x[["dist"]][[1]]), envir = x$env, mode = "function") # Remove distribution name and environment from parameters par <- x[-(1:2)] do.call(fn, c(list(at), par)) } #' @export log_density.dist_wrap <- function(x, at, ...){ fn <- get(paste0("d", x[["dist"]][[1]]), envir = x$env, mode = "function") # Remove distribution name and environment from parameters par <- x[-(1:2)] # Use density(log = TRUE) if supported if(is.null(formals(fn)$log)){ log(do.call(fn, c(list(at), par))) } else { do.call(fn, c(list(at), par, log = TRUE)) } } #' @export cdf.dist_wrap <- function(x, q, ...){ fn <- get(paste0("p", x[["dist"]][[1]]), envir = x$env, mode = "function") # Remove distribution name and environment from parameters par <- x[-(1:2)] do.call(fn, c(list(q), par)) } #' @export quantile.dist_wrap <- function(x, p, ...){ fn <- get(paste0("q", x[["dist"]][[1]]), envir = x$env, mode = "function") # Remove distribution name and environment from parameters par <- x[-(1:2)] do.call(fn, c(list(p), par)) } #' @export generate.dist_wrap <- function(x, times, ...){ fn <- get(paste0("r", x[["dist"]][[1]]), envir = x$env, mode = "function") # Remove distribution name and environment from parameters par <- x[-(1:2)] do.call(fn, c(list(times), par)) } #' @export parameters.dist_wrap <- function(x, ...) { # All parameters except distribution environment x[-2L] } distributional/R/dist_gumbel.R0000644000176200001440000001140215131557055016135 0ustar liggesusers#' The Gumbel distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Gumbel distribution is a special case of the Generalized Extreme Value #' distribution, obtained when the GEV shape parameter \eqn{\xi} is equal to 0. #' It may be referred to as a type I extreme value distribution. #' #' @inheritParams actuar::dgumbel #' #' @details #' #' `r pkgdown_doc_link("dist_gumbel")` #' #' In the following, let \eqn{X} be a Gumbel random variable with location #' parameter `alpha` = \eqn{\alpha} and scale parameter `scale` = \eqn{\sigma}. #' #' **Support**: \eqn{R}, the set of all real numbers. #' #' **Mean**: #' #' \deqn{ #' E(X) = \alpha + \sigma\gamma #' }{ #' E(X) = alpha + sigma*gamma #' } #' #' where \eqn{\gamma} is the Euler-Mascheroni constant, #' approximately equal to 0.5772157. #' #' **Variance**: #' #' \deqn{ #' \textrm{Var}(X) = \frac{\pi^2 \sigma^2}{6} #' }{ #' Var(X) = (pi^2 * sigma^2) / 6 #' } #' #' **Skewness**: #' #' \deqn{ #' \textrm{Skew}(X) = \frac{12\sqrt{6}\zeta(3)}{\pi^3} \approx 1.1395 #' }{ #' Skew(X) = (12*sqrt(6)*zeta(3)) / pi^3 #' } #' #' where \eqn{\zeta(3)} is Apery's constant, #' approximately equal to 1.2020569. Note that skewness is independent #' of the distribution parameters. #' #' **Kurtosis (excess)**: #' #' \deqn{ #' \textrm{Kurt}(X) = \frac{12}{5} = 2.4 #' }{ #' Kurt(X) = 12/5 #' } #' #' Note that excess kurtosis is independent of the distribution parameters. #' #' **Median**: #' #' \deqn{ #' \textrm{Median}(X) = \alpha - \sigma\ln(\ln 2) #' }{ #' Median(X) = alpha - sigma*ln(ln(2)) #' } #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \frac{1}{\sigma} \exp\left[-\frac{x - \alpha}{\sigma}\right] #' \exp\left\{-\exp\left[-\frac{x - \alpha}{\sigma}\right]\right\} #' }{ #' f(x) = (1/sigma) * exp[-(x - alpha)/sigma] * exp(-exp[-(x - alpha)/sigma]) #' } #' #' for \eqn{x} in \eqn{R}, the set of all real numbers. #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = \exp\left\{-\exp\left[-\frac{x - \alpha}{\sigma}\right]\right\} #' }{ #' F(x) = exp(-exp[-(x - alpha)/sigma]) #' } #' #' for \eqn{x} in \eqn{R}, the set of all real numbers. #' #' **Quantile function (inverse c.d.f)**: #' #' \deqn{ #' F^{-1}(p) = \alpha - \sigma \ln(-\ln p) #' }{ #' F^(-1)(p) = alpha - sigma * ln(-ln(p)) #' } #' #' for \eqn{p} in (0, 1). #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = \Gamma(1 - \sigma t) e^{\alpha t} #' }{ #' E(e^(tX)) = Gamma(1 - sigma*t) * e^(alpha*t) #' } #' #' for \eqn{\sigma t < 1}, where \eqn{\Gamma} is the gamma function. #' #' @seealso [actuar::Gumbel], [actuar::dgumbel()], [actuar::pgumbel()], #' [actuar::qgumbel()], [actuar::rgumbel()], [actuar::mgumbel()] #' #' @examples #' dist <- dist_gumbel(alpha = c(0.5, 1, 1.5, 3), scale = c(2, 2, 3, 4)) #' dist #' #' @examplesIf requireNamespace("actuar", quietly = TRUE) #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' support(dist) #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_gumbel #' @export dist_gumbel <- function(alpha, scale){ alpha <- vec_cast(alpha, double()) scale <- vec_cast(scale, double()) if(any(scale <= 0)){ abort("The scale parameter of a Gumbel distribution must be strictly positive.") } new_dist(a = alpha, s = scale, class = "dist_gumbel") } #' @export format.dist_gumbel <- function(x, digits = 2, ...){ sprintf( "Gumbel(%s, %s)", format(x[["a"]], digits = digits, ...), format(x[["s"]], digits = digits, ...) ) } #' @export density.dist_gumbel <- function(x, at, ...){ require_package("actuar") actuar::dgumbel(at, x[["a"]], x[["s"]]) } #' @export log_density.dist_gumbel <- function(x, at, ...){ require_package("actuar") actuar::dgumbel(at, x[["a"]], x[["s"]], log = TRUE) } #' @export quantile.dist_gumbel <- function(x, p, ...){ require_package("actuar") actuar::qgumbel(p, x[["a"]], x[["s"]]) } #' @export cdf.dist_gumbel <- function(x, q, ...){ require_package("actuar") actuar::pgumbel(q, x[["a"]], x[["s"]]) } #' @export generate.dist_gumbel <- function(x, times, ...){ require_package("actuar") actuar::rgumbel(times, x[["a"]], x[["s"]]) } #' @export mean.dist_gumbel <- function(x, ...){ actuar::mgumbel(1, x[["a"]], x[["s"]]) } #' @export covariance.dist_gumbel <- function(x, ...){ (pi*x[["s"]])^2/6 } #' @export skewness.dist_gumbel <- function(x, ...) { 1.13954709940465 # zeta3 <- 1.20205690315959401459612 # (12 * sqrt(6) * zeta3) / pi^3 } #' @export kurtosis.dist_gumbel <- function(x, ...) 12/5 distributional/R/dist_student_t.R0000644000176200001440000001553615131564466016713 0ustar liggesusers#' The (non-central) location-scale Student t Distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Student's T distribution is closely related to the [Normal()] #' distribution, but has heavier tails. As \eqn{\nu} increases to \eqn{\infty}, #' the Student's T converges to a Normal. The T distribution appears #' repeatedly throughout classic frequentist hypothesis testing when #' comparing group means. #' #' @inheritParams stats::dt #' @param mu The location parameter of the distribution. #' If `ncp == 0` (or `NULL`), this is the median. #' @param sigma The scale parameter of the distribution. #' #' @details #' #' `r pkgdown_doc_link("dist_student_t")` #' #' In the following, let \eqn{X} be a location-scale Student's T random variable with #' `df` = \eqn{\nu}, `mu` = \eqn{\mu}, `sigma` = \eqn{\sigma}, and #' `ncp` = \eqn{\delta} (non-centrality parameter). #' #' If \eqn{Z} follows a standard Student's T distribution (with `df` = \eqn{\nu} #' and `ncp` = \eqn{\delta}), then \eqn{X = \mu + \sigma Z}. #' #' #' **Support**: \eqn{R}, the set of all real numbers #' #' **Mean**: #' #' For the central distribution (`ncp` = 0 or `NULL`): #' #' \deqn{ #' E(X) = \mu #' }{ #' E(X) = \mu #' } #' #' for \eqn{\nu > 1}, and undefined otherwise. #' #' For the non-central distribution (`ncp` \eqn{\neq} 0): #' #' \deqn{ #' E(X) = \mu + \delta \sqrt{\frac{\nu}{2}} \frac{\Gamma((\nu-1)/2)}{\Gamma(\nu/2)} \sigma #' }{ #' E(X) = \mu + \delta \sqrt(\nu / 2) \Gamma((\nu-1)/2) / \Gamma(\nu/2) \sigma #' } #' #' for \eqn{\nu > 1}, and undefined otherwise. #' #' **Variance**: #' #' For the central distribution (`ncp` = 0 or `NULL`): #' #' \deqn{ #' \mathrm{Var}(X) = \frac{\nu}{\nu - 2} \sigma^2 #' }{ #' Var(X) = \nu / (\nu - 2) \sigma^2 #' } #' #' for \eqn{\nu > 2}. Undefined if \eqn{\nu \le 1}, infinite when \eqn{1 < \nu \le 2}. #' #' For the non-central distribution (`ncp` \eqn{\neq} 0): #' #' \deqn{ #' \mathrm{Var}(X) = \left[\frac{\nu(1+\delta^2)}{\nu-2} - \left(\delta \sqrt{\frac{\nu}{2}} \frac{\Gamma((\nu-1)/2)}{\Gamma(\nu/2)}\right)^2\right] \sigma^2 #' }{ #' Var(X) = [(\nu(1+\delta^2))/(\nu-2) - (\delta \sqrt(\nu / 2) \Gamma((\nu-1)/2) / \Gamma(\nu/2))^2] \sigma^2 #' } #' #' for \eqn{\nu > 2}. Undefined if \eqn{\nu \le 1}, infinite when \eqn{1 < \nu \le 2}. #' #' **Probability density function (p.d.f)**: #' #' For the central distribution (`ncp` = 0 or `NULL`), the standard #' t distribution with `df` = \eqn{\nu} has density: #' #' \deqn{ #' f_Z(z) = \frac{\Gamma((\nu + 1)/2)}{\sqrt{\pi \nu} \Gamma(\nu/2)} \left(1 + \frac{z^2}{\nu} \right)^{- (\nu + 1)/2} #' }{ #' f_Z(z) = \Gamma((\nu + 1) / 2) / (\sqrt(\pi \nu) \Gamma(\nu / 2)) (1 + z^2 / \nu)^(- (\nu + 1) / 2) #' } #' #' The location-scale version with `mu` = \eqn{\mu} and `sigma` = \eqn{\sigma} #' has density: #' #' \deqn{ #' f(x) = \frac{1}{\sigma} f_Z\left(\frac{x - \mu}{\sigma}\right) #' }{ #' f(x) = (1/\sigma) f_Z((x - \mu) / \sigma) #' } #' #' For the non-central distribution (`ncp` \eqn{\neq} 0), the density is #' computed numerically via [stats::dt()]. #' #' **Cumulative distribution function (c.d.f)**: #' #' For the central distribution (`ncp` = 0 or `NULL`), the cumulative #' distribution function is computed numerically via [stats::pt()], which #' uses the relationship to the incomplete beta function: #' #' \deqn{ #' F_\nu(t) = \frac{1}{2} I_x\left(\frac{\nu}{2}, \frac{1}{2}\right) #' }{ #' F_\nu(t) = (1/2) I_x(\nu/2, 1/2) #' } #' #' for \eqn{t \le 0}, where \eqn{x = \nu/(\nu + t^2)} and \eqn{I_x(a,b)} is #' the incomplete beta function ([stats::pbeta()]). For \eqn{t \ge 0}: #' #' \deqn{ #' F_\nu(t) = 1 - \frac{1}{2} I_x\left(\frac{\nu}{2}, \frac{1}{2}\right) #' }{ #' F_\nu(t) = 1 - (1/2) I_x(\nu/2, 1/2) #' } #' #' The location-scale version is: \eqn{F(x) = F_\nu((x - \mu)/\sigma)}. #' #' For the non-central distribution (`ncp` \eqn{\neq} 0), the cumulative #' distribution function is computed numerically via [stats::pt()]. #' #' **Moment generating function (m.g.f)**: #' #' Does not exist in closed form. Moments are computed using the formulas #' for mean and variance above where available. #' #' @seealso [stats::TDist] #' #' @examples #' dist <- dist_student_t(df = c(1,2,5), mu = c(0,1,2), sigma = c(1,2,3)) #' #' dist #' mean(dist) #' variance(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_student_t #' @export dist_student_t <- function(df, mu = 0, sigma = 1, ncp = NULL){ df <- vec_cast(df, numeric()) if(any(df <= 0)){ abort("The degrees of freedom parameter of a Student t distribution must be strictly positive.") } mu <- vec_cast(mu, double()) sigma <- vec_cast(sigma, double()) if(any(sigma[!is.na(sigma)] <= 0)){ abort("The scale (sigma) parameter of a Student t distribution must be strictly positive.") } new_dist(df = df, mu = mu, sigma = sigma, ncp = ncp, class = "dist_student_t") } #' @export format.dist_student_t <- function(x, digits = 2, ...){ out <- sprintf( "t(%s, %s, %s%s)", format(x[["df"]], digits = digits, ...), format(x[["mu"]], digits = digits, ...), format(x[["sigma"]], digits = digits, ...), if(is.null(x[["ncp"]])) "" else paste(",", format(x[["ncp"]], digits = digits, ...)) ) } #' @export density.dist_student_t <- function(x, at, ...){ ncp <- x[["ncp"]] %||% missing_arg() sigma <- x[["sigma"]] stats::dt((at - x[["mu"]])/sigma, x[["df"]], ncp) / sigma } #' @export log_density.dist_student_t <- function(x, at, ...){ ncp <- x[["ncp"]] %||% missing_arg() sigma <- x[["sigma"]] stats::dt((at - x[["mu"]])/sigma, x[["df"]], ncp, log = TRUE) - log(sigma) } #' @export quantile.dist_student_t <- function(x, p, ...){ ncp <- x[["ncp"]] %||% missing_arg() stats::qt(p, x[["df"]], ncp) * x[["sigma"]] + x[["mu"]] } #' @export cdf.dist_student_t <- function(x, q, ...){ ncp <- x[["ncp"]] %||% missing_arg() stats::pt((q - x[["mu"]])/x[["sigma"]], x[["df"]], ncp) } #' @export generate.dist_student_t <- function(x, times, ...){ ncp <- x[["ncp"]] %||% missing_arg() stats::rt(times, x[["df"]], ncp) * x[["sigma"]] + x[["mu"]] } #' @export mean.dist_student_t <- function(x, ...){ df <- x[["df"]] if(df <= 1) return(NA_real_) if(is.null(x[["ncp"]])){ x[["mu"]] } else { x[["mu"]] + x[["ncp"]] * sqrt(df/2) * (gamma((df-1)/2)/gamma(df/2)) * x[["sigma"]] } } #' @export covariance.dist_student_t <- function(x, ...){ df <- x[["df"]] ncp <- x[["ncp"]] if(df <= 1) return(NA_real_) if(df <= 2) return(Inf) if(is.null(ncp)){ df / (df - 2) * x[["sigma"]]^2 } else { ((df*(1+ncp^2))/(df-2) - (ncp * sqrt(df/2) * (gamma((df-1)/2)/gamma(df/2)))^2) * x[["sigma"]]^2 } } #' @export has_symmetry.dist_student_t <- function(x, ...) TRUEdistributional/R/hdr.R0000644000176200001440000000420515131027365014413 0ustar liggesusers#' Construct hdr intervals #' #' @param lower,upper A list of numeric vectors specifying the region's lower #' and upper bounds. #' @param size A numeric vector specifying the coverage size of the region. #' #' @return A "hdr" vector #' #' @author Mitchell O'Hara-Wild #' #' @examples #' #' new_hdr(lower = list(1, c(3,6)), upper = list(10, c(5, 8)), size = c(80, 95)) #' #' @export new_hdr <- function(lower = list_of(.ptype = double()), upper = list_of(.ptype = double()), size = double()) { lower <- as_list_of(lower) upper <- as_list_of(upper) vec_assert(lower, list_of(.ptype = double())) vec_assert(upper, list_of(.ptype = double())) vec_assert(size, double()) if (any(size < 0 | size > 100, na.rm = TRUE)) abort("'size' must be between [0, 100].") out <- vec_recycle_common(lower = lower, upper = upper) .mapply( function(l,u) if (any(u 1) { out <- paste(out, ncol(z), sep = "^") } } out }, FUN.VALUE = character(1L)) brackets <- list(c("(","["), c(")","]")) mapply(function(type, z, closed) { br1 <- brackets[[1]][closed[1] + 1L] br2 <- brackets[[2]][closed[2] + 1L] fz <- sapply(z, function(x) format(x, digits = digits)) fz <- gsub("3.14", "pi", fz, fixed = TRUE) if (any(is.na(z)) || all(is.infinite(z))) type else if (type == "Z") { if (identical(z, c(0L, Inf))) "N0" else if (identical(z, c(1L, Inf))) "N+" else paste0("{", z[1], ",", z[1]+1L, ",...,", z[2], "}") } else if (type == "R") paste0(br1, fz[1], ",", fz[2], br2) else type }, type, field(x, "lim"), field(x, "closed")) } #' @export vec_ptype_abbr.support_region <- function(x, ...){ "support" } distributional/R/dist_inverse_exponential.R0000644000176200001440000000534015131555400020737 0ustar liggesusers#' The Inverse Exponential distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Inverse Exponential distribution is used to model the reciprocal of #' exponentially distributed variables. #' #' @inheritParams actuar::dinvexp #' #' @details #' #' `r pkgdown_doc_link("dist_inverse_exponential")` #' #' In the following, let \eqn{X} be an Inverse Exponential random variable #' with parameter `rate` = \eqn{\lambda}. #' #' **Support**: \eqn{x > 0} #' #' **Mean**: Does not exist, returns NA #' #' **Variance**: Does not exist, returns NA #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \frac{\lambda}{x^2} e^{-\lambda/x} #' }{ #' f(x) = \lambda / x^2 * e^(-\lambda/x) #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = e^{-\lambda/x} #' }{ #' F(x) = e^(-\lambda/x) #' } #' #' **Quantile function (inverse c.d.f)**: #' #' \deqn{ #' F^{-1}(p) = -\frac{\lambda}{\log(p)} #' }{ #' F^(-1)(p) = -\lambda / log(p) #' } #' #' **Moment generating function (m.g.f)**: #' #' Does not exist (divergent integral). #' #' @seealso [actuar::InverseExponential] #' #' @examples #' dist <- dist_inverse_exponential(rate = 1:5) #' dist #' #' @examplesIf requireNamespace("actuar", quietly = TRUE) #' mean(dist) #' variance(dist) #' support(dist) #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_inverse_exponential #' @export dist_inverse_exponential <- function(rate){ rate <- vec_cast(rate, double()) if(any(rate <= 0)){ abort("The rate parameter of a Inverse Exponential distribution must be strictly positive.") } new_dist(r = rate, class = "dist_inverse_exponential") } #' @export format.dist_inverse_exponential <- function(x, digits = 2, ...){ sprintf( "InvExp(%s)", format(x[["r"]], digits = digits, ...) ) } #' @export density.dist_inverse_exponential <- function(x, at, ...){ require_package("actuar") actuar::dinvexp(at, x[["r"]]) } #' @export log_density.dist_inverse_exponential <- function(x, at, ...){ require_package("actuar") actuar::dinvexp(at, x[["r"]], log = TRUE) } #' @export quantile.dist_inverse_exponential <- function(x, p, ...){ require_package("actuar") actuar::qinvexp(p, x[["r"]]) } #' @export cdf.dist_inverse_exponential <- function(x, q, ...){ require_package("actuar") actuar::pinvexp(q, x[["r"]]) } #' @export generate.dist_inverse_exponential <- function(x, times, ...){ require_package("actuar") actuar::rinvexp(times, x[["r"]]) } #' @export mean.dist_inverse_exponential <- function(x, ...){ NA_real_ } #' @export covariance.dist_inverse_exponential <- function(x, ...){ NA_real_ } distributional/R/dist_mixture.R0000644000176200001440000001602215131577624016366 0ustar liggesusers#' Create a mixture of distributions #' #' @description #' `r lifecycle::badge('maturing')` #' #' A mixture distribution combines multiple component distributions with #' specified weights. The resulting distribution can model complex, #' multimodal data by representing it as a weighted sum of simpler #' distributions. #' #' @param ... Distributions to be used in the mixture. Can be any #' distributional objects. #' @param weights A numeric vector of non-negative weights that sum to 1. #' The length must match the number of distributions passed to `...`. #' Each weight \eqn{w_i} represents the probability that a random draw #' comes from the \eqn{i}-th component distribution. #' #' @details #' #' In the following, let \eqn{X} be a mixture random variable composed #' of \eqn{K} component distributions \eqn{F_1, F_2, \ldots, F_K} with #' corresponding weights \eqn{w_1, w_2, \ldots, w_K} where #' \eqn{\sum_{i=1}^K w_i = 1} and \eqn{w_i \geq 0} for all \eqn{i}. #' #' **Support**: The union of the supports of all component distributions #' #' **Mean**: #' #' For univariate mixtures: #' \deqn{ #' E(X) = \sum_{i=1}^K w_i \mu_i #' }{ #' E(X) = sum_{i=1}^K w_i * mu_i #' } #' #' where \eqn{\mu_i} is the mean of the \eqn{i}-th component distribution. #' #' For multivariate mixtures: #' \deqn{ #' E(\mathbf{X}) = \sum_{i=1}^K w_i \boldsymbol{\mu}_i #' }{ #' E(X) = sum_{i=1}^K w_i * mu_i #' } #' #' where \eqn{\boldsymbol{\mu}_i} is the mean vector of the \eqn{i}-th #' component distribution. #' #' **Variance**: #' #' For univariate mixtures: #' \deqn{ #' \text{Var}(X) = \sum_{i=1}^K w_i (\mu_i^2 + \sigma_i^2) - \left(\sum_{i=1}^K w_i \mu_i\right)^2 #' }{ #' Var(X) = sum_{i=1}^K w_i * (mu_i^2 + sigma_i^2) - (sum_{i=1}^K w_i * mu_i)^2 #' } #' #' where \eqn{\sigma_i^2} is the variance of the \eqn{i}-th component #' distribution. #' #' **Covariance**: #' #' For multivariate mixtures: #' \deqn{ #' \text{Cov}(\mathbf{X}) = \sum_{i=1}^K w_i \left[ (\boldsymbol{\mu}_i - \bar{\boldsymbol{\mu}})(\boldsymbol{\mu}_i - \bar{\boldsymbol{\mu}})^T + \boldsymbol{\Sigma}_i \right] #' }{ #' Cov(X) = sum_{i=1}^K w_i * [ (mu_i - mu_bar)(mu_i - mu_bar)^T + Sigma_i ] #' } #' #' where \eqn{\bar{\boldsymbol{\mu}} = \sum_{i=1}^K w_i \boldsymbol{\mu}_i} #' is the overall mean vector and \eqn{\boldsymbol{\Sigma}_i} is the #' covariance matrix of the \eqn{i}-th component distribution. #' #' **Probability density/mass function (p.d.f/p.m.f)**: #' #' \deqn{ #' f(x) = \sum_{i=1}^K w_i f_i(x) #' }{ #' f(x) = sum_{i=1}^K w_i * f_i(x) #' } #' #' where \eqn{f_i(x)} is the density or mass function of the \eqn{i}-th #' component distribution. #' #' **Cumulative distribution function (c.d.f)**: #' #' For univariate mixtures: #' \deqn{ #' F(x) = \sum_{i=1}^K w_i F_i(x) #' }{ #' F(x) = sum_{i=1}^K w_i * F_i(x) #' } #' #' where \eqn{F_i(x)} is the c.d.f. of the \eqn{i}-th component #' distribution. #' #' For multivariate mixtures, the c.d.f. is approximated numerically. #' #' **Quantile function**: #' #' For univariate mixtures, the quantile function has no closed form #' and is computed numerically by inverting the c.d.f. using root-finding #' ([stats::uniroot()]). #' #' For multivariate mixtures, quantiles are not yet implemented. #' #' @seealso [stats::uniroot()], [vctrs::vec_unique_count()] #' #' @examples #' # Univariate mixture of two normal distributions #' dist <- dist_mixture(dist_normal(0, 1), dist_normal(5, 2), weights = c(0.3, 0.7)) #' dist #' #' mean(dist) #' variance(dist) #' #' density(dist, 2) #' cdf(dist, 2) #' quantile(dist, 0.5) #' #' generate(dist, 10) #' #' @name dist_mixture #' @export dist_mixture <- function(..., weights = numeric()){ dist <- dots_list(...) dn <- unique(lapply(dist, dimnames)) dn <- if(length(dn) == 1) dn[[1]] else NULL vec_is(weights, numeric(), length(dist)) if(!abs(sum(weights)-1) < sqrt(.Machine$double.eps)){ abort("Weights of a mixture model must sum to 1.") } if(any(weights < 0)){ abort("All weights in a mixtue model must be non-negative.") } new_dist(dist = transpose(dist), w = list(weights), class = "dist_mixture", dimnames = dn) } #' @export format.dist_mixture <- function(x, width = getOption("width"), ...){ dists <- unlist(lapply(x[["dist"]], format)) dist_info <- paste0(x[["w"]], "*", dists, collapse = ", ") long_dist <- paste0("mixture(", dist_info, ")") short_dist <- paste0("mixture(n=", length(dists), ")") ifelse(nchar(long_dist) <= width, long_dist, short_dist) } #' @export density.dist_mixture <- function(x, at, ...){ if(NROW(at) > 1) return(vapply(at, density, numeric(1L), x = x, ...)) sum(x[["w"]]*vapply(x[["dist"]], density, numeric(1L), at = at, ...)) } #' @export quantile.dist_mixture <- function(x, p, ...){ d <- dim(x) if(d > 1) stop("quantile is not implemented for multivariate mixtures.") if(length(p) > 1) return(vapply(p, quantile, numeric(1L), x = x, ...)) # Find bounds for optimisation based on range of each quantile dist_q <- vapply(x[["dist"]], quantile, numeric(1L), p, ..., USE.NAMES = FALSE) if(vctrs::vec_unique_count(dist_q) == 1) return(dist_q[1]) if(p == 0) return(min(dist_q)) if(p == 1) return(max(dist_q)) # Search the cdf() for appropriate quantile stats::uniroot( function(pos) p - cdf(x, pos, ...), interval = c(min(dist_q), max(dist_q)), extendInt = "yes" )$root } #' @export cdf.dist_mixture <- function(x, q, times = 1e5, ...){ d <- dim(x) if(d == 1L) { if(length(q) > 1) return(vapply(q, cdf, numeric(1L), x = x, ...)) sum(x[["w"]]*vapply(x[["dist"]], cdf, numeric(1L), q = q, ...)) } else { NextMethod() } } #' @export generate.dist_mixture <- function(x, times, ...){ dist_idx <- .bincode(stats::runif(times), breaks = c(0, cumsum(x[["w"]]))) r <- matrix(nrow = times, ncol = dim(x)) for(i in seq_along(x[["dist"]])){ r_pos <- dist_idx == i if(any(r_pos)) { r[r_pos,] <- generate(x[["dist"]][[i]], sum(r_pos), ...) } } r[,seq(NCOL(r)), drop = TRUE] } #' @export mean.dist_mixture <- function(x, ...){ d <- dim(x) m <- vapply(x[["dist"]], mean, numeric(d), ...) if(d == 1L) { sum(x[["w"]] * m) } else { matrix(x[["w"]], ncol = d, nrow = 1) %*% t(m) } } #' @export covariance.dist_mixture <- function(x, ...){ d <- dim(x) if(d == 1L) { m <- vapply(x[["dist"]], mean, numeric(1L), ...) v <- vapply(x[["dist"]], variance, numeric(1L), ...) m1 <- sum(x[["w"]]*m) m2 <- sum(x[["w"]]*(m^2 + v)) m2 - m1^2 } else { m <- lapply(x[["dist"]], mean) w <- as.list(x[["w"]]) mbar <- mapply("*", m, w, SIMPLIFY = FALSE) mbar <- do.call("+", mbar) m <- lapply(m, function(u){u - mbar}) v <- lapply(x[["dist"]], function(u){covariance(u)[[1]]}) cov <- mapply(function(m,v,w) {w * ( t(m) %*% m + v ) }, m, v, w, SIMPLIFY = FALSE) list(do.call("+", cov)) } } #' @export dim.dist_mixture <- function(x){ dim(x[["dist"]][[1]]) } distributional/R/dist_burr.R0000644000176200001440000000733615131601265015640 0ustar liggesusers#' The Burr distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Burr distribution (Type XII) is a flexible continuous probability #' distribution often used for modeling income distributions, reliability #' data, and failure times. #' #' @inheritParams actuar::dburr #' #' @details #' #' `r pkgdown_doc_link("dist_burr")` #' #' In the following, let \eqn{X} be a Burr random variable with parameters #' `shape1` = \eqn{\alpha}, `shape2` = \eqn{\gamma}, and `rate` = \eqn{\lambda}. #' #' **Support**: \eqn{x \in (0, \infty)}{x > 0} #' #' **Mean**: \eqn{\frac{\lambda^{-1/\alpha} \gamma B(\gamma - 1/\alpha, 1 + 1/\alpha)}{\gamma}} (for \eqn{\alpha \gamma > 1}) #' #' **Variance**: \eqn{\frac{\lambda^{-2/\alpha} \gamma B(\gamma - 2/\alpha, 1 + 2/\alpha)}{\gamma} - \mu^2} (for \eqn{\alpha \gamma > 2}) #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \alpha \gamma \lambda x^{\alpha - 1} (1 + \lambda x^\alpha)^{-\gamma - 1} #' }{ #' f(x) = \alpha \gamma \lambda x^(\alpha - 1) (1 + \lambda x^\alpha)^(-\gamma - 1) #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = 1 - (1 + \lambda x^\alpha)^{-\gamma} #' }{ #' F(x) = 1 - (1 + \lambda x^\alpha)^(-\gamma) #' } #' #' **Quantile function**: #' #' \deqn{ #' F^{-1}(p) = \lambda^{-1/\alpha} ((1 - p)^{-1/\gamma} - 1)^{1/\alpha} #' }{ #' F^(-1)(p) = \lambda^(-1/\alpha) ((1 - p)^(-1/\gamma) - 1)^(1/\alpha) #' } #' #' **Moment generating function (m.g.f)**: #' #' Does not exist in closed form. #' #' @seealso [actuar::Burr] #' #' @examples #' dist <- dist_burr(shape1 = c(1,1,1,2,3,0.5), shape2 = c(1,2,3,1,1,2)) #' dist #' #' @examplesIf requireNamespace("actuar", quietly = TRUE) #' mean(dist) #' variance(dist) #' support(dist) #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_burr #' @export dist_burr <- function(shape1, shape2, rate = 1, scale = 1/rate){ shape1 <- vec_cast(shape1, double()) shape2 <- vec_cast(shape2, double()) if(any(shape1 <= 0)){ abort("The shape1 parameter of a Burr distribution must be strictly positive.") } if(any(shape2 <= 0)){ abort("The shape2 parameter of a Burr distribution must be strictly positive.") } if(any(rate <= 0)){ abort("The rate parameter of a Burr distribution must be strictly positive.") } new_dist(s1 = shape1, s2 = shape2, r = 1/scale, class = "dist_burr") } #' @export format.dist_burr <- function(x, digits = 2, ...){ sprintf( "Burr12(%s, %s, %s)", format(x[["s1"]], digits = digits, ...), format(x[["s2"]], x[["r"]], digits = digits, ...), format(x[["r"]], digits = digits, ...) ) } #' @export density.dist_burr <- function(x, at, ...){ require_package("actuar") actuar::dburr(at, x[["s1"]], x[["s2"]], x[["r"]]) } #' @export log_density.dist_burr <- function(x, at, ...){ require_package("actuar") actuar::dburr(at, x[["s1"]], x[["s2"]], x[["r"]], log = TRUE) } #' @export quantile.dist_burr <- function(x, p, ...){ require_package("actuar") actuar::qburr(p, x[["s1"]], x[["s2"]], x[["r"]]) } #' @export cdf.dist_burr <- function(x, q, ...){ require_package("actuar") actuar::pburr(q, x[["s1"]], x[["s2"]], x[["r"]]) } #' @export generate.dist_burr <- function(x, times, ...){ require_package("actuar") actuar::rburr(times, x[["s1"]], x[["s2"]], x[["r"]]) } #' @export mean.dist_burr <- function(x, ...){ require_package("actuar") actuar::mburr(1, x[["s1"]], x[["s2"]], x[["r"]]) } #' @export covariance.dist_burr <- function(x, ...){ require_package("actuar") m1 <- actuar::mburr(1, x[["s1"]], x[["s2"]], x[["r"]]) m2 <- actuar::mburr(2, x[["s1"]], x[["s2"]], x[["r"]]) -m1^2 + m2 } distributional/R/dist_laplace.R0000644000176200001440000000704015131601346016257 0ustar liggesusers#' The Laplace distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Laplace distribution, also known as the double exponential distribution, #' is a continuous probability distribution that is symmetric around its location #' parameter. #' #' @param mu The location parameter (mean) of the Laplace distribution. #' @param sigma The positive scale parameter of the Laplace distribution. #' #' @details #' #' `r pkgdown_doc_link("dist_laplace")` #' #' In the following, let \eqn{X} be a Laplace random variable with location #' parameter `mu` = \eqn{\mu} and scale parameter `sigma` = \eqn{\sigma}. #' #' **Support**: \eqn{R}, the set of all real numbers #' #' **Mean**: \eqn{\mu} #' #' **Variance**: \eqn{2\sigma^2} #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \frac{1}{2\sigma} \exp\left(-\frac{|x - \mu|}{\sigma}\right) #' }{ #' f(x) = 1 / (2 * sigma) * exp(-|x - mu| / sigma) #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = \begin{cases} #' \frac{1}{2} \exp\left(\frac{x - \mu}{\sigma}\right) & \text{if } x < \mu \\ #' 1 - \frac{1}{2} \exp\left(-\frac{x - \mu}{\sigma}\right) & \text{if } x \geq \mu #' \end{cases} #' }{ #' F(x) = 1/2 * exp((x - mu) / sigma) if x < mu, #' 1 - 1/2 * exp(-(x - mu) / sigma) if x >= mu #' } #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = \frac{\exp(\mu t)}{1 - \sigma^2 t^2} \text{ for } |t| < \frac{1}{\sigma} #' }{ #' E(e^(tX)) = exp(mu * t) / (1 - sigma^2 * t^2) for |t| < 1/sigma #' } #' #' @seealso [extraDistr::Laplace] #' #' @examples #' dist <- dist_laplace(mu = c(0, 2, -1), sigma = c(1, 2, 0.5)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 0) #' density(dist, 0, log = TRUE) #' #' cdf(dist, 1) #' #' quantile(dist, 0.7) #' #' @name dist_laplace #' @export dist_laplace <- function(mu, sigma) { mu <- vec_cast(mu, double()) sigma <- vec_cast(sigma, double()) if (any(sigma <= 0)) { abort("The scale parameter of a Laplace distribution must be positive.") } new_dist(mu = mu, sigma = sigma, class = "dist_laplace") } #' @export format.dist_laplace <- function(x, digits = 2, ...) { sprintf( "Laplace(%s, %s)", format(x[["mu"]], digits = digits, ...), format(x[["sigma"]], digits = digits, ...) ) } #' @export density.dist_laplace <- function(x, at, ...) { mu <- x[["mu"]] sigma <- x[["sigma"]] (1 / (2 * sigma)) * exp(-abs(at - mu) / sigma) } #' @export log_density.dist_laplace <- function(x, at, ...) { mu <- x[["mu"]] sigma <- x[["sigma"]] -log(2 * sigma) - abs(at - mu) / sigma } #' @export quantile.dist_laplace <- function(x, p, ...) { mu <- x[["mu"]] sigma <- x[["sigma"]] mu - sigma * sign(p - 0.5) * log(1 - 2 * abs(p - 0.5)) } #' @export cdf.dist_laplace <- function(x, q, ...) { mu <- x[["mu"]] sigma <- x[["sigma"]] ifelse(q < mu, 0.5 * exp((q - mu) / sigma), 1 - 0.5 * exp(-(q - mu) / sigma) ) } #' @export generate.dist_laplace <- function(x, times, ...) { mu <- x[["mu"]] sigma <- x[["sigma"]] u <- stats::runif(times) - 0.5 mu - sigma * sign(u) * log(1 - 2 * abs(u)) } #' @export mean.dist_laplace <- function(x, ...) { x[["mu"]] } #' @export variance.dist_laplace <- function(x, ...) { 2 * x[["sigma"]]^2 } #' @export skewness.dist_laplace <- function(x, ...) { 0 } #' @export kurtosis.dist_laplace <- function(x, ...) { 3 } #' @export has_symmetry.dist_laplace <- function(x, ...) { TRUE }distributional/R/dist_percentile.R0000644000176200001440000001011215131575133017006 0ustar liggesusers#' Percentile distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Percentile distribution is a non-parametric distribution defined by #' a set of quantiles at specified percentile values. This distribution is #' useful for representing empirical distributions or elicited expert #' knowledge when only percentile information is available. The distribution #' uses linear interpolation between percentiles and can be used to #' approximate complex distributions that may not have simple parametric forms. #' #' @param x A list of values #' @param percentile A list of percentiles #' #' @details #' #' `r pkgdown_doc_link("dist_percentile")` #' #' In the following, let \eqn{X} be a Percentile random variable defined by #' values \eqn{x_1, x_2, \ldots, x_n} at percentiles #' \eqn{p_1, p_2, \ldots, p_n} where \eqn{0 \le p_i \le 100}. #' #' **Support**: \eqn{[\min(x_i), \max(x_i)]} if \eqn{\min(p_i) > 0} or #' \eqn{\max(p_i) < 100}, otherwise support is approximated from the #' specified percentiles. #' #' **Mean**: Approximated numerically using spline interpolation and #' numerical integration: #' #' \deqn{ #' E(X) \approx \int_0^1 Q(u) du #' }{ #' E(X) ≈ integral_0^1 Q(u) du #' } #' #' where \eqn{Q(u)} is a spline function interpolating the percentile values. #' #' **Variance**: Approximated numerically. #' #' **Probability density function (p.d.f)**: Approximated numerically using #' kernel density estimation from generated samples. #' #' **Cumulative distribution function (c.d.f)**: Defined by linear #' interpolation: #' #' \deqn{ #' F(t) = \begin{cases} #' p_1/100 & \text{if } t < x_1 \\ #' p_i/100 + \frac{(t - x_i)(p_{i+1} - p_i)}{100(x_{i+1} - x_i)} & \text{if } x_i \le t < x_{i+1} \\ #' p_n/100 & \text{if } t \ge x_n #' \end{cases} #' }{ #' F(t) = p_i/100 + (t - x_i)(p_{i+1} - p_i) / (100(x_{i+1} - x_i)) for x_i ≤ t < x_{i+1} #' } #' #' **Quantile function**: Defined by linear interpolation: #' #' \deqn{ #' Q(u) = x_i + \frac{(100u - p_i)(x_{i+1} - x_i)}{p_{i+1} - p_i} #' }{ #' Q(u) = x_i + (100u - p_i)(x_{i+1} - x_i) / (p_{i+1} - p_i) #' } #' #' for \eqn{p_i/100 \le u \le p_{i+1}/100}. #' #' @examples #' dist <- dist_normal() #' percentiles <- seq(0.01, 0.99, by = 0.01) #' x <- vapply(percentiles, quantile, double(1L), x = dist) #' dist_percentile(list(x), list(percentiles*100)) #' #' @export dist_percentile <- function(x, percentile){ x <- as_list_of(x, .ptype = double()) percentile <- as_list_of(percentile, .ptype = double()) new_dist(x = x, percentile = percentile, class = "dist_percentile") } #' @export format.dist_percentile <- function(x, ...){ sprintf( "percentile[%s]", length(x[["x"]]) ) } #' @export density.dist_percentile <- function(x, at, ...){ d <- density(generate(x, 1000), from = min(at), to = max(at), ..., na.rm=TRUE) stats::approx(d$x, d$y, xout = at)$y } #' @export quantile.dist_percentile <- function(x, p, ...){ out <- x[["x"]][match(p, x[["percentile"]])] out[is.na(out)] <- stats::approx(x = x[["percentile"]]/100, y = x[["x"]], xout = p[is.na(out)])$y out } #' @export cdf.dist_percentile <- function(x, q, ...){ stats::approx(x = x[["x"]], y = x[["percentile"]]/100, xout = q)$y } #' @export generate.dist_percentile <- function(x, times, ...){ stats::approx(x[["percentile"]], x[["x"]], xout=stats::runif(times,min(x[["percentile"]]),max(x[["percentile"]])))$y } #' @export mean.dist_percentile <- function(x, ...) { # assumes percentile is sorted # probs <- x[["percentile"]]/100 # i <- seq_along(probs) # # weights <- (probs[pmin(i+1, length(probs))] - probs[pmax(i-1, 1)]) / 2 # sum(x[["x"]] * weights) # Fit a spline to the percentiles spline_fit <- stats::splinefun(x[["percentile"]], x[["x"]]) # Use numerical integration to estimate the mean stats::integrate(spline_fit, lower = 0, upper = 1)$value } #' @export support.dist_percentile <- function(x, ...) { new_support_region( list(vctrs::vec_init(x[["x"]], n = 0L)), list(range(x[["x"]])), list(!near(range(x[["percentile"]]), 0)) ) } distributional/R/dist_inverse_gaussian.R0000644000176200001440000000746315131556521020240 0ustar liggesusers#' The Inverse Gaussian distribution #' #' The inverse Gaussian distribution (also known as the Wald distribution) is #' commonly used to model positive-valued data, particularly in contexts #' involving first passage times and reliability analysis. #' #' @description #' `r lifecycle::badge('stable')` #' #' @inheritParams actuar::dinvgauss #' #' @details #' #' `r pkgdown_doc_link("dist_inverse_gaussian")` #' #' In the following, let \eqn{X} be an Inverse Gaussian random variable with #' parameters `mean` = \eqn{\mu} and `shape` = \eqn{\lambda}. #' #' **Support**: \eqn{(0, \infty)} #' #' **Mean**: \eqn{\mu} #' #' **Variance**: \eqn{\frac{\mu^3}{\lambda}} #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \sqrt{\frac{\lambda}{2\pi x^3}} #' \exp\left(-\frac{\lambda(x - \mu)^2}{2\mu^2 x}\right) #' }{ #' f(x) = sqrt(\lambda / (2 \pi x^3)) #' exp(-\lambda (x - \mu)^2 / (2 \mu^2 x)) #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = \Phi\left(\sqrt{\frac{\lambda}{x}} #' \left(\frac{x}{\mu} - 1\right)\right) + #' \exp\left(\frac{2\lambda}{\mu}\right) #' \Phi\left(-\sqrt{\frac{\lambda}{x}} #' \left(\frac{x}{\mu} + 1\right)\right) #' }{ #' F(x) = \Phi(sqrt(\lambda/x) (x/\mu - 1)) + #' exp(2\lambda/\mu) \Phi(-sqrt(\lambda/x) (x/\mu + 1)) #' } #' #' where \eqn{\Phi} is the standard normal c.d.f. #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = \exp\left(\frac{\lambda}{\mu} #' \left(1 - \sqrt{1 - \frac{2\mu^2 t}{\lambda}}\right)\right) #' }{ #' E(e^(tX)) = exp((\lambda/\mu) (1 - sqrt(1 - 2\mu^2 t/\lambda))) #' } #' #' for \eqn{t < \frac{\lambda}{2\mu^2}}. #' #' **Skewness**: \eqn{3\sqrt{\frac{\mu}{\lambda}}} #' #' **Excess Kurtosis**: \eqn{\frac{15\mu}{\lambda}} #' #' **Quantiles**: No closed-form expression, approximated numerically. #' #' @seealso [actuar::InverseGaussian] #' #' @examples #' dist <- dist_inverse_gaussian(mean = c(1,1,1,3,3), shape = c(0.2, 1, 3, 0.2, 1)) #' dist #' #' @examplesIf requireNamespace("actuar", quietly = TRUE) #' mean(dist) #' variance(dist) #' support(dist) #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_inverse_gaussian #' @export dist_inverse_gaussian <- function(mean, shape){ mean <- vec_cast(mean, double()) shape <- vec_cast(shape, double()) if(any(mean[!is.na(mean)] <= 0)){ abort("The mean parameter of a Inverse Gaussian distribution must be strictly positive.") } if(any(shape[!is.na(shape)] <= 0)){ abort("The shape parameter of a Inverse Gaussian distribution must be strictly positive.") } new_dist(m = mean, s = shape, class = "dist_inverse_gaussian") } #' @export format.dist_inverse_gaussian <- function(x, digits = 2, ...){ sprintf( "IG(%s, %s)", format(x[["m"]], digits = digits, ...), format(x[["s"]], digits = digits, ...) ) } #' @export density.dist_inverse_gaussian <- function(x, at, ...){ require_package("actuar") actuar::dinvgauss(at, x[["m"]], x[["s"]]) } #' @export log_density.dist_inverse_gaussian <- function(x, at, ...){ require_package("actuar") actuar::dinvgauss(at, x[["m"]], x[["s"]], log = TRUE) } #' @export quantile.dist_inverse_gaussian <- function(x, p, ...){ require_package("actuar") actuar::qinvgauss(p, x[["m"]], x[["s"]]) } #' @export cdf.dist_inverse_gaussian <- function(x, q, ...){ require_package("actuar") actuar::pinvgauss(q, x[["m"]], x[["s"]]) } #' @export generate.dist_inverse_gaussian <- function(x, times, ...){ require_package("actuar") actuar::rinvgauss(times, x[["m"]], x[["s"]]) } #' @export mean.dist_inverse_gaussian <- function(x, ...){ x[["m"]] } #' @export covariance.dist_inverse_gaussian <- function(x, ...){ x[["m"]]^3/x[["s"]] } distributional/R/dist_multinomial.R0000644000176200001440000001376515131607416017227 0ustar liggesusers#' The Multinomial distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The multinomial distribution is a generalization of the binomial #' distribution to multiple categories. It is perhaps easiest to think #' that we first extend a [dist_bernoulli()] distribution to include more #' than two categories, resulting in a [dist_categorical()] distribution. #' We then extend repeat the Categorical experiment several (\eqn{n}) #' times. #' #' @param size The number of draws from the Categorical distribution. #' @param prob The probability of an event occurring from each draw. #' #' @details #' #' `r pkgdown_doc_link("dist_multinomial")` #' #' In the following, let \eqn{X = (X_1, ..., X_k)} be a Multinomial #' random variable with success probability `prob` = \eqn{p}. Note that #' \eqn{p} is vector with \eqn{k} elements that sum to one. Assume #' that we repeat the Categorical experiment `size` = \eqn{n} times. #' #' **Support**: Each \eqn{X_i} is in \eqn{\{0, 1, 2, ..., n\}}{{0, 1, 2, ..., n}}. #' #' **Mean**: The mean of \eqn{X_i} is \eqn{n p_i}. #' #' **Variance**: The variance of \eqn{X_i} is \eqn{n p_i (1 - p_i)}. #' For \eqn{i \neq j}, the covariance of \eqn{X_i} and \eqn{X_j} #' is \eqn{-n p_i p_j}. #' #' **Probability mass function (p.m.f)**: #' #' \deqn{ #' P(X_1 = x_1, ..., X_k = x_k) = \frac{n!}{x_1! x_2! \cdots x_k!} p_1^{x_1} \cdot p_2^{x_2} \cdot \ldots \cdot p_k^{x_k} #' }{ #' P(X_1 = x_1, ..., X_k = x_k) = n! / (x_1! x_2! ... x_k!) p_1^x_1 p_2^x_2 ... p_k^x_k #' } #' #' where \eqn{\sum_{i=1}^k x_i = n} and \eqn{\sum_{i=1}^k p_i = 1}. #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' P(X_1 \le q_1, ..., X_k \le q_k) = \sum_{\substack{x_1, \ldots, x_k \ge 0 \\ x_i \le q_i \text{ for all } i \\ \sum_{i=1}^k x_i = n}} \frac{n!}{x_1! x_2! \cdots x_k!} p_1^{x_1} \cdot p_2^{x_2} \cdot \ldots \cdot p_k^{x_k} #' }{ #' P(X_1 <= q_1, ..., X_k <= q_k) = sum over all (x_1, ..., x_k) with x_i <= q_i and sum x_i = n of n! / (x_1! ... x_k!) p_1^x_1 ... p_k^x_k #' } #' #' The c.d.f. is computed as a finite sum of the p.m.f. over all integer vectors #' in the support that satisfy the componentwise inequalities. #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{t'X}) = \left(\sum_{i=1}^k p_i e^{t_i}\right)^n #' }{ #' E(e^(t'X)) = (p_1 e^t_1 + p_2 e^t_2 + ... + p_k e^t_k)^n #' } #' #' where \eqn{t = (t_1, ..., t_k)} is a vector of the same dimension as \eqn{X}. #' #' **Skewness**: The skewness of \eqn{X_i} is #' #' \deqn{ #' \frac{1 - 2p_i}{\sqrt{n p_i (1 - p_i)}} #' }{ #' (1 - 2p_i) / sqrt(n p_i (1 - p_i)) #' } #' #' **Excess Kurtosis**: The excess kurtosis of \eqn{X_i} is #' #' \deqn{ #' \frac{1 - 6p_i(1 - p_i)}{n p_i (1 - p_i)} #' }{ #' (1 - 6p_i(1 - p_i)) / (n p_i (1 - p_i)) #' } #' #' @seealso [stats::dmultinom()], [stats::rmultinom()] #' #' @examples #' dist <- dist_multinomial(size = c(4, 3), prob = list(c(0.3, 0.5, 0.2), c(0.1, 0.5, 0.4))) #' #' dist #' mean(dist) #' variance(dist) #' #' generate(dist, 10) #' #' density(dist, list(d = rbind(cbind(1,2,1), cbind(0,2,1)))) #' density(dist, list(d = rbind(cbind(1,2,1), cbind(0,2,1))), log = TRUE) #' #' cdf(dist, cbind(1,2,1)) #' #' @name dist_multinomial #' @export dist_multinomial <- function(size, prob){ size <- vec_cast(size, double()) prob <- lapply(prob, function(x) x/sum(x)) prob <- as_list_of(prob, .ptype = double()) new_dist(s = size, p = prob, class = "dist_multinomial") } #' @export format.dist_multinomial <- function(x, digits = 2, ...){ sprintf( "Multinomial(%s)[%s]", format(x[["s"]], digits = digits, ...), format(length(x[["p"]]), digits = digits, ...) ) } #' @export density.dist_multinomial <- function(x, at, ...){ if(is.list(at)) return(vapply(at, density, numeric(1L), x = x, ...)) stats::dmultinom(at, x[["s"]], x[["p"]]) } #' @export log_density.dist_multinomial <- function(x, at, ...){ if(is.list(at)) return(vapply(at, log_density, numeric(1L), x = x, ...)) stats::dmultinom(at, x[["s"]], x[["p"]], log = TRUE) } #' @export generate.dist_multinomial <- function(x, times, ...){ t(stats::rmultinom(times, x[["s"]], x[["p"]])) } #' @export mean.dist_multinomial <- function(x, ...){ matrix(x[["s"]]*x[["p"]], nrow = 1) } #' @export covariance.dist_multinomial <- function(x, ...){ s <- x[["s"]] p <- x[["p"]] v <- numeric(length(p)^2) for(i in seq_along(p)){ for(j in seq_along(p)){ v[(i-1)*length(p) + j] <- if(i == j) s*p[i]*(1-p[j]) else -s*p[i]*p[j] } } list(matrix(v, nrow = length(p))) } #' @export dim.dist_multinomial <- function(x){ length(x[["p"]]) } #' @export cdf.dist_multinomial <- function(x, q, ...) { # Extract parameters size <- x$s prob <- x$p k <- length(prob) # Validate dimensions if (ncol(q) != k) { stop("Number of columns in q must match the number of categories in the multinomial distribution") } n_queries <- nrow(q) result <- numeric(n_queries) for (i in seq_len(n_queries)) { q_i <- q[i, ] # Any negative quantile => CDF = 0 if (any(q_i < 0)) { result[i] <- 0 next } # Discrete CDF: floor input values q_i <- floor(q_i) # Max counts for the first (k-1) categories max_vec <- pmin(q_i[1:(k - 1)], size) # Build ranges for first (k-1) categories ranges <- lapply(max_vec, function(m) 0:m) # Enumerate only the first (k-1) coordinates partial <- expand.grid(ranges, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE) # Compute the last category as the remainder sums <- rowSums(partial) xk <- size - sums # Valid if 0 <= xk <= q_k valid_idx <- which(xk >= 0 & xk <= q_i[k]) if (!length(valid_idx)) { result[i] <- 0 next } # Construct full count vectors counts <- cbind(as.matrix(partial[valid_idx, , drop = FALSE]), xk[valid_idx]) # Sum multinomial pmf over valid outcomes probs <- apply(counts, 1L, stats::dmultinom, size = size, prob = prob) result[i] <- sum(probs) } result }distributional/R/dist_lognormal.R0000644000176200001440000001101315131606443016646 0ustar liggesusers#' The log-normal distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The log-normal distribution is a commonly used transformation of the Normal #' distribution. If \eqn{X} follows a log-normal distribution, then \eqn{\ln{X}} #' would be characterised by a Normal distribution. #' #' @param mu The mean (location parameter) of the distribution, which is the #' mean of the associated Normal distribution. Can be any real number. #' @param sigma The standard deviation (scale parameter) of the distribution. #' Can be any positive number. #' #' @details #' #' `r pkgdown_doc_link("dist_lognormal")` #' #' In the following, let \eqn{X} be a log-normal random variable with #' `mu` = \eqn{\mu} and `sigma` = \eqn{\sigma}. #' #' **Support**: \eqn{R^+}, the set of positive real numbers. #' #' **Mean**: \eqn{e^{\mu + \sigma^2/2}}{exp(\mu + \sigma^2/2)} #' #' **Variance**: \eqn{(e^{\sigma^2} - 1) e^{2\mu + \sigma^2}}{(exp(\sigma^2) - 1) exp(2\mu + \sigma^2)} #' #' **Skewness**: \eqn{(e^{\sigma^2} + 2) \sqrt{e^{\sigma^2} - 1}}{(exp(\sigma^2) + 2) sqrt(exp(\sigma^2) - 1)} #' #' **Excess Kurtosis**: \eqn{e^{4\sigma^2} + 2 e^{3\sigma^2} + 3 e^{2\sigma^2} - 6}{exp(4\sigma^2) + 2 exp(3\sigma^2) + 3 exp(2\sigma^2) - 6} #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \frac{1}{x\sqrt{2 \pi \sigma^2}} e^{-(\ln{x} - \mu)^2 / (2 \sigma^2)} #' }{ #' f(x) = 1 / (x sqrt(2 \pi \sigma^2)) exp(-(\log(x) - \mu)^2 / (2 \sigma^2)) #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = \Phi\left(\frac{\ln{x} - \mu}{\sigma}\right) #' }{ #' F(x) = \Phi((\log(x) - \mu) / \sigma) #' } #' #' where \eqn{\Phi} is the c.d.f. of the standard Normal distribution. #' #' **Moment generating function (m.g.f)**: #' #' Does not exist in closed form. #' #' @seealso [stats::Lognormal] #' #' @examples #' dist <- dist_lognormal(mu = 1:5, sigma = 0.1) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' # A log-normal distribution X is exp(Y), where Y is a Normal distribution of #' # the same parameters. So log(X) will produce the Normal distribution Y. #' log(dist) #' @name dist_lognormal #' @export dist_lognormal <- function(mu = 0, sigma = 1){ mu <- vec_cast(mu, double()) sigma <- vec_cast(sigma, double()) if(any(sigma[!is.na(sigma)] < 0)){ abort("Standard deviation of a log-normal distribution must be non-negative") } new_dist(mu = mu, sigma = sigma, class = "dist_lognormal") } #' @export format.dist_lognormal <- function(x, digits = 2, ...){ sprintf( "lN(%s, %s)", format(x[["mu"]], digits = digits, ...), format(x[["sigma"]]^2, digits = digits, ...) ) } #' @export density.dist_lognormal <- function(x, at, ...){ stats::dlnorm(at, x[["mu"]], x[["sigma"]]) } #' @export log_density.dist_lognormal <- function(x, at, ...){ stats::dlnorm(at, x[["mu"]], x[["sigma"]], log = TRUE) } #' @export quantile.dist_lognormal <- function(x, p, ...){ stats::qlnorm(p, x[["mu"]], x[["sigma"]]) } #' @export log_quantile.dist_lognormal <- function(x, p, ...){ stats::qlnorm(p, x[["mu"]], x[["sigma"]], log.p = TRUE) } #' @export cdf.dist_lognormal <- function(x, q, ...){ stats::plnorm(q, x[["mu"]], x[["sigma"]]) } #' @export log_cdf.dist_lognormal <- function(x, q, ...){ stats::plnorm(q, x[["mu"]], x[["sigma"]], log.p = TRUE) } #' @export generate.dist_lognormal <- function(x, times, ...){ stats::rlnorm(times, x[["mu"]], x[["sigma"]]) } #' @export mean.dist_lognormal <- function(x, ...){ exp(x[["mu"]] + x[["sigma"]]^2/2) } #' @export covariance.dist_lognormal <- function(x, ...){ s2 <- x[["sigma"]]^2 (exp(s2)-1)*exp(2*x[["mu"]] + s2) } #' @export skewness.dist_lognormal <- function(x, ...) { es2 <- exp(x[["sigma"]]^2) (es2+2)*sqrt(es2-1) } #' @export kurtosis.dist_lognormal <- function(x, ...) { s2 <- x[["sigma"]]^2 exp(4*s2) + 2*exp(3*s2) + 3*exp(2*s2) - 6 } # make a normal distribution from a lognormal distribution using the # specified base normal_dist_with_base <- function(x, base = exp(1)) { vec_data(dist_normal(x[["mu"]], x[["sigma"]]) / log(base))[[1]] } #' @method Math dist_lognormal #' @export Math.dist_lognormal <- function(x, ...) { switch(.Generic, # Shortcuts to get Normal distribution from log-normal. log = normal_dist_with_base(x, ...), log2 = normal_dist_with_base(x, 2), log10 = normal_dist_with_base(x, 10), NextMethod() ) } distributional/R/dist_binomial.R0000644000176200001440000001100615131570030016440 0ustar liggesusers#' The Binomial distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Binomial distributions are used to represent situations can that can #' be thought as the result of \eqn{n} Bernoulli experiments (here the #' \eqn{n} is defined as the `size` of the experiment). The classical #' example is \eqn{n} independent coin flips, where each coin flip has #' probability `p` of success. In this case, the individual probability of #' flipping heads or tails is given by the Bernoulli(p) distribution, #' and the probability of having \eqn{x} equal results (\eqn{x} heads, #' for example), in \eqn{n} trials is given by the Binomial(n, p) distribution. #' The equation of the Binomial distribution is directly derived from #' the equation of the Bernoulli distribution. #' #' @param size The number of trials. Must be an integer greater than or equal #' to one. When `size = 1L`, the Binomial distribution reduces to the #' Bernoulli distribution. Often called `n` in textbooks. #' @param prob The probability of success on each trial, `prob` can be any #' value in `[0, 1]`. #' #' @details #' #' `r pkgdown_doc_link("dist_binomial")` #' #' The Binomial distribution comes up when you are interested in the portion #' of people who do a thing. The Binomial distribution #' also comes up in the sign test, sometimes called the Binomial test #' (see [stats::binom.test()]), where you may need the Binomial C.D.F. to #' compute p-values. #' #' In the following, let \eqn{X} be a Binomial random variable with parameter #' `size` = \eqn{n} and `p` = \eqn{p}. Some textbooks define \eqn{q = 1 - p}, #' or called \eqn{\pi} instead of \eqn{p}. #' #' **Support**: \eqn{\{0, 1, 2, ..., n\}}{{0, 1, 2, ..., n}} #' #' **Mean**: \eqn{np} #' #' **Variance**: \eqn{np \cdot (1 - p) = np \cdot q}{np (1 - p)} #' #' **Probability mass function (p.m.f)**: #' #' \deqn{ #' P(X = k) = {n \choose k} p^k (1 - p)^{n-k} #' }{ #' P(X = k) = choose(n, k) p^k (1 - p)^(n - k) #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' P(X \le k) = \sum_{i=0}^{\lfloor k \rfloor} {n \choose i} p^i (1 - p)^{n-i} #' }{ #' P(X \le k) = \sum_{i=0}^k choose(n, i) p^i (1 - p)^(n-i) #' } #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = (1 - p + p e^t)^n #' }{ #' E(e^(tX)) = (1 - p + p e^t)^n #' } #' #' **Skewness**: #' #' \deqn{ #' \frac{1 - 2p}{\sqrt{np(1-p)}} #' }{ #' (1 - 2p) / sqrt(np(1-p)) #' } #' #' **Excess kurtosis**: #' #' \deqn{ #' \frac{1 - 6p(1-p)}{np(1-p)} #' }{ #' (1 - 6p(1-p)) / (np(1-p)) #' } #' #' @seealso [stats::Binomial] #' #' @examples #' dist <- dist_binomial(size = 1:5, prob = c(0.05, 0.5, 0.3, 0.9, 0.1)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_binomial #' @export dist_binomial <- function(size, prob){ size <- vec_cast(size, integer()) prob <- vec_cast(prob, double()) if(any(size < 0)){ abort("The number of observations cannot be negative.") } if(any((prob < 0) | (prob > 1))){ abort("The probability of success must be between 0 and 1.") } new_dist(n = size, p = prob, class = "dist_binomial") } #' @export format.dist_binomial <- function(x, digits = 2, ...){ sprintf( "B(%s, %s)", format(x[["n"]], digits = digits, ...), format(x[["p"]], digits = digits, ...) ) } #' @export density.dist_binomial <- function(x, at, ...){ stats::dbinom(at, x[["n"]], x[["p"]]) } #' @export log_density.dist_binomial <- function(x, at, ...){ stats::dbinom(at, x[["n"]], x[["p"]], log = TRUE) } #' @export quantile.dist_binomial <- function(x, p, ...){ as.integer(stats::qbinom(p, x[["n"]], x[["p"]])) } #' @export cdf.dist_binomial <- function(x, q, ...){ stats::pbinom(q, x[["n"]], x[["p"]]) } #' @export generate.dist_binomial <- function(x, times, ...){ as.integer(stats::rbinom(times, x[["n"]], x[["p"]])) } #' @export mean.dist_binomial <- function(x, ...){ x[["n"]]*x[["p"]] } #' @export covariance.dist_binomial <- function(x, ...){ x[["n"]]*x[["p"]]*(1-x[["p"]]) } #' @export skewness.dist_binomial <- function(x, ...) { n <- x[["n"]] p <- x[["p"]] q <- 1 - p (1 - (2 * p)) / sqrt(n * p * q) } #' @export kurtosis.dist_binomial <- function(x, ...) { n <- x[["n"]] p <- x[["p"]] q <- 1 - p (1 - (6 * p * q)) / (n * p * q) } #' @export has_symmetry.dist_binomial <- function(x, ...) x[["p"]] == 0.5distributional/R/dist_pareto.R0000644000176200001440000000762015131564154016161 0ustar liggesusers#' The Pareto Distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Pareto distribution is a power-law probability distribution commonly used #' in actuarial science to model loss severity and in economics to model income #' distributions and firm sizes. #' #' @inheritParams actuar::dpareto #' #' @details #' #' `r pkgdown_doc_link("dist_pareto")` #' #' In the following, let \eqn{X} be a Pareto random variable with parameters #' `shape` = \eqn{\alpha} and `scale` = \eqn{\theta}. #' #' **Support**: \eqn{(0, \infty)}{(0, Inf)} #' #' **Mean**: \eqn{\frac{\theta}{\alpha - 1}}{\theta / (\alpha - 1)} for \eqn{\alpha > 1}, #' undefined otherwise #' #' **Variance**: \eqn{\frac{\alpha\theta^2}{(\alpha - 1)^2(\alpha - 2)}}{\alpha\theta^2 / ((\alpha - 1)^2(\alpha - 2))} #' for \eqn{\alpha > 2}, undefined otherwise #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \frac{\alpha\theta^\alpha}{(x + \theta)^{\alpha + 1}} #' }{ #' f(x) = \alpha\theta^\alpha / (x + \theta)^(\alpha + 1) #' } #' #' for \eqn{x > 0}, \eqn{\alpha > 0} and \eqn{\theta > 0}. #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = 1 - \left(\frac{\theta}{x + \theta}\right)^\alpha #' }{ #' F(x) = 1 - (\theta / (x + \theta))^\alpha #' } #' #' for \eqn{x > 0}. #' #' **Moment generating function (m.g.f)**: #' #' Does not exist in closed form, but the \eqn{k}th raw moment \eqn{E[X^k]} exists #' for \eqn{-1 < k < \alpha}. #' #' @note #' There are many different definitions of the Pareto distribution in the #' literature; see Arnold (2015) or Kleiber and Kotz (2003). This implementation #' uses the Pareto distribution without a location parameter as described in [actuar::Pareto]. #' #' @references #' Kleiber, C. and Kotz, S. (2003), *Statistical Size Distributions in Economics #' and Actuarial Sciences*, Wiley. #' #' Klugman, S. A., Panjer, H. H. and Willmot, G. E. (2012), *Loss Models, From #' Data to Decisions, Fourth Edition*, Wiley. #' #' @seealso [actuar::Pareto] #' #' @examples #' dist <- dist_pareto(shape = c(10, 3, 2, 1), scale = rep(1, 4)) #' dist #' #' @examplesIf requireNamespace("actuar", quietly = TRUE) #' mean(dist) #' variance(dist) #' support(dist) #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_pareto #' @export dist_pareto <- function(shape, scale){ shape <- vec_cast(shape, double()) scale <- vec_cast(scale, double()) if(any(shape < 0)){ abort("The shape parameter of a Pareto distribution must be non-negative.") } if(any(scale <= 0)){ abort("The scale parameter of a Pareto distribution must be strictly positive.") } new_dist(shape = shape, scale = scale, class = "dist_pareto") } #' @export format.dist_pareto <- function(x, digits = 2, ...){ sprintf( "Pareto(%s, %s)", format(x[["shape"]], digits = digits, ...), format(x[["scale"]], digits = digits, ...) ) } #' @export density.dist_pareto <- function(x, at, ...){ require_package("actuar") actuar::dpareto(at, x[["shape"]], x[["scale"]]) } #' @export log_density.dist_pareto <- function(x, at, ...){ require_package("actuar") actuar::dpareto(at, x[["shape"]], x[["scale"]], log = TRUE) } #' @export quantile.dist_pareto <- function(x, p, ...){ require_package("actuar") actuar::qpareto(p, x[["shape"]], x[["scale"]]) } #' @export cdf.dist_pareto <- function(x, q, ...){ require_package("actuar") actuar::ppareto(q, x[["shape"]], x[["scale"]]) } #' @export generate.dist_pareto <- function(x, times, ...){ require_package("actuar") actuar::rpareto(times, x[["shape"]], x[["scale"]]) } #' @export mean.dist_pareto <- function(x, ...){ actuar::mpareto(1, x[["shape"]], x[["scale"]]) } #' @export covariance.dist_pareto <- function(x, ...){ actuar::mpareto(2, x[["shape"]], x[["scale"]]) - actuar::mpareto(1, x[["shape"]], x[["scale"]])^2 } distributional/R/dist_categorical.R0000644000176200001440000001171515131570033017135 0ustar liggesusers#' The Categorical distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Categorical distributions are used to represent events with multiple #' outcomes, such as what number appears on the roll of a dice. This is also #' referred to as the 'generalised Bernoulli' or 'multinoulli' distribution. #' The Categorical distribution is a special case of the [Multinomial()] #' distribution with `n = 1`. #' #' @param prob A list of probabilities of observing each outcome category. #' @param outcomes The list of vectors where each value represents each outcome. #' #' @details #' #' `r pkgdown_doc_link("dist_categorical")` #' #' In the following, let \eqn{X} be a Categorical random variable with #' probability parameters `prob` = \eqn{\{p_1, p_2, \ldots, p_k\}}. #' #' The Categorical probability distribution is widely used to model the #' occurance of multiple events. A simple example is the roll of a dice, where #' \eqn{p = \{1/6, 1/6, 1/6, 1/6, 1/6, 1/6\}} giving equal chance of observing #' each number on a 6 sided dice. #' #' **Support**: \eqn{\{1, \ldots, k\}}{{1, ..., k}} #' #' **Mean**: Not defined for unordered categories. For ordered categories with #' integer outcomes \eqn{\{1, 2, \ldots, k\}}, the mean is: #' #' \deqn{ #' E(X) = \sum_{i=1}^{k} i \cdot p_i #' }{ #' E(X) = sum(i * p_i) for i = 1 to k #' } #' #' **Variance**: Not defined for unordered categories. For ordered categories #' with integer outcomes \eqn{\{1, 2, \ldots, k\}}, the variance is: #' #' \deqn{ #' \text{Var}(X) = \sum_{i=1}^{k} i^2 \cdot p_i - \left(\sum_{i=1}^{k} i \cdot p_i\right)^2 #' }{ #' Var(X) = sum(i^2 * p_i) - [sum(i * p_i)]^2 #' } #' #' **Probability mass function (p.m.f)**: #' #' \deqn{ #' P(X = i) = p_i #' }{ #' P(X = i) = p_i #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' The c.d.f is undefined for unordered categories. For ordered categories #' with outcomes \eqn{x_1 < x_2 < \ldots < x_k}, the c.d.f is: #' #' \deqn{ #' P(X \le x_j) = \sum_{i=1}^{j} p_i #' }{ #' P(X <= x_j) = sum(p_i) for i = 1 to j #' } #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = \sum_{i=1}^{k} e^{tx_i} \cdot p_i #' }{ #' E(e^(tX)) = sum(e^(t * x_i) * p_i) for i = 1 to k #' } #' #' **Skewness**: Approximated numerically for ordered categories. #' #' **Kurtosis**: Approximated numerically for ordered categories. #' #' @seealso [stats::Multinomial] #' #' @examples #' dist <- dist_categorical(prob = list(c(0.05, 0.5, 0.15, 0.2, 0.1), c(0.3, 0.1, 0.6))) #' #' dist #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' # The outcomes aren't ordered, so many statistics are not applicable. #' cdf(dist, 0.6) #' quantile(dist, 0.7) #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' # Some of these statistics are meaningful for ordered outcomes #' dist <- dist_categorical(list(rpois(26, 3)), list(ordered(letters))) #' dist #' cdf(dist, "m") #' quantile(dist, 0.5) #' #' dist <- dist_categorical( #' prob = list(c(0.05, 0.5, 0.15, 0.2, 0.1), c(0.3, 0.1, 0.6)), #' outcomes = list(letters[1:5], letters[24:26]) #' ) #' #' generate(dist, 10) #' #' density(dist, "a") #' density(dist, "z", log = TRUE) #' #' @name dist_categorical #' @export dist_categorical <- function(prob, outcomes = NULL){ prob <- lapply(prob, function(x) x/sum(x)) prob <- as_list_of(prob, .ptype = double()) if(is.null(outcomes)) { new_dist(p = prob, class = "dist_categorical") } else { new_dist(p = prob, x = outcomes, class = "dist_categorical") } } #' @export format.dist_categorical <- function(x, digits = 2, ...){ sprintf( "Categorical[%s]", format(length(x[["p"]]), digits = digits, ...) ) } #' @export density.dist_categorical <- function(x, at, ...){ if(!is.null(x[["x"]])) at <- match(at, x[["x"]]) at[at <= 0] <- NA_real_ x[["p"]][at] } #' @export quantile.dist_categorical <- function(x, p, ...){ if(is.ordered(x[["x"]])) { return(x[["x"]][findInterval(p, cumsum(x[["p"]]))]) } rep_len(NA_real_, length(p)) } #' @export cdf.dist_categorical <- function(x, q, ...){ if(is.ordered(x[["x"]])) { return(cumsum(x[["p"]])[match(q, x[["x"]])]) } rep_len(NA_real_, length(q)) } #' @export generate.dist_categorical <- function(x, times, ...){ z <- sample( x = seq_along(x[["p"]]), size = times, prob = x[["p"]], replace = TRUE ) if(is.null(x[["x"]])) return(z) x[["x"]][z] } #' @export support.dist_categorical <- function(x, ...) { region <- if(is.null(x[["p"]])) seq_along(x[["p"]]) else x[["x"]] new_support_region( list(vctrs::vec_init(region, n = 0L)), list(region), list(c(TRUE, TRUE)) ) } #' @export mean.dist_categorical <- function(x, ...){ NA_real_ } #' @export covariance.dist_categorical <- function(x, ...){ NA_real_ } #' @export skewness.dist_categorical <- function(x, ...) { NA_real_ } #' @export kurtosis.dist_categorical <- function(x, ...) { NA_real_ }distributional/R/dist_multivariate_t.R0000644000176200001440000001427615131656210017720 0ustar liggesusers#' The multivariate t-distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The multivariate t-distribution is a generalization of the univariate #' Student's t-distribution to multiple dimensions. It is commonly used for #' modeling heavy-tailed multivariate data and in robust statistics. #' #' @param df A numeric vector of degrees of freedom (must be positive). #' @param mu A list of numeric vectors for the distribution location parameter. #' @param sigma A list of matrices for the distribution scale matrix. #' #' @details #' #' `r pkgdown_doc_link("dist_multivariate_t")` #' #' In the following, let \eqn{\mathbf{X}} be a multivariate t random vector #' with degrees of freedom `df` = \eqn{\nu}, location parameter #' `mu` = \eqn{\boldsymbol{\mu}}, and scale matrix #' `sigma` = \eqn{\boldsymbol{\Sigma}}. #' #' **Support**: \eqn{\mathbf{x} \in \mathbb{R}^k}, where \eqn{k} is the #' dimension of the distribution #' #' **Mean**: \eqn{\boldsymbol{\mu}} for \eqn{\nu > 1}, undefined otherwise #' #' **Covariance matrix**: #' #' \deqn{ #' \text{Cov}(\mathbf{X}) = \frac{\nu}{\nu - 2} \boldsymbol{\Sigma} #' }{ #' Cov(X) = \nu / (\nu - 2) \Sigma #' } #' #' for \eqn{\nu > 2}, undefined otherwise #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(\mathbf{x}) = \frac{\Gamma\left(\frac{\nu + k}{2}\right)} #' {\Gamma\left(\frac{\nu}{2}\right) \nu^{k/2} \pi^{k/2} #' |\boldsymbol{\Sigma}|^{1/2}} #' \left[1 + \frac{1}{\nu}(\mathbf{x} - \boldsymbol{\mu})^T #' \boldsymbol{\Sigma}^{-1} (\mathbf{x} - \boldsymbol{\mu})\right]^{-\frac{\nu + k}{2}} #' }{ #' f(x) = \Gamma((\nu + k)/2) / (\Gamma(\nu/2) \nu^(k/2) \pi^(k/2) |\Sigma|^(1/2)) #' [1 + 1/\nu (x - \mu)^T \Sigma^(-1) (x - \mu)]^(-(\nu + k)/2) #' } #' #' where \eqn{k} is the dimension of the distribution and \eqn{\Gamma(\cdot)} is #' the gamma function. #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(\mathbf{t}) = \int_{-\infty}^{t_1} \cdots \int_{-\infty}^{t_k} f(\mathbf{x}) \, d\mathbf{x} #' }{ #' F(t) = integral_{-\infty}^{t_1} ... integral_{-\infty}^{t_k} f(x) dx #' } #' #' This integral does not have a closed form solution and is approximated numerically. #' #' **Quantile function**: #' #' The equicoordinate quantile function finds \eqn{q} such that: #' #' \deqn{ #' P(X_1 \leq q, \ldots, X_k \leq q) = p #' }{ #' P(X_1 <= q, ..., X_k <= q) = p #' } #' #' This does not have a closed form solution and is approximated numerically. #' #' The marginal quantile function for each dimension \eqn{i} is: #' #' \deqn{ #' Q_i(p) = \mu_i + \sqrt{\Sigma_{ii}} \cdot t_{\nu}^{-1}(p) #' }{ #' Q_i(p) = \mu_i + sqrt(\Sigma_{ii}) * t_\nu^(-1)(p) #' } #' #' where \eqn{t_{\nu}^{-1}(p)} is the quantile function of the univariate #' Student's t-distribution with \eqn{\nu} degrees of freedom, and #' \eqn{\Sigma_{ii}} is the \eqn{i}-th diagonal element of `sigma`. #' #' @seealso [mvtnorm::dmvt], [mvtnorm::pmvt], [mvtnorm::qmvt], [mvtnorm::rmvt] #' #' @examples #' dist <- dist_multivariate_t( #' df = 5, #' mu = list(c(1, 2)), #' sigma = list(matrix(c(4, 2, 2, 3), ncol = 2)) #' ) #' dimnames(dist) <- c("x", "y") #' dist #' #' @examplesIf requireNamespace("mvtnorm", quietly = TRUE) #' mean(dist) #' variance(dist) #' support(dist) #' generate(dist, 10) #' #' density(dist, cbind(2, 1)) #' density(dist, cbind(2, 1), log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' quantile(dist, 0.7, kind = "marginal") #' #' @name dist_multivariate_t #' @export dist_multivariate_t <- function(df = 1, mu = 0, sigma = diag(1)) { df <- vec_cast(df, double()) if (any(df <= 0, na.rm = TRUE)) { abort("Degrees of freedom must be positive.") } new_dist( df = df, mu = mu, sigma = sigma, dimnames = colnames(sigma[[1]]), class = "dist_mvt" ) } #' @export format.dist_mvt <- function(x, digits = 2, ...) { sprintf( "MVT[%i](%s)", length(x[["mu"]]), format(x[["df"]], digits = digits, ...) ) } #' @export density.dist_mvt <- function(x, at, ..., na.rm = FALSE) { require_package("mvtnorm") if (is.list(at)) return(vapply(at, density, numeric(1L), x = x, ...)) mvtnorm::dmvt(at, delta = x[["mu"]], sigma = x[["sigma"]], df = x[["df"]], log = FALSE, ...) } #' @export log_density.dist_mvt <- function(x, at, ..., na.rm = FALSE) { require_package("mvtnorm") if (is.list(at)) return(vapply(at, log_density, numeric(1L), x = x, ...)) mvtnorm::dmvt(at, delta = x[["mu"]], sigma = x[["sigma"]], df = x[["df"]], log = TRUE, ...) } #' @export quantile.dist_mvt <- function(x, p, kind = c("equicoordinate", "marginal"), ..., na.rm = FALSE) { kind <- match.arg(kind) q <- if (kind == "marginal") { scale <- sqrt(diag(x[["sigma"]])) stats::qt(p, df = rep(x[["df"]], each = length(p))) * rep(scale, each = length(p)) + rep(x[["mu"]], each = length(p)) } else { require_package("mvtnorm") vapply(p, function(p, ...) { if (p == 0) return(-Inf) else if (p == 1) return(Inf) mvtnorm::qmvt(p, ...)$quantile }, numeric(1L), delta = x[["mu"]], sigma = x[["sigma"]], df = x[["df"]], ...) } matrix(q, nrow = length(p), ncol = dim(x)) } #' @export cdf.dist_mvt <- function(x, q, ..., na.rm = FALSE) { if (is.list(q)) return(vapply(q, cdf, numeric(1L), x = x, ...)) require_package("mvtnorm") mvtnorm::pmvt(upper = as.numeric(q), delta = x[["mu"]], sigma = x[["sigma"]], df = x[["df"]], ...)[1] } #' @export generate.dist_mvt <- function(x, times, ..., na.rm = FALSE) { require_package("mvtnorm") mvtnorm::rmvt(times, delta = x[["mu"]], sigma = x[["sigma"]], df = x[["df"]], ...) } #' @export mean.dist_mvt <- function(x, ...) { if (x[["df"]] <= 1) { warning("Mean is undefined for df <= 1") return(matrix(NA_real_, nrow = 1, ncol = length(x[["mu"]]))) } matrix(x[["mu"]], nrow = 1) } #' @export covariance.dist_mvt <- function(x, ...) { if (x[["df"]] <= 2) { warning("Covariance is undefined for df <= 2") return(list(matrix(NA_real_, nrow = length(x[["mu"]]), ncol = length(x[["mu"]])))) } list(x[["df"]] / (x[["df"]] - 2) * x[["sigma"]]) } #' @export dim.dist_mvt <- function(x) { length(x[["mu"]]) } distributional/R/dist_weibull.R0000644000176200001440000001100415131565727016330 0ustar liggesusers#' The Weibull distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Generalization of the gamma distribution. Often used in survival and #' time-to-event analyses. #' #' @inheritParams stats::dweibull #' #' @details #' #' `r pkgdown_doc_link("dist_weibull")` #' #' In the following, let \eqn{X} be a Weibull random variable with #' shape parameter `shape` = \eqn{k} and scale parameter `scale` = \eqn{\lambda}. #' #' **Support**: \eqn{[0, \infty)} #' #' **Mean**: #' #' \deqn{ #' E(X) = \lambda \Gamma\left(1 + \frac{1}{k}\right) #' }{ #' E(X) = \lambda \Gamma(1 + 1/k) #' } #' #' where \eqn{\Gamma} is the gamma function. #' #' **Variance**: #' #' \deqn{ #' \text{Var}(X) = \lambda^2 \left[\Gamma\left(1 + \frac{2}{k}\right) - \left(\Gamma\left(1 + \frac{1}{k}\right)\right)^2\right] #' }{ #' Var(X) = \lambda^2 [\Gamma(1 + 2/k) - (\Gamma(1 + 1/k))^2] #' } #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \frac{k}{\lambda}\left(\frac{x}{\lambda}\right)^{k-1}e^{-(x/\lambda)^k}, \quad x \ge 0 #' }{ #' f(x) = (k/\lambda)(x/\lambda)^(k-1) e^(-(x/\lambda)^k), x >= 0 #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = 1 - e^{-(x/\lambda)^k}, \quad x \ge 0 #' }{ #' F(x) = 1 - e^(-(x/\lambda)^k), x >= 0 #' } #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = \sum_{n=0}^\infty \frac{t^n\lambda^n}{n!} \Gamma\left(1+\frac{n}{k}\right) #' }{ #' E(e^(tX)) = \sum_(n=0)^\infty (t^n \lambda^n / n!) \Gamma(1 + n/k) #' } #' #' **Skewness**: #' #' \deqn{ #' \gamma_1 = \frac{\mu^3 - 3\mu\sigma^2 - \mu^3}{\sigma^3} #' }{ #' \gamma_1 = (\mu_3 - 3\mu\sigma^2 - \mu^3) / \sigma^3 #' } #' #' where \eqn{\mu = E(X)}, \eqn{\sigma^2 = \text{Var}(X)}, and the third #' raw moment is #' #' \deqn{ #' \mu^3 = \lambda^3 \Gamma\left(1 + \frac{3}{k}\right) #' }{ #' \mu_3 = \lambda^3 \Gamma(1 + 3/k) #' } #' #' **Excess Kurtosis**: #' #' \deqn{ #' \gamma_2 = \frac{\mu^4 - 4\gamma_1\mu\sigma^3 - 6\mu^2\sigma^2 - \mu^4}{\sigma^4} - 3 #' }{ #' \gamma_2 = (\mu_4 - 4\gamma_1\mu\sigma^3 - 6\mu^2\sigma^2 - \mu^4) / \sigma^4 - 3 #' } #' #' where the fourth raw moment is #' #' \deqn{ #' \mu^4 = \lambda^4 \Gamma\left(1 + \frac{4}{k}\right) #' }{ #' \mu_4 = \lambda^4 \Gamma(1 + 4/k) #' } #' #' @seealso [stats::Weibull] #' #' @examples #' dist <- dist_weibull(shape = c(0.5, 1, 1.5, 5), scale = rep(1, 4)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_weibull #' @export dist_weibull <- function(shape, scale){ shape <- vec_cast(shape, double()) scale <- vec_cast(scale, double()) if(any(shape[!is.na(shape)] < 0)){ abort("The shape parameter of a Weibull distribution must be non-negative.") } if(any(scale[!is.na(scale)] <= 0)){ abort("The scale parameter of a Weibull distribution must be strictly positive.") } new_dist(shape = shape, scale = scale, class = "dist_weibull") } #' @export format.dist_weibull <- function(x, digits = 2, ...){ sprintf( "Weibull(%s, %s)", format(x[["shape"]], digits = digits, ...), format(x[["scale"]], digits = digits, ...) ) } #' @export density.dist_weibull <- function(x, at, ...){ stats::dweibull(at, x[["shape"]], x[["scale"]]) } #' @export log_density.dist_weibull <- function(x, at, ...){ stats::dweibull(at, x[["shape"]], x[["scale"]], log = TRUE) } #' @export quantile.dist_weibull <- function(x, p, ...){ stats::qweibull(p, x[["shape"]], x[["scale"]]) } #' @export cdf.dist_weibull <- function(x, q, ...){ stats::pweibull(q, x[["shape"]], x[["scale"]]) } #' @export generate.dist_weibull <- function(x, times, ...){ stats::rweibull(times, x[["shape"]], x[["scale"]]) } #' @export mean.dist_weibull <- function(x, ...){ x[["scale"]] * gamma(1 + 1/x[["shape"]]) } #' @export covariance.dist_weibull <- function(x, ...){ x[["scale"]]^2 * (gamma(1 + 2/x[["shape"]]) - gamma(1 + 1/x[["shape"]])^2) } #' @export skewness.dist_weibull <- function(x, ...) { mu <- mean(x) sigma <- sqrt(variance(x)) r <- mu / sigma gamma(1 + 3/x[["shape"]]) * (x[["scale"]]/sigma)^3 - 3*r - r^3 } #' @export kurtosis.dist_weibull <- function(x, ...) { mu <- mean(x) sigma <- sqrt(variance(x)) gamma <- skewness(x) r <- mu / sigma (x[["scale"]]/sigma)^4 * gamma(1 + 4/x[["shape"]]) - 4*gamma*r -6*r^2 - r^4 - 3 } distributional/R/dist_poisson.R0000644000176200001440000000643615131607222016357 0ustar liggesusers#' The Poisson Distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Poisson distributions are frequently used to model counts. The Poisson #' distribution is commonly used to model the number of events occurring in #' a fixed interval of time or space when these events occur with a known #' constant mean rate and independently of the time since the last event. #' Examples include the number of emails received per hour, the number of #' decay events per second from a radioactive source, or the number of #' customers arriving at a store per day. #' #' @param lambda The rate parameter (mean and variance) of the distribution. #' Can be any positive number. This represents the expected number of events #' in the given interval. #' #' @details #' #' `r pkgdown_doc_link("dist_poisson")` #' #' #' In the following, let \eqn{X} be a Poisson random variable with parameter #' `lambda` = \eqn{\lambda}. #' #' **Support**: \eqn{\{0, 1, 2, 3, ...\}}{{0, 1, 2, 3, ...}} #' #' **Mean**: \eqn{\lambda} #' #' **Variance**: \eqn{\lambda} #' #' **Probability mass function (p.m.f)**: #' #' \deqn{ #' P(X = k) = \frac{\lambda^k e^{-\lambda}}{k!} #' }{ #' P(X = k) = \lambda^k e^(-\lambda) / k! #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' P(X \le k) = e^{-\lambda} #' \sum_{i = 0}^{\lfloor k \rfloor} \frac{\lambda^i}{i!} #' }{ #' P(X \le k) = e^(-\lambda) #' \sum_{i = 0}^k \lambda^i / i! #' } #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = e^{\lambda (e^t - 1)} #' }{ #' E(e^(tX)) = e^(\lambda (e^t - 1)) #' } #' #' **Skewness**: #' #' \deqn{ #' \gamma_1 = \frac{1}{\sqrt{\lambda}} #' }{ #' \gamma_1 = 1 / sqrt(\lambda) #' } #' #' **Excess kurtosis**: #' #' \deqn{ #' \gamma_2 = \frac{1}{\lambda} #' }{ #' \gamma_2 = 1 / \lambda #' } #' #' @seealso [stats::Poisson] #' #' @examples #' dist <- dist_poisson(lambda = c(1, 4, 10)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_poisson #' @export dist_poisson <- function(lambda){ lambda <- vec_cast(lambda, double()) if(any(lambda < 0)){ abort("The lambda parameter of an Poisson distribution must be non-negative.") } new_dist(l = lambda, class = "dist_poisson") } #' @export format.dist_poisson <- function(x, digits = 2, ...){ sprintf( "Pois(%s)", format(x[["l"]], digits = digits, ...) ) } #' @export density.dist_poisson <- function(x, at, ...){ stats::dpois(at, x[["l"]]) } #' @export log_density.dist_poisson <- function(x, at, ...){ stats::dpois(at, x[["l"]], log = TRUE) } #' @export quantile.dist_poisson <- function(x, p, ...){ stats::qpois(p, x[["l"]]) } #' @export cdf.dist_poisson <- function(x, q, ...){ stats::ppois(q, x[["l"]]) } #' @export generate.dist_poisson <- function(x, times, ...){ as.integer(stats::rpois(times, x[["l"]])) } #' @export mean.dist_poisson <- function(x, ...){ x[["l"]] } #' @export covariance.dist_poisson <- function(x, ...){ x[["l"]] } #' @export skewness.dist_poisson <- function(x, ...) 1 / sqrt(x[["l"]]) #' @export kurtosis.dist_poisson <- function(x, ...) 1 / x[["l"]] distributional/R/dist_beta.R0000644000176200001440000000755615131546063015611 0ustar liggesusers#' The Beta distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Beta distribution is a continuous probability distribution defined on the #' interval \[0, 1\], commonly used to model probabilities and proportions. #' #' @param shape1,shape2 The non-negative shape parameters of the Beta distribution. #' #' @details #' #' `r pkgdown_doc_link("dist_beta")` #' #' In the following, let \eqn{X} be a Beta random variable with parameters #' `shape1` = \eqn{\alpha} and `shape2` = \eqn{\beta}. #' #' **Support**: \eqn{x \in [0, 1]} #' #' **Mean**: \eqn{\frac{\alpha}{\alpha + \beta}} #' #' **Variance**: \eqn{\frac{\alpha\beta}{(\alpha + \beta)^2(\alpha + \beta + 1)}} #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \frac{x^{\alpha - 1}(1-x)^{\beta - 1}}{B(\alpha, \beta)} = #' \frac{\Gamma(\alpha + \beta)}{\Gamma(\alpha)\Gamma(\beta)} x^{\alpha - 1}(1-x)^{\beta - 1} #' }{ #' f(x) = x^(alpha - 1) * (1-x)^(beta - 1) / B(alpha, beta) #' } #' #' where \eqn{B(\alpha, \beta) = \frac{\Gamma(\alpha)\Gamma(\beta)}{\Gamma(\alpha + \beta)}} #' is the Beta function. #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = I_x(alpha, beta) = \frac{B(x; \alpha, \beta)}{B(\alpha, \beta)} #' }{ #' F(x) = I_x(\alpha, \beta) #' } #' #' where \eqn{I_x(\alpha, \beta)} is the regularized incomplete beta function and #' \eqn{B(x; \alpha, \beta) = \int_0^x t^{\alpha-1}(1-t)^{\beta-1} dt}. #' #' **Moment generating function (m.g.f)**: #' #' The moment generating function does not have a simple closed form, but the #' moments can be calculated as: #' #' \deqn{ #' E(X^k) = \prod_{r=0}^{k-1} \frac{\alpha + r}{\alpha + \beta + r} #' }{ #' E(X^k) = prod((alpha + r) / (alpha + beta + r)) for r = 0, ..., k-1 #' } #' #' @seealso [stats::Beta] #' #' @examples #' dist <- dist_beta(shape1 = c(0.5, 5, 1, 2, 2), shape2 = c(0.5, 1, 3, 2, 5)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_beta #' @export dist_beta <- function(shape1, shape2){ shape1 <- vec_cast(shape1, double()) shape2 <- vec_cast(shape2, double()) if(any((shape1 < 0) | shape2 < 0)){ abort("Shape parameters of a Beta distribution must be non-negative.") } new_dist(shape1 = shape1, shape2 = shape2, class = "dist_beta") } #' @export format.dist_beta <- function(x, digits = 2, ...){ sprintf( "Beta(%s, %s)", format(x[["shape1"]], digits = digits, ...), format(x[["shape2"]], digits = digits, ...) ) } #' @export density.dist_beta <- function(x, at, ...){ stats::dbeta(at, x[["shape1"]], x[["shape2"]]) } #' @export log_density.dist_beta <- function(x, at, ...){ stats::dbeta(at, x[["shape1"]], x[["shape2"]], log = TRUE) } #' @export quantile.dist_beta <- function(x, p, ...){ stats::qbeta(p, x[["shape1"]], x[["shape2"]]) } #' @export cdf.dist_beta <- function(x, q, ...){ stats::pbeta(q, x[["shape1"]], x[["shape2"]]) } #' @export generate.dist_beta <- function(x, times, ...){ stats::rbeta(times, x[["shape1"]], x[["shape2"]]) } #' @export mean.dist_beta <- function(x, ...){ x[["shape1"]]/(x[["shape1"]] + x[["shape2"]]) } #' @export covariance.dist_beta <- function(x, ...){ a <- x[["shape1"]] b <- x[["shape2"]] a*b/((a+b)^2*(a+b+1)) } #' @export skewness.dist_beta <- function(x, ...) { a <- x[["shape1"]] b <- x[["shape2"]] 2 * (b - a) * sqrt(a + b + 1) / (a + b + 2) * sqrt(a * b) } #' @export kurtosis.dist_beta <- function(x, ...) { a <- x[["shape1"]] b <- x[["shape2"]] num <- 6 * ((a - b)^2 * (a + b + 1) - (a * b) * (a + b + 2)) denom <- a * b * (a + b + 2) * (a + b + 3) num / denom } #' @export has_symmetry.dist_beta <- function(x, ...) { a <- x[["shape1"]] b <- x[["shape2"]] a == b }distributional/R/dist_cauchy.R0000644000176200001440000000566315131601420016134 0ustar liggesusers#' The Cauchy distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Cauchy distribution is the student's t distribution with one degree of #' freedom. The Cauchy distribution does not have a well defined mean or #' variance. Cauchy distributions often appear as priors in Bayesian contexts #' due to their heavy tails. #' #' @inheritParams stats::dcauchy #' #' @details #' #' `r pkgdown_doc_link("dist_cauchy")` #' #' In the following, let \eqn{X} be a Cauchy variable with mean #' `location =` \eqn{x_0} and `scale` = \eqn{\gamma}. #' #' **Support**: \eqn{R}, the set of all real numbers #' #' **Mean**: Undefined. #' #' **Variance**: Undefined. #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \frac{1}{\pi \gamma \left[1 + \left(\frac{x - x_0}{\gamma} \right)^2 \right]} #' }{ #' f(x) = 1 / (\pi \gamma (1 + ((x - x_0) / \gamma)^2) #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(t) = \frac{1}{\pi} \arctan \left( \frac{t - x_0}{\gamma} \right) + #' \frac{1}{2} #' }{ #' F(t) = arctan((t - x_0) / \gamma) / \pi + 1/2 #' } #' #' **Moment generating function (m.g.f)**: #' #' Does not exist. #' #' @seealso [stats::Cauchy] #' #' @examples #' dist <- dist_cauchy(location = c(0, 0, 0, -2), scale = c(0.5, 1, 2, 1)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_cauchy #' @export dist_cauchy <- function(location, scale){ location <- vec_cast(location, double()) scale <- vec_cast(scale, double()) if(any(scale[!is.na(scale)] <= 0)){ abort("The scale parameter of a Cauchy distribution must strictly positive.") } new_dist(location = location, scale = scale, class = "dist_cauchy") } #' @export format.dist_cauchy <- function(x, digits = 2, ...){ sprintf( "Cauchy(%s, %s)", format(x[["location"]], digits = digits, ...), format(x[["scale"]], digits = digits, ...) ) } #' @export density.dist_cauchy <- function(x, at, ...){ stats::dcauchy(at, x[["location"]], x[["scale"]]) } #' @export log_density.dist_cauchy <- function(x, at, ...){ stats::dcauchy(at, x[["location"]], x[["scale"]], log = TRUE) } #' @export quantile.dist_cauchy <- function(x, p, ...){ stats::qcauchy(p, x[["location"]], x[["scale"]]) } #' @export cdf.dist_cauchy <- function(x, q, ...){ stats::pcauchy(q, x[["location"]], x[["scale"]]) } #' @export generate.dist_cauchy <- function(x, times, ...){ stats::rcauchy(times, x[["location"]], x[["scale"]]) } #' @export mean.dist_cauchy <- function(x, ...){ NA_real_ } #' @export covariance.dist_cauchy <- function(x, ...){ NA_real_ } #' @export skewness.dist_cauchy <- function(x, ...){ NA_real_ } #' @export kurtosis.dist_cauchy <- function(x, ...){ NA_real_ } #' @export has_symmetry.dist_cauchy <- function(x, ...) TRUEdistributional/R/dist_poisson_inverse_gaussian.R0000644000176200001440000000772215131565724022015 0ustar liggesusers#' The Poisson-Inverse Gaussian distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Poisson-Inverse Gaussian distribution is a compound Poisson distribution #' where the rate parameter follows an Inverse Gaussian distribution. It is #' useful for modeling overdispersed count data. #' #' @inheritParams actuar::dpoisinvgauss #' #' @details #' #' `r pkgdown_doc_link("dist_poisson_inverse_gaussian")` #' #' #' In the following, let \eqn{X} be a Poisson-Inverse Gaussian random variable #' with parameters `mean` = \eqn{\mu} and `shape` = \eqn{\phi}. #' #' **Support**: \eqn{\{0, 1, 2, 3, ...\}}{{0, 1, 2, 3, ...}} #' #' **Mean**: \eqn{\mu} #' #' **Variance**: \eqn{\frac{\mu}{\phi}(\mu^2 + \phi)}{\mu/\phi (\mu^2 + \phi)} #' #' **Probability mass function (p.m.f)**: #' #' \deqn{ #' P(X = x) = \frac{e^{\phi}}{\sqrt{2\pi}} #' \left(\frac{\phi}{\mu^2}\right)^{x/2} #' \frac{1}{x!} #' \int_0^\infty u^{x-1/2} #' \exp\left(-\frac{\phi u}{2} - \frac{\phi}{2\mu^2 u}\right) du #' }{ #' P(X = x) = (e^\phi / sqrt(2\pi)) (\phi/\mu^2)^(x/2) (1/x!) #' \int_0^\infty u^(x-1/2) exp(-\phi u/2 - \phi/(2\mu^2 u)) du #' } #' #' for \eqn{x = 0, 1, 2, \ldots}{x = 0, 1, 2, ...} #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' P(X \le x) = \sum_{k=0}^{\lfloor x \rfloor} P(X = k) #' }{ #' P(X <= x) = sum_{k=0}^x P(X = k) #' } #' #' The c.d.f does not have a closed form and is approximated numerically. #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = \exp\left\{\phi\left[1 - \sqrt{1 - \frac{2\mu^2}{\phi}(e^t - 1)}\right]\right\} #' }{ #' E(e^(tX)) = exp{\phi [1 - sqrt(1 - 2\mu^2/\phi (e^t - 1))]} #' } #' #' for \eqn{t < -\log(1 + \phi/(2\mu^2))}{t < -log(1 + \phi/(2\mu^2))} #' #' @seealso [actuar::PoissonInverseGaussian], [actuar::dpoisinvgauss()], #' [actuar::ppoisinvgauss()], [actuar::qpoisinvgauss()], [actuar::rpoisinvgauss()] #' #' @examples #' dist <- dist_poisson_inverse_gaussian(mean = rep(0.1, 3), shape = c(0.4, 0.8, 1)) #' dist #' #' @examplesIf requireNamespace("actuar", quietly = TRUE) #' mean(dist) #' variance(dist) #' support(dist) #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_poisson_inverse_gaussian #' @export dist_poisson_inverse_gaussian <- function(mean, shape){ mean <- vec_cast(mean, double()) shape <- vec_cast(shape, double()) if(any(mean[!is.na(mean)] <= 0)){ abort("The mean parameter of a Poisson-Inverse Gaussian distribution must be strictly positive.") } if(any(shape[!is.na(shape)] <= 0)){ abort("The shape parameter of a Poisson-Inverse Gaussian distribution must be strictly positive.") } new_dist(m = mean, s = shape, class = "dist_poisson_inverse_gaussian") } #' @export format.dist_poisson_inverse_gaussian <- function(x, digits = 2, ...){ sprintf( "PIG(%s, %s)", format(x[["m"]], digits = digits, ...), format(x[["s"]], digits = digits, ...) ) } #' @export density.dist_poisson_inverse_gaussian <- function(x, at, ...){ require_package("actuar") actuar::dpoisinvgauss(at, x[["m"]], x[["s"]]) } #' @export log_density.dist_poisson_inverse_gaussian <- function(x, at, ...){ require_package("actuar") actuar::dpoisinvgauss(at, x[["m"]], x[["s"]], log = TRUE) } #' @export quantile.dist_poisson_inverse_gaussian <- function(x, p, ...){ require_package("actuar") actuar::qpoisinvgauss(p, x[["m"]], x[["s"]]) } #' @export cdf.dist_poisson_inverse_gaussian <- function(x, q, ...){ require_package("actuar") actuar::ppoisinvgauss(q, x[["m"]], x[["s"]]) } #' @export generate.dist_poisson_inverse_gaussian <- function(x, times, ...){ require_package("actuar") actuar::rpoisinvgauss(times, x[["m"]], x[["s"]]) } #' @export mean.dist_poisson_inverse_gaussian <- function(x, ...){ x[["m"]] } #' @export covariance.dist_poisson_inverse_gaussian <- function(x, ...){ x[["m"]]/x[["s"]] * (x[["m"]]^2 + x[["s"]]) } distributional/R/dist_exponential.R0000644000176200001440000000505215131547231017207 0ustar liggesusers#' The Exponential Distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Exponential distributions are frequently used to model waiting times and the #' time between events in a Poisson process. #' #' @inheritParams stats::dexp #' #' #' @details #' #' `r pkgdown_doc_link("dist_exponential")` #' #' #' In the following, let \eqn{X} be an Exponential random variable with #' parameter `rate` = \eqn{\lambda}. #' #' **Support**: \eqn{x \in [0, \infty)}{x >= 0} #' #' **Mean**: \eqn{\frac{1}{\lambda}}{1 / \lambda} #' #' **Variance**: \eqn{\frac{1}{\lambda^2}}{1 / \lambda^2} #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \lambda e^{-\lambda x} #' }{ #' f(x) = \lambda e^(-\lambda x) #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = 1 - e^{-\lambda x} #' }{ #' F(x) = 1 - e^(-\lambda x) #' } #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = \frac{\lambda}{\lambda - t}, \quad t < \lambda #' }{ #' E(e^(tX)) = \lambda / (\lambda - t), for t < \lambda #' } #' #' @seealso [stats::Exponential] #' #' @examples #' dist <- dist_exponential(rate = c(2, 1, 2/3)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_exponential #' @export dist_exponential <- function(rate){ rate <- vec_cast(rate, double()) if(any(rate < 0)){ abort("The rate parameter of an Exponential distribution must be non-negative.") } new_dist(rate = rate, class = "dist_exponential") } #' @export format.dist_exponential <- function(x, digits = 2, ...){ sprintf( "Exp(%s)", format(x[["rate"]], digits = digits, ...) ) } #' @export density.dist_exponential <- function(x, at, ...){ stats::dexp(at, x[["rate"]]) } #' @export log_density.dist_exponential <- function(x, at, ...){ stats::dexp(at, x[["rate"]], log = TRUE) } #' @export quantile.dist_exponential <- function(x, p, ...){ stats::qexp(p, x[["rate"]]) } #' @export cdf.dist_exponential <- function(x, q, ...){ stats::pexp(q, x[["rate"]]) } #' @export generate.dist_exponential <- function(x, times, ...){ stats::rexp(times, x[["rate"]]) } #' @export mean.dist_exponential <- function(x, ...){ 1/x[["rate"]] } #' @export covariance.dist_exponential <- function(x, ...){ 1/x[["rate"]]^2 } #' @export skewness.dist_exponential <- function(x, ...) 2 #' @export kurtosis.dist_exponential <- function(x, ...) 6 distributional/R/dist_chisq.R0000644000176200001440000001143715131607167016001 0ustar liggesusers#' The (non-central) Chi-Squared Distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Chi-square distributions show up often in frequentist settings #' as the sampling distribution of test statistics, especially #' in maximum likelihood estimation settings. #' #' @param df Degrees of freedom (non-centrality parameter). Can be any #' positive real number. #' @param ncp Non-centrality parameter. Can be any non-negative real number. #' Defaults to 0 (central chi-squared distribution). #' #' @details #' #' #' `r pkgdown_doc_link("dist_chisq")` #' #' In the following, let \eqn{X} be a \eqn{\chi^2} random variable with #' `df` = \eqn{k} and `ncp` = \eqn{\lambda}. #' #' **Support**: \eqn{R^+}, the set of positive real numbers #' #' **Mean**: \eqn{k + \lambda} #' #' **Variance**: \eqn{2(k + 2\lambda)} #' #' **Probability density function (p.d.f)**: #' #' For the central chi-squared distribution (\eqn{\lambda = 0}): #' #' \deqn{ #' f(x) = \frac{1}{2^{k/2} \Gamma(k/2)} x^{k/2 - 1} e^{-x/2} #' }{ #' f(x) = 1 / (2^(k/2) \Gamma(k/2)) x^(k/2 - 1) exp(-x/2) #' } #' #' For the non-central chi-squared distribution (\eqn{\lambda > 0}): #' #' \deqn{ #' f(x) = \frac{1}{2} e^{-(x+\lambda)/2} \left(\frac{x}{\lambda}\right)^{k/4-1/2} I_{k/2-1}\left(\sqrt{\lambda x}\right) #' }{ #' f(x) = (1/2) exp(-(x+\lambda)/2) (x/\lambda)^(k/4-1/2) I_(k/2-1)(sqrt(\lambda x)) #' } #' #' where \eqn{I_\nu(z)} is the modified Bessel function of the first kind. #' #' **Cumulative distribution function (c.d.f)**: #' #' For the central chi-squared distribution (\eqn{\lambda = 0}): #' #' \deqn{ #' F(x) = \frac{\gamma(k/2, x/2)}{\Gamma(k/2)} = P(k/2, x/2) #' }{ #' F(x) = \gamma(k/2, x/2) / \Gamma(k/2) = P(k/2, x/2) #' } #' #' where \eqn{\gamma(s, x)} is the lower incomplete gamma function and #' \eqn{P(s, x)} is the regularized gamma function. #' #' For the non-central chi-squared distribution (\eqn{\lambda > 0}): #' #' \deqn{ #' F(x) = \sum_{j=0}^{\infty} \frac{e^{-\lambda/2} (\lambda/2)^j}{j!} P(k/2 + j, x/2) #' }{ #' F(x) = sum_(j=0)^(\infty) exp(-\lambda/2) (\lambda/2)^j / j! P(k/2 + j, x/2) #' } #' #' This is approximated numerically. #' #' **Moment generating function (m.g.f)**: #' #' For the central chi-squared distribution (\eqn{\lambda = 0}): #' #' \deqn{ #' E(e^{tX}) = (1 - 2t)^{-k/2}, \quad t < 1/2 #' }{ #' E(e^(tX)) = (1 - 2t)^(-k/2), t < 1/2 #' } #' #' For the non-central chi-squared distribution (\eqn{\lambda > 0}): #' #' \deqn{ #' E(e^{tX}) = \frac{e^{\lambda t / (1 - 2t)}}{(1 - 2t)^{k/2}}, \quad t < 1/2 #' }{ #' E(e^(tX)) = exp(\lambda t / (1 - 2t)) / (1 - 2t)^(k/2), t < 1/2 #' } #' #' **Skewness**: #' #' \deqn{ #' \gamma_1 = \frac{2^{3/2}(k + 3\lambda)}{(k + 2\lambda)^{3/2}} #' }{ #' \gamma_1 = 2^(3/2) (k + 3\lambda) / (k + 2\lambda)^(3/2) #' } #' #' For the central case (\eqn{\lambda = 0}), this simplifies to #' \eqn{\sqrt{8/k}}. #' #' **Excess Kurtosis**: #' #' \deqn{ #' \gamma_2 = \frac{12(k + 4\lambda)}{(k + 2\lambda)^2} #' }{ #' \gamma_2 = 12(k + 4\lambda) / (k + 2\lambda)^2 #' } #' #' For the central case (\eqn{\lambda = 0}), this simplifies to #' \eqn{12/k}. #' #' #' @seealso [stats::Chisquare] #' #' @examples #' dist <- dist_chisq(df = c(1,2,3,4,6,9)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_chisq #' @export dist_chisq <- function(df, ncp = 0){ df <- vec_cast(df, double()) ncp <- vec_cast(ncp, double()) if(any(df < 0)){ abort("The degrees of freedom parameter of a Chi-Squared distribution must be non-negative.") } new_dist(df = df, ncp = ncp, class = "dist_chisq") } #' @export format.dist_chisq <- function(x, digits = 2, ...){ sprintf( if (is_utf8_output()) "\u1d6a\u00b2(%s)" else "x2(%s)", format(x[["df"]], digits = digits, ...) ) } #' @export density.dist_chisq <- function(x, at, ...){ stats::dchisq(at, x[["df"]], x[["ncp"]]) } #' @export log_density.dist_chisq <- function(x, at, ...){ stats::dchisq(at, x[["df"]], x[["ncp"]], log = TRUE) } #' @export quantile.dist_chisq <- function(x, p, ...){ stats::qchisq(p, x[["df"]], x[["ncp"]]) } #' @export cdf.dist_chisq <- function(x, q, ...){ stats::pchisq(q, x[["df"]], x[["ncp"]]) } #' @export generate.dist_chisq <- function(x, times, ...){ stats::rchisq(times, x[["df"]], x[["ncp"]]) } #' @export mean.dist_chisq <- function(x, ...){ x[["df"]] + x[["ncp"]] } #' @export covariance.dist_chisq <- function(x, ...){ 2*(x[["df"]] + 2*x[["ncp"]]) } #' @export skewness.dist_chisq <- function(x, ...) sqrt(8 / x[["df"]]) #' @export kurtosis.dist_chisq <- function(x, ...) 12 / x[["df"]] distributional/R/distributional-package.R0000644000176200001440000000061015112047012020245 0ustar liggesusers#' @keywords internal "_PACKAGE" # The following block is used by usethis to automatically manage # roxygen namespace tags. Modify with care! ## usethis namespace: start #' @importFrom lifecycle deprecate_soft #' @importFrom lifecycle deprecated ## usethis namespace: end #' @import vctrs #' @import rlang NULL # Used for generating transformation function expressions globalVariables("x") distributional/R/dist_f.R0000644000176200001440000001623615131607175015120 0ustar liggesusers#' The F Distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The F distribution is commonly used in statistical inference, particularly #' in the analysis of variance (ANOVA), testing the equality of variances, #' and in regression analysis. It arises as the ratio of two scaled #' chi-squared distributions divided by their respective degrees of freedom. #' #' @param df1 Degrees of freedom for the numerator. Can be any positive number. #' @param df2 Degrees of freedom for the denominator. Can be any positive number. #' @param ncp Non-centrality parameter. If `NULL` (default), the central F #' distribution is used. If specified, must be non-negative. #' #' @details #' #' `r pkgdown_doc_link("dist_f")` #' #' In the following, let \eqn{X} be an F random variable with numerator #' degrees of freedom `df1` = \eqn{d_1} and denominator degrees of freedom #' `df2` = \eqn{d_2}. #' #' **Support**: \eqn{x \in (0, \infty)} #' #' **Mean**: #' #' For the central F distribution (\code{ncp = NULL}): #' #' \deqn{ #' E(X) = \frac{d_2}{d_2 - 2} #' }{ #' E(X) = d_2 / (d_2 - 2) #' } #' #' for \eqn{d_2 > 2}, otherwise undefined. #' #' For the non-central F distribution with non-centrality parameter #' `ncp` = \eqn{\lambda}: #' #' \deqn{ #' E(X) = \frac{d_2 (d_1 + \lambda)}{d_1 (d_2 - 2)} #' }{ #' E(X) = d_2 (d_1 + \lambda) / (d_1 (d_2 - 2)) #' } #' #' for \eqn{d_2 > 2}, otherwise undefined. #' #' **Variance**: #' #' For the central F distribution (\code{ncp = NULL}): #' #' \deqn{ #' \text{Var}(X) = \frac{2 d_2^2 (d_1 + d_2 - 2)}{d_1 (d_2 - 2)^2 (d_2 - 4)} #' }{ #' Var(X) = 2 d_2^2 (d_1 + d_2 - 2) / (d_1 (d_2 - 2)^2 (d_2 - 4)) #' } #' #' for \eqn{d_2 > 4}, otherwise undefined. #' #' For the non-central F distribution with non-centrality parameter #' `ncp` = \eqn{\lambda}: #' #' \deqn{ #' \text{Var}(X) = \frac{2 d_2^2}{d_1^2} \cdot \frac{(d_1 + \lambda)^2 + (d_1 + 2\lambda)(d_2 - 2)}{(d_2 - 2)^2 (d_2 - 4)} #' }{ #' Var(X) = 2 d_2^2 / d_1^2 * ((d_1 + lambda)^2 + (d_1 + 2*lambda)(d_2 - 2)) / ((d_2 - 2)^2 (d_2 - 4)) #' } #' #' for \eqn{d_2 > 4}, otherwise undefined. #' #' **Skewness**: #' #' For the central F distribution (\code{ncp = NULL}): #' #' \deqn{ #' \text{Skew}(X) = \frac{(2 d_1 + d_2 - 2) \sqrt{8 (d_2 - 4)}}{(d_2 - 6) \sqrt{d_1 (d_1 + d_2 - 2)}} #' }{ #' Skew(X) = (2 d_1 + d_2 - 2) sqrt(8 (d_2 - 4)) / ((d_2 - 6) sqrt(d_1 (d_1 + d_2 - 2))) #' } #' #' for \eqn{d_2 > 6}, otherwise undefined. #' #' For the non-central F distribution, skewness has no simple closed form #' and is not computed. #' #' **Excess Kurtosis**: #' #' For the central F distribution (\code{ncp = NULL}): #' #' \deqn{ #' \text{Kurt}(X) = \frac{12[d_1 (5 d_2 - 22)(d_1 + d_2 - 2) + (d_2 - 4)(d_2 - 2)^2]}{d_1 (d_2 - 6)(d_2 - 8)(d_1 + d_2 - 2)} #' }{ #' Kurt(X) = 12[d_1 (5 d_2 - 22)(d_1 + d_2 - 2) + (d_2 - 4)(d_2 - 2)^2] / (d_1 (d_2 - 6)(d_2 - 8)(d_1 + d_2 - 2)) #' } #' #' for \eqn{d_2 > 8}, otherwise undefined. #' #' For the non-central F distribution, kurtosis has no simple closed form #' and is not computed. #' #' **Probability density function (p.d.f)**: #' #' For the central F distribution (\code{ncp = NULL}): #' #' \deqn{ #' f(x) = \frac{\sqrt{\frac{(d_1 x)^{d_1} d_2^{d_2}}{(d_1 x + d_2)^{d_1 + d_2}}}}{x \, B(d_1/2, d_2/2)} #' }{ #' f(x) = sqrt((d_1 x)^d_1 d_2^d_2 / (d_1 x + d_2)^(d_1 + d_2)) / (x B(d_1/2, d_2/2)) #' } #' #' where \eqn{B(\cdot, \cdot)} is the beta function. #' #' For the non-central F distribution, the density involves an infinite #' series and is approximated numerically. #' #' **Cumulative distribution function (c.d.f)**: #' #' The c.d.f. does not have a simple closed form expression and is #' approximated numerically using regularized incomplete beta functions #' and related special functions. #' #' **Moment generating function (m.g.f)**: #' #' The moment generating function for the F distribution does not exist #' in general (it diverges for \eqn{t > 0}). #' #' @seealso [stats::FDist] #' #' @examples #' dist <- dist_f(df1 = c(1,2,5,10,100), df2 = c(1,1,2,1,100)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_f #' @export dist_f <- function(df1, df2, ncp = NULL){ df1 <- vec_cast(df1, double()) df2 <- vec_cast(df2, double()) ncp <- vec_cast(ncp, double()) if(any((df1 < 0) | (df2 < 0))){ abort("The degrees of freedom parameters of the F distribution must be non-negative.") } if(is.null(ncp)){ new_dist(df1 = df1, df2 = df2, class = "dist_f") } else { new_dist(df1 = df1, df2 = df2, ncp = ncp, class = "dist_f") } } #' @export format.dist_f <- function(x, digits = 2, ...){ sprintf( "F(%s, %s)", format(x[["df1"]], digits = digits, ...), format(x[["df2"]], digits = digits, ...) ) } #' @export density.dist_f <- function(x, at, ...){ if(is.null(x[["ncp"]])) { stats::df(at, x[["df1"]], x[["df2"]]) } else { stats::df(at, x[["df1"]], x[["df2"]], x[["ncp"]]) } } #' @export log_density.dist_f <- function(x, at, ...){ if(is.null(x[["ncp"]])) { stats::df(at, x[["df1"]], x[["df2"]], log = TRUE) } else { stats::df(at, x[["df1"]], x[["df2"]], x[["ncp"]], log = TRUE) } } #' @export quantile.dist_f <- function(x, p, ...){ if(is.null(x[["ncp"]])) { stats::qf(p, x[["df1"]], x[["df2"]]) } else { stats::qf(p, x[["df1"]], x[["df2"]], x[["ncp"]]) } } #' @export cdf.dist_f <- function(x, q, ...){ if(is.null(x[["ncp"]])) { stats::pf(q, x[["df1"]], x[["df2"]]) } else { stats::pf(q, x[["df1"]], x[["df2"]], x[["ncp"]]) } } #' @export generate.dist_f <- function(x, times, ...){ if(is.null(x[["ncp"]])) { stats::rf(times, x[["df1"]], x[["df2"]]) } else { stats::rf(times, x[["df1"]], x[["df2"]], x[["ncp"]]) } } #' @export mean.dist_f <- function(x, ...){ df1 <- x[["df1"]] df2 <- x[["df2"]] if(df2 > 2) { if(is.null(x[["ncp"]])){ df2 / (df2 - 2) } else { (df2 * (df1 + x[["ncp"]])) / (df1 * (df2 - 2)) } } else { NA_real_ } } #' @export covariance.dist_f <- function(x, ...){ df1 <- x[["df1"]] df2 <- x[["df2"]] if(df2 > 4) { if(is.null(x[["ncp"]])){ (2 * df2^2 * (df1 + df2 - 2))/(df1*(df2-2)^2*(df2-4)) } else { 2*((df1 + x[["ncp"]])^2 + (df1 + 2*x[["ncp"]])*(df2 - 2))/((df2-2)^2*(df2-4)) * (df2^2/df1^2) } } else { NA_real_ } } #' @export skewness.dist_f <- function(x, ...) { df1 <- x[["df1"]] df2 <- x[["df2"]] if(!is.null(x[["ncp"]])) return(NA_real_) if (df2 > 6) { a <- (2 * df1 + df2 - 2) * sqrt(8 * (df2 - 4)) b <- (df2 - 6) * sqrt(df1 * (df1 + df2 - 2)) a / b } else { NA_real_ } } #' @export kurtosis.dist_f <- function(x, ...) { df1 <- x[["df1"]] df2 <- x[["df2"]] if(!is.null(x[["ncp"]])) return(NA_real_) if (df2 > 8) { a <- df1 * (5 * df2 - 22) * (df1 + df2 - 2) + (df2 - 4) * (df2 - 2)^2 b <- df1 * (df2 - 6) * (df2 - 8) * (df1 + df2 - 2) 12 * a / b } else { NA_real_ } } distributional/R/dist_degenerate.R0000644000176200001440000000466615131575021016774 0ustar liggesusers#' The degenerate distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The degenerate distribution takes a single value which is certain to be #' observed. It takes a single parameter, which is the value that is observed #' by the distribution. #' #' @param x The value of the distribution (location parameter). Can be any real number. #' #' @details #' #' `r pkgdown_doc_link("dist_degenerate")` #' #' In the following, let \eqn{X} be a degenerate random variable with value #' `x` = \eqn{k_0}. #' #' **Support**: \eqn{\{k_0\}}, a single point #' #' **Mean**: \eqn{\mu = k_0} #' #' **Variance**: \eqn{\sigma^2 = 0} #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = 1 \textrm{ for } x = k_0 #' }{ #' f(x) = 1 for x = k_0 #' } #' \deqn{ #' f(x) = 0 \textrm{ for } x \neq k_0 #' }{ #' f(x) = 0 for x != k_0 #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(t) = 0 \textrm{ for } t < k_0 #' }{ #' F(t) = 0 for t < k_0 #' } #' \deqn{ #' F(t) = 1 \textrm{ for } t \ge k_0 #' }{ #' F(t) = 1 for t >= k_0 #' } #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = e^{k_0 t} #' }{ #' E(e^(tX)) = e^(k_0 t) #' } #' #' **Skewness**: Undefined (NA) #' #' **Excess Kurtosis**: Undefined (NA) #' #' @seealso [stats::Distributions] #' #' @examples #' dist <- dist_degenerate(x = 1:5) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @export dist_degenerate <- function(x){ vec_is(x, numeric()) new_dist(x = x, class = "dist_degenerate") } #' @export format.dist_degenerate <- function(x, ...){ format(x[["x"]], ...) } #' @export density.dist_degenerate <- function(x, at, ...){ ifelse(at == x[["x"]], 1, 0) } #' @export quantile.dist_degenerate <- function(x, p, ...){ ifelse(p < 0 | p > 1, NaN, x[["x"]]) } #' @export cdf.dist_degenerate <- function(x, q, ...){ ifelse(q >= x[["x"]], 1, 0) } #' @export generate.dist_degenerate <- function(x, times, ...){ rep(x[["x"]], times) } #' @export mean.dist_degenerate <- function(x, ...){ x[["x"]] } #' @export covariance.dist_degenerate <- function(x, ...){ 0 } #' @export skewness.dist_degenerate <- function(x, ...) NA_real_ #' @export kurtosis.dist_degenerate <- function(x, ...) NA_real_ distributional/R/dist_normal.R0000644000176200001440000001351415131607214016151 0ustar liggesusers#' The Normal distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Normal distribution is ubiquitous in statistics, partially because #' of the central limit theorem, which states that sums of i.i.d. random #' variables eventually become Normal. Linear transformations of Normal #' random variables result in new random variables that are also Normal. If #' you are taking an intro stats course, you'll likely use the Normal #' distribution for Z-tests and in simple linear regression. Under #' regularity conditions, maximum likelihood estimators are #' asymptotically Normal. The Normal distribution is also called the #' gaussian distribution. #' #' @param mu,mean The mean (location parameter) of the distribution, which is also #' the mean of the distribution. Can be any real number. #' @param sigma,sd The standard deviation (scale parameter) of the distribution. #' Can be any positive number. If you would like a Normal distribution with #' **variance** \eqn{\sigma^2}, be sure to take the square root, as this is a #' common source of errors. #' #' @details #' #' `r pkgdown_doc_link("dist_normal")` #' #' #' In the following, let \eqn{X} be a Normal random variable with mean #' `mu` = \eqn{\mu} and standard deviation `sigma` = \eqn{\sigma}. #' #' **Support**: \eqn{R}, the set of all real numbers #' #' **Mean**: \eqn{\mu} #' #' **Variance**: \eqn{\sigma^2} #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \frac{1}{\sqrt{2 \pi \sigma^2}} e^{-(x - \mu)^2 / 2 \sigma^2} #' }{ #' f(x) = 1 / sqrt(2 \pi \sigma^2) exp(-(x - \mu)^2 / (2 \sigma^2)) #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(t) = \int_{-\infty}^t \frac{1}{\sqrt{2 \pi \sigma^2}} e^{-(x - \mu)^2 / 2 \sigma^2} dx #' }{ #' F(t) = integral_{-\infty}^t 1 / sqrt(2 \pi \sigma^2) exp(-(x - \mu)^2 / (2 \sigma^2)) dx #' } #' #' This integral does not have a closed form solution and is #' approximated numerically. The c.d.f. of a standard Normal is sometimes #' called the "error function". The notation \eqn{\Phi(t)} also stands #' for the c.d.f. of a standard Normal evaluated at \eqn{t}. Z-tables #' list the value of \eqn{\Phi(t)} for various \eqn{t}. #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = e^{\mu t + \sigma^2 t^2 / 2} #' }{ #' E(e^(tX)) = e^(\mu t + \sigma^2 t^2 / 2) #' } #' #' @seealso [stats::Normal] #' #' @examples #' dist <- dist_normal(mu = 1:5, sigma = 3) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_normal #' @export dist_normal <- function(mu = 0, sigma = 1, mean = mu, sd = sigma){ mean <- vec_cast(mean, double()) sd <- vec_cast(sd, double()) if(any(sd[!is.na(sd)] < 0)){ abort("Standard deviation of a normal distribution must be non-negative") } new_dist(mu = mean, sigma = sd, class = "dist_normal") } #' @export format.dist_normal <- function(x, digits = 2, ...){ sprintf( "N(%s, %s)", format(x[["mu"]], digits = digits, ...), format(x[["sigma"]]^2, digits = digits, ...) ) } #' @export density.dist_normal <- function(x, at, ...){ stats::dnorm(at, x[["mu"]], x[["sigma"]]) } #' @export log_density.dist_normal <- function(x, at, ...){ stats::dnorm(at, x[["mu"]], x[["sigma"]], log = TRUE) } #' @export quantile.dist_normal <- function(x, p, ...){ stats::qnorm(p, x[["mu"]], x[["sigma"]]) } #' @export log_quantile.dist_normal <- function(x, p, ...){ stats::qnorm(p, x[["mu"]], x[["sigma"]], log.p = TRUE) } #' @export cdf.dist_normal <- function(x, q, ...){ stats::pnorm(q, x[["mu"]], x[["sigma"]]) } #' @export log_cdf.dist_normal <- function(x, q, ...){ stats::pnorm(q, x[["mu"]], x[["sigma"]], log.p = TRUE) } #' @export generate.dist_normal <- function(x, times, ...){ stats::rnorm(times, x[["mu"]], x[["sigma"]]) } #' @export mean.dist_normal <- function(x, ...){ x[["mu"]] } #' @export covariance.dist_normal <- function(x, ...){ x[["sigma"]]^2 } #' @export skewness.dist_normal <- function(x, ...) 0 #' @export kurtosis.dist_normal <- function(x, ...) 0 #' @export has_symmetry.dist_normal <- function(x, ...) TRUE #' @export Ops.dist_normal <- function(e1, e2){ ok <- switch(.Generic, `+` = , `-` = , `*` = , `/` = TRUE, FALSE) if (!ok) { return(NextMethod()) } if(.Generic == "/" && inherits(e2, "dist_normal")){ abort(sprintf("Cannot divide by a normal distribution")) } if(.Generic %in% c("-", "+") && missing(e2)){ e2 <- e1 e1 <- if(.Generic == "+") 1 else -1 .Generic <- "*" } if(.Generic == "-"){ .Generic <- "+" e2 <- -e2 } else if(.Generic == "/"){ .Generic <- "*" e2 <- 1/e2 } # Ops between two normals if(inherits(e1, "dist_normal") && inherits(e2, "dist_normal")){ if(.Generic == "*"){ abort(sprintf("Multiplying two normal distributions is not supported.")) } e1$mu <- e1$mu + e2$mu e1$sigma <- sqrt(e1$sigma^2 + e2$sigma^2) return(e1) } # Ops between a normal and scalar if(inherits(e1, "dist_normal")){ dist <- e1 scalar <- e2 } else { dist <- e2 scalar <- e1 } if(!is.numeric(scalar)){ abort(sprintf("Cannot %s a `%s` with a normal distribution", switch(.Generic, `+` = "add", `-` = "subtract", `*` = "multiply", `/` = "divide"), class(scalar))) } if(.Generic == "+"){ dist$mu <- dist$mu + scalar } else if(.Generic == "*"){ dist$mu <- dist$mu * scalar dist$sigma <- dist$sigma * abs(scalar) } dist } #' @method Math dist_normal #' @export Math.dist_normal <- function(x, ...) { # Shortcut to get log-normal distribution from Normal. if(.Generic == "exp") return(vec_data(dist_lognormal(x[["mu"]], x[["sigma"]]))[[1]]) NextMethod() } distributional/R/dist_inverse_gamma.R0000644000176200001440000000744215131556517017512 0ustar liggesusers#' The Inverse Gamma distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Inverse Gamma distribution is commonly used as a prior distribution #' in Bayesian statistics, particularly for variance parameters. #' #' @inheritParams actuar::dinvgamma #' #' @details #' #' `r pkgdown_doc_link("dist_inverse_gamma")` #' #' In the following, let \eqn{X} be an Inverse Gamma random variable with #' shape parameter `shape` = \eqn{\alpha} and rate parameter #' `rate` = \eqn{\beta} (equivalently, scale = \eqn{1/\beta}). #' #' **Support**: \eqn{x \in (0, \infty)} #' #' **Mean**: \eqn{\frac{\beta}{\alpha - 1}} for \eqn{\alpha > 1}, #' otherwise undefined #' #' **Variance**: \eqn{\frac{\beta^2}{(\alpha - 1)^2 (\alpha - 2)}} #' for \eqn{\alpha > 2}, otherwise undefined #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \frac{\beta^\alpha}{\Gamma(\alpha)} x^{-\alpha - 1} #' e^{-\beta/x} #' }{ #' f(x) = \beta^\alpha / \Gamma(\alpha) x^(-\alpha - 1) e^(-\beta/x) #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = \frac{\Gamma(\alpha, \beta/x)}{\Gamma(\alpha)} = #' Q(\alpha, \beta/x) #' }{ #' F(x) = \Gamma(\alpha, \beta/x) / \Gamma(\alpha) = Q(\alpha, \beta/x) #' } #' #' where \eqn{\Gamma(\alpha, z)} is the upper incomplete gamma function and #' \eqn{Q} is the regularized incomplete gamma function. #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' M_X(t) = \frac{2 (-\beta t)^{\alpha/2}}{\Gamma(\alpha)} #' K_\alpha\left(\sqrt{-4\beta t}\right) #' }{ #' M_X(t) = 2 (-\beta t)^(\alpha/2) / \Gamma(\alpha) K_\alpha(sqrt(-4\beta t)) #' } #' #' for \eqn{t < 0}, where \eqn{K_\alpha} is the modified Bessel function #' of the second kind. The MGF does not exist for \eqn{t \ge 0}. #' #' @seealso [actuar::InverseGamma] #' #' @examples #' dist <- dist_inverse_gamma(shape = c(1,2,3,3), rate = c(1,1,1,2)) #' dist #' #' @examplesIf requireNamespace("actuar", quietly = TRUE) #' mean(dist) #' variance(dist) #' support(dist) #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_inverse_gamma #' @export dist_inverse_gamma <- function(shape, rate = 1/scale, scale){ shape <- vec_cast(shape, double()) rate <- vec_cast(rate, double()) if(any(shape <= 0)){ abort("The shape parameter of a Inverse Gamma distribution must be strictly positive.") } if(any(rate <= 0)){ abort("The rate/scale parameter of a Inverse Gamma distribution must be strictly positive.") } new_dist(s = shape, r = rate, class = "dist_inverse_gamma") } #' @export format.dist_inverse_gamma <- function(x, digits = 2, ...){ sprintf( "InvGamma(%s, %s)", format(x[["s"]], digits = digits, ...), format(1/x[["r"]], digits = digits, ...) ) } #' @export density.dist_inverse_gamma <- function(x, at, ...){ require_package("actuar") actuar::dinvgamma(at, x[["s"]], x[["r"]]) } #' @export log_density.dist_inverse_gamma <- function(x, at, ...){ require_package("actuar") actuar::dinvgamma(at, x[["s"]], x[["r"]], log = TRUE) } #' @export quantile.dist_inverse_gamma <- function(x, p, ...){ require_package("actuar") actuar::qinvgamma(p, x[["s"]], x[["r"]]) } #' @export cdf.dist_inverse_gamma <- function(x, q, ...){ require_package("actuar") actuar::pinvgamma(q, x[["s"]], x[["r"]]) } #' @export generate.dist_inverse_gamma <- function(x, times, ...){ require_package("actuar") actuar::rinvgamma(times, x[["s"]], x[["r"]]) } #' @export mean.dist_inverse_gamma <- function(x, ...){ if(x[["s"]] <= 1) return(NA_real_) 1/(x[["r"]]*(x[["s"]]-1)) } #' @export covariance.dist_inverse_gamma <- function(x, ...){ if(x[["s"]] <= 2) return(NA_real_) 1/(x[["r"]]^2*(x[["s"]]-1)^2*(x[["s"]]-2)) } distributional/R/dist_gpd.R0000644000176200001440000001374615131551670015446 0ustar liggesusers#' The Generalized Pareto Distribution #' #' @description #' The GPD distribution is commonly used to model the tails of distributions, #' particularly in extreme value theory. #' #' The Pickands–Balkema–De Haan theorem states that for a large class of #' distributions, the tail (above some threshold) can be approximated by a GPD. #' #' @param location the location parameter \eqn{a} of the GPD distribution. #' @param scale the scale parameter \eqn{b} of the GPD distribution. #' @param shape the shape parameter \eqn{s} of the GPD distribution. #' #' @details #' #' `r pkgdown_doc_link("dist_gpd")` #' #' In the following, let \eqn{X} be a Generalized Pareto random variable with #' parameters `location` = \eqn{a}, `scale` = \eqn{b > 0}, and #' `shape` = \eqn{s}. #' #' **Support**: #' \eqn{x \ge a} if \eqn{s \ge 0}, #' \eqn{a \le x \le a - b/s} if \eqn{s < 0} #' #' **Mean**: #' \deqn{ #' E(X) = a + \frac{b}{1 - s} \quad \textrm{for } s < 1 #' }{ #' E(X) = a + b/(1 - s) for s < 1 #' } #' \eqn{E(X) = \infty} for \eqn{s \ge 1} #' #' **Variance**: #' \deqn{ #' \textrm{Var}(X) = \frac{b^2}{(1-s)^2(1-2s)} \quad \textrm{for } s < 0.5 #' }{ #' Var(X) = b^2 / ((1-s)^2 (1-2s)) for s < 0.5 #' } #' \eqn{\textrm{Var}(X) = \infty} for \eqn{s \ge 0.5} #' #' **Probability density function (p.d.f)**: #' #' For \eqn{s = 0}: #' \deqn{ #' f(x) = \frac{1}{b}\exp\left(-\frac{x-a}{b}\right) \quad \textrm{for } x \ge a #' }{ #' f(x) = (1/b) exp(-(x-a)/b) for x >= a #' } #' #' For \eqn{s \ne 0}: #' \deqn{ #' f(x) = \frac{1}{b}\left(1 + s\frac{x-a}{b}\right)^{-1/s - 1} #' }{ #' f(x) = (1/b)(1 + s(x-a)/b)^(-1/s - 1) #' } #' where \eqn{1 + s(x-a)/b > 0} #' #' **Cumulative distribution function (c.d.f)**: #' #' For \eqn{s = 0}: #' \deqn{ #' F(x) = 1 - \exp\left(-\frac{x-a}{b}\right) \quad \textrm{for } x \ge a #' }{ #' F(x) = 1 - exp(-(x-a)/b) for x >= a #' } #' #' For \eqn{s \ne 0}: #' \deqn{ #' F(x) = 1 - \left(1 + s\frac{x-a}{b}\right)^{-1/s} #' }{ #' F(x) = 1 - (1 + s(x-a)/b)^(-1/s) #' } #' where \eqn{1 + s(x-a)/b > 0} #' #' **Quantile function**: #' #' For \eqn{s = 0}: #' \deqn{ #' Q(p) = a - b\log(1-p) #' }{ #' Q(p) = a - b log(1-p) #' } #' #' For \eqn{s \ne 0}: #' \deqn{ #' Q(p) = a + \frac{b}{s}\left[(1-p)^{-s} - 1\right] #' }{ #' Q(p) = a + (b/s)[(1-p)^(-s) - 1] #' } #' #' **Median**: #' #' For \eqn{s = 0}: #' \deqn{ #' \textrm{Median}(X) = a + b\log(2) #' }{ #' Median(X) = a + b log(2) #' } #' #' For \eqn{s \ne 0}: #' \deqn{ #' \textrm{Median}(X) = a + \frac{b}{s}\left(2^s - 1\right) #' }{ #' Median(X) = a + (b/s)(2^s - 1) #' } #' #' **Skewness and Kurtosis**: No closed-form expressions; approximated numerically. #' #' @seealso [evd::dgpd()] #' @examples #' dist <- dist_gpd(location = 0, scale = 1, shape = 0) #' #' dist #' mean(dist) #' variance(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' @export dist_gpd <- function(location, scale, shape) { location <- vctrs::vec_cast(unname(location), double()) shape <- vctrs::vec_cast(unname(shape), double()) scale <- vctrs::vec_cast(unname(scale), double()) if (any(scale <= 0)) { stop("The scale parameter of a GPD distribution must be strictly positive") } distributional::new_dist(location = location, scale = scale, shape = shape, class = "dist_gpd") } #' @export format.dist_gpd <- function(x, digits = 2, ...) { sprintf( "GPD(%s, %s, %s)", format(x[["location"]], digits = digits, ...), format(x[["scale"]], digits = digits, ...), format(x[["shape"]], digits = digits, ...) ) } #' @exportS3Method distributional::log_density log_density.dist_gpd <- function(x, at, ...) { z <- (at - x[["location"]]) / x[["scale"]] if (x[["shape"]] == 0) { pdf <- -z } else { xx <- 1 + x[["shape"]] * z xx[xx <= 0] <- NA_real_ pdf <- -(1 / x[["shape"]] + 1) * log(xx) pdf[is.na(pdf)] <- -Inf } if (x[["shape"]] >= 0) { pdf[z < 0] <- -Inf } else { pdf[z < 0 | z > -1 / x[["shape"]]] <- -Inf } pdf - log(x[["scale"]]) } #' @exportS3Method stats::density density.dist_gpd <- function(x, at, ...) { exp(log_density.dist_gpd(x, at, ...)) } #' @exportS3Method distributional::cdf cdf.dist_gpd <- function(x, q, ...) { z <- pmax(q - x[["location"]], 0) / x[["scale"]] if (x[["shape"]] == 0) { 1 - exp(-z) } else { 1 - pmax(1 + x[["shape"]] * z, 0)^(-1 / x[["shape"]]) } } #' @exportS3Method stats::quantile quantile.dist_gpd <- function(x, p, ...) { if (x[["shape"]] == 0) { x[["location"]] - x[["scale"]] * log(1 - p) } else { x[["location"]] + x[["scale"]] * ((1 - p)^(-x[["shape"]]) - 1) / x[["shape"]] } } #' @exportS3Method distributional::generate generate.dist_gpd <- function(x, times, ...) { if (x[["shape"]] == 0) { x[["location"]] + x[["scale"]] * stats::rexp(times) } else { quantile(x, stats::runif(times)) } } #' @export mean.dist_gpd <- function(x, ...) { if (x[["shape"]] < 1) { x[["location"]] + x[["scale"]] / (1 - x[["shape"]]) } else { Inf } } #' @exportS3Method stats::median median.dist_gpd <- function(x, ...) { if (x[["shape"]] == 0) { x[["location"]] - x[["scale"]] * log(0.5) } else { x[["location"]] + x[["scale"]] * (2^x[["shape"]] - 1) / x[["shape"]] } } #' @exportS3Method distributional::covariance covariance.dist_gpd <- function(x, ...) { if (x[["shape"]] < 0.5) { x[["scale"]]^2 / (1 - x[["shape"]])^2 / (1 - 2 * x[["shape"]]) } else { Inf } } #' @export support.dist_gpd <- function(x, ...) { shape <- x[["shape"]] location <- x[["location"]] scale <- x[["scale"]] # Determine limits based on shape parameter if (shape >= 0) { # Support: [location, Inf) lims <- list(c(location, Inf)) closed <- list(c(TRUE, FALSE)) } else { # Support: [location, location - scale/shape] upper <- location - scale / shape lims <- list(c(location, upper)) closed <- list(c(TRUE, TRUE)) } x_proto <- list(vctrs::vec_init(numeric(), n = 0L)) new_support_region(x_proto, lims, closed) }distributional/R/dist_multivariate_normal.R0000644000176200001440000001200015131642036020725 0ustar liggesusers#' The multivariate normal distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The multivariate normal distribution is a generalization of the univariate #' normal distribution to higher dimensions. It is widely used in multivariate #' statistics and describes the joint distribution of multiple correlated #' continuous random variables. #' #' @param mu A list of numeric vectors for the distribution's mean. #' @param sigma A list of matrices for the distribution's variance-covariance matrix. #' #' @details #' #' `r pkgdown_doc_link("dist_multivariate_normal")` #' #' In the following, let \eqn{\mathbf{X}} be a \eqn{k}-dimensional multivariate #' normal random variable with mean vector `mu` = \eqn{\boldsymbol{\mu}} and #' variance-covariance matrix `sigma` = \eqn{\boldsymbol{\Sigma}}. #' #' **Support**: \eqn{\mathbf{x} \in \mathbb{R}^k} #' #' **Mean**: \eqn{\boldsymbol{\mu}} #' #' **Variance-covariance matrix**: \eqn{\boldsymbol{\Sigma}} #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(\mathbf{x}) = \frac{1}{(2\pi)^{k/2} |\boldsymbol{\Sigma}|^{1/2}} #' \exp\left(-\frac{1}{2}(\mathbf{x} - \boldsymbol{\mu})^T #' \boldsymbol{\Sigma}^{-1}(\mathbf{x} - \boldsymbol{\mu})\right) #' }{ #' f(x) = 1 / ((2\pi)^(k/2) |\Sigma|^(1/2)) #' exp(-1/2 (x - \mu)^T \Sigma^(-1) (x - \mu)) #' } #' #' where \eqn{|\boldsymbol{\Sigma}|}{|\Sigma|} is the determinant of #' \eqn{\boldsymbol{\Sigma}}{\Sigma}. #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' P(\mathbf{X} \le \mathbf{q}) = P(X_1 \le q_1, \ldots, X_k \le q_k) #' }{ #' P(X \le q) = P(X_1 \le q_1, ..., X_k \le q_k) #' } #' #' The c.d.f. does not have a closed-form expression and is computed numerically. #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' M(\mathbf{t}) = E(e^{\mathbf{t}^T \mathbf{X}}) = #' \exp\left(\mathbf{t}^T \boldsymbol{\mu} + \frac{1}{2}\mathbf{t}^T #' \boldsymbol{\Sigma} \mathbf{t}\right) #' }{ #' M(t) = E(e^(t^T X)) = exp(t^T \mu + 1/2 t^T \Sigma t) #' } #' #' @seealso [mvtnorm::dmvnorm()], [mvtnorm::pmvnorm()], [mvtnorm::qmvnorm()], #' [mvtnorm::rmvnorm()] #' #' @examples #' dist <- dist_multivariate_normal(mu = list(c(1,2)), sigma = list(matrix(c(4,2,2,3), ncol=2))) #' dimnames(dist) <- c("x", "y") #' dist #' #' @examplesIf requireNamespace("mvtnorm", quietly = TRUE) #' mean(dist) #' variance(dist) #' support(dist) #' generate(dist, 10) #' #' density(dist, cbind(2, 1)) #' density(dist, cbind(2, 1), log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7, kind = "equicoordinate") #' quantile(dist, 0.7, kind = "marginal") #' #' @export dist_multivariate_normal <- function(mu = 0, sigma = diag(1)){ new_dist(mu = mu, sigma = sigma, dimnames = colnames(sigma[[1]]), class = "dist_mvnorm") } #' @export format.dist_mvnorm <- function(x, digits = 2, ...){ sprintf( "MVN[%i]", length(x[["mu"]]) ) } #' @export density.dist_mvnorm <- function(x, at, ..., na.rm = FALSE){ require_package("mvtnorm") if(is.list(at)) return(vapply(at, density, numeric(1L), x = x, ...)) mvtnorm::dmvnorm(at, x[["mu"]], x[["sigma"]]) } #' @export log_density.dist_mvnorm <- function(x, at, ..., na.rm = FALSE){ require_package("mvtnorm") if(is.list(at)) return(vapply(at, log_density, numeric(1L), x = x, ...)) mvtnorm::dmvnorm(at, x[["mu"]], x[["sigma"]], log = TRUE) } #' @export quantile.dist_mvnorm <- function(x, p, kind = c("marginal", "equicoordinate"), ..., type = lifecycle::deprecated(), na.rm = FALSE){ if (lifecycle::is_present(type)) { lifecycle::deprecate_warn( "0.6.0", "quantile.dist_mvnorm(type)", "quantile.dist_mvnorm(kind)", # Force removal of the 'contact author' footer since it doesn't work here. user_env = .GlobalEnv ) kind <- type } kind <- match.arg(kind) q <- if (kind == "marginal") { stats::qnorm(p, mean = rep(x[["mu"]], each = length(p)), sd = rep(sqrt(diag(x[["sigma"]])), each = length(p)), ...) } else { require_package("mvtnorm") vapply(p, function(p, ...) { if (p == 0) return(-Inf) else if (p == 1) return(Inf) mvtnorm::qmvnorm(p, ...)$quantile }, numeric(1L), mean = x[["mu"]], sigma = x[["sigma"]], ...) } matrix(q, nrow = length(p), ncol = dim(x)) } #' @export cdf.dist_mvnorm <- function(x, q, ..., na.rm = FALSE){ if(is.list(q)) return(vapply(q, cdf, numeric(1L), x = x, ...)) require_package("mvtnorm") mvtnorm::pmvnorm(upper = as.numeric(q), mean = x[["mu"]], sigma = x[["sigma"]], ...)[1] } #' @export generate.dist_mvnorm <- function(x, times, ..., na.rm = FALSE){ require_package("mvtnorm") mvtnorm::rmvnorm(times, x[["mu"]], x[["sigma"]], ...) } #' @export mean.dist_mvnorm <- function(x, ...){ matrix(x[["mu"]], nrow = 1) } #' @export median.dist_mvnorm <- mean.dist_mvnorm #' @export covariance.dist_mvnorm <- function(x, ...){ # Wrap in list to preserve matrix structure list(x[["sigma"]]) } #' @export dim.dist_mvnorm <- function(x){ length(x[["mu"]]) } distributional/R/utils.R0000644000176200001440000001212015131641660014771 0ustar liggesuserstranspose <- function(.l) { if(is_empty(.l)) return(.l) inner_names <- names(.l[[1L]]) result <- lapply(seq_along(.l[[1L]]), function(i) { lapply(.l, .subset2, i) }) set_names(result, inner_names) } transpose_c <- function(.l) { stopifnot(is_list_of(.l)) .ptype <- vec_init(attr(.l, "ptype"), 1L) if(is_empty(.l)) return(.l) inner_names <- names(.l[[1L]]) .l <- vec_recycle_common(!!!.l) result <- lapply(seq_along(.l[[1L]]), function(i) { unname(vec_c(!!!lapply(.l, vec_slice, i), .ptype = .ptype)) }) set_names(result, inner_names) } split_matrix_rows <- function(x) { lapply(seq_len(nrow(x)), function(i) x[i,,drop=FALSE]) } # Declare a function's argument as allowing list inputs for mapping values arg_listable <- function(x, .ptype) { if(is.list(x)) { x <- as_list_of(as.list(x), .ptype) if(is.matrix(attr(x, "ptype"))) { x <- lapply(x, split_matrix_rows) x <- as_list_of(x, .ptype) } if(is.null(names(x))) { names(x) <- vec_as_names(character(vec_size(x)), repair = "unique") } } else if(is.matrix(x)) { x <- split_matrix_rows(x) } else { vec_assert(x, .ptype) } # Declares list arguments to be unpacked for dist_apply() class(x) <- c("arg_listable", class(x)) x } validate_recycling <- function(x, arg) { if(is_list_of(arg)) return(lapply(arg, validate_recycling, x = x)) if(!any(vec_size(arg) == c(1, vec_size(x)))) { abort( sprintf("Cannot recycle input of size %i to match the distributions (size %i).", vec_size(arg), vec_size(x) ) ) } } dist_apply <- function(x, .f, ...){ dn <- dimnames(x) x <- vec_data(x) dist_is_na <- vapply(x, is.null, logical(1L)) x[dist_is_na] <- list(structure(list(), class = c("dist_na", "dist_default"))) args <- dots_list(...) is_arg_listable <- vapply(args, inherits, FUN.VALUE = logical(1L), "arg_listable") unpack_listable <- multi_arg <- FALSE if(any(is_arg_listable)) { if(sum(is_arg_listable) > 1) abort("Only distribution argument can be unpacked at a time.\nThis shouldn't happen, please report a bug at https://github.com/mitchelloharawild/distributional/issues/") arg_pos <- which(is_arg_listable) if(unpack_listable <- is_list_of(args[[arg_pos]])) { validate_recycling(x, args[[arg_pos]]) .unpack_names <- names(args[[arg_pos]]) args[[arg_pos]] <- transpose_c(args[[arg_pos]]) } else if (multi_arg <- (length(args[[arg_pos]]) > 1)){ args[[arg_pos]] <- list(unclass(args[[arg_pos]])) } } out <- do.call(mapply, c(.f, list(x), args, SIMPLIFY = FALSE, USE.NAMES = FALSE)) # out <- mapply(.f, x, ..., SIMPLIFY = FALSE, USE.NAMES = FALSE) if(unpack_listable) { # TODO - update and repair multivariate distribution i/o with unpacking out <- as_list_of(out) if (rbind_mat <- is.matrix(attr(out, "ptype"))) { out <- as_list_of(lapply(out, split_matrix_rows)) } out <- transpose_c(out) if(rbind_mat) { out <- lapply(out, function(x) `colnames<-`(do.call(rbind, x), dn)) } names(out) <- .unpack_names out <- new_data_frame(out, n = vec_size(x)) # } else if(length(out[[1]]) > 1) { # out <- suppressMessages(vctrs::vec_rbind(!!!out)) } else if(multi_arg) { if(length(dn) > 1) out <- lapply(out, `colnames<-`, dn) } else { out <- vec_c(!!!out) out <- if(vec_is_list(out)) lapply(out, set_matrix_dimnames, dn = dn) else set_matrix_dimnames(out, dn) } out } set_matrix_dimnames <- function(x, dn) { if((is.matrix(x) || is.data.frame(x)) && !is.null(dn)){ # Set dimension names colnames(x) <- dn } x } # inlined from https://github.com/r-lib/cli/blob/master/R/utf8.R is_utf8_output <- function() { opt <- getOption("cli.unicode", NULL) if (!is_null(opt)) { isTRUE(opt) } else { l10n_info()$`UTF-8` && !is_latex_output() } } is_latex_output <- function() { if (!("knitr" %in% loadedNamespaces())) { return(FALSE) } get("is_latex_output", asNamespace("knitr"))() } require_package <- function(pkg){ if(!requireNamespace(pkg, quietly = TRUE)){ abort( sprintf('The `%s` package must be installed to use this functionality. It can be installed with install.packages("%s")', pkg, pkg) ) } } #' @keywords internal pkgdown_doc_link <- function(name) { require_package("pkgdown") if (pkgdown::in_pkgdown()) { # When pkgdown builds, return nothing return("") } url <- sprintf( "https://pkg.mitchelloharawild.com/distributional/reference/%s.html", name ) # Plain text + markdown-style link; roxygen will convert this to Rd. # Example rendered text: # We recommend reading this documentation on [pkgdown](https://...) # https://... paste0( "We recommend reading this documentation on ", "[pkgdown](", url, ") which renders math nicely.\n\\url{", url, "}\n" ) } restore_rng <- function(expr, seed = NULL) { old_seed <- .GlobalEnv$.Random.seed # Set new temporary seed set.seed(seed) # Restore previous seed on.exit(.GlobalEnv$.Random.seed <- old_seed) expr } near <- function(x, y) { tol <- .Machine$double.eps^0.5 abs(x - y) < tol } distributional/R/dist_logistic.R0000644000176200001440000000547215131557563016515 0ustar liggesusers#' The Logistic distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' A continuous distribution on the real line. For binary outcomes #' the model given by \eqn{P(Y = 1 | X) = F(X \beta)} where #' \eqn{F} is the Logistic [cdf()] is called *logistic regression*. #' #' @inheritParams stats::dlogis #' #' #' @details #' #' `r pkgdown_doc_link("dist_logistic")` #' #' In the following, let \eqn{X} be a Logistic random variable with #' `location` = \eqn{\mu} and `scale` = \eqn{s}. #' #' **Support**: \eqn{R}, the set of all real numbers #' #' **Mean**: \eqn{\mu} #' #' **Variance**: \eqn{s^2 \pi^2 / 3} #' #' **Probability density function (p.d.f)**: #' #' \deqn{ #' f(x) = \frac{e^{-\frac{x - \mu}{s}}}{s \left[1 + e^{-\frac{x - \mu}{s}}\right]^2} #' }{ #' f(x) = e^(-(x - mu) / s) / (s (1 + e^(-(x - \mu) / s))^2) #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' F(x) = \frac{1}{1 + e^{-\frac{x - \mu}{s}}} #' }{ #' F(x) = 1 / (1 + e^(-(x - mu) / s)) #' } #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = e^{\mu t} B(1 - st, 1 + st) #' }{ #' E(e^(tX)) = e^(mu t) B(1 - st, 1 + st) #' } #' #' for \eqn{-1 < st < 1}, where \eqn{B(a, b)} is the Beta function. #' #' @seealso [stats::Logistic] #' #' @examples #' dist <- dist_logistic(location = c(5,9,9,6,2), scale = c(2,3,4,2,1)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_logistic #' @export dist_logistic <- function(location, scale){ location <- vec_cast(location, double()) scale <- vec_cast(scale, double()) new_dist(l = location, s = scale, class = "dist_logistic") } #' @export format.dist_logistic <- function(x, digits = 2, ...){ sprintf( "Logistic(%s, %s)", format(x[["l"]], digits = digits, ...), format(x[["s"]], digits = digits, ...) ) } #' @export density.dist_logistic <- function(x, at, ...){ stats::dlogis(at, x[["l"]], x[["s"]]) } #' @export log_density.dist_logistic <- function(x, at, ...){ stats::dlogis(at, x[["l"]], x[["s"]], log = TRUE) } #' @export quantile.dist_logistic <- function(x, p, ...){ stats::qlogis(p, x[["l"]], x[["s"]]) } #' @export cdf.dist_logistic <- function(x, q, ...){ stats::plogis(q, x[["l"]], x[["s"]]) } #' @export generate.dist_logistic <- function(x, times, ...){ stats::rlogis(times, x[["l"]], x[["s"]]) } #' @export mean.dist_logistic <- function(x, ...){ x[["l"]] } #' @export covariance.dist_logistic <- function(x, ...){ (x[["s"]]*pi)^2/3 } #' @export skewness.dist_logistic <- function(x, ...) 0 #' @export kurtosis.dist_logistic <- function(x, ...) 6 / 5 #' @export has_symmetry.dist_logistic <- function(x, ...) TRUEdistributional/R/dist_gev.R0000644000176200001440000001703615131550367015453 0ustar liggesusers#' The Generalized Extreme Value Distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The GEV distribution is widely used in extreme value theory to model #' the distribution of maxima (or minima) of samples. The parametric form #' encompasses the Gumbel, Frechet, and reverse Weibull distributions. #' #' @param location the location parameter \eqn{\mu} of the GEV distribution. #' @param scale the scale parameter \eqn{\sigma} of the GEV distribution. #' Must be strictly positive. #' @param shape the shape parameter \eqn{\xi} of the GEV distribution. #' Determines the tail behavior: \eqn{\xi = 0} gives Gumbel, #' \eqn{\xi > 0} gives Frechet, \eqn{\xi < 0} gives reverse Weibull. #' #' @details #' #' `r pkgdown_doc_link("dist_gev")` #' #' In the following, let \eqn{X} be a GEV random variable with parameters #' `location` = \eqn{\mu}, `scale` = \eqn{\sigma}, and `shape` = \eqn{\xi}. #' #' **Support**: #' * \eqn{x \in \mathbb{R}} (all real numbers) if \eqn{\xi = 0} #' * \eqn{x \geq \mu - \sigma/\xi} if \eqn{\xi > 0} #' * \eqn{x \leq \mu - \sigma/\xi} if \eqn{\xi < 0} #' #' **Mean**: #' \deqn{ #' E(X) = \begin{cases} #' \mu + \sigma \gamma & \text{if } \xi = 0 \\ #' \mu + \sigma \frac{\Gamma(1-\xi) - 1}{\xi} & \text{if } \xi < 1 \\ #' \infty & \text{if } \xi \geq 1 #' \end{cases} #' }{ #' E(X) = \mu + \sigma \gamma (if \xi = 0) #' \mu + \sigma (\Gamma(1-\xi) - 1) / \xi (if \xi < 1) #' Inf (if \xi >= 1) #' } #' where \eqn{\gamma \approx 0.5772} is the Euler-Mascheroni constant and #' \eqn{\Gamma(\cdot)} is the gamma function. #' #' **Median**: #' \deqn{ #' \text{Median}(X) = \begin{cases} #' \mu - \sigma \log(\log 2) & \text{if } \xi = 0 \\ #' \mu + \sigma \frac{(\log 2)^{-\xi} - 1}{\xi} & \text{if } \xi \neq 0 #' \end{cases} #' }{ #' Median(X) = \mu - \sigma log(log 2) (if \xi = 0) #' \mu + \sigma (log(2)^(-\xi) - 1) / \xi (if \xi != 0) #' } #' #' **Variance**: #' \deqn{ #' \text{Var}(X) = \begin{cases} #' \frac{\pi^2 \sigma^2}{6} & \text{if } \xi = 0 \\ #' \frac{\sigma^2}{\xi^2} [\Gamma(1-2\xi) - \Gamma(1-\xi)^2] & \text{if } \xi < 0.5 \\ #' \infty & \text{if } \xi \geq 0.5 #' \end{cases} #' }{ #' Var(X) = \pi^2 \sigma^2 / 6 (if \xi = 0) #' \sigma^2 [\Gamma(1-2\xi) - \Gamma(1-\xi)^2] / \xi^2 (if \xi < 0.5) #' Inf (if \xi >= 0.5) #' } #' #' **Probability density function (p.d.f)**: #' #' For \eqn{\xi = 0} (Gumbel): #' \deqn{ #' f(x) = \frac{1}{\sigma} \exp\left(-\frac{x-\mu}{\sigma}\right) #' \exp\left[-\exp\left(-\frac{x-\mu}{\sigma}\right)\right] #' }{ #' f(x) = (1/\sigma) exp(-(x-\mu)/\sigma) exp(-exp(-(x-\mu)/\sigma)) #' } #' #' For \eqn{\xi \neq 0}: #' \deqn{ #' f(x) = \frac{1}{\sigma} \left[1 + \xi\left(\frac{x-\mu}{\sigma}\right)\right]^{-1/\xi-1} #' \exp\left\{-\left[1 + \xi\left(\frac{x-\mu}{\sigma}\right)\right]^{-1/\xi}\right\} #' }{ #' f(x) = (1/\sigma) [1 + \xi(x-\mu)/\sigma]^(-1/\xi-1) exp(-[1 + \xi(x-\mu)/\sigma]^(-1/\xi)) #' } #' where \eqn{1 + \xi(x-\mu)/\sigma > 0}. #' #' **Cumulative distribution function (c.d.f)**: #' #' For \eqn{\xi = 0} (Gumbel): #' \deqn{ #' F(x) = \exp\left[-\exp\left(-\frac{x-\mu}{\sigma}\right)\right] #' }{ #' F(x) = exp(-exp(-(x-\mu)/\sigma)) #' } #' #' For \eqn{\xi \neq 0}: #' \deqn{ #' F(x) = \exp\left\{-\left[1+\xi\left(\frac{x-\mu}{\sigma}\right)\right]^{-1/\xi}\right\} #' }{ #' F(x) = exp(-[1 + \xi(x-\mu)/\sigma]^(-1/\xi)) #' } #' where \eqn{1 + \xi(x-\mu)/\sigma > 0}. #' #' **Quantile function**: #' #' For \eqn{\xi = 0} (Gumbel): #' \deqn{ #' Q(p) = \mu - \sigma \log(-\log p) #' }{ #' Q(p) = \mu - \sigma log(-log p) #' } #' #' For \eqn{\xi \neq 0}: #' \deqn{ #' Q(p) = \mu + \frac{\sigma}{\xi}\left[(-\log p)^{-\xi} - 1\right] #' }{ #' Q(p) = \mu + \sigma [(-log p)^(-\xi) - 1] / \xi #' } #' #' @references Jenkinson, A. F. (1955) The frequency distribution of the annual #' maximum (or minimum) of meteorological elements. \emph{Quart. J. R. Met. Soc.}, #' \bold{81}, 158–171. #' #' @seealso [evd::dgev()] #' #' @examples #' # Create GEV distributions with different shape parameters #' #' # Gumbel distribution (shape = 0) #' gumbel <- dist_gev(location = 0, scale = 1, shape = 0) #' #' # Frechet distribution (shape > 0, heavy-tailed) #' frechet <- dist_gev(location = 0, scale = 1, shape = 0.3) #' #' # Reverse Weibull distribution (shape < 0, bounded above) #' weibull <- dist_gev(location = 0, scale = 1, shape = -0.2) #' #' dist <- c(gumbel, frechet, weibull) #' dist #' #' # Statistical properties #' mean(dist) #' median(dist) #' variance(dist) #' #' # Generate random samples #' generate(dist, 10) #' #' # Evaluate density #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' # Evaluate cumulative distribution #' cdf(dist, 4) #' #' # Calculate quantiles #' quantile(dist, 0.95) #' #' @export dist_gev <- function(location, scale, shape) { location <- vctrs::vec_cast(unname(location), double()) shape <- vctrs::vec_cast(unname(shape), double()) scale <- vctrs::vec_cast(unname(scale), double()) if (any(scale <= 0)) { stop("The scale parameter of a GEV distribution must be strictly positive") } distributional::new_dist(location = location, scale = scale, shape = shape, class = "dist_gev") } #' @export format.dist_gev <- function(x, digits = 2, ...) { sprintf( "GEV(%s, %s, %s)", format(x[["location"]], digits = digits, ...), format(x[["scale"]], digits = digits, ...), format(x[["shape"]], digits = digits, ...) ) } #' @exportS3Method distributional::log_density log_density.dist_gev <- function(x, at, ...) { z <- (at - x[["location"]]) / x[["scale"]] if (x[["shape"]] == 0) { pdf <- -z - exp(-z) } else { xx <- 1 + x[["shape"]] * z xx[xx <= 0] <- NA_real_ pdf <- -xx^(-1 / x[["shape"]]) - (1 / x[["shape"]] + 1) * log(xx) pdf[is.na(pdf)] <- -Inf } pdf - log(x[["scale"]]) } #' @exportS3Method stats::density density.dist_gev <- function(x, at, ...) { exp(log_density.dist_gev(x, at, ...)) } #' @exportS3Method distributional::cdf cdf.dist_gev <- function(x, q, ...) { z <- (q - x[["location"]]) / x[["scale"]] if (x[["shape"]] == 0) { exp(-exp(-z)) } else { exp(-pmax(1 + x[["shape"]] * z, 0)^(-1 / x[["shape"]])) } } #' @exportS3Method stats::quantile quantile.dist_gev <- function(x, p, ...) { if (x[["shape"]] == 0) { x[["location"]] - x[["scale"]] * log(-log(p)) } else { x[["location"]] + x[["scale"]] * ((-log(p))^(-x[["shape"]]) - 1) / x[["shape"]] } } #' @exportS3Method distributional::generate generate.dist_gev <- function(x, times, ...) { z <- stats::rexp(times) if (x[["shape"]] == 0) { x[["location"]] - x[["scale"]] * log(z) } else { x[["location"]] + x[["scale"]] * (z^(-x[["shape"]]) - 1) / x[["shape"]] } } #' @export mean.dist_gev <- function(x, ...) { if (x[["shape"]] == 0) { x[["location"]] + x[["scale"]] * 0.57721566490153286 } else if (x[["shape"]] < 1) { x[["location"]] + x[["scale"]] * (gamma(1 - x[["shape"]]) - 1) / x[["shape"]] } else { Inf } } #' @exportS3Method stats::median median.dist_gev <- function(x, ...) { if (x[["shape"]] == 0) { x[["location"]] - x[["scale"]] * log(log(2)) } else { x[["location"]] + x[["scale"]] * (log(2)^(-x[["shape"]]) - 1) / x[["shape"]] } } #' @exportS3Method distributional::covariance covariance.dist_gev <- function(x, ...) { if (x[["shape"]] == 0) { x[["scale"]]^2 * pi^2 / 6 } else if (x[["shape"]] < 0.5) { g2 <- gamma(1 - 2 * x[["shape"]]) g1 <- gamma(1 - x[["shape"]]) x[["scale"]]^2 * (g2 - g1^2) / x[["shape"]]^2 } else { Inf } } distributional/R/dist_bernoulli.R0000644000176200001440000000705715131566042016664 0ustar liggesusers#' The Bernoulli distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Bernoulli distributions are used to represent events like coin flips #' when there is single trial that is either successful or unsuccessful. #' The Bernoulli distribution is a special case of the [Binomial()] #' distribution with `n = 1`. #' #' @inheritParams dist_binomial #' #' @details #' #' `r pkgdown_doc_link("dist_bernoulli")` #' #' In the following, let \eqn{X} be a Bernoulli random variable with parameter #' `prob` = \eqn{p}. Some textbooks also define \eqn{q = 1 - p}, or use #' \eqn{\pi} instead of \eqn{p}. #' #' The Bernoulli probability distribution is widely used to model #' binary variables, such as 'failure' and 'success'. The most #' typical example is the flip of a coin, when \eqn{p} is thought as the #' probability of flipping a head, and \eqn{q = 1 - p} is the #' probability of flipping a tail. #' #' **Support**: \eqn{\{0, 1\}}{{0, 1}} #' #' **Mean**: \eqn{p} #' #' **Variance**: \eqn{p \cdot (1 - p) = p \cdot q}{p (1 - p)} #' #' **Probability mass function (p.m.f)**: #' #' \deqn{ #' P(X = x) = p^x (1 - p)^{1-x} = p^x q^{1-x} #' }{ #' P(X = x) = p^x (1 - p)^(1-x) #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' P(X \le x) = #' \left \{ #' \begin{array}{ll} #' 0 & x < 0 \\ #' 1 - p & 0 \leq x < 1 \\ #' 1 & x \geq 1 #' \end{array} #' \right. #' }{ #' P(X \le x) = (1 - p) 1_{[0, 1)}(x) + 1_{1}(x) #' } #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = (1 - p) + p e^t #' }{ #' E(e^(tX)) = (1 - p) + p e^t #' } #' #' **Skewness**: #' #' \deqn{ #' \frac{1 - 2p}{\sqrt{p(1-p)}} = \frac{q - p}{\sqrt{pq}} #' }{ #' (1 - 2p) / sqrt(p(1-p)) #' } #' #' **Excess Kurtosis**: #' #' \deqn{ #' \frac{1 - 6p(1-p)}{p(1-p)} = \frac{1 - 6pq}{pq} #' }{ #' (1 - 6p(1-p)) / (p(1-p)) #' } #' #' @seealso [stats::Binomial] #' #' @examples #' dist <- dist_bernoulli(prob = c(0.05, 0.5, 0.3, 0.9, 0.1)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_bernoulli #' @export dist_bernoulli <- function(prob){ prob <- vec_cast(prob, double()) if(any((prob < 0) | (prob > 1))){ abort("The probability of success must be between 0 and 1.") } new_dist(p = prob, class = "dist_bernoulli") } #' @export format.dist_bernoulli <- function(x, digits = 2, ...){ sprintf( "Bernoulli(%s)", format(x[["p"]], digits = digits, ...) ) } #' @export density.dist_bernoulli <- function(x, at, ...){ stats::dbinom(at, 1, x[["p"]]) } #' @export log_density.dist_bernoulli <- function(x, at, ...){ stats::dbinom(at, 1, x[["p"]], log = TRUE) } #' @export quantile.dist_bernoulli <- function(x, p, ...){ as.logical(stats::qbinom(p, 1, x[["p"]])) } #' @export cdf.dist_bernoulli <- function(x, q, ...){ stats::pbinom(q, 1, x[["p"]]) } #' @export generate.dist_bernoulli <- function(x, times, ...){ as.logical(stats::rbinom(times, 1, x[["p"]])) } #' @export mean.dist_bernoulli <- function(x, ...){ x[["p"]] } #' @export covariance.dist_bernoulli <- function(x, ...){ x[["p"]]*(1-x[["p"]]) } #' @export skewness.dist_bernoulli <- function(x, ...) { p <- x[["p"]] q <- 1 - x[["p"]] (1 - (2 * p)) / sqrt(p * q) } #' @export kurtosis.dist_bernoulli <- function(x, ...) { p <- x[["p"]] q <- 1 - x[["p"]] (1 - (6 * p * q)) / (p * q) } distributional/R/distribution.R0000644000176200001440000004137315131640147016363 0ustar liggesusers#' Construct distributions #' #' @description #' `r lifecycle::badge('maturing')` #' #' Allows extension package developers to define a new distribution class #' compatible with the distributional package. #' #' @param ... Parameters of the distribution (named). #' @param class The class of the distribution for S3 dispatch. #' @param dimnames The names of the variables in the distribution (optional). #' #' @export new_dist <- function(..., class = NULL, dimnames = NULL){ args <- transpose(vctrs::vec_recycle_common(...)) wrap_dist( lapply(args, enclass_dist, class = class), dimnames = dimnames ) } enclass_dist <- function(x, class) { structure(x, class = c(class, "dist_default")) } wrap_dist <- function(x, dimnames = NULL){ vctrs::new_vctr(x, vars = dimnames, class = "distribution") } #' @export vec_ptype_abbr.distribution <- function(x, ...){ "dist" } #' @export format.distribution <- function(x, ...){ x <- vec_data(x) out <- vapply(x, format, character(1L), ...) out[vapply(x, is.null, logical(1L))] <- "NA" out } #' @importFrom pillar pillar_shaft new_pillar_shaft get_max_extent #' @export pillar_shaft.distribution <- function(x, ...) { dist = format(x) dist_min = format(x, width = 30) pillar::new_pillar_shaft( list(dist = dist, dist_min = dist_min), width = pillar::get_max_extent(dist), min_width = pillar::get_max_extent(dist_min), class = "pillar_distribution" ) } #' @export #' @importFrom pillar new_ornament format.pillar_distribution <- function(x, width, ...) { if (get_max_extent(x$dist) <= width) { ornament <- x$dist } else { ornament <- x$dist_min } pillar::new_ornament(ornament, align = "right") } #' @export `dimnames<-.distribution` <- function(x, value){ attr(x, "vars") <- value x } #' @export dimnames.distribution <- function(x){ attr(x, "vars") } #' @export `[[.distribution` <- `[` #' The probability density/mass function #' #' @description #' `r lifecycle::badge('stable')` #' #' Computes the probability density function for a continuous distribution, or #' the probability mass function for a discrete distribution. #' #' @param x The distribution(s). #' @param at The point at which to compute the density/mass. #' @param ... Additional arguments passed to methods. #' @param log If `TRUE`, probabilities will be given as log probabilities. #' #' @importFrom stats density #' @export density.distribution <- function(x, at, ..., log = FALSE){ if(log) return(log_density(x, at, ...)) at <- arg_listable(at, .ptype = NULL) dist_apply(x, density, at = at, ...) } log_density <- function(x, at, ...) { UseMethod("log_density") } #' @export log_density.distribution <- function(x, at, ...){ at <- arg_listable(at, .ptype = NULL) dist_apply(x, log_density, at = at, ...) } #' Distribution Quantiles #' #' @description #' `r lifecycle::badge('stable')` #' #' Computes the quantiles of a distribution. #' #' @inheritParams density.distribution #' @param p The probability of the quantile. #' @param ... Additional arguments passed to methods. #' #' @importFrom stats quantile #' @export quantile.distribution <- function(x, p, ..., log = FALSE){ if(log) return(log_quantile(x, p, ...)) p <- arg_listable(p, .ptype = double()) dist_apply(x, quantile, p = p, ...) } log_quantile <- function(x, p, ...) { UseMethod("log_quantile") } #' @export log_quantile.distribution <- function(x, p, ...){ vec_assert(q, double(), 1L) p <- arg_listable(p, .ptype = double()) dist_apply(x, log_quantile, p = p, ...) } #' The cumulative distribution function #' #' @description #' `r lifecycle::badge('stable')` #' #' @inheritParams density.distribution #' @param q The quantile at which the cdf is calculated. #' #' @name cdf #' @export cdf <- function (x, q, ..., log = FALSE){ if(log) return(log_cdf(x, q, ...)) UseMethod("cdf") } #' @rdname cdf #' @export cdf.distribution <- function(x, q, ...){ q <- arg_listable(q, .ptype = NULL) dist_apply(x, cdf, q = q, ...) } log_cdf <- function(x, q, ...) { UseMethod("log_cdf") } #' @export log_cdf.distribution <- function(x, q, ...){ q <- arg_listable(q, .ptype = NULL) dist_apply(x, log_cdf, q = q, ...) } #' Randomly sample values from a distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Generate random samples from probability distributions. #' #' @param x The distribution(s). #' @param times The number of samples. #' @param ... Additional arguments used by methods. #' #' @export generate.distribution <- function(x, times, ...){ times <- vec_cast(times, integer()) times <- vec_recycle(times, size = length(x)) dn <- dimnames(x) x <- vec_data(x) dist_is_na <- vapply(x, is.null, logical(1L)) x[dist_is_na] <- list(structure(list(), class = c("dist_na", "dist_default"))) mapply( function(x, ...) { y <- generate(x, ...) if (is.matrix(y)) colnames(y) <- dn y }, x, times = times, ..., SIMPLIFY = FALSE ) # dist_apply(x, generate, times = times, ...) # Needs work to structure MV appropriately. } #' The (log) likelihood of a sample matching a distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' @param x The distribution(s). #' @param ... Additional arguments used by methods. #' #' @name likelihood #' @export likelihood <- function (x, ...){ UseMethod("likelihood") } #' @rdname likelihood #' @param sample A list of sampled values to compare to distribution(s). #' @param log If `TRUE`, the log-likelihood will be computed. #' @export likelihood.distribution <- function(x, sample, ..., log = FALSE){ if(vec_is(sample, numeric())) { warn("The `sample` argument of `likelihood()` should contain a list of numbers. The same sample will be used for each distribution, i.e. `sample = list(sample)`.") sample <- list(sample) } if(log){ dist_apply(x, log_likelihood, sample = sample, ...) } else { dist_apply(x, likelihood, sample = sample, ...) } } #' @rdname likelihood #' @export log_likelihood <- function(x, ...) { UseMethod("log_likelihood") } #' @export log_likelihood.distribution <- function(x, sample, ...){ dist_apply(x, log_likelihood, sample = sample, ...) } #' Extract the parameters of a distribution #' #' @description #' `r lifecycle::badge('experimental')` #' #' @param x The distribution(s). #' @param ... Additional arguments used by methods. #' #' @name parameters #' @examples #' dist <- c( #' dist_normal(1:2), #' dist_poisson(3), #' dist_multinomial(size = c(4, 3), #' prob = list(c(0.3, 0.5, 0.2), c(0.1, 0.5, 0.4))) #' ) #' parameters(dist) #' @export parameters <- function(x, ...) { UseMethod("parameters") } #' @rdname parameters #' @export parameters.distribution <- function(x, ...) { x <- lapply(vec_data(x), parameters) x <- lapply(x, function(z) data_frame(!!!z, .name_repair = "minimal")) vec_rbind(!!!x) } #' Extract the name of the distribution family #' #' @description #' `r lifecycle::badge('experimental')` #' #' @param object The distribution(s). #' @param ... Additional arguments used by methods. #' #' @examples #' dist <- c( #' dist_normal(1:2), #' dist_poisson(3), #' dist_multinomial(size = c(4, 3), #' prob = list(c(0.3, 0.5, 0.2), c(0.1, 0.5, 0.4))) #' ) #' family(dist) #' #' @importFrom stats family #' @export family.distribution <- function(object, ...) { vapply(vec_data(object), family, character(1L)) } #' Region of support of a distribution #' #' @description #' `r lifecycle::badge('experimental')` #' #' @param x The distribution(s). #' @param ... Additional arguments used by methods. #' #' @name support #' @export support <- function(x, ...) { UseMethod("support") } #' @rdname support #' @export support.distribution <- function(x, ...) { dist_apply(x, support, ...) } #' Mean of a probability distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Returns the empirical mean of the probability distribution. If the method #' does not exist, the mean of a random sample will be returned. #' #' @param x The distribution(s). #' @param ... Additional arguments used by methods. #' #' @export mean.distribution <- function(x, ...){ dist_apply(x, mean, ...) } #' Variance #' #' @description #' `r lifecycle::badge('stable')` #' #' A generic function for computing the variance of an object. #' #' @param x An object. #' @param ... Additional arguments used by methods. #' #' @details #' #' The implementation of `variance()` for numeric variables coerces the input to #' a vector then uses [`stats::var()`] to compute the variance. This means that, #' unlike [`stats::var()`], if `variance()` is passed a matrix or a 2-dimensional #' array, it will still return the variance ([`stats::var()`] returns the #' covariance matrix in that case). #' #' @seealso [`variance.distribution()`], [`covariance()`] #' #' @export variance <- function(x, ...){ UseMethod("variance") } #' @export variance.default <- function(x, ...){ stop( "The variance() method is not supported for objects of type ", paste(deparse(class(x)), collapse = "") ) } #' @rdname variance #' @export variance.numeric <- function(x, ...){ stats::var(as.vector(x), ...) } #' @rdname variance #' @export variance.matrix <- function(x, ...){ diag(stats::cov(x, ...)) } #' Variance of a probability distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Returns the empirical variance of the probability distribution. If the method #' does not exist, the variance of a random sample will be returned. #' #' @param x The distribution(s). #' @param ... Additional arguments used by methods. #' #' @export variance.distribution <- function(x, ...){ dist_apply(x, variance, ...) } #' Covariance #' #' @description #' `r lifecycle::badge('stable')` #' #' A generic function for computing the covariance of an object. #' #' @param x An object. #' @param ... Additional arguments used by methods. #' #' @seealso [`covariance.distribution()`], [`variance()`] #' #' @export covariance <- function(x, ...){ UseMethod("covariance") } #' @export covariance.default <- function(x, ...){ stop( "The covariance() method is not supported for objects of type ", paste(deparse(class(x)), collapse = "") ) } #' @rdname variance #' @export covariance.numeric <- function(x, ...){ stats::cov(x, ...) } #' Covariance of a probability distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Returns the empirical covariance of the probability distribution. If the #' method does not exist, the covariance of a random sample will be returned. #' #' @param x The distribution(s). #' @param ... Additional arguments used by methods. #' #' @export covariance.distribution <- function(x, ...){ dist_apply(x, covariance, ...) } #' Skewness of a probability distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' @param x The distribution(s). #' @param ... Additional arguments used by methods. #' #' @export skewness <- function(x, ...) { UseMethod("skewness") } #' @rdname skewness #' @export skewness.distribution <- function(x, ...){ dist_apply(x, skewness, ...) } #' Kurtosis of a probability distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' @param x The distribution(s). #' @param ... Additional arguments used by methods. #' #' @export kurtosis <- function(x, ...) { UseMethod("kurtosis") } #' @rdname kurtosis #' @export kurtosis.distribution <- function(x, ...){ dist_apply(x, kurtosis, ...) } #' Median of a probability distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Returns the median (50th percentile) of a probability distribution. This is #' equivalent to `quantile(x, p=0.5)`. #' #' @param x The distribution(s). #' @param na.rm Unused, included for consistency with the generic function. #' @param ... Additional arguments used by methods. #' #' @importFrom stats median #' @export median.distribution <- function(x, na.rm = FALSE, ...){ dist_apply(x, median, na.rm = na.rm, ...) } #' Probability intervals of a probability distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Returns a `hilo` central probability interval with probability coverage of #' `size`. By default, the distribution's [`quantile()`] will be used to compute #' the lower and upper bound for a centered interval #' #' @param x The distribution(s). #' @param size The size of the interval (between 0 and 100). #' @param ... Additional arguments used by methods. #' #' @seealso [`hdr.distribution()`] #' #' @importFrom stats median #' @export hilo.distribution <- function(x, size = 95, ...){ size <- arg_listable(size, .ptype = double()) dist_apply(x, hilo, size = size, ...) } #' Highest density regions of probability distributions #' #' @description #' `r lifecycle::badge('maturing')` #' #' This function is highly experimental and will change in the future. In #' particular, improved functionality for object classes and visualisation tools #' will be added in a future release. #' #' Computes minimally sized probability intervals highest density regions. #' #' @param x The distribution(s). #' @param size The size of the interval (between 0 and 100). #' @param n The resolution used to estimate the distribution's density. #' @param ... Additional arguments used by methods. #' #' @export hdr.distribution <- function(x, size = 95, n = 512, ...){ size <- arg_listable(size, .ptype = double()) dist_apply(x, hdr, size = size, n = n, ...) } #' @export sum.distribution <- function(x, ...){ vec_restore(list(Reduce("+", x)), x) } #' @method vec_arith distribution #' @export vec_arith.distribution <- function(op, x, y, ...){ UseMethod("vec_arith.distribution", y) } #' @method vec_arith.distribution default #' @export vec_arith.distribution.default <- function(op, x, y, ...){ dist_is_na <- vapply(x, is.null, logical(1L)) x[dist_is_na] <- list(structure(list(), class = c("dist_na", "dist_default"))) if(is_empty(y)){ out <- lapply(vec_data(x), get(op)) } else { x <- vec_recycle_common(x = x, y = y) y <- x[["y"]] if(is_distribution(y)) y <- vec_data(y) x <- x[["x"]] out <- .mapply(get(op), dots = list(x = vec_data(x), y = y), MoreArgs = NULL) } vec_restore(out, x) } #' @method vec_arith.numeric distribution #' @export vec_arith.numeric.distribution <- function(op, x, y, ...){ x <- vec_recycle_common(x = x, y = y) y <- x[["y"]] x <- x[["x"]] out <- .mapply(get(op), list(x = x, y = vec_data(y)), MoreArgs = NULL) vec_restore(out, y) } #' @method vec_math distribution #' @export vec_math.distribution <- function(.fn, .x, ...) { if(.fn %in% c("is.nan", "is.infinite")) return(rep_len(FALSE, length(.x))) if(.fn == "is.finite") return(rep_len(TRUE, length(.x))) out <- lapply(vec_data(.x), get(.fn), ...) vec_restore(out, .x) } #' @export vec_ptype2.distribution.distribution <- function(x, y, ...){ if(!identical(dimnames(x), dimnames(y))){ abort("Distributions must have the same `dimnames` to be combined.") } x } #' @export vec_ptype2.double.distribution <- function(x, y, ...) new_dist() #' @export vec_ptype2.distribution.double <- function(x, y, ...) new_dist() #' @export vec_ptype2.integer.distribution <- function(x, y, ...) new_dist() #' @export vec_ptype2.distribution.integer <- function(x, y, ...) new_dist() #' @export vec_cast.distribution.distribution <- function(x, to, ...){ dimnames(x) <- dimnames(to) x } #' @export vec_cast.distribution.double <- function(x, to, ...){ x <- dist_degenerate(x) dimnames(x) <- dimnames(to) x } #' @export vec_cast.distribution.integer <- vec_cast.distribution.double #' @export vec_cast.character.distribution <- function(x, to, ...){ format(x) } #' Test if the object is a distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' This function returns `TRUE` for distributions and `FALSE` for all other objects. #' #' @param x An object. #' #' @return TRUE if the object inherits from the distribution class. #' @rdname is-distribution #' @examples #' dist <- dist_normal() #' is_distribution(dist) #' is_distribution("distributional") #' @export is_distribution <- function(x) { inherits(x, "distribution") } #' Check if a distribution is symmetric #' #' @description #' `r lifecycle::badge('experimental')` #' #' Determines whether a probability distribution is symmetric around its center. #' #' @param x The distribution(s). #' @param ... Additional arguments used by methods. #' #' @return A logical value indicating whether the distribution is symmetric. #' #' @examples #' # Normal distribution is symmetric #' has_symmetry(dist_normal(mu = 0, sigma = 1)) #' has_symmetry(dist_normal(mu = 5, sigma = 2)) #' #' # Beta distribution symmetry depends on parameters #' has_symmetry(dist_beta(shape1 = 2, shape2 = 2)) # symmetric #' has_symmetry(dist_beta(shape1 = 2, shape2 = 5)) # not symmetric #' #' @export has_symmetry <- function(x, ...) { UseMethod("has_symmetry") } #' @export has_symmetry.distribution <- function(x, ...) { dist_apply(x, has_symmetry, ...) }distributional/R/dist_logarithmic.R0000644000176200001440000000630315131570041017156 0ustar liggesusers#' The Logarithmic distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Logarithmic distribution is a discrete probability distribution #' derived from the logarithmic series. It is useful in modeling the #' abundance of species and other phenomena where the frequency of #' an event follows a logarithmic pattern. #' #' @inheritParams actuar::dlogarithmic #' #' @details #' #' `r pkgdown_doc_link("dist_logarithmic")` #' #' In the following, let \eqn{X} be a Logarithmic random variable with #' parameter `prob` = \eqn{p}. #' #' **Support**: \eqn{\{1, 2, 3, ...\}}{{1, 2, 3, ...}} #' #' **Mean**: \eqn{\frac{-1}{\log(1-p)} \cdot \frac{p}{1-p}}{-1/log(1-p) * p/(1-p)} #' #' **Variance**: \eqn{\frac{-(p^2 + p\log(1-p))}{[(1-p)\log(1-p)]^2}}{-(p^2 + p*log(1-p))/((1-p)*log(1-p))^2} #' #' **Probability mass function (p.m.f)**: #' #' \deqn{ #' P(X = k) = \frac{-1}{\log(1-p)} \cdot \frac{p^k}{k} #' }{ #' P(X = k) = -1/log(1-p) * p^k/k #' } #' #' for \eqn{k = 1, 2, 3, \ldots}{k = 1, 2, 3, ...} #' #' **Cumulative distribution function (c.d.f)**: #' #' The c.d.f. does not have a simple closed form. It is computed #' using the recurrence relationship #' \eqn{P(X = k+1) = \frac{p \cdot k}{k+1} \cdot P(X = k)}{P(X = k+1) = (p*k)/(k+1) * P(X = k)} #' starting from \eqn{P(X = 1) = \frac{-p}{\log(1-p)}}{P(X = 1) = -p/log(1-p)}. #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = \frac{\log(1 - pe^t)}{\log(1-p)} #' }{ #' E(e^(tX)) = log(1 - p*e^t)/log(1-p) #' } #' #' for \eqn{pe^t < 1}{p*e^t < 1} #' #' @seealso [actuar::Logarithmic] #' #' @examples #' dist <- dist_logarithmic(prob = c(0.33, 0.66, 0.99)) #' dist #' #' @examplesIf requireNamespace("actuar", quietly = TRUE) #' mean(dist) #' variance(dist) #' support(dist) #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' @name dist_logarithmic #' @export dist_logarithmic <- function(prob){ prob <- vec_cast(prob, double()) if(any((prob < 0) | (prob > 1))){ abort("The prob parameter of a Logarithmic distribution must be between 0 and 1.") } new_dist(p = prob, class = "dist_logarithmic") } #' @export format.dist_logarithmic <- function(x, digits = 2, ...){ sprintf( "Logarithmic(%s)", format(x[["p"]], digits = digits, ...) ) } #' @export density.dist_logarithmic <- function(x, at, ...){ require_package("actuar") actuar::dlogarithmic(at, x[["p"]]) } #' @export log_density.dist_logarithmic <- function(x, at, ...){ require_package("actuar") actuar::dlogarithmic(at, x[["p"]], log = TRUE) } #' @export quantile.dist_logarithmic <- function(x, p, ...){ require_package("actuar") actuar::qlogarithmic(p, x[["p"]]) } #' @export cdf.dist_logarithmic <- function(x, q, ...){ require_package("actuar") actuar::plogarithmic(q, x[["p"]]) } #' @export generate.dist_logarithmic <- function(x, times, ...){ require_package("actuar") actuar::rlogarithmic(times, x[["p"]]) } #' @export mean.dist_logarithmic <- function(x, ...){ p <- x[["p"]] (-1/(log(1-p)))*(p/(1-p)) } #' @export covariance.dist_logarithmic <- function(x, ...){ p <- x[["p"]] -(p^2 + p*log(1-p))/((1-p)*log(1-p))^2 } distributional/R/dist_studentized_range.R0000644000176200001440000000427115131564662020410 0ustar liggesusers#' The Studentized Range distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Tukey's studentized range distribution, used for Tukey's #' honestly significant differences test in ANOVA. #' #' @inheritParams stats::qtukey #' #' @details #' #' `r pkgdown_doc_link("dist_studentized_range")` #' #' In the following, let \eqn{Q} be a Studentized Range random variable with #' parameters `nmeans` = \eqn{k} (number of groups), `df` = \eqn{\nu} (degrees #' of freedom), and `nranges` = \eqn{n} (number of ranges). #' #' **Support**: \eqn{R^+}, the set of positive real numbers. #' #' **Mean**: Approximated numerically. #' #' **Variance**: Approximated numerically. #' #' **Probability density function (p.d.f)**: The density does not have a #' closed-form expression and is computed numerically. #' #' **Cumulative distribution function (c.d.f)**: The c.d.f does not have a #' simple closed-form expression. For \eqn{n = 1} (single range), it involves #' integration over the joint distribution of the sample range and an #' independent chi-square variable. The general form is computed numerically #' using algorithms described in the references for [stats::ptukey()]. #' #' **Moment generating function (m.g.f)**: Does not exist in closed form. #' #' @seealso [stats::Tukey] #' #' @examples #' dist <- dist_studentized_range(nmeans = c(6, 2), df = c(5, 4), nranges = c(1, 1)) #' #' dist #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_studentized_range #' @export dist_studentized_range <- function(nmeans, df, nranges){ nmeans <- vec_cast(nmeans, double()) df <- vec_cast(df, double()) new_dist(nm = nmeans, df = df, nr = nranges, class = "dist_studentized_range") } #' @export format.dist_studentized_range <- function(x, digits = 2, ...){ sprintf( "StudentizedRange(%s, %s, %s)", format(x[["nm"]], digits = digits, ...), format(x[["df"]], digits = digits, ...), format(x[["nr"]], digits = digits, ...) ) } #' @export quantile.dist_studentized_range <- function(x, p, ...){ stats::qtukey(p, x[["nm"]], x[["df"]], x[["nr"]]) } #' @export cdf.dist_studentized_range <- function(x, q, ...){ stats::ptukey(q, x[["nm"]], x[["df"]], x[["nr"]]) } distributional/R/dist_gamma.R0000644000176200001440000000752715131601333015746 0ustar liggesusers#' The Gamma distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' Several important distributions are special cases of the Gamma #' distribution. When the shape parameter is `1`, the Gamma is an #' exponential distribution with parameter \eqn{1/\beta}. When the #' \eqn{shape = n/2} and \eqn{rate = 1/2}, the Gamma is a equivalent to #' a chi squared distribution with n degrees of freedom. Moreover, if #' we have \eqn{X_1} is \eqn{Gamma(\alpha_1, \beta)} and #' \eqn{X_2} is \eqn{Gamma(\alpha_2, \beta)}, a function of these two variables #' of the form \eqn{\frac{X_1}{X_1 + X_2}} \eqn{Beta(\alpha_1, \alpha_2)}. #' This last property frequently appears in another distributions, and it #' has extensively been used in multivariate methods. More about the Gamma #' distribution will be added soon. #' #' @inheritParams stats::dgamma #' #' @details #' #' `r pkgdown_doc_link("dist_gamma")` #' #' In the following, let \eqn{X} be a Gamma random variable #' with parameters #' `shape` = \eqn{\alpha} and #' `rate` = \eqn{\beta}. #' #' **Support**: \eqn{x \in (0, \infty)} #' #' **Mean**: \eqn{\frac{\alpha}{\beta}} #' #' **Variance**: \eqn{\frac{\alpha}{\beta^2}} #' #' **Probability density function (p.m.f)**: #' #' \deqn{ #' f(x) = \frac{\beta^{\alpha}}{\Gamma(\alpha)} x^{\alpha - 1} e^{-\beta x} #' }{ #' f(x) = \frac{\beta^{\alpha}}{\Gamma(\alpha)} x^{\alpha - 1} e^{-\beta x} #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' f(x) = \frac{\Gamma(\alpha, \beta x)}{\Gamma{\alpha}} #' }{ #' f(x) = \frac{\Gamma(\alpha, \beta x)}{\Gamma{\alpha}} #' } #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = \Big(\frac{\beta}{ \beta - t}\Big)^{\alpha}, \thinspace t < \beta #' }{ #' E(e^(tX)) = \Big(\frac{\beta}{ \beta - t}\Big)^{\alpha}, \thinspace t < \beta #' } #' #' @seealso [stats::GammaDist] #' #' @examples #' dist <- dist_gamma(shape = c(1,2,3,5,9,7.5,0.5), rate = c(0.5,0.5,0.5,1,2,1,1)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_gamma #' @export dist_gamma <- function(shape, rate = 1/scale, scale = 1/rate){ shape <- vec_cast(shape, double()) if(missing(rate) && missing(scale)) { abort("Either the `rate` or `scale` must be specified for Gamma distributions.") } rate <- vec_cast(rate, double()) if(any(shape[!is.na(shape)] < 0)){ abort("The shape parameter of a Gamma distribution must be non-negative.") } if(any(rate[!is.na(rate)] <= 0)){ abort("The rate parameter of a Gamma distribution must be strictly positive.") } new_dist(shape = shape, rate = 1/scale, class = "dist_gamma") } #' @export format.dist_gamma <- function(x, digits = 2, ...){ sprintf( if (is_utf8_output()) "\u0393(%s, %s)" else "Gamma(%s, %s)", format(x[["shape"]], digits = digits, ...), format(x[["rate"]], digits = digits, ...) ) } #' @export density.dist_gamma <- function(x, at, ...){ stats::dgamma(at, x[["shape"]], x[["rate"]]) } #' @export log_density.dist_gamma <- function(x, at, ...){ stats::dgamma(at, x[["shape"]], x[["rate"]], log = TRUE) } #' @export quantile.dist_gamma <- function(x, p, ...){ stats::qgamma(p, x[["shape"]], x[["rate"]]) } #' @export cdf.dist_gamma <- function(x, q, ...){ stats::pgamma(q, x[["shape"]], x[["rate"]]) } #' @export generate.dist_gamma <- function(x, times, ...){ stats::rgamma(times, x[["shape"]], x[["rate"]]) } #' @export mean.dist_gamma <- function(x, ...){ x[["shape"]] / x[["rate"]] } #' @export covariance.dist_gamma <- function(x, ...){ x[["shape"]] / x[["rate"]]^2 } #' @export skewness.dist_gamma <- function(x, ...) 2 / sqrt(x[["shape"]]) #' @export kurtosis.dist_gamma <- function(x, ...) 6 / x[["shape"]] distributional/R/dist_geometric.R0000644000176200001440000000603315131570034016634 0ustar liggesusers#' The Geometric Distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The Geometric distribution can be thought of as a generalization #' of the [dist_bernoulli()] distribution where we ask: "if I keep flipping a #' coin with probability `p` of heads, what is the probability I need #' \eqn{k} flips before I get my first heads?" The Geometric #' distribution is a special case of Negative Binomial distribution. #' #' @inheritParams stats::dgeom #' #' @details #' #' `r pkgdown_doc_link("dist_geometric")` #' #' In the following, let \eqn{X} be a Geometric random variable with #' success probability `prob` = \eqn{p}. Note that there are multiple #' parameterizations of the Geometric distribution. #' #' **Support**: \eqn{\{0, 1, 2, 3, ...\}}{{0, 1, 2, 3, ...}} #' #' **Mean**: \eqn{\frac{1-p}{p}} #' #' **Variance**: \eqn{\frac{1-p}{p^2}} #' #' **Probability mass function (p.m.f)**: #' #' \deqn{ #' P(X = k) = p(1-p)^k #' }{ #' P(X = k) = p(1-p)^k #' } #' #' **Cumulative distribution function (c.d.f)**: #' #' \deqn{ #' P(X \le k) = 1 - (1-p)^{k+1} #' }{ #' P(X \le k) = 1 - (1-p)^(k+1) #' } #' #' **Moment generating function (m.g.f)**: #' #' \deqn{ #' E(e^{tX}) = \frac{pe^t}{1 - (1-p)e^t} #' }{ #' E(e^(tX)) = pe^t / (1 - (1-p)e^t) #' } #' #' **Skewness**: #' #' \deqn{ #' \frac{2 - p}{\sqrt{1 - p}} #' }{ #' (2 - p) / sqrt(1 - p) #' } #' #' **Excess Kurtosis**: #' #' \deqn{ #' 6 + \frac{p^2}{1 - p} #' }{ #' 6 + p^2 / (1 - p) #' } #' #' @seealso [stats::Geometric] #' #' @examples #' dist <- dist_geometric(prob = c(0.2, 0.5, 0.8)) #' #' dist #' mean(dist) #' variance(dist) #' skewness(dist) #' kurtosis(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' @name dist_geometric #' @export dist_geometric <- function(prob){ prob <- vec_cast(prob, double()) if(any((prob < 0) | (prob > 1))){ abort("The prob parameter of an Geometric distribution must be between 0 and 1.") } new_dist(p = prob, class = "dist_geometric") } #' @export format.dist_geometric <- function(x, digits = 2, ...){ sprintf( "Geometric(%s)", format(x[["p"]], digits = digits, ...) ) } #' @export density.dist_geometric <- function(x, at, ...){ stats::dgeom(at, x[["p"]]) } #' @export log_density.dist_geometric <- function(x, at, ...){ stats::dgeom(at, x[["p"]], log = TRUE) } #' @export quantile.dist_geometric <- function(x, p, ...){ stats::qgeom(p, x[["p"]]) } #' @export cdf.dist_geometric <- function(x, q, ...){ stats::pgeom(q, x[["p"]]) } #' @export generate.dist_geometric <- function(x, times, ...){ stats::rgeom(times, x[["p"]]) } #' @export mean.dist_geometric <- function(x, ...){ 1/x[["p"]] - 1 } #' @export covariance.dist_geometric <- function(x, ...){ (1 - x[["p"]])/x[["p"]]^2 } #' @export skewness.dist_geometric <- function(x, ...) (2 - x[["p"]]) / sqrt(1 - x[["p"]]) #' @export kurtosis.dist_geometric <- function(x, ...) 6 + (x[["p"]]^2 / (1 - x[["p"]])) distributional/R/dist_gk.R0000644000176200001440000000612715131601302015254 0ustar liggesusers#' The g-and-k Distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The g-and-k distribution is a flexible distribution often used to model univariate data. #' It is particularly known for its ability to handle skewness and heavy-tailed behavior. #' #' @inheritParams gk::dgk #' #' @details #' #' `r pkgdown_doc_link("dist_gk")` #' #' In the following, let \eqn{X} be a g-k random variable with parameters #' `A`, `B`, `g`, `k`, and `c`. #' #' **Support**: \eqn{(-\infty, \infty)} #' #' **Mean**: Not available in closed form. #' #' **Variance**: Not available in closed form. #' #' **Probability density function (p.d.f)**: #' #' The g-k distribution does not have a closed-form expression for its density. Instead, #' it is defined through its quantile function: #' #' \deqn{ #' Q(u) = A + B \left( 1 + c \frac{1 - \exp(-gz(u))}{1 + \exp(-gz(u))} \right) (1 + z(u)^2)^k z(u) #' }{ #' Q(u) = A + B * (1 + c * ((1 - exp(-g * z(u))) / (1 + exp(-g * z(u))))) * (1 + z(u)^2)^k * z(u) #' } #' #' where \eqn{z(u) = \Phi^{-1}(u)}, the standard normal quantile of u. #' #' **Cumulative distribution function (c.d.f)**: #' #' The cumulative distribution function is typically evaluated numerically due to the lack #' of a closed-form expression. #' #' @seealso [gk::dgk], [distributional::dist_gh] #' #' @examples #' dist <- dist_gk(A = 0, B = 1, g = 0, k = 0.5) #' dist #' #' @examplesIf requireNamespace("gk", quietly = TRUE) #' mean(dist) #' variance(dist) #' support(dist) #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_gk #' @export dist_gk <- function(A, B, g, k, c = 0.8){ A <- vec_cast(A, double()) B <- vec_cast(B, double()) g <- vec_cast(g, double()) k <- vec_cast(k, double()) c <- vec_cast(c, double()) if(any(B <= 0)){ abort("The B parameter (scale) of the gk distribution must be strictly positive.") } new_dist(A = A, B = B, g = g, k = k, c = c, class = "dist_gk") } #' @export format.dist_gk <- function(x, digits = 2, ...){ sprintf( "gk(A = %s, B = %s, g = %s, k = %s%s)", format(x[["A"]], digits = digits, ...), format(x[["B"]], digits = digits, ...), format(x[["g"]], digits = digits, ...), format(x[["k"]], digits = digits, ...), if (x[["c"]]==0.8) "" else paste0(", c = ", format(x[["c"]], digits = digits, ...)) ) } #' @export density.dist_gk <- function(x, at, ...){ require_package("gk") gk::dgk(at, x[["A"]], x[["B"]], x[["g"]], x[["k"]], x[["c"]]) } #' @export log_density.dist_gk <- function(x, at, ...){ require_package("gk") gk::dgk(at, x[["A"]], x[["B"]], x[["g"]], x[["k"]], x[["c"]], log = TRUE) } #' @export quantile.dist_gk <- function(x, p, ...){ require_package("gk") gk::qgk(p, x[["A"]], x[["B"]], x[["g"]], x[["k"]], x[["c"]]) } #' @export cdf.dist_gk <- function(x, q, ...){ require_package("gk") gk::pgk(q, x[["A"]], x[["B"]], x[["g"]], x[["k"]], x[["c"]]) } #' @export generate.dist_gk <- function(x, times, ...){ require_package("gk") gk::rgk(times, x[["A"]], x[["B"]], x[["g"]], x[["k"]], x[["c"]]) } distributional/R/dist_gh.R0000644000176200001440000000745715131550671015274 0ustar liggesusers#' The generalised g-and-h Distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' The generalised g-and-h distribution is a flexible distribution used to model univariate data, similar to the g-k distribution. #' It is known for its ability to handle skewness and heavy-tailed behavior. #' #' @inheritParams gk::dgh #' #' @details #' #' `r pkgdown_doc_link("dist_gh")` #' #' In the following, let \eqn{X} be a g-and-h random variable with parameters #' `A` = \eqn{A}, `B` = \eqn{B}, `g` = \eqn{g}, `h` = \eqn{h}, and `c` = \eqn{c}. #' #' **Support**: \eqn{(-\infty, \infty)}{R} #' #' **Mean**: Does not have a closed-form expression. Approximated numerically. #' #' **Variance**: Does not have a closed-form expression. Approximated numerically. #' #' **Probability density function (p.d.f)**: #' #' The g-and-h distribution does not have a closed-form expression for its density. #' The density is approximated numerically from the quantile function. #' The distribution is defined through its quantile function: #' #' \deqn{ #' Q(u) = A + B \left( 1 + c \frac{1 - \exp(-gz(u))}{1 + \exp(-gz(u))} \right) \exp(h z(u)^2/2) z(u) #' }{ #' Q(u) = A + B * (1 + c * ((1 - exp(-g * z(u))) / (1 + exp(-g * z(u))))) * exp(h * z(u)^2/2) * z(u) #' } #' #' where \eqn{z(u) = \Phi^{-1}(u)} is the standard normal quantile function. #' #' **Cumulative distribution function (c.d.f)**: #' #' Does not have a closed-form expression. The cumulative distribution function is #' approximated numerically by inverting the quantile function. #' #' **Quantile function**: #' #' \deqn{ #' Q(p) = A + B \left( 1 + c \frac{1 - \exp(-g\Phi^{-1}(p))}{1 + \exp(-g\Phi^{-1}(p))} \right) \exp(h (\Phi^{-1}(p))^2/2) \Phi^{-1}(p) #' }{ #' Q(p) = A + B * (1 + c * ((1 - exp(-g * qnorm(p))) / (1 + exp(-g * qnorm(p))))) * exp(h * qnorm(p)^2/2) * qnorm(p) #' } #' #' where \eqn{\Phi^{-1}(p)} is the standard normal quantile function. #' #' @seealso [gk::dgh()], [gk::pgh()], [gk::qgh()], [gk::rgh()], [distributional::dist_gk()] #' #' @examples #' dist <- dist_gh(A = 0, B = 1, g = 0, h = 0.5) #' dist #' #' @examplesIf requireNamespace("gk", quietly = TRUE) #' mean(dist) #' variance(dist) #' support(dist) #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @name dist_gh #' @export dist_gh <- function(A, B, g, h, c = 0.8){ A <- vec_cast(A, double()) B <- vec_cast(B, double()) g <- vec_cast(g, double()) h <- vec_cast(h, double()) c <- vec_cast(c, double()) if(any(B <= 0)){ abort("The B parameter (scale) of the g-and-h distribution must be strictly positive.") } new_dist(A = A, B = B, g = g, h = h, c = c, class = "dist_gh") } #' @export format.dist_gh <- function(x, digits = 2, ...){ sprintf( "gh(A = %s, B = %s, g = %s, h = %s%s)", format(x[["A"]], digits = digits, ...), format(x[["B"]], digits = digits, ...), format(x[["g"]], digits = digits, ...), format(x[["h"]], digits = digits, ...), if (x[["c"]]==0.8) "" else paste0(", c = ", format(x[["c"]], digits = digits, ...)) ) } #' @export density.dist_gh <- function(x, at, ...){ require_package("gk") gk::dgh(at, x[["A"]], x[["B"]], x[["g"]], x[["h"]], x[["c"]]) } #' @export log_density.dist_gh <- function(x, at, ...){ require_package("gk") gk::dgh(at, x[["A"]], x[["B"]], x[["g"]], x[["h"]], x[["c"]], log = TRUE) } #' @export quantile.dist_gh <- function(x, p, ...){ require_package("gk") gk::qgh(p, x[["A"]], x[["B"]], x[["g"]], x[["h"]], x[["c"]]) } #' @export cdf.dist_gh <- function(x, q, ...){ require_package("gk") gk::pgh(q, x[["A"]], x[["B"]], x[["g"]], x[["h"]], x[["c"]]) } #' @export generate.dist_gh <- function(x, times, ...){ require_package("gk") gk::rgh(times, x[["A"]], x[["B"]], x[["g"]], x[["h"]], x[["c"]]) } distributional/R/dist_missing.R0000644000176200001440000000511115131575022016325 0ustar liggesusers#' Missing distribution #' #' @description #' `r lifecycle::badge('maturing')` #' #' A placeholder distribution for handling missing values in a vector of #' distributions. #' #' @param length The number of missing distributions #' #' @details #' #' `r pkgdown_doc_link("dist_missing")` #' #' The missing distribution represents the absence of distributional #' information. It is used as a placeholder when distribution values are #' not available or not applicable, similar to how `NA` is used for missing #' scalar values. #' #' **Support**: Undefined #' #' **Mean**: \eqn{\text{NA}} #' #' **Variance**: \eqn{\text{NA}} #' #' **Skewness**: \eqn{\text{NA}} #' #' **Kurtosis**: \eqn{\text{NA}} #' #' **Probability density function (p.d.f)**: Undefined #' #' \deqn{ #' f(x) = \text{NA} #' } #' #' **Cumulative distribution function (c.d.f)**: Undefined #' #' \deqn{ #' F(t) = \text{NA} #' } #' #' **Quantile function**: Undefined #' #' \deqn{ #' Q(p) = \text{NA} #' } #' #' **Moment generating function (m.g.f)**: Undefined #' #' \deqn{ #' E(e^{tX}) = \text{NA} #' } #' #' All statistical operations on missing distributions return `NA` values #' of appropriate length, propagating the missingness through calculations. #' #' @seealso [base::NA] #' #' @name dist_missing #' #' @examples #' dist <- dist_missing(3L) #' #' dist #' mean(dist) #' variance(dist) #' #' generate(dist, 10) #' #' density(dist, 2) #' density(dist, 2, log = TRUE) #' #' cdf(dist, 4) #' #' quantile(dist, 0.7) #' #' @export dist_missing <- function(length = 1) { vctrs::vec_rep(NA_dist_, length) } NA_dist_ <- structure(list(NULL), class = c("distribution", "vctrs_vctr", "list")) #' @export format.dist_na <- function(x, ...) { "NA" } #' @export density.dist_na <- function(x, at, ...){ rep_len(NA_real_, length(at)) } #' @export log_density.dist_na <- density.dist_na #' @export quantile.dist_na <- function(x, p, ...){ rep_len(NA_real_, length(p)) } #' @export log_quantile.dist_na <- quantile.dist_na #' @export cdf.dist_na <- function(x, q, ...){ rep_len(NA_real_, length(q)) } #' @export log_cdf.dist_na <- cdf.dist_na #' @export generate.dist_na <- function(x, times, ...){ rep(NA_real_, times) } #' @export mean.dist_na <- function(x, ...) NA_real_ #' @export covariance.dist_na <- function(x, ...) NA_real_ #' @export skewness.dist_na <- function(x, ...) NA_real_ #' @export kurtosis.dist_na <- function(x, ...) NA_real_ #' @export Math.dist_na <- function(x, ...) { x } #' @export Ops.dist_na <- function(e1, e2) { dist_missing(max(length(e1), length(e2))) } distributional/R/reexports.R0000644000176200001440000000007715112047012015662 0ustar liggesusers#' @importFrom generics generate #' @export generics::generate distributional/R/dist_hypergeometric.R0000644000176200001440000001017115131554443017710 0ustar liggesusers#' The Hypergeometric distribution #' #' @description #' `r lifecycle::badge('stable')` #' #' To understand the HyperGeometric distribution, consider a set of #' \eqn{r} objects, of which \eqn{m} are of the type I and #' \eqn{n} are of the type II. A sample with size \eqn{k} (\eqn{k` else `>=` d[which(at %less_than% limits[1] | at %greater_than% limits[2])] <- 0 } d } #' @export cdf.dist_transformed <- function(x, q, ...){ inv <- function(v) suppressWarnings(x[["inverse"]](v)) p <- cdf(x[["dist"]], inv(q), ...) # TODO - remove null dist check when dist_na is structured correctly (revdep temp fix) if(!is.null(x[["dist"]]) && !monotonic_increasing(x[["transform"]], support(x[["dist"]]))) p <- 1 - p # TODO: Rework for support of closed limits and prevent computation x_sup <- support(x) x_lim <- field(x_sup, "lim")[[1]] x_cls <- field(x_sup, "closed")[[1]] if (!any(is.na(x_lim))) { p[q <= x_lim[1] & !x_cls[1]] <- 0 p[q >= x_lim[2] & !x_cls[2]] <- 1 } p } #' @export quantile.dist_transformed <- function(x, p, ...){ # TODO - remove null dist check when dist_na is structured correctly (revdep temp fix) if(!is.null(x[["dist"]]) && !monotonic_increasing(x[["transform"]], support(x[["dist"]]))) p <- 1 - p x[["transform"]](quantile(x[["dist"]], p, ...)) } #' @export generate.dist_transformed <- function(x, ...){ x[["transform"]](generate(x[["dist"]], ...)) } #' @export mean.dist_transformed <- function(x, ...){ mu <- mean(x[["dist"]]) sigma2 <- variance(x[["dist"]]) if(is.na(sigma2)){ # warning("Could not compute the transformed distribution's mean as the base distribution's variance is unknown. The transformed distribution's median has been returned instead.") return(x[["transform"]](mu)) } drop( x[["transform"]](mu) + numDeriv::hessian(x[["transform"]], mu, method.args=list(d = 0.01))/2*sigma2 ) } #' @export covariance.dist_transformed <- function(x, ...){ mu <- mean(x[["dist"]]) sigma2 <- variance(x[["dist"]]) if(is.na(sigma2)) return(NA_real_) drop( numDeriv::jacobian(x[["transform"]], mu)^2*sigma2 + (numDeriv::hessian(x[["transform"]], mu, method.args=list(d = 0.01))*sigma2)^2/2 ) } #' @method Math dist_transformed #' @export Math.dist_transformed <- function(x, ...) { trans <- new_function(exprs(x = ), body = expr((!!sym(.Generic))((!!x$transform)(x), !!!dots_list(...)))) inverse_fun <- get_unary_inverse(.Generic) inverse <- new_function(exprs(x = ), body = expr((!!x$inverse)((!!inverse_fun)(x, !!!dots_list(...))))) vec_data(dist_transformed(wrap_dist(list(x[["dist"]])), trans, inverse))[[1]] } #' @method Ops dist_transformed #' @export Ops.dist_transformed <- function(e1, e2) { if(.Generic %in% c("-", "+") && missing(e2)){ e2 <- e1 e1 <- if(.Generic == "+") 1 else -1 .Generic <- "*" } is_dist <- c(inherits(e1, "dist_default"), inherits(e2, "dist_default")) trans <- if(all(is_dist)) { if(identical(e1$dist, e2$dist)){ new_function(exprs(x = ), expr((!!sym(.Generic))((!!e1$transform)(x), (!!e2$transform)(x)))) } else { stop(sprintf("The %s operation is not supported for <%s> and <%s>", .Generic, class(e1)[1], class(e2)[1])) } } else if(is_dist[1]){ new_function(exprs(x = ), body = expr((!!sym(.Generic))((!!e1$transform)(x), !!e2))) } else { new_function(exprs(x = ), body = expr((!!sym(.Generic))(!!e1, (!!e2$transform)(x)))) } inverse <- if(all(is_dist)) { invert_fail } else if(is_dist[1]){ inverse_fun <- get_binary_inverse_1(.Generic, e2) new_function(exprs(x = ), body = expr((!!e1$inverse)((!!inverse_fun)(x)))) } else { inverse_fun <- get_binary_inverse_2(.Generic, e1) new_function(exprs(x = ), body = expr((!!e2$inverse)((!!inverse_fun)(x)))) } vec_data(dist_transformed(wrap_dist(list(list(e1,e2)[[which(is_dist)[1]]][["dist"]])), trans, inverse))[[1]] } monotonic_increasing <- function(f, support) { # Shortcut for identity function (used widely in ggdist) if(!is.primitive(f) && identical(body(f), as.name(names(formals(f))))) { return(TRUE) } # Currently assumes (without checking, #9) monotonicity of f over the domain x <- f(field(support, "lim")[[1]]) isTRUE(x[[2L]] >= x[[1L]]) } distributional/NAMESPACE0000644000176200001440000004322215131627763014543 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method("$",hilo) S3method("[[",distribution) S3method("dimnames<-",distribution) S3method("names<-",hilo) S3method(.DollarNames,hilo) S3method(Math,dist_default) S3method(Math,dist_lognormal) S3method(Math,dist_na) S3method(Math,dist_normal) S3method(Math,dist_sample) S3method(Math,dist_transformed) S3method(Ops,dist_default) S3method(Ops,dist_na) S3method(Ops,dist_normal) S3method(Ops,dist_sample) S3method(Ops,dist_transformed) S3method(cdf,dist_bernoulli) S3method(cdf,dist_beta) S3method(cdf,dist_binomial) S3method(cdf,dist_burr) S3method(cdf,dist_categorical) S3method(cdf,dist_cauchy) S3method(cdf,dist_chisq) S3method(cdf,dist_default) S3method(cdf,dist_degenerate) S3method(cdf,dist_exponential) S3method(cdf,dist_f) S3method(cdf,dist_gamma) S3method(cdf,dist_geometric) S3method(cdf,dist_gh) S3method(cdf,dist_gk) S3method(cdf,dist_gumbel) S3method(cdf,dist_hypergeometric) S3method(cdf,dist_inflated) S3method(cdf,dist_inverse_exponential) S3method(cdf,dist_inverse_gamma) S3method(cdf,dist_inverse_gaussian) S3method(cdf,dist_laplace) S3method(cdf,dist_logarithmic) S3method(cdf,dist_logistic) S3method(cdf,dist_lognormal) S3method(cdf,dist_mixture) S3method(cdf,dist_multinomial) S3method(cdf,dist_mvnorm) S3method(cdf,dist_mvt) S3method(cdf,dist_na) S3method(cdf,dist_negbin) S3method(cdf,dist_normal) S3method(cdf,dist_pareto) S3method(cdf,dist_percentile) S3method(cdf,dist_poisson) S3method(cdf,dist_poisson_inverse_gaussian) S3method(cdf,dist_sample) S3method(cdf,dist_student_t) S3method(cdf,dist_studentized_range) S3method(cdf,dist_transformed) S3method(cdf,dist_truncated) S3method(cdf,dist_uniform) S3method(cdf,dist_weibull) S3method(cdf,dist_wrap) S3method(cdf,distribution) S3method(covariance,default) S3method(covariance,dist_bernoulli) S3method(covariance,dist_beta) S3method(covariance,dist_binomial) S3method(covariance,dist_burr) S3method(covariance,dist_categorical) S3method(covariance,dist_cauchy) S3method(covariance,dist_chisq) S3method(covariance,dist_default) S3method(covariance,dist_degenerate) S3method(covariance,dist_exponential) S3method(covariance,dist_f) S3method(covariance,dist_gamma) S3method(covariance,dist_geometric) S3method(covariance,dist_gumbel) S3method(covariance,dist_hypergeometric) S3method(covariance,dist_inflated) S3method(covariance,dist_inverse_exponential) S3method(covariance,dist_inverse_gamma) S3method(covariance,dist_inverse_gaussian) S3method(covariance,dist_logarithmic) S3method(covariance,dist_logistic) S3method(covariance,dist_lognormal) S3method(covariance,dist_mixture) S3method(covariance,dist_multinomial) S3method(covariance,dist_mvnorm) S3method(covariance,dist_mvt) S3method(covariance,dist_na) S3method(covariance,dist_negbin) S3method(covariance,dist_normal) S3method(covariance,dist_pareto) S3method(covariance,dist_poisson) S3method(covariance,dist_poisson_inverse_gaussian) S3method(covariance,dist_sample) S3method(covariance,dist_student_t) S3method(covariance,dist_transformed) S3method(covariance,dist_uniform) S3method(covariance,dist_weibull) S3method(covariance,distribution) S3method(covariance,numeric) S3method(density,dist_bernoulli) S3method(density,dist_beta) S3method(density,dist_binomial) S3method(density,dist_burr) S3method(density,dist_categorical) S3method(density,dist_cauchy) S3method(density,dist_chisq) S3method(density,dist_default) S3method(density,dist_degenerate) S3method(density,dist_exponential) S3method(density,dist_f) S3method(density,dist_gamma) S3method(density,dist_geometric) S3method(density,dist_gh) S3method(density,dist_gk) S3method(density,dist_gumbel) S3method(density,dist_hypergeometric) S3method(density,dist_inflated) S3method(density,dist_inverse_exponential) S3method(density,dist_inverse_gamma) S3method(density,dist_inverse_gaussian) S3method(density,dist_laplace) S3method(density,dist_logarithmic) S3method(density,dist_logistic) S3method(density,dist_lognormal) S3method(density,dist_mixture) S3method(density,dist_multinomial) S3method(density,dist_mvnorm) S3method(density,dist_mvt) S3method(density,dist_na) S3method(density,dist_negbin) S3method(density,dist_normal) S3method(density,dist_pareto) S3method(density,dist_percentile) S3method(density,dist_poisson) S3method(density,dist_poisson_inverse_gaussian) S3method(density,dist_sample) S3method(density,dist_student_t) S3method(density,dist_transformed) S3method(density,dist_truncated) S3method(density,dist_uniform) S3method(density,dist_weibull) S3method(density,dist_wrap) S3method(density,distribution) S3method(dim,dist_default) S3method(dim,dist_mixture) S3method(dim,dist_multinomial) S3method(dim,dist_mvnorm) S3method(dim,dist_mvt) S3method(dimnames,distribution) S3method(distributional::cdf,dist_gev) S3method(distributional::cdf,dist_gpd) S3method(distributional::covariance,dist_gev) S3method(distributional::covariance,dist_gpd) S3method(distributional::generate,dist_gev) S3method(distributional::generate,dist_gpd) S3method(distributional::log_density,dist_gev) S3method(distributional::log_density,dist_gpd) S3method(family,dist_default) S3method(family,distribution) S3method(format,dist_bernoulli) S3method(format,dist_beta) S3method(format,dist_binomial) S3method(format,dist_burr) S3method(format,dist_categorical) S3method(format,dist_cauchy) S3method(format,dist_chisq) S3method(format,dist_default) S3method(format,dist_degenerate) S3method(format,dist_exponential) S3method(format,dist_f) S3method(format,dist_gamma) S3method(format,dist_geometric) S3method(format,dist_gev) S3method(format,dist_gh) S3method(format,dist_gk) S3method(format,dist_gpd) S3method(format,dist_gumbel) S3method(format,dist_hypergeometric) S3method(format,dist_inflated) S3method(format,dist_inverse_exponential) S3method(format,dist_inverse_gamma) S3method(format,dist_inverse_gaussian) S3method(format,dist_laplace) S3method(format,dist_logarithmic) S3method(format,dist_logistic) S3method(format,dist_lognormal) S3method(format,dist_mixture) S3method(format,dist_multinomial) S3method(format,dist_mvnorm) S3method(format,dist_mvt) S3method(format,dist_na) S3method(format,dist_negbin) S3method(format,dist_normal) S3method(format,dist_pareto) S3method(format,dist_percentile) S3method(format,dist_poisson) S3method(format,dist_poisson_inverse_gaussian) S3method(format,dist_sample) S3method(format,dist_student_t) S3method(format,dist_studentized_range) S3method(format,dist_transformed) S3method(format,dist_truncated) S3method(format,dist_uniform) S3method(format,dist_weibull) S3method(format,dist_wrap) S3method(format,distribution) S3method(format,hdr) S3method(format,hilo) S3method(format,pillar_distribution) S3method(format,support_region) S3method(generate,dist_bernoulli) S3method(generate,dist_beta) S3method(generate,dist_binomial) S3method(generate,dist_burr) S3method(generate,dist_categorical) S3method(generate,dist_cauchy) S3method(generate,dist_chisq) S3method(generate,dist_default) S3method(generate,dist_degenerate) S3method(generate,dist_exponential) S3method(generate,dist_f) S3method(generate,dist_gamma) S3method(generate,dist_geometric) S3method(generate,dist_gh) S3method(generate,dist_gk) S3method(generate,dist_gumbel) S3method(generate,dist_hypergeometric) S3method(generate,dist_inflated) S3method(generate,dist_inverse_exponential) S3method(generate,dist_inverse_gamma) S3method(generate,dist_inverse_gaussian) S3method(generate,dist_laplace) S3method(generate,dist_logarithmic) S3method(generate,dist_logistic) S3method(generate,dist_lognormal) S3method(generate,dist_mixture) S3method(generate,dist_multinomial) S3method(generate,dist_mvnorm) S3method(generate,dist_mvt) S3method(generate,dist_na) S3method(generate,dist_negbin) S3method(generate,dist_normal) S3method(generate,dist_pareto) S3method(generate,dist_percentile) S3method(generate,dist_poisson) S3method(generate,dist_poisson_inverse_gaussian) S3method(generate,dist_sample) S3method(generate,dist_student_t) S3method(generate,dist_transformed) S3method(generate,dist_uniform) S3method(generate,dist_weibull) S3method(generate,dist_wrap) S3method(generate,distribution) S3method(has_symmetry,dist_beta) S3method(has_symmetry,dist_binomial) S3method(has_symmetry,dist_cauchy) S3method(has_symmetry,dist_default) S3method(has_symmetry,dist_laplace) S3method(has_symmetry,dist_logistic) S3method(has_symmetry,dist_normal) S3method(has_symmetry,dist_student_t) S3method(has_symmetry,dist_uniform) S3method(has_symmetry,distribution) S3method(hdr,default) S3method(hdr,dist_default) S3method(hdr,distribution) S3method(hilo,default) S3method(hilo,dist_default) S3method(hilo,distribution) S3method(is.na,hilo) S3method(kurtosis,dist_bernoulli) S3method(kurtosis,dist_beta) S3method(kurtosis,dist_binomial) S3method(kurtosis,dist_categorical) S3method(kurtosis,dist_cauchy) S3method(kurtosis,dist_chisq) S3method(kurtosis,dist_degenerate) S3method(kurtosis,dist_exponential) S3method(kurtosis,dist_f) S3method(kurtosis,dist_gamma) S3method(kurtosis,dist_geometric) S3method(kurtosis,dist_gumbel) S3method(kurtosis,dist_hypergeometric) S3method(kurtosis,dist_laplace) S3method(kurtosis,dist_logistic) S3method(kurtosis,dist_lognormal) S3method(kurtosis,dist_na) S3method(kurtosis,dist_negbin) S3method(kurtosis,dist_normal) S3method(kurtosis,dist_poisson) S3method(kurtosis,dist_uniform) S3method(kurtosis,dist_weibull) S3method(kurtosis,distribution) S3method(likelihood,dist_default) S3method(likelihood,distribution) S3method(log_cdf,dist_default) S3method(log_cdf,dist_lognormal) S3method(log_cdf,dist_na) S3method(log_cdf,dist_normal) S3method(log_cdf,distribution) S3method(log_density,dist_bernoulli) S3method(log_density,dist_beta) S3method(log_density,dist_binomial) S3method(log_density,dist_burr) S3method(log_density,dist_cauchy) S3method(log_density,dist_chisq) S3method(log_density,dist_default) S3method(log_density,dist_exponential) S3method(log_density,dist_f) S3method(log_density,dist_gamma) S3method(log_density,dist_geometric) S3method(log_density,dist_gh) S3method(log_density,dist_gk) S3method(log_density,dist_gumbel) S3method(log_density,dist_hypergeometric) S3method(log_density,dist_inverse_exponential) S3method(log_density,dist_inverse_gamma) S3method(log_density,dist_inverse_gaussian) S3method(log_density,dist_laplace) S3method(log_density,dist_logarithmic) S3method(log_density,dist_logistic) S3method(log_density,dist_lognormal) S3method(log_density,dist_multinomial) S3method(log_density,dist_mvnorm) S3method(log_density,dist_mvt) S3method(log_density,dist_na) S3method(log_density,dist_negbin) S3method(log_density,dist_normal) S3method(log_density,dist_pareto) S3method(log_density,dist_poisson) S3method(log_density,dist_poisson_inverse_gaussian) S3method(log_density,dist_student_t) S3method(log_density,dist_uniform) S3method(log_density,dist_weibull) S3method(log_density,dist_wrap) S3method(log_density,distribution) S3method(log_likelihood,dist_default) S3method(log_likelihood,distribution) S3method(log_quantile,dist_default) S3method(log_quantile,dist_lognormal) S3method(log_quantile,dist_na) S3method(log_quantile,dist_normal) S3method(log_quantile,distribution) S3method(mean,dist_bernoulli) S3method(mean,dist_beta) S3method(mean,dist_binomial) S3method(mean,dist_burr) S3method(mean,dist_categorical) S3method(mean,dist_cauchy) S3method(mean,dist_chisq) S3method(mean,dist_default) S3method(mean,dist_degenerate) S3method(mean,dist_exponential) S3method(mean,dist_f) S3method(mean,dist_gamma) S3method(mean,dist_geometric) S3method(mean,dist_gev) S3method(mean,dist_gpd) S3method(mean,dist_gumbel) S3method(mean,dist_hypergeometric) S3method(mean,dist_inflated) S3method(mean,dist_inverse_exponential) S3method(mean,dist_inverse_gamma) S3method(mean,dist_inverse_gaussian) S3method(mean,dist_laplace) S3method(mean,dist_logarithmic) S3method(mean,dist_logistic) S3method(mean,dist_lognormal) S3method(mean,dist_mixture) S3method(mean,dist_multinomial) S3method(mean,dist_mvnorm) S3method(mean,dist_mvt) S3method(mean,dist_na) S3method(mean,dist_negbin) S3method(mean,dist_normal) S3method(mean,dist_pareto) S3method(mean,dist_percentile) S3method(mean,dist_poisson) S3method(mean,dist_poisson_inverse_gaussian) S3method(mean,dist_sample) S3method(mean,dist_student_t) S3method(mean,dist_transformed) S3method(mean,dist_truncated) S3method(mean,dist_uniform) S3method(mean,dist_weibull) S3method(mean,distribution) S3method(median,dist_default) S3method(median,dist_mvnorm) S3method(median,dist_sample) S3method(median,distribution) S3method(parameters,dist_default) S3method(parameters,dist_wrap) S3method(parameters,distribution) S3method(pillar_shaft,distribution) S3method(print,dist_default) S3method(quantile,dist_bernoulli) S3method(quantile,dist_beta) S3method(quantile,dist_binomial) S3method(quantile,dist_burr) S3method(quantile,dist_categorical) S3method(quantile,dist_cauchy) S3method(quantile,dist_chisq) S3method(quantile,dist_default) S3method(quantile,dist_degenerate) S3method(quantile,dist_exponential) S3method(quantile,dist_f) S3method(quantile,dist_gamma) S3method(quantile,dist_geometric) S3method(quantile,dist_gh) S3method(quantile,dist_gk) S3method(quantile,dist_gumbel) S3method(quantile,dist_hypergeometric) S3method(quantile,dist_inflated) S3method(quantile,dist_inverse_exponential) S3method(quantile,dist_inverse_gamma) S3method(quantile,dist_inverse_gaussian) S3method(quantile,dist_laplace) S3method(quantile,dist_logarithmic) S3method(quantile,dist_logistic) S3method(quantile,dist_lognormal) S3method(quantile,dist_mixture) S3method(quantile,dist_mvnorm) S3method(quantile,dist_mvt) S3method(quantile,dist_na) S3method(quantile,dist_negbin) S3method(quantile,dist_normal) S3method(quantile,dist_pareto) S3method(quantile,dist_percentile) S3method(quantile,dist_poisson) S3method(quantile,dist_poisson_inverse_gaussian) S3method(quantile,dist_sample) S3method(quantile,dist_student_t) S3method(quantile,dist_studentized_range) S3method(quantile,dist_transformed) S3method(quantile,dist_truncated) S3method(quantile,dist_uniform) S3method(quantile,dist_weibull) S3method(quantile,dist_wrap) S3method(quantile,distribution) S3method(skewness,dist_bernoulli) S3method(skewness,dist_beta) S3method(skewness,dist_binomial) S3method(skewness,dist_categorical) S3method(skewness,dist_cauchy) S3method(skewness,dist_chisq) S3method(skewness,dist_degenerate) S3method(skewness,dist_exponential) S3method(skewness,dist_f) S3method(skewness,dist_gamma) S3method(skewness,dist_geometric) S3method(skewness,dist_gumbel) S3method(skewness,dist_hypergeometric) S3method(skewness,dist_laplace) S3method(skewness,dist_logistic) S3method(skewness,dist_lognormal) S3method(skewness,dist_na) S3method(skewness,dist_negbin) S3method(skewness,dist_normal) S3method(skewness,dist_poisson) S3method(skewness,dist_sample) S3method(skewness,dist_uniform) S3method(skewness,dist_weibull) S3method(skewness,distribution) S3method(stats::density,dist_gev) S3method(stats::density,dist_gpd) S3method(stats::median,dist_gev) S3method(stats::median,dist_gpd) S3method(stats::quantile,dist_gev) S3method(stats::quantile,dist_gpd) S3method(sum,distribution) S3method(support,dist_categorical) S3method(support,dist_default) S3method(support,dist_gpd) S3method(support,dist_percentile) S3method(support,dist_sample) S3method(support,dist_transformed) S3method(support,distribution) S3method(variance,default) S3method(variance,dist_default) S3method(variance,dist_laplace) S3method(variance,distribution) S3method(variance,matrix) S3method(variance,numeric) S3method(vec_arith,distribution) S3method(vec_arith,hilo) S3method(vec_arith.distribution,default) S3method(vec_arith.numeric,distribution) S3method(vec_arith.numeric,hilo) S3method(vec_cast,character.distribution) S3method(vec_cast,character.hilo) S3method(vec_cast,distribution.distribution) S3method(vec_cast,distribution.double) S3method(vec_cast,distribution.integer) S3method(vec_math,distribution) S3method(vec_math,hilo) S3method(vec_ptype2,distribution.distribution) S3method(vec_ptype2,distribution.double) S3method(vec_ptype2,distribution.integer) S3method(vec_ptype2,double.distribution) S3method(vec_ptype2,hilo.hilo) S3method(vec_ptype2,integer.distribution) S3method(vec_ptype_abbr,distribution) S3method(vec_ptype_abbr,support_region) export(cdf) export(covariance) export(dist_bernoulli) export(dist_beta) export(dist_binomial) export(dist_burr) export(dist_categorical) export(dist_cauchy) export(dist_chisq) export(dist_degenerate) export(dist_exponential) export(dist_f) export(dist_gamma) export(dist_geometric) export(dist_gev) export(dist_gh) export(dist_gk) export(dist_gpd) export(dist_gumbel) export(dist_hypergeometric) export(dist_inflated) export(dist_inverse_exponential) export(dist_inverse_gamma) export(dist_inverse_gaussian) export(dist_laplace) export(dist_logarithmic) export(dist_logistic) export(dist_lognormal) export(dist_missing) export(dist_mixture) export(dist_multinomial) export(dist_multivariate_normal) export(dist_multivariate_t) export(dist_negative_binomial) export(dist_normal) export(dist_pareto) export(dist_percentile) export(dist_poisson) export(dist_poisson_inverse_gaussian) export(dist_sample) export(dist_student_t) export(dist_studentized_range) export(dist_transformed) export(dist_truncated) export(dist_uniform) export(dist_weibull) export(dist_wrap) export(generate) export(has_symmetry) export(hdr) export(hilo) export(is_distribution) export(is_hdr) export(is_hilo) export(kurtosis) export(likelihood) export(log_likelihood) export(new_dist) export(new_hdr) export(new_hilo) export(parameters) export(skewness) export(support) export(variance) import(rlang) import(vctrs) importFrom(generics,generate) importFrom(lifecycle,deprecate_soft) importFrom(lifecycle,deprecated) importFrom(pillar,get_max_extent) importFrom(pillar,new_ornament) importFrom(pillar,new_pillar_shaft) importFrom(pillar,pillar_shaft) importFrom(stats,density) importFrom(stats,family) importFrom(stats,median) importFrom(stats,quantile) importFrom(utils,.DollarNames) distributional/NEWS.md0000644000176200001440000003373115131656102014413 0ustar liggesusers# distributional 0.6.0 Extensively updated documentation for each distribution to include additional details about the statistics associated with each distribution. ## New features * Added `has_symmetry()` generic to identify if a distribution is symmetric. ### Probability distributions * Added `dist_laplace()` for the Laplace distribution. * Added `dist_multivariate_t()` for the multivariate t-distribution. ## Improvements * Extended documentation for many distributions to include detailed mathematical formulas for the statistics of each distribution (#141). * `hdr()` now produces exact results for symmetric distributions (#123). * `hdr()` uses more quantiles by default (512 -> 4096) for more accurate HDRs (#123). * Transformed distributions now work better for monotonic increasing functions when the domain of the untransformed distribution is R (#129). * Added `cdf()` and `quantile()` methods for dist_categorical() distributions with ordered factors (#136). * Added `support()` method for `dist_gpd()` distributions. * Added `cdf()` method for `dist_multinomial()` distributions. ## Bug fixes * Fixed `dist_gamma()` not allowing the `scale` parameter to be used (#132). * Fixed `dist_negative_binomial()` calculation of skewness and kurtosis for p!=0.5. * Fixed floating point issue that caused `dist_mixture()` to incorrectly throw an error when weights did sum to 1 (@statasaurus, #134) ## Breaking changes * The `type` argument for `dist_multivariate_normal()`'s `quantile()` method has been renamed to `kind` to avoid name clashes with `quantile.default()`. # distributional 0.5.0 ## New features * `support()` now shows whether the interval of support is open or closed (@venpopov, #97). * Added default method for `cdf()` which estimates the CDF using Monte Carlo integration (@robjhyndman, #122). ### Probability distributions * Added `dist_gk()` for g-and-k distributions. * Added `dist_gh()` for g-and-h distributions. * Added `dist_gev()` for the Generalised Extreme Value distribution and `dist_gpd()` for the Generalised Pareto distribution (@robjhyndman, #124). ## Improvements * `dist_mixture()` now displays the components of the mixture when the output width is sufficiently wide (@statasaurus, #112). * `generate()` now respects `dimnames()` for multivariate distributions. * `dist_mixture()` now supports multivariate distributions (@robjhyndman, #122). ## Bug fixes * Fixed error when using '-' as a unary operator on a distribution different from `dist_normal()` (@venpopov, #95) * Density for transformed distributions now correctly gives 0 instead of NaNs for values outside the support of the distribution (@venpopov, #97) * Fixed `quantile()` and `cdf()` for transformed distributions with monotonically decreasing transformations (#100). * Fixed multivariate `dist_sample()` methods not structuring multivariate results correctly as matrices. * The `cdf()` method for `dist_multivariate_normal()` now gives the P(X <= q) rather than P(X > q) for consistency with all other `cdf()` methods. * The `quantile()` method for `dist_multivariate_normal()` now correctly gives the boundaries when `p=0` or `p=1` when `type="equicoordinate"`. * The `quantile()` method for `dist_multivariate_normal()` now only square roots the diagonal elements when `type="marginal"`. # distributional 0.4.0 ## Breaking changes * All graphics related functionality has been removed from the package in favour of the ggdist (https://cran.r-project.org/package=ggdist) package. This breaking change was done to substantially reduce the package's dependencies, focusing the functionality on representing vectors of distributions. # distributional 0.3.2 Small patch to resolve issues with CRAN checks. ## Bug fixes * Fixed object structure resulting from transforming sample distributions (#81). * Improved reliability of `quantile()`. * Defined `cdf()` as Pr(X <= x), not Pr(X < x). * Fixed S3 generic argument name `p` for `log_quantile()`. # distributional 0.3.1 ## New features * Add Math and Ops methods for sample distribution, which applies the functions directly to the samples. * Added `mean` and `sd` as aliases for `mu` and `sigma` respectively in `dist_normal()` and `dist_student_t()` to match arguments of the stats package interface (#76). * Added `scale` argument for alternative specification for `dist_burr()` and `dist_gamma()`. ## Improvements * Generics introduced by this package now allow `na.rm` and other parameters to be passed to distribution methods, even if these parameters aren't used. The package no longer checks the usage of `...` with the `ellipsis` package, if you'd like to check that all `...` are used, you can write your own wrapping functions. * Lists of functions can now be used in `dist_transformed()`, allowing the transformation to differ for each distribution. * `covariance()` and other matrix output functions of multivariate distributions now name the result using the distribution's dimension names. * Improve handling of mixture distribution quantiles at boundaries {0,1}. ## Bug fixes * Fixed issue with computing multiple values from a univariate distribution with named dimensions (#79). # distributional 0.3.0 ## New features ### Probability distributions * Added `dist_categorical()` for the Categorical distribution. * Added `dist_lognormal()` for the log-normal distribution. Mathematical conversion shortcuts have also been added, so `exp(dist_normal())` produces `dist_lognormal()`. ### Generics * Added `parameters()` generic for obtaining the distribution's parameters. * Added `family()` for getting the distribution's family name. * Added `covariance()` to return the covariance of a distribution. * Added `support()` to identify the distribution's region of support (#8). * Added `log_likelihood()` for computing the log-likelihood of observing a sample from a distribution. ## Improvements * `variance()` now always returns a variance. It will not default to providing a covariance matrix for matrices. This also applies to multivariate distributions such as `dist_multivariate_normal()`. The covariance can now be obtained using the `covariance()` function. * `dist_wrap()` can now search for distribution functions in any environment, not just packages. If the `package` argument is `NULL`, it will search the calling environment for the functions. You can also provide a package name as before, and additionally an arbitrary environment to this argument. * `median()` methods will now ignore the `na.rm` option when it does not apply to that distribution type (#72). * `dist_sample()` now allows for missing values to be stored. Note that `density()`, `quantile()` and `cdf()` will remove these missing values by default. This behaviour can be changed with the `na.rm` argument. * `` objects now support non-numeric and multivariate distributions. `` vectors that have different bound types cannot be mixed (#74). * Improved performance of default methods of `mean()` and `variance()`, which no longer use sampling based means and variances for univariate continuous distributions (#71, @mjskay) * `dist_binomial()` distributions now return integers for `quantile()` and `generate()` methods. * Added conditional examples for distributions using functions from supported packages. ## Bug fixes * Fixed fallback `format()` function for distributions classes that have not defined this method (#67). ## Breaking changes * `variance()` on a `dist_multivariate_normal()` will now return the diagonal instead of the complete variance-covariance matrix. * `dist_bernoulli()` will now return logical values for `quantile()` and `generate()`. # distributional 0.2.2 ## New features * Added `is_distribution()` to identify if an object is a distribution. ## Improvements * Improved NA structure of distributions, allowing it to work with `is.na()` and `vctrs` vector resizing / filling functionality. * Added `as.character()` method, allowing datasets containing `hilo()` objects to be saved as a text file (#57). ## Bug fixes * Fixed issue with `hdr()` range `size` incorrectly being treated as `100-size`, giving 5% ranges for 95% sizes and vice-versa (#61). # distributional 0.2.1 A small performance and methods release. Some issues with truncated distributions have been fixed, and some more distribution methods have been added which improve performance of common tasks. ## New features ### Probability distributions * Added `dist_missing()` for representing unknown or missing (NA) distributions. ## Improvements * Documentation improvements. * Added `cdf()` method for `dist_sample()` which uses the emperical cdf. * `dist_mixture()` now preserves `dimnames()` if all distributions have the same `dimnames()`. * Added `density()` and `generate()` methods for sample distributions. * Added `skewness()` method for `dist_sample()`. * Improved performance for truncated Normal and sample distributions (#49). * Improved vectorisation of distribution methods. ## Bug fixes * Fixed issue with computing the median of `dist_truncated()` distributions. * Fixed format method for `dist_truncated()` distributions with no upper or lower limit. * Fixed issue with naming objects giving an invalid structure. It now gives an informative error (#23). * Fixed documentation for Negative Binomial distribution (#46). # distributional 0.2.0 ## New features ### Probability distributions * Added `dist_wrap()` for wrapping distributions not yet added in the package. ### Methods * Added `likelihood()` for computing the likelihood of observing a sample from a distribution. * Added `skewness()` for computing the skewness of a distribution. * Added `kurtosis()` for computing the kurtosis of a distribution. * The `density()`, `cdf()` and `quantile()` methods now accept a `log` argument which will use/return probabilities as log probabilities. ## Improvements * Improved documentation for most distributions to include equations for the region of support, summary statistics, density functions and moments. This is the work of @alexpghayes in the `distributions3` package. * Documentation improvements * Added support for displaying distributions with `View()`. * `hilo()` intervals can no longer be added to other intervals, as this is a common mistake when aggregating forecasts. * Incremented `d` for `numDeriv::hessian()` when computing mean and variance of transformed distributions. ## Deprecated features * Graphics functionality provided by `autoplot.distribution()` is now deprecated in favour of using the `ggdist` package. The `ggdist` package allows distributions produced by distributional to be used directly with ggplot2 as aesthetics. # distributional 0.1.0 First release. ## New features ### Object classes * `distribution`: Distributions are represented in a vectorised format using the [vctrs](https://cran.r-project.org/package=vctrs) package. This makes distributions suitable for inclusion in model prediction output. A `distribution` is a container for distribution-specific S3 classes. * `hilo`: Intervals are also stored in a vector. A `hilo` consists of a `lower` bound, `upper` bound, and confidence `level`. Each numerical element can be extracted using `$`, for example my_hilo$lower to obtain the lower bounds. * `hdr`: Highest density regions are currently stored as lists of `hilo` values. This is an experimental feature, and is likely to be expanded upon in an upcoming release. ### Generic functions Values of interest can be computed from the distribution using generic functions. The first release provides 9 functions for interacting with distributions: * `density()`: The probability density/mass function (equivalent to `d...()`). * `cdf()`: The cumulative distribution function (equivalent to `p...()`). * `generate()`: Random generation from the distribution (equivalent to `r...()`). * `quantile()`: Compute quantiles of the distribution (equivalent to `q...()`). * `hilo()`: Compute probability intervals of probability distribution(s). * `hdr()`: Compute highest density regions of probability distribution(s). * `mean()`: Obtain the mean(s) of probability distribution(s). * `median()`: Obtain the median(s) of probability distribution(s). * `variance()`: Obtain the variance(s) of probability distribution(s). ### Graphics * Added an `autoplot()` method for visualising the probability density function ([`density()`]) or cumulative distribution function ([`cdf()`]) of one or more distribution. * Added `geom_hilo_ribbon()` and `geom_hilo_linerange()` geometries for ggplot2. These geoms allow uncertainty to be shown graphically with `hilo()` intervals. ### Probability distributions * Added 20 continuous probability distributions: `dist_beta()`, `dist_burr()`, `dist_cauchy()`, `dist_chisq()`, `dist_exponential()`, `dist_f()`, `dist_gamma()`, `dist_gumbel()`, `dist_hypergeometric()`, `dist_inverse_exponential()`, `dist_inverse_gamma()`, `dist_inverse_gaussian()`, `dist_logistic()`, `dist_multivariate_normal()`, `dist_normal()`, `dist_pareto()`, `dist_student_t()`, `dist_studentized_range()`, `dist_uniform()`, `dist_weibull()` * Added 8 discrete probability distributions: `dist_bernoulli()`, `dist_binomial()`, `dist_geometric()`, `dist_logarithmic()`, `dist_multinomial()`, `dist_negative_binomial()`, `dist_poisson()`, `dist_poisson_inverse_gaussian()` * Added 3 miscellaneous probability distributions: `dist_degenerate()`, `dist_percentile()`, `dist_sample()` ### Distribution modifiers * Added `dist_inflated()` which inflates a specific value of a distribution by a given probability. This can be used to produce zero-inflated distributions. * Added `dist_transformed()` for transforming distributions. This can be used to produce log distributions such as logNormal: `dist_transformed(dist_normal(), transform = exp, inverse = log)` * Added `dist_mixture()` for producing weighted mixtures of distributions. * Added `dist_truncated()` to impose boundaries on a distribution's domain via truncation. distributional/README.md0000644000176200001440000001276415131610176014600 0ustar liggesusers # distributional [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) [![R-CMD-check](https://github.com/mitchelloharawild/distributional/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/mitchelloharawild/distributional/actions/workflows/R-CMD-check.yaml) [![CRAN status](https://www.r-pkg.org/badges/version/distributional)](https://CRAN.R-project.org/package=distributional) ![Download count](https://cranlogs.r-pkg.org/badges/last-month/distributional) The distributional package allows distributions to be used in a vectorised context. It provides methods which are minimal wrappers to the standard d, p, q, and r distribution functions which are applied to each distribution in the vector. Additional distributional statistics can be computed, including the `mean()`, `median()`, `variance()`, and intervals with `hilo()`. The distributional nature of a model’s predictions is often understated, with default output of prediction methods usually only producing point predictions. Some R packages (such as [forecast](https://CRAN.R-project.org/package=forecast)) further emphasise uncertainty by producing point forecasts and intervals by default, however the user’s ability to interact with them is limited. This package vectorises distributions and provides methods for working with them, making distributions compatible with prediction outputs of modelling functions. These vectorised distributions can be illustrated with [ggplot2](https://CRAN.R-project.org/package=ggplot2) using the [ggdist](https://CRAN.R-project.org/package=ggdist) package, providing further opportunity to visualise the uncertainty of predictions and teach distributional theory. ## Installation You can install the released version of distributional from [CRAN](https://CRAN.R-project.org/package=distributional) with: ``` r install.packages("distributional") ``` The development version can be installed from [GitHub](https://github.com/mitchelloharawild/distributional) with: ``` r # install.packages("remotes") remotes::install_github("mitchelloharawild/distributional") ``` ## Examples Distributions are created using `dist_*()` functions. A list of included distribution shapes can be found here: ``` r library(distributional) my_dist <- c(dist_normal(mu = 0, sigma = 1), dist_student_t(df = 10)) my_dist #> #> [1] N(0, 1) t(10, 0, 1) ``` The standard four distribution functions in R are usable via these generics: ``` r density(my_dist, 0) # c(dnorm(0, mean = 0, sd = 1), dt(0, df = 10)) #> [1] 0.3989423 0.3891084 cdf(my_dist, 5) # c(pnorm(5, mean = 0, sd = 1), pt(5, df = 10)) #> [1] 0.9999997 0.9997313 quantile(my_dist, 0.1) # c(qnorm(0.1, mean = 0, sd = 1), qt(0.1, df = 10)) #> [1] -1.281552 -1.372184 generate(my_dist, 10) # list(rnorm(10, mean = 0, sd = 1), rt(10, df = 10)) #> [[1]] #> [1] 1.262954285 -0.326233361 1.329799263 1.272429321 0.414641434 #> [6] -1.539950042 -0.928567035 -0.294720447 -0.005767173 2.404653389 #> #> [[2]] #> [1] 0.99165484 -1.36999677 -0.40943004 -0.85261144 -1.37728388 0.81020460 #> [7] -1.82965813 -0.06142032 -1.33933588 -0.28491414 ``` You can also compute intervals using `hilo()` ``` r hilo(my_dist, 0.95) #> #> [1] [-0.01190677, 0.01190677]0.95 [-0.01220773, 0.01220773]0.95 ``` Additionally, some distributions may support other methods such as mathematical operations and summary measures. If the methods aren’t supported, a transformed distribution will be created. ``` r my_dist #> #> [1] N(0, 1) t(10, 0, 1) my_dist*3 + 2 #> #> [1] N(2, 9) t(t(10, 0, 1)) mean(my_dist) #> [1] 0 0 variance(my_dist) #> [1] 1.00 1.25 ``` You can also visualise the distribution(s) using the [ggdist](https://mjskay.github.io/ggdist/) package. ``` r library(ggdist) library(ggplot2) df <- data.frame( name = c("Gamma(2,1)", "Normal(5,1)", "Mixture"), dist = c(dist_gamma(2,1), dist_normal(5,1), dist_mixture(dist_gamma(2,1), dist_normal(5, 1), weights = c(0.4, 0.6))) ) ggplot(df, aes(y = factor(name, levels = rev(name)))) + stat_dist_halfeye(aes(dist = dist)) + labs(title = "Density function for a mixture of distributions", y = NULL, x = NULL) ``` ## Related work There are several packages which unify interfaces for distributions in R: - stats provides functions to work with possibly multiple distributions (comparisons made below). - [distributions3](https://cran.r-project.org/package=distributions3) represents singular distributions using S3, with particularly nice documentation. - [distr](https://cran.r-project.org/package=distr) represents singular distributions using S4. - [distr6](https://cran.r-project.org/package=distr6) represents singular distributions using R6. - Many more in the [CRAN task view](https://cran.r-project.org/view=Distributions) This package differs from the above libraries by storing the distributions in a vectorised format. It does this using [vctrs](https://vctrs.r-lib.org/), so it should play nicely with the tidyverse (try putting distributions into a tibble!). distributional/man/0000755000176200001440000000000015131547234014066 5ustar liggesusersdistributional/man/dist_cauchy.Rd0000644000176200001440000000365015131604123016647 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_cauchy.R \name{dist_cauchy} \alias{dist_cauchy} \title{The Cauchy distribution} \usage{ dist_cauchy(location, scale) } \arguments{ \item{location, scale}{location and scale parameters.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The Cauchy distribution is the student's t distribution with one degree of freedom. The Cauchy distribution does not have a well defined mean or variance. Cauchy distributions often appear as priors in Bayesian contexts due to their heavy tails. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_cauchy.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_cauchy.html} In the following, let \eqn{X} be a Cauchy variable with mean \verb{location =} \eqn{x_0} and \code{scale} = \eqn{\gamma}. \strong{Support}: \eqn{R}, the set of all real numbers \strong{Mean}: Undefined. \strong{Variance}: Undefined. \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \frac{1}{\pi \gamma \left[1 + \left(\frac{x - x_0}{\gamma} \right)^2 \right]} }{ f(x) = 1 / (\pi \gamma (1 + ((x - x_0) / \gamma)^2) } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(t) = \frac{1}{\pi} \arctan \left( \frac{t - x_0}{\gamma} \right) + \frac{1}{2} }{ F(t) = arctan((t - x_0) / \gamma) / \pi + 1/2 } \strong{Moment generating function (m.g.f)}: Does not exist. } \examples{ dist <- dist_cauchy(location = c(0, 0, 0, -2), scale = c(0.5, 1, 2, 1)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Cauchy]{stats::Cauchy} } distributional/man/dist_normal.Rd0000644000176200001440000000601115131560632016663 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_normal.R \name{dist_normal} \alias{dist_normal} \title{The Normal distribution} \usage{ dist_normal(mu = 0, sigma = 1, mean = mu, sd = sigma) } \arguments{ \item{mu, mean}{The mean (location parameter) of the distribution, which is also the mean of the distribution. Can be any real number.} \item{sigma, sd}{The standard deviation (scale parameter) of the distribution. Can be any positive number. If you would like a Normal distribution with \strong{variance} \eqn{\sigma^2}, be sure to take the square root, as this is a common source of errors.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The Normal distribution is ubiquitous in statistics, partially because of the central limit theorem, which states that sums of i.i.d. random variables eventually become Normal. Linear transformations of Normal random variables result in new random variables that are also Normal. If you are taking an intro stats course, you'll likely use the Normal distribution for Z-tests and in simple linear regression. Under regularity conditions, maximum likelihood estimators are asymptotically Normal. The Normal distribution is also called the gaussian distribution. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_normal.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_normal.html} In the following, let \eqn{X} be a Normal random variable with mean \code{mu} = \eqn{\mu} and standard deviation \code{sigma} = \eqn{\sigma}. \strong{Support}: \eqn{R}, the set of all real numbers \strong{Mean}: \eqn{\mu} \strong{Variance}: \eqn{\sigma^2} \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \frac{1}{\sqrt{2 \pi \sigma^2}} e^{-(x - \mu)^2 / 2 \sigma^2} }{ f(x) = 1 / sqrt(2 \pi \sigma^2) exp(-(x - \mu)^2 / (2 \sigma^2)) } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(t) = \int_{-\infty}^t \frac{1}{\sqrt{2 \pi \sigma^2}} e^{-(x - \mu)^2 / 2 \sigma^2} dx }{ F(t) = integral_{-\infty}^t 1 / sqrt(2 \pi \sigma^2) exp(-(x - \mu)^2 / (2 \sigma^2)) dx } This integral does not have a closed form solution and is approximated numerically. The c.d.f. of a standard Normal is sometimes called the "error function". The notation \eqn{\Phi(t)} also stands for the c.d.f. of a standard Normal evaluated at \eqn{t}. Z-tables list the value of \eqn{\Phi(t)} for various \eqn{t}. \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = e^{\mu t + \sigma^2 t^2 / 2} }{ E(e^(tX)) = e^(\mu t + \sigma^2 t^2 / 2) } } \examples{ dist <- dist_normal(mu = 1:5, sigma = 3) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Normal]{stats::Normal} } distributional/man/hilo.distribution.Rd0000644000176200001440000000156515112047012020021 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{hilo.distribution} \alias{hilo.distribution} \title{Probability intervals of a probability distribution} \usage{ \method{hilo}{distribution}(x, size = 95, ...) } \arguments{ \item{x}{The distribution(s).} \item{size}{The size of the interval (between 0 and 100).} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Returns a \code{hilo} central probability interval with probability coverage of \code{size}. By default, the distribution's \code{\link[=quantile]{quantile()}} will be used to compute the lower and upper bound for a centered interval } \seealso{ \code{\link[=hdr.distribution]{hdr.distribution()}} } distributional/man/median.distribution.Rd0000644000176200001440000000131215112047012020311 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{median.distribution} \alias{median.distribution} \title{Median of a probability distribution} \usage{ \method{median}{distribution}(x, na.rm = FALSE, ...) } \arguments{ \item{x}{The distribution(s).} \item{na.rm}{Unused, included for consistency with the generic function.} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Returns the median (50th percentile) of a probability distribution. This is equivalent to \code{quantile(x, p=0.5)}. } distributional/man/dist_inverse_gaussian.Rd0000644000176200001440000000541615131557626020761 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_inverse_gaussian.R \name{dist_inverse_gaussian} \alias{dist_inverse_gaussian} \title{The Inverse Gaussian distribution} \usage{ dist_inverse_gaussian(mean, shape) } \arguments{ \item{mean, shape}{parameters. Must be strictly positive. Infinite values are supported.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} } \details{ The inverse Gaussian distribution (also known as the Wald distribution) is commonly used to model positive-valued data, particularly in contexts involving first passage times and reliability analysis. We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_inverse_gaussian.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_inverse_gaussian.html} In the following, let \eqn{X} be an Inverse Gaussian random variable with parameters \code{mean} = \eqn{\mu} and \code{shape} = \eqn{\lambda}. \strong{Support}: \eqn{(0, \infty)} \strong{Mean}: \eqn{\mu} \strong{Variance}: \eqn{\frac{\mu^3}{\lambda}} \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \sqrt{\frac{\lambda}{2\pi x^3}} \exp\left(-\frac{\lambda(x - \mu)^2}{2\mu^2 x}\right) }{ f(x) = sqrt(\lambda / (2 \pi x^3)) exp(-\lambda (x - \mu)^2 / (2 \mu^2 x)) } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = \Phi\left(\sqrt{\frac{\lambda}{x}} \left(\frac{x}{\mu} - 1\right)\right) + \exp\left(\frac{2\lambda}{\mu}\right) \Phi\left(-\sqrt{\frac{\lambda}{x}} \left(\frac{x}{\mu} + 1\right)\right) }{ F(x) = \Phi(sqrt(\lambda/x) (x/\mu - 1)) + exp(2\lambda/\mu) \Phi(-sqrt(\lambda/x) (x/\mu + 1)) } where \eqn{\Phi} is the standard normal c.d.f. \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = \exp\left(\frac{\lambda}{\mu} \left(1 - \sqrt{1 - \frac{2\mu^2 t}{\lambda}}\right)\right) }{ E(e^(tX)) = exp((\lambda/\mu) (1 - sqrt(1 - 2\mu^2 t/\lambda))) } for \eqn{t < \frac{\lambda}{2\mu^2}}. \strong{Skewness}: \eqn{3\sqrt{\frac{\mu}{\lambda}}} \strong{Excess Kurtosis}: \eqn{\frac{15\mu}{\lambda}} \strong{Quantiles}: No closed-form expression, approximated numerically. } \examples{ dist <- dist_inverse_gaussian(mean = c(1,1,1,3,3), shape = c(0.2, 1, 3, 0.2, 1)) dist \dontshow{if (requireNamespace("actuar", quietly = TRUE)) withAutoprint(\{ # examplesIf} mean(dist) variance(dist) support(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) \dontshow{\}) # examplesIf} } \seealso{ \link[actuar:InverseGaussian]{actuar::InverseGaussian} } distributional/man/dist_degenerate.Rd0000644000176200001440000000372715131575653017522 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_degenerate.R \name{dist_degenerate} \alias{dist_degenerate} \title{The degenerate distribution} \usage{ dist_degenerate(x) } \arguments{ \item{x}{The value of the distribution (location parameter). Can be any real number.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The degenerate distribution takes a single value which is certain to be observed. It takes a single parameter, which is the value that is observed by the distribution. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_degenerate.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_degenerate.html} In the following, let \eqn{X} be a degenerate random variable with value \code{x} = \eqn{k_0}. \strong{Support}: \eqn{\{k_0\}}, a single point \strong{Mean}: \eqn{\mu = k_0} \strong{Variance}: \eqn{\sigma^2 = 0} \strong{Probability density function (p.d.f)}: \deqn{ f(x) = 1 \textrm{ for } x = k_0 }{ f(x) = 1 for x = k_0 } \deqn{ f(x) = 0 \textrm{ for } x \neq k_0 }{ f(x) = 0 for x != k_0 } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(t) = 0 \textrm{ for } t < k_0 }{ F(t) = 0 for t < k_0 } \deqn{ F(t) = 1 \textrm{ for } t \ge k_0 }{ F(t) = 1 for t >= k_0 } \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = e^{k_0 t} }{ E(e^(tX)) = e^(k_0 t) } \strong{Skewness}: Undefined (NA) \strong{Excess Kurtosis}: Undefined (NA) } \examples{ dist <- dist_degenerate(x = 1:5) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Distributions]{stats::Distributions} } distributional/man/likelihood.Rd0000644000176200001440000000140215112047012016461 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{likelihood} \alias{likelihood} \alias{likelihood.distribution} \alias{log_likelihood} \title{The (log) likelihood of a sample matching a distribution} \usage{ likelihood(x, ...) \method{likelihood}{distribution}(x, sample, ..., log = FALSE) log_likelihood(x, ...) } \arguments{ \item{x}{The distribution(s).} \item{...}{Additional arguments used by methods.} \item{sample}{A list of sampled values to compare to distribution(s).} \item{log}{If \code{TRUE}, the log-likelihood will be computed.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} } distributional/man/dist_f.Rd0000644000176200001440000001065115131604123015617 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_f.R \name{dist_f} \alias{dist_f} \title{The F Distribution} \usage{ dist_f(df1, df2, ncp = NULL) } \arguments{ \item{df1}{Degrees of freedom for the numerator. Can be any positive number.} \item{df2}{Degrees of freedom for the denominator. Can be any positive number.} \item{ncp}{Non-centrality parameter. If \code{NULL} (default), the central F distribution is used. If specified, must be non-negative.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The F distribution is commonly used in statistical inference, particularly in the analysis of variance (ANOVA), testing the equality of variances, and in regression analysis. It arises as the ratio of two scaled chi-squared distributions divided by their respective degrees of freedom. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_f.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_f.html} In the following, let \eqn{X} be an F random variable with numerator degrees of freedom \code{df1} = \eqn{d_1} and denominator degrees of freedom \code{df2} = \eqn{d_2}. \strong{Support}: \eqn{x \in (0, \infty)} \strong{Mean}: For the central F distribution (\code{ncp = NULL}): \deqn{ E(X) = \frac{d_2}{d_2 - 2} }{ E(X) = d_2 / (d_2 - 2) } for \eqn{d_2 > 2}, otherwise undefined. For the non-central F distribution with non-centrality parameter \code{ncp} = \eqn{\lambda}: \deqn{ E(X) = \frac{d_2 (d_1 + \lambda)}{d_1 (d_2 - 2)} }{ E(X) = d_2 (d_1 + \lambda) / (d_1 (d_2 - 2)) } for \eqn{d_2 > 2}, otherwise undefined. \strong{Variance}: For the central F distribution (\code{ncp = NULL}): \deqn{ \text{Var}(X) = \frac{2 d_2^2 (d_1 + d_2 - 2)}{d_1 (d_2 - 2)^2 (d_2 - 4)} }{ Var(X) = 2 d_2^2 (d_1 + d_2 - 2) / (d_1 (d_2 - 2)^2 (d_2 - 4)) } for \eqn{d_2 > 4}, otherwise undefined. For the non-central F distribution with non-centrality parameter \code{ncp} = \eqn{\lambda}: \deqn{ \text{Var}(X) = \frac{2 d_2^2}{d_1^2} \cdot \frac{(d_1 + \lambda)^2 + (d_1 + 2\lambda)(d_2 - 2)}{(d_2 - 2)^2 (d_2 - 4)} }{ Var(X) = 2 d_2^2 / d_1^2 * ((d_1 + lambda)^2 + (d_1 + 2*lambda)(d_2 - 2)) / ((d_2 - 2)^2 (d_2 - 4)) } for \eqn{d_2 > 4}, otherwise undefined. \strong{Skewness}: For the central F distribution (\code{ncp = NULL}): \deqn{ \text{Skew}(X) = \frac{(2 d_1 + d_2 - 2) \sqrt{8 (d_2 - 4)}}{(d_2 - 6) \sqrt{d_1 (d_1 + d_2 - 2)}} }{ Skew(X) = (2 d_1 + d_2 - 2) sqrt(8 (d_2 - 4)) / ((d_2 - 6) sqrt(d_1 (d_1 + d_2 - 2))) } for \eqn{d_2 > 6}, otherwise undefined. For the non-central F distribution, skewness has no simple closed form and is not computed. \strong{Excess Kurtosis}: For the central F distribution (\code{ncp = NULL}): \deqn{ \text{Kurt}(X) = \frac{12[d_1 (5 d_2 - 22)(d_1 + d_2 - 2) + (d_2 - 4)(d_2 - 2)^2]}{d_1 (d_2 - 6)(d_2 - 8)(d_1 + d_2 - 2)} }{ Kurt(X) = 12[d_1 (5 d_2 - 22)(d_1 + d_2 - 2) + (d_2 - 4)(d_2 - 2)^2] / (d_1 (d_2 - 6)(d_2 - 8)(d_1 + d_2 - 2)) } for \eqn{d_2 > 8}, otherwise undefined. For the non-central F distribution, kurtosis has no simple closed form and is not computed. \strong{Probability density function (p.d.f)}: For the central F distribution (\code{ncp = NULL}): \deqn{ f(x) = \frac{\sqrt{\frac{(d_1 x)^{d_1} d_2^{d_2}}{(d_1 x + d_2)^{d_1 + d_2}}}}{x \, B(d_1/2, d_2/2)} }{ f(x) = sqrt((d_1 x)^d_1 d_2^d_2 / (d_1 x + d_2)^(d_1 + d_2)) / (x B(d_1/2, d_2/2)) } where \eqn{B(\cdot, \cdot)} is the beta function. For the non-central F distribution, the density involves an infinite series and is approximated numerically. \strong{Cumulative distribution function (c.d.f)}: The c.d.f. does not have a simple closed form expression and is approximated numerically using regularized incomplete beta functions and related special functions. \strong{Moment generating function (m.g.f)}: The moment generating function for the F distribution does not exist in general (it diverges for \eqn{t > 0}). } \examples{ dist <- dist_f(df1 = c(1,2,5,10,100), df2 = c(1,1,2,1,100)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Fdist]{stats::FDist} } distributional/man/dist_inverse_exponential.Rd0000644000176200001440000000373515131557626021477 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_inverse_exponential.R \name{dist_inverse_exponential} \alias{dist_inverse_exponential} \title{The Inverse Exponential distribution} \usage{ dist_inverse_exponential(rate) } \arguments{ \item{rate}{an alternative way to specify the scale.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The Inverse Exponential distribution is used to model the reciprocal of exponentially distributed variables. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_inverse_exponential.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_inverse_exponential.html} In the following, let \eqn{X} be an Inverse Exponential random variable with parameter \code{rate} = \eqn{\lambda}. \strong{Support}: \eqn{x > 0} \strong{Mean}: Does not exist, returns NA \strong{Variance}: Does not exist, returns NA \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \frac{\lambda}{x^2} e^{-\lambda/x} }{ f(x) = \lambda / x^2 * e^(-\lambda/x) } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = e^{-\lambda/x} }{ F(x) = e^(-\lambda/x) } \strong{Quantile function (inverse c.d.f)}: \deqn{ F^{-1}(p) = -\frac{\lambda}{\log(p)} }{ F^(-1)(p) = -\lambda / log(p) } \strong{Moment generating function (m.g.f)}: Does not exist (divergent integral). } \examples{ dist <- dist_inverse_exponential(rate = 1:5) dist \dontshow{if (requireNamespace("actuar", quietly = TRUE)) withAutoprint(\{ # examplesIf} mean(dist) variance(dist) support(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) \dontshow{\}) # examplesIf} } \seealso{ \link[actuar:InverseExponential]{actuar::InverseExponential} } distributional/man/new_hilo.Rd0000644000176200001440000000137315112047012016151 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/hilo.R \name{new_hilo} \alias{new_hilo} \title{Construct hilo intervals} \usage{ new_hilo(lower = double(), upper = double(), size = double()) } \arguments{ \item{lower, upper}{A numeric vector of values for lower and upper limits.} \item{size}{Size of the interval between [0, 100].} } \value{ A "hilo" vector } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Class constructor function to help with manually creating hilo interval objects. } \examples{ new_hilo(lower = rnorm(10), upper = rnorm(10) + 5, size = 95) } \author{ Earo Wang & Mitchell O'Hara-Wild } distributional/man/hdr.Rd0000644000176200001440000000060615112047012015120 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/hdr.R \name{hdr} \alias{hdr} \title{Compute highest density regions} \usage{ hdr(x, ...) } \arguments{ \item{x}{Object to create hilo from.} \item{...}{Additional arguments used by methods.} } \description{ Used to extract a specified prediction interval at a particular confidence level from a distribution. } distributional/man/variance.Rd0000644000176200001440000000227215112047012016134 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{variance} \alias{variance} \alias{variance.numeric} \alias{variance.matrix} \alias{covariance.numeric} \title{Variance} \usage{ variance(x, ...) \method{variance}{numeric}(x, ...) \method{variance}{matrix}(x, ...) \method{covariance}{numeric}(x, ...) } \arguments{ \item{x}{An object.} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} A generic function for computing the variance of an object. } \details{ The implementation of \code{variance()} for numeric variables coerces the input to a vector then uses \code{\link[stats:cor]{stats::var()}} to compute the variance. This means that, unlike \code{\link[stats:cor]{stats::var()}}, if \code{variance()} is passed a matrix or a 2-dimensional array, it will still return the variance (\code{\link[stats:cor]{stats::var()}} returns the covariance matrix in that case). } \seealso{ \code{\link[=variance.distribution]{variance.distribution()}}, \code{\link[=covariance]{covariance()}} } distributional/man/has_symmetry.Rd0000644000176200001440000000171315131030222017063 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{has_symmetry} \alias{has_symmetry} \title{Check if a distribution is symmetric} \usage{ has_symmetry(x, ...) } \arguments{ \item{x}{The distribution(s).} \item{...}{Additional arguments used by methods.} } \value{ A logical value indicating whether the distribution is symmetric. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Determines whether a probability distribution is symmetric around its center. } \examples{ # Normal distribution is symmetric has_symmetry(dist_normal(mu = 0, sigma = 1)) has_symmetry(dist_normal(mu = 5, sigma = 2)) # Beta distribution symmetry depends on parameters has_symmetry(dist_beta(shape1 = 2, shape2 = 2)) # symmetric has_symmetry(dist_beta(shape1 = 2, shape2 = 5)) # not symmetric } distributional/man/dist_inflated.Rd0000644000176200001440000000604415131577024017172 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_inflated.R \name{dist_inflated} \alias{dist_inflated} \title{Inflate a value of a probability distribution} \usage{ dist_inflated(dist, prob, x = 0) } \arguments{ \item{dist}{The distribution(s) to inflate.} \item{prob}{The added probability of observing \code{x}.} \item{x}{The value to inflate. The default of \code{x = 0} is for zero-inflation.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Inflated distributions add extra probability mass at a specific value, most commonly zero (zero-inflation). These distributions are useful for modeling data with excess observations at a particular value compared to what the base distribution would predict. Common applications include zero-inflated Poisson or negative binomial models for count data with many zeros. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_inflated.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_inflated.html} In the following, let \eqn{Y} be an inflated random variable based on a base distribution \eqn{X}, with inflation value \code{x} = \eqn{c} and inflation probability \code{prob} = \eqn{p}. \strong{Support}: Same as the base distribution, but with additional probability mass at \eqn{c} \strong{Mean}: (when \code{x} is numeric) \deqn{ E(Y) = p \cdot c + (1-p) \cdot E(X) }{ E(Y) = p * c + (1-p) * E(X) } \strong{Variance}: (when \code{x = 0}) \deqn{ \text{Var}(Y) = (1-p) \cdot \text{Var}(X) + p(1-p) \cdot [E(X)]^2 }{ Var(Y) = (1-p) * Var(X) + p(1-p) * [E(X)]^2 } For non-zero inflation values, the variance is not computed in closed form. \strong{Probability mass/density function (p.m.f/p.d.f)}: For discrete distributions: \deqn{ f_Y(y) = \begin{cases} p + (1-p) \cdot f_X(c) & \text{if } y = c \\ (1-p) \cdot f_X(y) & \text{if } y \neq c \end{cases} }{ f_Y(y) = p + (1-p) * f_X(c) if y = c, (1-p) * f_X(y) if y != c } For continuous distributions: \deqn{ f_Y(y) = \begin{cases} p & \text{if } y = c \\ (1-p) \cdot f_X(y) & \text{if } y \neq c \end{cases} }{ f_Y(y) = p if y = c, (1-p) * f_X(y) if y != c } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F_Y(q) = \begin{cases} (1-p) \cdot F_X(q) & \text{if } q < c \\ p + (1-p) \cdot F_X(q) & \text{if } q \geq c \end{cases} }{ F_Y(q) = (1-p) * F_X(q) if q < c, p + (1-p) * F_X(q) if q >= c } \strong{Quantile function}: The quantile function is computed numerically by inverting the inflated CDF, accounting for the jump in probability at the inflation point. } \examples{ # Zero-inflated Poisson dist <- dist_inflated(dist_poisson(lambda = 2), prob = 0.3, x = 0) dist mean(dist) variance(dist) generate(dist, 10) density(dist, 0) density(dist, 1) cdf(dist, 2) quantile(dist, 0.5) } distributional/man/new_dist.Rd0000644000176200001440000000130715131620157016166 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{new_dist} \alias{new_dist} \title{Construct distributions} \usage{ new_dist(..., class = NULL, dimnames = NULL) } \arguments{ \item{...}{Parameters of the distribution (named).} \item{class}{The class of the distribution for S3 dispatch.} \item{dimnames}{The names of the variables in the distribution (optional).} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} Allows extension package developers to define a new distribution class compatible with the distributional package. } distributional/man/dist_transformed.Rd0000644000176200001440000000757315131601157017733 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_transformed.R \name{dist_transformed} \alias{dist_transformed} \title{Modify a distribution with a transformation} \usage{ dist_transformed(dist, transform, inverse) } \arguments{ \item{dist}{A univariate distribution vector.} \item{transform}{A function used to transform the distribution. This transformation should be monotonic over appropriate domain.} \item{inverse}{The inverse of the \code{transform} function.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} A transformed distribution applies a monotonic transformation to an existing distribution. This is useful for creating derived distributions such as log-normal (exponential transformation of normal), or other custom transformations of base distributions. The \code{\link[=density]{density()}}, \code{\link[=mean]{mean()}}, and \code{\link[=variance]{variance()}} methods are approximate as they are based on numerical derivatives. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_transformed.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_transformed.html} Let \eqn{Y = g(X)} where \eqn{X} is the base distribution with transformation function \code{transform} = \eqn{g} and \code{inverse} = \eqn{g^{-1}}. The transformation \eqn{g} must be monotonic over the support of \eqn{X}. \strong{Support}: \eqn{g(S_X)} where \eqn{S_X} is the support of \eqn{X} \strong{Mean}: Approximated numerically using a second-order Taylor expansion: \deqn{ E(Y) \approx g(\mu_X) + \frac{1}{2}g''(\mu_X)\sigma_X^2 }{ E(Y) ~= g(mu_X) + (1/2) g''(mu_X) sigma_X^2 } where \eqn{\mu_X} and \eqn{\sigma_X^2} are the mean and variance of the base distribution \eqn{X}, and \eqn{g''} is the second derivative of the transformation. The derivative is computed numerically using \code{\link[numDeriv:hessian]{numDeriv::hessian()}}. \strong{Variance}: Approximated numerically using the delta method: \deqn{ \mathrm{Var}(Y) \approx [g'(\mu_X)]^2\sigma_X^2 + \frac{1}{2}[g''(\mu_X)\sigma_X^2]^2 }{ Var(Y) ~= [g'(mu_X)]^2 sigma_X^2 + (1/2) [g''(mu_X) sigma_X^2]^2 } where \eqn{g'} is the first derivative (Jacobian) computed numerically using \code{\link[numDeriv:jacobian]{numDeriv::jacobian()}}. \strong{Probability density function (p.d.f)}: Using the change of variables formula: \deqn{ f_Y(y) = f_X(g^{-1}(y)) \left|\frac{d}{dy}g^{-1}(y)\right| }{ f_Y(y) = f_X(g^(-1)(y)) |d/dy g^(-1)(y)| } where \eqn{f_X} is the p.d.f. of the base distribution and the Jacobian \eqn{|d/dy \, g^{-1}(y)|} is computed numerically using \code{\link[numDeriv:jacobian]{numDeriv::jacobian()}}. \strong{Cumulative distribution function (c.d.f)}: For monotonically increasing \eqn{g}: \deqn{ F_Y(y) = F_X(g^{-1}(y)) }{ F_Y(y) = F_X(g^(-1)(y)) } For monotonically decreasing \eqn{g}: \deqn{ F_Y(y) = 1 - F_X(g^{-1}(y)) }{ F_Y(y) = 1 - F_X(g^(-1)(y)) } where \eqn{F_X} is the c.d.f. of the base distribution. \strong{Quantile function}: The inverse of the c.d.f. For monotonically increasing \eqn{g}: \deqn{ Q_Y(p) = g(Q_X(p)) }{ Q_Y(p) = g(Q_X(p)) } For monotonically decreasing \eqn{g}: \deqn{ Q_Y(p) = g(Q_X(1-p)) }{ Q_Y(p) = g(Q_X(1-p)) } where \eqn{Q_X} is the quantile function of the base distribution. } \examples{ # Create a log normal distribution dist <- dist_transformed(dist_normal(0, 0.5), exp, log) density(dist, 1) # dlnorm(1, 0, 0.5) cdf(dist, 4) # plnorm(4, 0, 0.5) quantile(dist, 0.1) # qlnorm(0.1, 0, 0.5) generate(dist, 10) # rlnorm(10, 0, 0.5) } \seealso{ \code{\link[numDeriv:jacobian]{numDeriv::jacobian()}}, \code{\link[numDeriv:hessian]{numDeriv::hessian()}} } distributional/man/family.distribution.Rd0000644000176200001440000000131215112047012020335 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{family.distribution} \alias{family.distribution} \title{Extract the name of the distribution family} \usage{ \method{family}{distribution}(object, ...) } \arguments{ \item{object}{The distribution(s).} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} } \examples{ dist <- c( dist_normal(1:2), dist_poisson(3), dist_multinomial(size = c(4, 3), prob = list(c(0.3, 0.5, 0.2), c(0.1, 0.5, 0.4))) ) family(dist) } distributional/man/dist_bernoulli.Rd0000644000176200001440000000520715131566053017377 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_bernoulli.R \name{dist_bernoulli} \alias{dist_bernoulli} \title{The Bernoulli distribution} \usage{ dist_bernoulli(prob) } \arguments{ \item{prob}{The probability of success on each trial, \code{prob} can be any value in \verb{[0, 1]}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Bernoulli distributions are used to represent events like coin flips when there is single trial that is either successful or unsuccessful. The Bernoulli distribution is a special case of the \code{\link[=Binomial]{Binomial()}} distribution with \code{n = 1}. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_bernoulli.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_bernoulli.html} In the following, let \eqn{X} be a Bernoulli random variable with parameter \code{prob} = \eqn{p}. Some textbooks also define \eqn{q = 1 - p}, or use \eqn{\pi} instead of \eqn{p}. The Bernoulli probability distribution is widely used to model binary variables, such as 'failure' and 'success'. The most typical example is the flip of a coin, when \eqn{p} is thought as the probability of flipping a head, and \eqn{q = 1 - p} is the probability of flipping a tail. \strong{Support}: \eqn{\{0, 1\}}{{0, 1}} \strong{Mean}: \eqn{p} \strong{Variance}: \eqn{p \cdot (1 - p) = p \cdot q}{p (1 - p)} \strong{Probability mass function (p.m.f)}: \deqn{ P(X = x) = p^x (1 - p)^{1-x} = p^x q^{1-x} }{ P(X = x) = p^x (1 - p)^(1-x) } \strong{Cumulative distribution function (c.d.f)}: \deqn{ P(X \le x) = \left \{ \begin{array}{ll} 0 & x < 0 \\ 1 - p & 0 \leq x < 1 \\ 1 & x \geq 1 \end{array} \right. }{ P(X \le x) = (1 - p) 1_{[0, 1)}(x) + 1_{1}(x) } \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = (1 - p) + p e^t }{ E(e^(tX)) = (1 - p) + p e^t } \strong{Skewness}: \deqn{ \frac{1 - 2p}{\sqrt{p(1-p)}} = \frac{q - p}{\sqrt{pq}} }{ (1 - 2p) / sqrt(p(1-p)) } \strong{Excess Kurtosis}: \deqn{ \frac{1 - 6p(1-p)}{p(1-p)} = \frac{1 - 6pq}{pq} }{ (1 - 6p(1-p)) / (p(1-p)) } } \examples{ dist <- dist_bernoulli(prob = c(0.05, 0.5, 0.3, 0.9, 0.1)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Binomial]{stats::Binomial} } distributional/man/is-distribution.Rd0000644000176200001440000000123515112047012017472 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{is_distribution} \alias{is_distribution} \title{Test if the object is a distribution} \usage{ is_distribution(x) } \arguments{ \item{x}{An object.} } \value{ TRUE if the object inherits from the distribution class. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} This function returns \code{TRUE} for distributions and \code{FALSE} for all other objects. } \examples{ dist <- dist_normal() is_distribution(dist) is_distribution("distributional") } distributional/man/dist_percentile.Rd0000644000176200001440000000534615131575014017537 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_percentile.R \name{dist_percentile} \alias{dist_percentile} \title{Percentile distribution} \usage{ dist_percentile(x, percentile) } \arguments{ \item{x}{A list of values} \item{percentile}{A list of percentiles} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The Percentile distribution is a non-parametric distribution defined by a set of quantiles at specified percentile values. This distribution is useful for representing empirical distributions or elicited expert knowledge when only percentile information is available. The distribution uses linear interpolation between percentiles and can be used to approximate complex distributions that may not have simple parametric forms. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_percentile.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_percentile.html} In the following, let \eqn{X} be a Percentile random variable defined by values \eqn{x_1, x_2, \ldots, x_n} at percentiles \eqn{p_1, p_2, \ldots, p_n} where \eqn{0 \le p_i \le 100}. \strong{Support}: \eqn{[\min(x_i), \max(x_i)]} if \eqn{\min(p_i) > 0} or \eqn{\max(p_i) < 100}, otherwise support is approximated from the specified percentiles. \strong{Mean}: Approximated numerically using spline interpolation and numerical integration: \deqn{ E(X) \approx \int_0^1 Q(u) du }{ E(X) ≈ integral_0^1 Q(u) du } where \eqn{Q(u)} is a spline function interpolating the percentile values. \strong{Variance}: Approximated numerically. \strong{Probability density function (p.d.f)}: Approximated numerically using kernel density estimation from generated samples. \strong{Cumulative distribution function (c.d.f)}: Defined by linear interpolation: \deqn{ F(t) = \begin{cases} p_1/100 & \text{if } t < x_1 \\ p_i/100 + \frac{(t - x_i)(p_{i+1} - p_i)}{100(x_{i+1} - x_i)} & \text{if } x_i \le t < x_{i+1} \\ p_n/100 & \text{if } t \ge x_n \end{cases} }{ F(t) = p_i/100 + (t - x_i)(p_{i+1} - p_i) / (100(x_{i+1} - x_i)) for x_i ≤ t < x_{i+1} } \strong{Quantile function}: Defined by linear interpolation: \deqn{ Q(u) = x_i + \frac{(100u - p_i)(x_{i+1} - x_i)}{p_{i+1} - p_i} }{ Q(u) = x_i + (100u - p_i)(x_{i+1} - x_i) / (p_{i+1} - p_i) } for \eqn{p_i/100 \le u \le p_{i+1}/100}. } \examples{ dist <- dist_normal() percentiles <- seq(0.01, 0.99, by = 0.01) x <- vapply(percentiles, quantile, double(1L), x = dist) dist_percentile(list(x), list(percentiles*100)) } distributional/man/dist_negative_binomial.Rd0000644000176200001440000000517515131604151021054 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_negative_binomial.R \name{dist_negative_binomial} \alias{dist_negative_binomial} \title{The Negative Binomial distribution} \usage{ dist_negative_binomial(size, prob) } \arguments{ \item{size}{The number of successful trials (target number of successes). Must be a positive number. Also called the dispersion parameter.} \item{prob}{The probability of success in each trial. Must be between 0 and 1.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} A generalization of the geometric distribution. It is the number of failures in a sequence of i.i.d. Bernoulli trials before a specified number of successes (\code{size}) occur. The probability of success in each trial is given by \code{prob}. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_negative_binomial.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_negative_binomial.html} In the following, let \eqn{X} be a Negative Binomial random variable with success probability \code{prob} = \eqn{p} and the number of successes \code{size} = \eqn{r}. \strong{Support}: \eqn{\{0, 1, 2, 3, ...\}} \strong{Mean}: \eqn{\frac{r(1-p)}{p}} \strong{Variance}: \eqn{\frac{r(1-p)}{p^2}} \strong{Probability mass function (p.m.f)}: \deqn{ P(X = k) = \binom{k + r - 1}{k} (1-p)^r p^k }{ P(X = k) = choose(k+r-1, k) * (1-p)^r * p^k } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(k) = \sum_{i=0}^{\lfloor k \rfloor} \binom{i + r - 1}{i} (1-p)^r p^i }{ F(k) = sum_{i=0}^floor(k) choose(i+r-1, i) * (1-p)^r * p^i } This can also be expressed in terms of the regularized incomplete beta function, and is computed numerically. \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = \left(\frac{1-p}{1-pe^t}\right)^r, \quad t < -\log p }{ E(e^(tX)) = ((1-p)/(1-p*e^t))^r, t < -log p } \strong{Skewness}: \deqn{ \gamma_1 = \frac{2-p}{\sqrt{r(1-p)}} }{ \gamma_1 = (2-p) / sqrt(r(1-p)) } \strong{Excess Kurtosis}: \deqn{ \gamma_2 = \frac{6}{r} + \frac{p^2}{r(1-p)} }{ \gamma_2 = 6/r + p^2/(r(1-p)) } } \examples{ dist <- dist_negative_binomial(size = 10, prob = 0.5) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) support(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:NegBinomial]{stats::NegBinomial} } distributional/man/dist_student_t.Rd0000644000176200001440000001216315131564425017415 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_student_t.R \name{dist_student_t} \alias{dist_student_t} \title{The (non-central) location-scale Student t Distribution} \usage{ dist_student_t(df, mu = 0, sigma = 1, ncp = NULL) } \arguments{ \item{df}{degrees of freedom (\eqn{> 0}, maybe non-integer). \code{df = Inf} is allowed.} \item{mu}{The location parameter of the distribution. If \code{ncp == 0} (or \code{NULL}), this is the median.} \item{sigma}{The scale parameter of the distribution.} \item{ncp}{non-centrality parameter \eqn{\delta}{delta}; currently except for \code{rt()}, accurate only for \code{abs(ncp) <= 37.62}. If omitted, use the central t distribution.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The Student's T distribution is closely related to the \code{\link[=Normal]{Normal()}} distribution, but has heavier tails. As \eqn{\nu} increases to \eqn{\infty}, the Student's T converges to a Normal. The T distribution appears repeatedly throughout classic frequentist hypothesis testing when comparing group means. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_student_t.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_student_t.html} In the following, let \eqn{X} be a location-scale Student's T random variable with \code{df} = \eqn{\nu}, \code{mu} = \eqn{\mu}, \code{sigma} = \eqn{\sigma}, and \code{ncp} = \eqn{\delta} (non-centrality parameter). If \eqn{Z} follows a standard Student's T distribution (with \code{df} = \eqn{\nu} and \code{ncp} = \eqn{\delta}), then \eqn{X = \mu + \sigma Z}. \strong{Support}: \eqn{R}, the set of all real numbers \strong{Mean}: For the central distribution (\code{ncp} = 0 or \code{NULL}): \deqn{ E(X) = \mu }{ E(X) = \mu } for \eqn{\nu > 1}, and undefined otherwise. For the non-central distribution (\code{ncp} \eqn{\neq} 0): \deqn{ E(X) = \mu + \delta \sqrt{\frac{\nu}{2}} \frac{\Gamma((\nu-1)/2)}{\Gamma(\nu/2)} \sigma }{ E(X) = \mu + \delta \sqrt(\nu / 2) \Gamma((\nu-1)/2) / \Gamma(\nu/2) \sigma } for \eqn{\nu > 1}, and undefined otherwise. \strong{Variance}: For the central distribution (\code{ncp} = 0 or \code{NULL}): \deqn{ \mathrm{Var}(X) = \frac{\nu}{\nu - 2} \sigma^2 }{ Var(X) = \nu / (\nu - 2) \sigma^2 } for \eqn{\nu > 2}. Undefined if \eqn{\nu \le 1}, infinite when \eqn{1 < \nu \le 2}. For the non-central distribution (\code{ncp} \eqn{\neq} 0): \deqn{ \mathrm{Var}(X) = \left[\frac{\nu(1+\delta^2)}{\nu-2} - \left(\delta \sqrt{\frac{\nu}{2}} \frac{\Gamma((\nu-1)/2)}{\Gamma(\nu/2)}\right)^2\right] \sigma^2 }{ Var(X) = [(\nu(1+\delta^2))/(\nu-2) - (\delta \sqrt(\nu / 2) \Gamma((\nu-1)/2) / \Gamma(\nu/2))^2] \sigma^2 } for \eqn{\nu > 2}. Undefined if \eqn{\nu \le 1}, infinite when \eqn{1 < \nu \le 2}. \strong{Probability density function (p.d.f)}: For the central distribution (\code{ncp} = 0 or \code{NULL}), the standard t distribution with \code{df} = \eqn{\nu} has density: \deqn{ f_Z(z) = \frac{\Gamma((\nu + 1)/2)}{\sqrt{\pi \nu} \Gamma(\nu/2)} \left(1 + \frac{z^2}{\nu} \right)^{- (\nu + 1)/2} }{ f_Z(z) = \Gamma((\nu + 1) / 2) / (\sqrt(\pi \nu) \Gamma(\nu / 2)) (1 + z^2 / \nu)^(- (\nu + 1) / 2) } The location-scale version with \code{mu} = \eqn{\mu} and \code{sigma} = \eqn{\sigma} has density: \deqn{ f(x) = \frac{1}{\sigma} f_Z\left(\frac{x - \mu}{\sigma}\right) }{ f(x) = (1/\sigma) f_Z((x - \mu) / \sigma) } For the non-central distribution (\code{ncp} \eqn{\neq} 0), the density is computed numerically via \code{\link[stats:TDist]{stats::dt()}}. \strong{Cumulative distribution function (c.d.f)}: For the central distribution (\code{ncp} = 0 or \code{NULL}), the cumulative distribution function is computed numerically via \code{\link[stats:TDist]{stats::pt()}}, which uses the relationship to the incomplete beta function: \deqn{ F_\nu(t) = \frac{1}{2} I_x\left(\frac{\nu}{2}, \frac{1}{2}\right) }{ F_\nu(t) = (1/2) I_x(\nu/2, 1/2) } for \eqn{t \le 0}, where \eqn{x = \nu/(\nu + t^2)} and \eqn{I_x(a,b)} is the incomplete beta function (\code{\link[stats:Beta]{stats::pbeta()}}). For \eqn{t \ge 0}: \deqn{ F_\nu(t) = 1 - \frac{1}{2} I_x\left(\frac{\nu}{2}, \frac{1}{2}\right) }{ F_\nu(t) = 1 - (1/2) I_x(\nu/2, 1/2) } The location-scale version is: \eqn{F(x) = F_\nu((x - \mu)/\sigma)}. For the non-central distribution (\code{ncp} \eqn{\neq} 0), the cumulative distribution function is computed numerically via \code{\link[stats:TDist]{stats::pt()}}. \strong{Moment generating function (m.g.f)}: Does not exist in closed form. Moments are computed using the formulas for mean and variance above where available. } \examples{ dist <- dist_student_t(df = c(1,2,5), mu = c(0,1,2), sigma = c(1,2,3)) dist mean(dist) variance(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:TDist]{stats::TDist} } distributional/man/variance.distribution.Rd0000644000176200001440000000122315112047012020645 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{variance.distribution} \alias{variance.distribution} \title{Variance of a probability distribution} \usage{ \method{variance}{distribution}(x, ...) } \arguments{ \item{x}{The distribution(s).} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Returns the empirical variance of the probability distribution. If the method does not exist, the variance of a random sample will be returned. } distributional/man/dist_gev.Rd0000644000176200001440000001170215131557626016170 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_gev.R \name{dist_gev} \alias{dist_gev} \title{The Generalized Extreme Value Distribution} \usage{ dist_gev(location, scale, shape) } \arguments{ \item{location}{the location parameter \eqn{\mu} of the GEV distribution.} \item{scale}{the scale parameter \eqn{\sigma} of the GEV distribution. Must be strictly positive.} \item{shape}{the shape parameter \eqn{\xi} of the GEV distribution. Determines the tail behavior: \eqn{\xi = 0} gives Gumbel, \eqn{\xi > 0} gives Frechet, \eqn{\xi < 0} gives reverse Weibull.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The GEV distribution is widely used in extreme value theory to model the distribution of maxima (or minima) of samples. The parametric form encompasses the Gumbel, Frechet, and reverse Weibull distributions. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_gev.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_gev.html} In the following, let \eqn{X} be a GEV random variable with parameters \code{location} = \eqn{\mu}, \code{scale} = \eqn{\sigma}, and \code{shape} = \eqn{\xi}. \strong{Support}: \itemize{ \item \eqn{x \in \mathbb{R}} (all real numbers) if \eqn{\xi = 0} \item \eqn{x \geq \mu - \sigma/\xi} if \eqn{\xi > 0} \item \eqn{x \leq \mu - \sigma/\xi} if \eqn{\xi < 0} } \strong{Mean}: \deqn{ E(X) = \begin{cases} \mu + \sigma \gamma & \text{if } \xi = 0 \\ \mu + \sigma \frac{\Gamma(1-\xi) - 1}{\xi} & \text{if } \xi < 1 \\ \infty & \text{if } \xi \geq 1 \end{cases} }{ E(X) = \mu + \sigma \gamma (if \xi = 0) \mu + \sigma (\Gamma(1-\xi) - 1) / \xi (if \xi < 1) Inf (if \xi >= 1) } where \eqn{\gamma \approx 0.5772} is the Euler-Mascheroni constant and \eqn{\Gamma(\cdot)} is the gamma function. \strong{Median}: \deqn{ \text{Median}(X) = \begin{cases} \mu - \sigma \log(\log 2) & \text{if } \xi = 0 \\ \mu + \sigma \frac{(\log 2)^{-\xi} - 1}{\xi} & \text{if } \xi \neq 0 \end{cases} }{ Median(X) = \mu - \sigma log(log 2) (if \xi = 0) \mu + \sigma (log(2)^(-\xi) - 1) / \xi (if \xi != 0) } \strong{Variance}: \deqn{ \text{Var}(X) = \begin{cases} \frac{\pi^2 \sigma^2}{6} & \text{if } \xi = 0 \\ \frac{\sigma^2}{\xi^2} [\Gamma(1-2\xi) - \Gamma(1-\xi)^2] & \text{if } \xi < 0.5 \\ \infty & \text{if } \xi \geq 0.5 \end{cases} }{ Var(X) = \pi^2 \sigma^2 / 6 (if \xi = 0) \sigma^2 [\Gamma(1-2\xi) - \Gamma(1-\xi)^2] / \xi^2 (if \xi < 0.5) Inf (if \xi >= 0.5) } \strong{Probability density function (p.d.f)}: For \eqn{\xi = 0} (Gumbel): \deqn{ f(x) = \frac{1}{\sigma} \exp\left(-\frac{x-\mu}{\sigma}\right) \exp\left[-\exp\left(-\frac{x-\mu}{\sigma}\right)\right] }{ f(x) = (1/\sigma) exp(-(x-\mu)/\sigma) exp(-exp(-(x-\mu)/\sigma)) } For \eqn{\xi \neq 0}: \deqn{ f(x) = \frac{1}{\sigma} \left[1 + \xi\left(\frac{x-\mu}{\sigma}\right)\right]^{-1/\xi-1} \exp\left\{-\left[1 + \xi\left(\frac{x-\mu}{\sigma}\right)\right]^{-1/\xi}\right\} }{ f(x) = (1/\sigma) [1 + \xi(x-\mu)/\sigma]^(-1/\xi-1) exp(-[1 + \xi(x-\mu)/\sigma]^(-1/\xi)) } where \eqn{1 + \xi(x-\mu)/\sigma > 0}. \strong{Cumulative distribution function (c.d.f)}: For \eqn{\xi = 0} (Gumbel): \deqn{ F(x) = \exp\left[-\exp\left(-\frac{x-\mu}{\sigma}\right)\right] }{ F(x) = exp(-exp(-(x-\mu)/\sigma)) } For \eqn{\xi \neq 0}: \deqn{ F(x) = \exp\left\{-\left[1+\xi\left(\frac{x-\mu}{\sigma}\right)\right]^{-1/\xi}\right\} }{ F(x) = exp(-[1 + \xi(x-\mu)/\sigma]^(-1/\xi)) } where \eqn{1 + \xi(x-\mu)/\sigma > 0}. \strong{Quantile function}: For \eqn{\xi = 0} (Gumbel): \deqn{ Q(p) = \mu - \sigma \log(-\log p) }{ Q(p) = \mu - \sigma log(-log p) } For \eqn{\xi \neq 0}: \deqn{ Q(p) = \mu + \frac{\sigma}{\xi}\left[(-\log p)^{-\xi} - 1\right] }{ Q(p) = \mu + \sigma [(-log p)^(-\xi) - 1] / \xi } } \examples{ # Create GEV distributions with different shape parameters # Gumbel distribution (shape = 0) gumbel <- dist_gev(location = 0, scale = 1, shape = 0) # Frechet distribution (shape > 0, heavy-tailed) frechet <- dist_gev(location = 0, scale = 1, shape = 0.3) # Reverse Weibull distribution (shape < 0, bounded above) weibull <- dist_gev(location = 0, scale = 1, shape = -0.2) dist <- c(gumbel, frechet, weibull) dist # Statistical properties mean(dist) median(dist) variance(dist) # Generate random samples generate(dist, 10) # Evaluate density density(dist, 2) density(dist, 2, log = TRUE) # Evaluate cumulative distribution cdf(dist, 4) # Calculate quantiles quantile(dist, 0.95) } \references{ Jenkinson, A. F. (1955) The frequency distribution of the annual maximum (or minimum) of meteorological elements. \emph{Quart. J. R. Met. Soc.}, \bold{81}, 158–171. } \seealso{ \code{\link[evd:gev]{evd::dgev()}} } distributional/man/dist_multinomial.Rd0000644000176200001440000000717115131607047017737 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_multinomial.R \name{dist_multinomial} \alias{dist_multinomial} \title{The Multinomial distribution} \usage{ dist_multinomial(size, prob) } \arguments{ \item{size}{The number of draws from the Categorical distribution.} \item{prob}{The probability of an event occurring from each draw.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The multinomial distribution is a generalization of the binomial distribution to multiple categories. It is perhaps easiest to think that we first extend a \code{\link[=dist_bernoulli]{dist_bernoulli()}} distribution to include more than two categories, resulting in a \code{\link[=dist_categorical]{dist_categorical()}} distribution. We then extend repeat the Categorical experiment several (\eqn{n}) times. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_multinomial.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_multinomial.html} In the following, let \eqn{X = (X_1, ..., X_k)} be a Multinomial random variable with success probability \code{prob} = \eqn{p}. Note that \eqn{p} is vector with \eqn{k} elements that sum to one. Assume that we repeat the Categorical experiment \code{size} = \eqn{n} times. \strong{Support}: Each \eqn{X_i} is in \eqn{\{0, 1, 2, ..., n\}}{{0, 1, 2, ..., n}}. \strong{Mean}: The mean of \eqn{X_i} is \eqn{n p_i}. \strong{Variance}: The variance of \eqn{X_i} is \eqn{n p_i (1 - p_i)}. For \eqn{i \neq j}, the covariance of \eqn{X_i} and \eqn{X_j} is \eqn{-n p_i p_j}. \strong{Probability mass function (p.m.f)}: \deqn{ P(X_1 = x_1, ..., X_k = x_k) = \frac{n!}{x_1! x_2! \cdots x_k!} p_1^{x_1} \cdot p_2^{x_2} \cdot \ldots \cdot p_k^{x_k} }{ P(X_1 = x_1, ..., X_k = x_k) = n! / (x_1! x_2! ... x_k!) p_1^x_1 p_2^x_2 ... p_k^x_k } where \eqn{\sum_{i=1}^k x_i = n} and \eqn{\sum_{i=1}^k p_i = 1}. \strong{Cumulative distribution function (c.d.f)}: \deqn{ P(X_1 \le q_1, ..., X_k \le q_k) = \sum_{\substack{x_1, \ldots, x_k \ge 0 \\ x_i \le q_i \text{ for all } i \\ \sum_{i=1}^k x_i = n}} \frac{n!}{x_1! x_2! \cdots x_k!} p_1^{x_1} \cdot p_2^{x_2} \cdot \ldots \cdot p_k^{x_k} }{ P(X_1 <= q_1, ..., X_k <= q_k) = sum over all (x_1, ..., x_k) with x_i <= q_i and sum x_i = n of n! / (x_1! ... x_k!) p_1^x_1 ... p_k^x_k } The c.d.f. is computed as a finite sum of the p.m.f. over all integer vectors in the support that satisfy the componentwise inequalities. \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{t'X}) = \left(\sum_{i=1}^k p_i e^{t_i}\right)^n }{ E(e^(t'X)) = (p_1 e^t_1 + p_2 e^t_2 + ... + p_k e^t_k)^n } where \eqn{t = (t_1, ..., t_k)} is a vector of the same dimension as \eqn{X}. \strong{Skewness}: The skewness of \eqn{X_i} is \deqn{ \frac{1 - 2p_i}{\sqrt{n p_i (1 - p_i)}} }{ (1 - 2p_i) / sqrt(n p_i (1 - p_i)) } \strong{Excess Kurtosis}: The excess kurtosis of \eqn{X_i} is \deqn{ \frac{1 - 6p_i(1 - p_i)}{n p_i (1 - p_i)} }{ (1 - 6p_i(1 - p_i)) / (n p_i (1 - p_i)) } } \examples{ dist <- dist_multinomial(size = c(4, 3), prob = list(c(0.3, 0.5, 0.2), c(0.1, 0.5, 0.4))) dist mean(dist) variance(dist) generate(dist, 10) density(dist, list(d = rbind(cbind(1,2,1), cbind(0,2,1)))) density(dist, list(d = rbind(cbind(1,2,1), cbind(0,2,1))), log = TRUE) cdf(dist, cbind(1,2,1)) } \seealso{ \code{\link[stats:Multinom]{stats::dmultinom()}}, \code{\link[stats:Multinom]{stats::rmultinom()}} } distributional/man/hdr.distribution.Rd0000644000176200001440000000164215112047012017637 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{hdr.distribution} \alias{hdr.distribution} \title{Highest density regions of probability distributions} \usage{ \method{hdr}{distribution}(x, size = 95, n = 512, ...) } \arguments{ \item{x}{The distribution(s).} \item{size}{The size of the interval (between 0 and 100).} \item{n}{The resolution used to estimate the distribution's density.} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} This function is highly experimental and will change in the future. In particular, improved functionality for object classes and visualisation tools will be added in a future release. Computes minimally sized probability intervals highest density regions. } distributional/man/skewness.Rd0000644000176200001440000000103015112047012016175 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{skewness} \alias{skewness} \alias{skewness.distribution} \title{Skewness of a probability distribution} \usage{ skewness(x, ...) \method{skewness}{distribution}(x, ...) } \arguments{ \item{x}{The distribution(s).} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} } distributional/man/dist_inverse_gamma.Rd0000644000176200001440000000527615131557626020235 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_inverse_gamma.R \name{dist_inverse_gamma} \alias{dist_inverse_gamma} \title{The Inverse Gamma distribution} \usage{ dist_inverse_gamma(shape, rate = 1/scale, scale) } \arguments{ \item{shape, scale}{parameters. Must be strictly positive.} \item{rate}{an alternative way to specify the scale.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The Inverse Gamma distribution is commonly used as a prior distribution in Bayesian statistics, particularly for variance parameters. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_inverse_gamma.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_inverse_gamma.html} In the following, let \eqn{X} be an Inverse Gamma random variable with shape parameter \code{shape} = \eqn{\alpha} and rate parameter \code{rate} = \eqn{\beta} (equivalently, scale = \eqn{1/\beta}). \strong{Support}: \eqn{x \in (0, \infty)} \strong{Mean}: \eqn{\frac{\beta}{\alpha - 1}} for \eqn{\alpha > 1}, otherwise undefined \strong{Variance}: \eqn{\frac{\beta^2}{(\alpha - 1)^2 (\alpha - 2)}} for \eqn{\alpha > 2}, otherwise undefined \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \frac{\beta^\alpha}{\Gamma(\alpha)} x^{-\alpha - 1} e^{-\beta/x} }{ f(x) = \beta^\alpha / \Gamma(\alpha) x^(-\alpha - 1) e^(-\beta/x) } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = \frac{\Gamma(\alpha, \beta/x)}{\Gamma(\alpha)} = Q(\alpha, \beta/x) }{ F(x) = \Gamma(\alpha, \beta/x) / \Gamma(\alpha) = Q(\alpha, \beta/x) } where \eqn{\Gamma(\alpha, z)} is the upper incomplete gamma function and \eqn{Q} is the regularized incomplete gamma function. \strong{Moment generating function (m.g.f)}: \deqn{ M_X(t) = \frac{2 (-\beta t)^{\alpha/2}}{\Gamma(\alpha)} K_\alpha\left(\sqrt{-4\beta t}\right) }{ M_X(t) = 2 (-\beta t)^(\alpha/2) / \Gamma(\alpha) K_\alpha(sqrt(-4\beta t)) } for \eqn{t < 0}, where \eqn{K_\alpha} is the modified Bessel function of the second kind. The MGF does not exist for \eqn{t \ge 0}. } \examples{ dist <- dist_inverse_gamma(shape = c(1,2,3,3), rate = c(1,1,1,2)) dist \dontshow{if (requireNamespace("actuar", quietly = TRUE)) withAutoprint(\{ # examplesIf} mean(dist) variance(dist) support(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) \dontshow{\}) # examplesIf} } \seealso{ \link[actuar:InverseGamma]{actuar::InverseGamma} } distributional/man/hilo.Rd0000644000176200001440000000163115112047012015275 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/hilo.R \name{hilo} \alias{hilo} \title{Compute intervals} \usage{ hilo(x, ...) } \arguments{ \item{x}{Object to create hilo from.} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Used to extract a specified prediction interval at a particular confidence level from a distribution. The numeric lower and upper bounds can be extracted from the interval using \verb{$lower} and \verb{$upper} as shown in the examples below. } \examples{ # 95\% interval from a standard normal distribution interval <- hilo(dist_normal(0, 1), 95) interval # Extract the individual quantities with `$lower`, `$upper`, and `$level` interval$lower interval$upper interval$level } distributional/man/dist_categorical.Rd0000644000176200001440000000705115131571433017656 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_categorical.R \name{dist_categorical} \alias{dist_categorical} \title{The Categorical distribution} \usage{ dist_categorical(prob, outcomes = NULL) } \arguments{ \item{prob}{A list of probabilities of observing each outcome category.} \item{outcomes}{The list of vectors where each value represents each outcome.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Categorical distributions are used to represent events with multiple outcomes, such as what number appears on the roll of a dice. This is also referred to as the 'generalised Bernoulli' or 'multinoulli' distribution. The Categorical distribution is a special case of the \code{\link[=Multinomial]{Multinomial()}} distribution with \code{n = 1}. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_categorical.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_categorical.html} In the following, let \eqn{X} be a Categorical random variable with probability parameters \code{prob} = \eqn{\{p_1, p_2, \ldots, p_k\}}. The Categorical probability distribution is widely used to model the occurance of multiple events. A simple example is the roll of a dice, where \eqn{p = \{1/6, 1/6, 1/6, 1/6, 1/6, 1/6\}} giving equal chance of observing each number on a 6 sided dice. \strong{Support}: \eqn{\{1, \ldots, k\}}{{1, ..., k}} \strong{Mean}: Not defined for unordered categories. For ordered categories with integer outcomes \eqn{\{1, 2, \ldots, k\}}, the mean is: \deqn{ E(X) = \sum_{i=1}^{k} i \cdot p_i }{ E(X) = sum(i * p_i) for i = 1 to k } \strong{Variance}: Not defined for unordered categories. For ordered categories with integer outcomes \eqn{\{1, 2, \ldots, k\}}, the variance is: \deqn{ \text{Var}(X) = \sum_{i=1}^{k} i^2 \cdot p_i - \left(\sum_{i=1}^{k} i \cdot p_i\right)^2 }{ Var(X) = sum(i^2 * p_i) - [sum(i * p_i)]^2 } \strong{Probability mass function (p.m.f)}: \deqn{ P(X = i) = p_i }{ P(X = i) = p_i } \strong{Cumulative distribution function (c.d.f)}: The c.d.f is undefined for unordered categories. For ordered categories with outcomes \eqn{x_1 < x_2 < \ldots < x_k}, the c.d.f is: \deqn{ P(X \le x_j) = \sum_{i=1}^{j} p_i }{ P(X <= x_j) = sum(p_i) for i = 1 to j } \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = \sum_{i=1}^{k} e^{tx_i} \cdot p_i }{ E(e^(tX)) = sum(e^(t * x_i) * p_i) for i = 1 to k } \strong{Skewness}: Approximated numerically for ordered categories. \strong{Kurtosis}: Approximated numerically for ordered categories. } \examples{ dist <- dist_categorical(prob = list(c(0.05, 0.5, 0.15, 0.2, 0.1), c(0.3, 0.1, 0.6))) dist generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) # The outcomes aren't ordered, so many statistics are not applicable. cdf(dist, 0.6) quantile(dist, 0.7) mean(dist) variance(dist) skewness(dist) kurtosis(dist) # Some of these statistics are meaningful for ordered outcomes dist <- dist_categorical(list(rpois(26, 3)), list(ordered(letters))) dist cdf(dist, "m") quantile(dist, 0.5) dist <- dist_categorical( prob = list(c(0.05, 0.5, 0.15, 0.2, 0.1), c(0.3, 0.1, 0.6)), outcomes = list(letters[1:5], letters[24:26]) ) generate(dist, 10) density(dist, "a") density(dist, "z", log = TRUE) } \seealso{ \link[stats:Multinom]{stats::Multinomial} } distributional/man/is_hilo.Rd0000644000176200001440000000035615112047012015773 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/hilo.R \name{is_hilo} \alias{is_hilo} \title{Is the object a hilo} \usage{ is_hilo(x) } \arguments{ \item{x}{An object.} } \description{ Is the object a hilo } distributional/man/dist_pareto.Rd0000644000176200001440000000544515131564160016677 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_pareto.R \name{dist_pareto} \alias{dist_pareto} \title{The Pareto Distribution} \usage{ dist_pareto(shape, scale) } \arguments{ \item{shape, scale}{parameters. Must be strictly positive.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The Pareto distribution is a power-law probability distribution commonly used in actuarial science to model loss severity and in economics to model income distributions and firm sizes. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_pareto.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_pareto.html} In the following, let \eqn{X} be a Pareto random variable with parameters \code{shape} = \eqn{\alpha} and \code{scale} = \eqn{\theta}. \strong{Support}: \eqn{(0, \infty)}{(0, Inf)} \strong{Mean}: \eqn{\frac{\theta}{\alpha - 1}}{\theta / (\alpha - 1)} for \eqn{\alpha > 1}, undefined otherwise \strong{Variance}: \eqn{\frac{\alpha\theta^2}{(\alpha - 1)^2(\alpha - 2)}}{\alpha\theta^2 / ((\alpha - 1)^2(\alpha - 2))} for \eqn{\alpha > 2}, undefined otherwise \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \frac{\alpha\theta^\alpha}{(x + \theta)^{\alpha + 1}} }{ f(x) = \alpha\theta^\alpha / (x + \theta)^(\alpha + 1) } for \eqn{x > 0}, \eqn{\alpha > 0} and \eqn{\theta > 0}. \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = 1 - \left(\frac{\theta}{x + \theta}\right)^\alpha }{ F(x) = 1 - (\theta / (x + \theta))^\alpha } for \eqn{x > 0}. \strong{Moment generating function (m.g.f)}: Does not exist in closed form, but the \eqn{k}th raw moment \eqn{E[X^k]} exists for \eqn{-1 < k < \alpha}. } \note{ There are many different definitions of the Pareto distribution in the literature; see Arnold (2015) or Kleiber and Kotz (2003). This implementation uses the Pareto distribution without a location parameter as described in \link[actuar:Pareto]{actuar::Pareto}. } \examples{ dist <- dist_pareto(shape = c(10, 3, 2, 1), scale = rep(1, 4)) dist \dontshow{if (requireNamespace("actuar", quietly = TRUE)) withAutoprint(\{ # examplesIf} mean(dist) variance(dist) support(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) \dontshow{\}) # examplesIf} } \references{ Kleiber, C. and Kotz, S. (2003), \emph{Statistical Size Distributions in Economics and Actuarial Sciences}, Wiley. Klugman, S. A., Panjer, H. H. and Willmot, G. E. (2012), \emph{Loss Models, From Data to Decisions, Fourth Edition}, Wiley. } \seealso{ \link[actuar:Pareto]{actuar::Pareto} } distributional/man/density.distribution.Rd0000644000176200001440000000144415112047012020541 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{density.distribution} \alias{density.distribution} \title{The probability density/mass function} \usage{ \method{density}{distribution}(x, at, ..., log = FALSE) } \arguments{ \item{x}{The distribution(s).} \item{at}{The point at which to compute the density/mass.} \item{...}{Additional arguments passed to methods.} \item{log}{If \code{TRUE}, probabilities will be given as log probabilities.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Computes the probability density function for a continuous distribution, or the probability mass function for a discrete distribution. } distributional/man/new_support_region.Rd0000644000176200001440000000101015131620157020271 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/support.R \name{new_support_region} \alias{new_support_region} \title{Construct support regions} \usage{ new_support_region(x = numeric(), limits = list(), closed = list()) } \arguments{ \item{x}{A list of prototype vectors defining the distribution type.} \item{limits}{A list of value limits for the distribution.} \item{closed}{A list of logical(2L) indicating whether the limits are closed.} } \description{ Construct support regions } distributional/man/dist_beta.Rd0000644000176200001440000000471015131557626016323 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_beta.R \name{dist_beta} \alias{dist_beta} \title{The Beta distribution} \usage{ dist_beta(shape1, shape2) } \arguments{ \item{shape1, shape2}{The non-negative shape parameters of the Beta distribution.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The Beta distribution is a continuous probability distribution defined on the interval [0, 1], commonly used to model probabilities and proportions. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_beta.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_beta.html} In the following, let \eqn{X} be a Beta random variable with parameters \code{shape1} = \eqn{\alpha} and \code{shape2} = \eqn{\beta}. \strong{Support}: \eqn{x \in [0, 1]} \strong{Mean}: \eqn{\frac{\alpha}{\alpha + \beta}} \strong{Variance}: \eqn{\frac{\alpha\beta}{(\alpha + \beta)^2(\alpha + \beta + 1)}} \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \frac{x^{\alpha - 1}(1-x)^{\beta - 1}}{B(\alpha, \beta)} = \frac{\Gamma(\alpha + \beta)}{\Gamma(\alpha)\Gamma(\beta)} x^{\alpha - 1}(1-x)^{\beta - 1} }{ f(x) = x^(alpha - 1) * (1-x)^(beta - 1) / B(alpha, beta) } where \eqn{B(\alpha, \beta) = \frac{\Gamma(\alpha)\Gamma(\beta)}{\Gamma(\alpha + \beta)}} is the Beta function. \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = I_x(alpha, beta) = \frac{B(x; \alpha, \beta)}{B(\alpha, \beta)} }{ F(x) = I_x(\alpha, \beta) } where \eqn{I_x(\alpha, \beta)} is the regularized incomplete beta function and \eqn{B(x; \alpha, \beta) = \int_0^x t^{\alpha-1}(1-t)^{\beta-1} dt}. \strong{Moment generating function (m.g.f)}: The moment generating function does not have a simple closed form, but the moments can be calculated as: \deqn{ E(X^k) = \prod_{r=0}^{k-1} \frac{\alpha + r}{\alpha + \beta + r} }{ E(X^k) = prod((alpha + r) / (alpha + beta + r)) for r = 0, ..., k-1 } } \examples{ dist <- dist_beta(shape1 = c(0.5, 5, 1, 2, 2), shape2 = c(0.5, 1, 3, 2, 5)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Beta]{stats::Beta} } distributional/man/dist_wrap.Rd0000644000176200001440000000367415131577035016365 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_wrap.R \name{dist_wrap} \alias{dist_wrap} \title{Create a distribution from p/d/q/r style functions} \usage{ dist_wrap(dist, ..., package = NULL) } \arguments{ \item{dist}{The name of the distribution used in the functions (name that is prefixed by p/d/q/r)} \item{...}{Named arguments used to parameterise the distribution.} \item{package}{The package from which the distribution is provided. If NULL, the calling environment's search path is used to find the distribution functions. Alternatively, an arbitrary environment can also be provided here.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} If a distribution is not yet supported, you can vectorise p/d/q/r functions using this function. \code{dist_wrap()} stores the distributions parameters, and provides wrappers which call the appropriate p/d/q/r functions. Using this function to wrap a distribution should only be done if the distribution is not yet available in this package. If you need a distribution which isn't in the package yet, consider making a request at https://github.com/mitchelloharawild/distributional/issues. } \details{ The \code{dist_wrap()} function provides a generic interface to create distribution objects from any set of p/d/q/r style functions. The statistical properties depend on the specific distribution being wrapped. } \examples{ dist <- dist_wrap("norm", mean = 1:3, sd = c(3, 9, 2)) density(dist, 1) # dnorm() cdf(dist, 4) # pnorm() quantile(dist, 0.975) # qnorm() generate(dist, 10) # rnorm() library(actuar) dist <- dist_wrap("invparalogis", package = "actuar", shape = 2, rate = 2) density(dist, 1) # actuar::dinvparalogis() cdf(dist, 4) # actuar::pinvparalogis() quantile(dist, 0.975) # actuar::qinvparalogis() generate(dist, 10) # actuar::rinvparalogis() } distributional/man/dist_uniform.Rd0000644000176200001440000000374715131565231017067 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_uniform.R \name{dist_uniform} \alias{dist_uniform} \title{The Uniform distribution} \usage{ dist_uniform(min, max) } \arguments{ \item{min, max}{lower and upper limits of the distribution. Must be finite.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} A distribution with constant density on an interval. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_uniform.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_uniform.html} In the following, let \eqn{X} be a Uniform random variable with parameters \code{min} = \eqn{a} and \code{max} = \eqn{b}. \strong{Support}: \eqn{[a, b]} \strong{Mean}: \eqn{\frac{a + b}{2}} \strong{Variance}: \eqn{\frac{(b - a)^2}{12}} \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \frac{1}{b - a} }{ f(x) = 1 / (b - a) } for \eqn{x \in [a, b]}{x in [a, b]}, and \eqn{f(x) = 0} otherwise. \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = \frac{x - a}{b - a} }{ F(x) = (x - a) / (b - a) } for \eqn{x \in [a, b]}{x in [a, b]}, with \eqn{F(x) = 0} for \eqn{x < a} and \eqn{F(x) = 1} for \eqn{x > b}. \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = \frac{e^{tb} - e^{ta}}{t(b - a)} }{ E(e^(tX)) = (e^(tb) - e^(ta)) / (t(b - a)) } for \eqn{t \neq 0}{t != 0}, and \eqn{E(e^{tX}) = 1} for \eqn{t = 0}. \strong{Skewness}: \eqn{0} \strong{Excess Kurtosis}: \eqn{-\frac{6}{5}}{-6/5} } \examples{ dist <- dist_uniform(min = c(3, -2), max = c(5, 4)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Uniform]{stats::Uniform} } distributional/man/dist_studentized_range.Rd0000644000176200001440000000420415131564625021121 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_studentized_range.R \name{dist_studentized_range} \alias{dist_studentized_range} \title{The Studentized Range distribution} \usage{ dist_studentized_range(nmeans, df, nranges) } \arguments{ \item{nmeans}{sample size for range (same for each group).} \item{df}{degrees of freedom for \eqn{s} (see below).} \item{nranges}{number of \emph{groups} whose \bold{maximum} range is considered.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Tukey's studentized range distribution, used for Tukey's honestly significant differences test in ANOVA. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_studentized_range.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_studentized_range.html} In the following, let \eqn{Q} be a Studentized Range random variable with parameters \code{nmeans} = \eqn{k} (number of groups), \code{df} = \eqn{\nu} (degrees of freedom), and \code{nranges} = \eqn{n} (number of ranges). \strong{Support}: \eqn{R^+}, the set of positive real numbers. \strong{Mean}: Approximated numerically. \strong{Variance}: Approximated numerically. \strong{Probability density function (p.d.f)}: The density does not have a closed-form expression and is computed numerically. \strong{Cumulative distribution function (c.d.f)}: The c.d.f does not have a simple closed-form expression. For \eqn{n = 1} (single range), it involves integration over the joint distribution of the sample range and an independent chi-square variable. The general form is computed numerically using algorithms described in the references for \code{\link[stats:Tukey]{stats::ptukey()}}. \strong{Moment generating function (m.g.f)}: Does not exist in closed form. } \examples{ dist <- dist_studentized_range(nmeans = c(6, 2), df = c(5, 4), nranges = c(1, 1)) dist cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Tukey]{stats::Tukey} } distributional/man/dist_hypergeometric.Rd0000644000176200001440000000534415131557626020442 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_hypergeometric.R \name{dist_hypergeometric} \alias{dist_hypergeometric} \title{The Hypergeometric distribution} \usage{ dist_hypergeometric(m, n, k) } \arguments{ \item{m}{The number of type I elements available.} \item{n}{The number of type II elements available.} \item{k}{The size of the sample taken.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} To understand the HyperGeometric distribution, consider a set of \eqn{r} objects, of which \eqn{m} are of the type I and \eqn{n} are of the type II. A sample with size \eqn{k} (\eqn{k 0} \strong{Mean}: \eqn{\frac{\lambda^{-1/\alpha} \gamma B(\gamma - 1/\alpha, 1 + 1/\alpha)}{\gamma}} (for \eqn{\alpha \gamma > 1}) \strong{Variance}: \eqn{\frac{\lambda^{-2/\alpha} \gamma B(\gamma - 2/\alpha, 1 + 2/\alpha)}{\gamma} - \mu^2} (for \eqn{\alpha \gamma > 2}) \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \alpha \gamma \lambda x^{\alpha - 1} (1 + \lambda x^\alpha)^{-\gamma - 1} }{ f(x) = \alpha \gamma \lambda x^(\alpha - 1) (1 + \lambda x^\alpha)^(-\gamma - 1) } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = 1 - (1 + \lambda x^\alpha)^{-\gamma} }{ F(x) = 1 - (1 + \lambda x^\alpha)^(-\gamma) } \strong{Quantile function}: \deqn{ F^{-1}(p) = \lambda^{-1/\alpha} ((1 - p)^{-1/\gamma} - 1)^{1/\alpha} }{ F^(-1)(p) = \lambda^(-1/\alpha) ((1 - p)^(-1/\gamma) - 1)^(1/\alpha) } \strong{Moment generating function (m.g.f)}: Does not exist in closed form. } \examples{ dist <- dist_burr(shape1 = c(1,1,1,2,3,0.5), shape2 = c(1,2,3,1,1,2)) dist \dontshow{if (requireNamespace("actuar", quietly = TRUE)) withAutoprint(\{ # examplesIf} mean(dist) variance(dist) support(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) \dontshow{\}) # examplesIf} } \seealso{ \link[actuar:Burr]{actuar::Burr} } distributional/man/dist_laplace.Rd0000644000176200001440000000441515131604123016774 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_laplace.R \name{dist_laplace} \alias{dist_laplace} \title{The Laplace distribution} \usage{ dist_laplace(mu, sigma) } \arguments{ \item{mu}{The location parameter (mean) of the Laplace distribution.} \item{sigma}{The positive scale parameter of the Laplace distribution.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The Laplace distribution, also known as the double exponential distribution, is a continuous probability distribution that is symmetric around its location parameter. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_laplace.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_laplace.html} In the following, let \eqn{X} be a Laplace random variable with location parameter \code{mu} = \eqn{\mu} and scale parameter \code{sigma} = \eqn{\sigma}. \strong{Support}: \eqn{R}, the set of all real numbers \strong{Mean}: \eqn{\mu} \strong{Variance}: \eqn{2\sigma^2} \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \frac{1}{2\sigma} \exp\left(-\frac{|x - \mu|}{\sigma}\right) }{ f(x) = 1 / (2 * sigma) * exp(-|x - mu| / sigma) } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = \begin{cases} \frac{1}{2} \exp\left(\frac{x - \mu}{\sigma}\right) & \text{if } x < \mu \\ 1 - \frac{1}{2} \exp\left(-\frac{x - \mu}{\sigma}\right) & \text{if } x \geq \mu \end{cases} }{ F(x) = 1/2 * exp((x - mu) / sigma) if x < mu, 1 - 1/2 * exp(-(x - mu) / sigma) if x >= mu } \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = \frac{\exp(\mu t)}{1 - \sigma^2 t^2} \text{ for } |t| < \frac{1}{\sigma} }{ E(e^(tX)) = exp(mu * t) / (1 - sigma^2 * t^2) for |t| < 1/sigma } } \examples{ dist <- dist_laplace(mu = c(0, 2, -1), sigma = c(1, 2, 0.5)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 0) density(dist, 0, log = TRUE) cdf(dist, 1) quantile(dist, 0.7) } \seealso{ \link[extraDistr:Laplace]{extraDistr::Laplace} } distributional/man/dist_lognormal.Rd0000644000176200001440000000523015131607047017371 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_lognormal.R \name{dist_lognormal} \alias{dist_lognormal} \title{The log-normal distribution} \usage{ dist_lognormal(mu = 0, sigma = 1) } \arguments{ \item{mu}{The mean (location parameter) of the distribution, which is the mean of the associated Normal distribution. Can be any real number.} \item{sigma}{The standard deviation (scale parameter) of the distribution. Can be any positive number.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The log-normal distribution is a commonly used transformation of the Normal distribution. If \eqn{X} follows a log-normal distribution, then \eqn{\ln{X}} would be characterised by a Normal distribution. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_lognormal.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_lognormal.html} In the following, let \eqn{X} be a log-normal random variable with \code{mu} = \eqn{\mu} and \code{sigma} = \eqn{\sigma}. \strong{Support}: \eqn{R^+}, the set of positive real numbers. \strong{Mean}: \eqn{e^{\mu + \sigma^2/2}}{exp(\mu + \sigma^2/2)} \strong{Variance}: \eqn{(e^{\sigma^2} - 1) e^{2\mu + \sigma^2}}{(exp(\sigma^2) - 1) exp(2\mu + \sigma^2)} \strong{Skewness}: \eqn{(e^{\sigma^2} + 2) \sqrt{e^{\sigma^2} - 1}}{(exp(\sigma^2) + 2) sqrt(exp(\sigma^2) - 1)} \strong{Excess Kurtosis}: \eqn{e^{4\sigma^2} + 2 e^{3\sigma^2} + 3 e^{2\sigma^2} - 6}{exp(4\sigma^2) + 2 exp(3\sigma^2) + 3 exp(2\sigma^2) - 6} \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \frac{1}{x\sqrt{2 \pi \sigma^2}} e^{-(\ln{x} - \mu)^2 / (2 \sigma^2)} }{ f(x) = 1 / (x sqrt(2 \pi \sigma^2)) exp(-(\log(x) - \mu)^2 / (2 \sigma^2)) } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = \Phi\left(\frac{\ln{x} - \mu}{\sigma}\right) }{ F(x) = \Phi((\log(x) - \mu) / \sigma) } where \eqn{\Phi} is the c.d.f. of the standard Normal distribution. \strong{Moment generating function (m.g.f)}: Does not exist in closed form. } \examples{ dist <- dist_lognormal(mu = 1:5, sigma = 0.1) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) # A log-normal distribution X is exp(Y), where Y is a Normal distribution of # the same parameters. So log(X) will produce the Normal distribution Y. log(dist) } \seealso{ \link[stats:Lognormal]{stats::Lognormal} } distributional/man/dist_gumbel.Rd0000644000176200001440000000720115131557626016661 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_gumbel.R \name{dist_gumbel} \alias{dist_gumbel} \title{The Gumbel distribution} \usage{ dist_gumbel(alpha, scale) } \arguments{ \item{alpha}{location parameter.} \item{scale}{parameter. Must be strictly positive.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The Gumbel distribution is a special case of the Generalized Extreme Value distribution, obtained when the GEV shape parameter \eqn{\xi} is equal to 0. It may be referred to as a type I extreme value distribution. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_gumbel.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_gumbel.html} In the following, let \eqn{X} be a Gumbel random variable with location parameter \code{alpha} = \eqn{\alpha} and scale parameter \code{scale} = \eqn{\sigma}. \strong{Support}: \eqn{R}, the set of all real numbers. \strong{Mean}: \deqn{ E(X) = \alpha + \sigma\gamma }{ E(X) = alpha + sigma*gamma } where \eqn{\gamma} is the Euler-Mascheroni constant, approximately equal to 0.5772157. \strong{Variance}: \deqn{ \textrm{Var}(X) = \frac{\pi^2 \sigma^2}{6} }{ Var(X) = (pi^2 * sigma^2) / 6 } \strong{Skewness}: \deqn{ \textrm{Skew}(X) = \frac{12\sqrt{6}\zeta(3)}{\pi^3} \approx 1.1395 }{ Skew(X) = (12*sqrt(6)*zeta(3)) / pi^3 } where \eqn{\zeta(3)} is Apery's constant, approximately equal to 1.2020569. Note that skewness is independent of the distribution parameters. \strong{Kurtosis (excess)}: \deqn{ \textrm{Kurt}(X) = \frac{12}{5} = 2.4 }{ Kurt(X) = 12/5 } Note that excess kurtosis is independent of the distribution parameters. \strong{Median}: \deqn{ \textrm{Median}(X) = \alpha - \sigma\ln(\ln 2) }{ Median(X) = alpha - sigma*ln(ln(2)) } \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \frac{1}{\sigma} \exp\left[-\frac{x - \alpha}{\sigma}\right] \exp\left\{-\exp\left[-\frac{x - \alpha}{\sigma}\right]\right\} }{ f(x) = (1/sigma) * exp[-(x - alpha)/sigma] * exp(-exp[-(x - alpha)/sigma]) } for \eqn{x} in \eqn{R}, the set of all real numbers. \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = \exp\left\{-\exp\left[-\frac{x - \alpha}{\sigma}\right]\right\} }{ F(x) = exp(-exp[-(x - alpha)/sigma]) } for \eqn{x} in \eqn{R}, the set of all real numbers. \strong{Quantile function (inverse c.d.f)}: \deqn{ F^{-1}(p) = \alpha - \sigma \ln(-\ln p) }{ F^(-1)(p) = alpha - sigma * ln(-ln(p)) } for \eqn{p} in (0, 1). \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = \Gamma(1 - \sigma t) e^{\alpha t} }{ E(e^(tX)) = Gamma(1 - sigma*t) * e^(alpha*t) } for \eqn{\sigma t < 1}, where \eqn{\Gamma} is the gamma function. } \examples{ dist <- dist_gumbel(alpha = c(0.5, 1, 1.5, 3), scale = c(2, 2, 3, 4)) dist \dontshow{if (requireNamespace("actuar", quietly = TRUE)) withAutoprint(\{ # examplesIf} mean(dist) variance(dist) skewness(dist) kurtosis(dist) support(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) \dontshow{\}) # examplesIf} } \seealso{ \link[actuar:Gumbel]{actuar::Gumbel}, \code{\link[actuar:Gumbel]{actuar::dgumbel()}}, \code{\link[actuar:Gumbel]{actuar::pgumbel()}}, \code{\link[actuar:Gumbel]{actuar::qgumbel()}}, \code{\link[actuar:Gumbel]{actuar::rgumbel()}}, \code{\link[actuar:Gumbel]{actuar::mgumbel()}} } distributional/man/covariance.distribution.Rd0000644000176200001440000000123715112047012021174 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{covariance.distribution} \alias{covariance.distribution} \title{Covariance of a probability distribution} \usage{ \method{covariance}{distribution}(x, ...) } \arguments{ \item{x}{The distribution(s).} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Returns the empirical covariance of the probability distribution. If the method does not exist, the covariance of a random sample will be returned. } distributional/man/reexports.Rd0000644000176200001440000000062415112047012016376 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/reexports.R \docType{import} \name{reexports} \alias{reexports} \alias{generate} \title{Objects exported from other packages} \keyword{internal} \description{ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ \item{generics}{\code{\link[generics]{generate}}} }} distributional/man/kurtosis.Rd0000644000176200001440000000103015112047012016216 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{kurtosis} \alias{kurtosis} \alias{kurtosis.distribution} \title{Kurtosis of a probability distribution} \usage{ kurtosis(x, ...) \method{kurtosis}{distribution}(x, ...) } \arguments{ \item{x}{The distribution(s).} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} } distributional/man/dist_geometric.Rd0000644000176200001440000000434015131571433017355 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_geometric.R \name{dist_geometric} \alias{dist_geometric} \title{The Geometric Distribution} \usage{ dist_geometric(prob) } \arguments{ \item{prob}{probability of success in each trial. \code{0 < prob <= 1}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The Geometric distribution can be thought of as a generalization of the \code{\link[=dist_bernoulli]{dist_bernoulli()}} distribution where we ask: "if I keep flipping a coin with probability \code{p} of heads, what is the probability I need \eqn{k} flips before I get my first heads?" The Geometric distribution is a special case of Negative Binomial distribution. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_geometric.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_geometric.html} In the following, let \eqn{X} be a Geometric random variable with success probability \code{prob} = \eqn{p}. Note that there are multiple parameterizations of the Geometric distribution. \strong{Support}: \eqn{\{0, 1, 2, 3, ...\}}{{0, 1, 2, 3, ...}} \strong{Mean}: \eqn{\frac{1-p}{p}} \strong{Variance}: \eqn{\frac{1-p}{p^2}} \strong{Probability mass function (p.m.f)}: \deqn{ P(X = k) = p(1-p)^k }{ P(X = k) = p(1-p)^k } \strong{Cumulative distribution function (c.d.f)}: \deqn{ P(X \le k) = 1 - (1-p)^{k+1} }{ P(X \le k) = 1 - (1-p)^(k+1) } \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = \frac{pe^t}{1 - (1-p)e^t} }{ E(e^(tX)) = pe^t / (1 - (1-p)e^t) } \strong{Skewness}: \deqn{ \frac{2 - p}{\sqrt{1 - p}} }{ (2 - p) / sqrt(1 - p) } \strong{Excess Kurtosis}: \deqn{ 6 + \frac{p^2}{1 - p} }{ 6 + p^2 / (1 - p) } } \examples{ dist <- dist_geometric(prob = c(0.2, 0.5, 0.8)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Geometric]{stats::Geometric} } distributional/man/mean.distribution.Rd0000644000176200001440000000117315112047012020001 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{mean.distribution} \alias{mean.distribution} \title{Mean of a probability distribution} \usage{ \method{mean}{distribution}(x, ...) } \arguments{ \item{x}{The distribution(s).} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Returns the empirical mean of the probability distribution. If the method does not exist, the mean of a random sample will be returned. } distributional/man/dist_multivariate_normal.Rd0000644000176200001440000000624615131635552021467 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_multivariate_normal.R \name{dist_multivariate_normal} \alias{dist_multivariate_normal} \title{The multivariate normal distribution} \usage{ dist_multivariate_normal(mu = 0, sigma = diag(1)) } \arguments{ \item{mu}{A list of numeric vectors for the distribution's mean.} \item{sigma}{A list of matrices for the distribution's variance-covariance matrix.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The multivariate normal distribution is a generalization of the univariate normal distribution to higher dimensions. It is widely used in multivariate statistics and describes the joint distribution of multiple correlated continuous random variables. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_multivariate_normal.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_multivariate_normal.html} In the following, let \eqn{\mathbf{X}} be a \eqn{k}-dimensional multivariate normal random variable with mean vector \code{mu} = \eqn{\boldsymbol{\mu}} and variance-covariance matrix \code{sigma} = \eqn{\boldsymbol{\Sigma}}. \strong{Support}: \eqn{\mathbf{x} \in \mathbb{R}^k} \strong{Mean}: \eqn{\boldsymbol{\mu}} \strong{Variance-covariance matrix}: \eqn{\boldsymbol{\Sigma}} \strong{Probability density function (p.d.f)}: \deqn{ f(\mathbf{x}) = \frac{1}{(2\pi)^{k/2} |\boldsymbol{\Sigma}|^{1/2}} \exp\left(-\frac{1}{2}(\mathbf{x} - \boldsymbol{\mu})^T \boldsymbol{\Sigma}^{-1}(\mathbf{x} - \boldsymbol{\mu})\right) }{ f(x) = 1 / ((2\pi)^(k/2) |\Sigma|^(1/2)) exp(-1/2 (x - \mu)^T \Sigma^(-1) (x - \mu)) } where \eqn{|\boldsymbol{\Sigma}|}{|\Sigma|} is the determinant of \eqn{\boldsymbol{\Sigma}}{\Sigma}. \strong{Cumulative distribution function (c.d.f)}: \deqn{ P(\mathbf{X} \le \mathbf{q}) = P(X_1 \le q_1, \ldots, X_k \le q_k) }{ P(X \le q) = P(X_1 \le q_1, ..., X_k \le q_k) } The c.d.f. does not have a closed-form expression and is computed numerically. \strong{Moment generating function (m.g.f)}: \deqn{ M(\mathbf{t}) = E(e^{\mathbf{t}^T \mathbf{X}}) = \exp\left(\mathbf{t}^T \boldsymbol{\mu} + \frac{1}{2}\mathbf{t}^T \boldsymbol{\Sigma} \mathbf{t}\right) }{ M(t) = E(e^(t^T X)) = exp(t^T \mu + 1/2 t^T \Sigma t) } } \examples{ dist <- dist_multivariate_normal(mu = list(c(1,2)), sigma = list(matrix(c(4,2,2,3), ncol=2))) dimnames(dist) <- c("x", "y") dist \dontshow{if (requireNamespace("mvtnorm", quietly = TRUE)) withAutoprint(\{ # examplesIf} mean(dist) variance(dist) support(dist) generate(dist, 10) density(dist, cbind(2, 1)) density(dist, cbind(2, 1), log = TRUE) cdf(dist, 4) quantile(dist, 0.7, kind = "equicoordinate") quantile(dist, 0.7, kind = "marginal") \dontshow{\}) # examplesIf} } \seealso{ \code{\link[mvtnorm:Mvnorm]{mvtnorm::dmvnorm()}}, \code{\link[mvtnorm:pmvnorm]{mvtnorm::pmvnorm()}}, \code{\link[mvtnorm:qmvnorm]{mvtnorm::qmvnorm()}}, \code{\link[mvtnorm:Mvnorm]{mvtnorm::rmvnorm()}} } distributional/man/distributional-package.Rd0000644000176200001440000000314315112047012020767 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distributional-package.R \docType{package} \name{distributional-package} \alias{distributional} \alias{distributional-package} \title{distributional: Vectorised Probability Distributions} \description{ Vectorised distribution objects with tools for manipulating, visualising, and using probability distributions. Designed to allow model prediction outputs to return distributions rather than their parameters, allowing users to directly interact with predictive distributions in a data-oriented workflow. In addition to providing generic replacements for p/d/q/r functions, other useful statistics can be computed including means, variances, intervals, and highest density regions. } \seealso{ Useful links: \itemize{ \item \url{https://pkg.mitchelloharawild.com/distributional/} \item \url{https://github.com/mitchelloharawild/distributional} \item Report bugs at \url{https://github.com/mitchelloharawild/distributional/issues} } } \author{ \strong{Maintainer}: Mitchell O'Hara-Wild \email{mail@mitchelloharawild.com} (\href{https://orcid.org/0000-0001-6729-7695}{ORCID}) Authors: \itemize{ \item Matthew Kay (\href{https://orcid.org/0000-0001-9446-0419}{ORCID}) \item Alex Hayes (\href{https://orcid.org/0000-0002-4985-5160}{ORCID}) \item Rob Hyndman (\href{https://orcid.org/0000-0002-2140-5352}{ORCID}) } Other contributors: \itemize{ \item Earo Wang (\href{https://orcid.org/0000-0001-6448-5260}{ORCID}) [contributor] \item Vencislav Popov (\href{https://orcid.org/0000-0002-8073-4199}{ORCID}) [contributor] } } \keyword{internal} distributional/man/dist_truncated.Rd0000644000176200001440000000567215131601157017376 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_truncated.R \name{dist_truncated} \alias{dist_truncated} \title{Truncate a distribution} \usage{ dist_truncated(dist, lower = -Inf, upper = Inf) } \arguments{ \item{dist}{The distribution(s) to truncate.} \item{lower, upper}{The range of values to keep from a distribution.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Note that the samples are generated using inverse transform sampling, and the means and variances are estimated from samples. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_truncated.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_truncated.html} In the following, let \eqn{X} be a truncated random variable with underlying distribution \eqn{Y}, truncation bounds \code{lower} = \eqn{a} and \code{upper} = \eqn{b}, where \eqn{F_Y(x)} is the c.d.f. of \eqn{Y} and \eqn{f_Y(x)} is the p.d.f. of \eqn{Y}. \strong{Support}: \eqn{[a, b]} \strong{Mean}: For the general case, the mean is approximated numerically. For a truncated Normal distribution with underlying mean \eqn{\mu} and standard deviation \eqn{\sigma}, the mean is: \deqn{ E(X) = \mu + \frac{\phi(\alpha) - \phi(\beta)}{\Phi(\beta) - \Phi(\alpha)} \sigma }{ E(X) = mu + (phi(alpha) - phi(beta)) / (Phi(beta) - Phi(alpha)) * sigma } where \eqn{\alpha = (a - \mu)/\sigma}, \eqn{\beta = (b - \mu)/\sigma}, \eqn{\phi} is the standard Normal p.d.f., and \eqn{\Phi} is the standard Normal c.d.f. \strong{Variance}: Approximated numerically for all distributions. \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \begin{cases} \frac{f_Y(x)}{F_Y(b) - F_Y(a)} & \text{if } a \le x \le b \\ 0 & \text{otherwise} \end{cases} }{ f(x) = f_Y(x) / (F_Y(b) - F_Y(a)) if a <= x <= b, 0 otherwise } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = \begin{cases} 0 & \text{if } x < a \\ \frac{F_Y(x) - F_Y(a)}{F_Y(b) - F_Y(a)} & \text{if } a \le x \le b \\ 1 & \text{if } x > b \end{cases} }{ F(x) = 0 if x < a, (F_Y(x) - F_Y(a)) / (F_Y(b) - F_Y(a)) if a <= x <= b, 1 if x > b } \strong{Quantile function}: \deqn{ Q(p) = F_Y^{-1}(F_Y(a) + p(F_Y(b) - F_Y(a))) }{ Q(p) = F_Y^(-1)(F_Y(a) + p(F_Y(b) - F_Y(a))) } clamped to the interval \eqn{[a, b]}. } \examples{ dist <- dist_truncated(dist_normal(2,1), lower = 0) dist mean(dist) variance(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) if(requireNamespace("ggdist")) { library(ggplot2) ggplot() + ggdist::stat_dist_halfeye( aes(y = c("Normal", "Truncated"), dist = c(dist_normal(2,1), dist_truncated(dist_normal(2,1), lower = 0))) ) } } distributional/man/dist_gk.Rd0000644000176200001440000000445415131604123015777 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_gk.R \name{dist_gk} \alias{dist_gk} \title{The g-and-k Distribution} \usage{ dist_gk(A, B, g, k, c = 0.8) } \arguments{ \item{A}{Vector of A (location) parameters.} \item{B}{Vector of B (scale) parameters. Must be positive.} \item{g}{Vector of g parameters.} \item{k}{Vector of k parameters. Must be at least -0.5.} \item{c}{Vector of c parameters. Often fixed at 0.8 which is the default.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The g-and-k distribution is a flexible distribution often used to model univariate data. It is particularly known for its ability to handle skewness and heavy-tailed behavior. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_gk.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_gk.html} In the following, let \eqn{X} be a g-k random variable with parameters \code{A}, \code{B}, \code{g}, \code{k}, and \code{c}. \strong{Support}: \eqn{(-\infty, \infty)} \strong{Mean}: Not available in closed form. \strong{Variance}: Not available in closed form. \strong{Probability density function (p.d.f)}: The g-k distribution does not have a closed-form expression for its density. Instead, it is defined through its quantile function: \deqn{ Q(u) = A + B \left( 1 + c \frac{1 - \exp(-gz(u))}{1 + \exp(-gz(u))} \right) (1 + z(u)^2)^k z(u) }{ Q(u) = A + B * (1 + c * ((1 - exp(-g * z(u))) / (1 + exp(-g * z(u))))) * (1 + z(u)^2)^k * z(u) } where \eqn{z(u) = \Phi^{-1}(u)}, the standard normal quantile of u. \strong{Cumulative distribution function (c.d.f)}: The cumulative distribution function is typically evaluated numerically due to the lack of a closed-form expression. } \examples{ dist <- dist_gk(A = 0, B = 1, g = 0, k = 0.5) dist \dontshow{if (requireNamespace("gk", quietly = TRUE)) withAutoprint(\{ # examplesIf} mean(dist) variance(dist) support(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) \dontshow{\}) # examplesIf} } \seealso{ \link[gk:g-and-k]{gk::dgk}, \link{dist_gh} } distributional/man/figures/0000755000176200001440000000000015131610243015521 5ustar liggesusersdistributional/man/figures/README-plot-1.png0000644000176200001440000003467315131610243020313 0ustar liggesusersPNG  IHDRHc pHYsod IDATxg`e2)eFA QDEQP@ō qTEEd -&CMҴMrI(,hN#h@<D mm'j tJZ[[|d !L[/L[sZ?eа6챸{5#w+oKاnW k;NwsࠕVr K~\C B]gΚ5k֬7x:.{w[;ӳ++ՈS9X!KfͱR@KOMqow \;kFFYrsu˼Oݮ]+c;"(]#&=Ѻ`#L}Ơ~/N53fQVEg0HoO(X/_d52fhsi~TC-l#/˺.B6w8ݳp.-+:W?COYu9ޞF;#?b9eW )_L_Fjbk%CPTl`3?ͺGڑA]G54Eܷ!zb^7S0uuhK͍A}_l0C [g'99;ml6ǫ?vx,. U-ckOIEwZ0_K;s]1iOd]/rxf:wiw^eėi9ojVBRa{|R 2I:Shv.#9\;<0(vlvr/O410`d=_ESÉ\l95WQgmewPg6ϼs݈-oynO3~xڕvbSh97WSnmQ]~*o`N6'E)|ы퍆&ʑeY]vlC!݈.xq72.8#˹shZQxm'+Ug%,9^mh3yyj@]Ġ姭rKgօC;>z q|fήBo]w%}z[CIlMߵ~bs7~i.%U9^(}@ݾ^GoiW֢ûow-U8wgl3ֺw9[8_=vft8{ց EK{t->emsCuƍ(%CӼ.Ǎgoq,~zGSqmi 8=ꊭmU|qe˓%Sc^3;W5GFU ÃRlu^_fLH<{%s~lPtr>hf~5"\ }u,9L}DG#^,A?SJ34>-[-g5OOth|wrQ/45VcU-'c wUC Z,֣:nbskےT7s(<|/FNOMu!Aw5cuWP8`y5)1ɖW$_jwsYQYpЯ5m!wTUqΕ%cj8P7dߴtԣ}C>q` Kޣ{M;֭W#膁|[CJ쐡JBC 9쩋d;vm6~¢YN~TfUqW>RMr.zhǛ>z'ޯ},~kܛیyu7W\5:ʱ1k,:Xhqxve?k'CX͵ py.09e3wZ?gȉO1\{ zۥy {ٲߊekWP5h5^:[J.'Rۼݠ|/m l!zoX-ꟍ _s7ZvMrl+RG>tɉ_֚v^*w`v-t۹/a9j'ǃ*\P=K{>ĿЍ-iǧNaJo_k筥O4ϝ+雎{ny_#xxLNZgpPyѱ,:Uι=*FϾ ܽs`!DXA7^)C Eexi؄q]#.l]6s_˭Ho=;ۭ{pdww\@3kWЯ+|hJ^6/ɑخ(ߒ uvWӹ?^ fצoТ[>(5~l yʛwm+@M]}Z]SJW=[Gl {Su*"+[ 7MKk7;J%Î>N+z ]%X1yĝ,e~|.#Bv5S es}_ɕGkL%[ev%\f6"ucù_V{߹Avӻ}Wva?~9|vqQ7F$!I MPeٖk>MTm;p}kyuܦfS`DNwM8}Dlɖ3;p}wʡ!Qb_?  6G Yyzj&sexYl$&qKT2~[O88Puت::;o+_eY=F4?ߏh8>!V7o[)٠%N7Eg4IvC,293y+rW QQ!]"6^l|)W8@{*Ǫ_t]޺1u W9\'{xɢ@= "h@<D x4OMbbe6CCCl6;Φ2!##)YYYiii%O rңp`7*a6YYYJ"##ҴwPe2Š  "h@<D x4A "h@<D x4A "h@ JWիF>5_$v9NEGx-pc-o!si@d|a^{Gr'y?v@@Q3@aǨ0dT5wxA? TԀ_%exLyP x>jI#Sy2^+"!*yex(F~?9 ^u0xLs<xp "-Sy(?P^-YqL<Ԏ@xU `%b@x :9b>9mN% <@qs"4Εxqx9>ݸȹ]XF]ngXZǖyv}tpMKv21#:U->SwY[vmBȩ;.K4We C۷ٳ*T[Abcc`럡|ř;JRҁM}37F9k,˒[o?OjD |wiW|4q_yrNlΎowmB6׏og$*11qԩgϞ1c\ʎ (2|PHBTn6qLBll54]d.t΀}s_ɦFßܯ^\|BX-`IdqcCtB45f~/lR;.Kſ$S+bҥu/`Z,YҸq㈈EF(-_YJnaQ n1lb !8ׇZ0y BXN Ίz {ߜ8n:{T}68cg:]7,AB!OQEWhRO.虳coj{󽩽{jϟ4iңpUVV2naÆ)= *}ga?)iB?ER`'f#'m=n5kֱcDŽ/R7]N駟 .}zHHңPmICWz} <1wlvox),Bɲ$ }Q W"eraNJEk$*V*,r1B!Id2$!dq wg[\>95pҜj"?aaaBGl-r?e˖47>"++ŋ]&][ڵsbvwy?]rC WJ```ppVGRRR<f!DVV7 fsVVVFFq0/!e2nCvRw{mG!OY M*U0 [?q[g7*5 Bjq&S4Sv|9b-*˩g/Tv9yӉ1ukb]vBBk4 CՑ#Gnܸ۷o*."""++8 `@/?s{>8G^:zra/8ɳIirq,!lg%7An>>~axGf9Vô ;՚5kV h6}]CYOuƷr;>]~ jo;~gQ&^3}>444))IoҔV9gB n޼9>>r-[?~|Æ 82 }V_LKKK*?L&S'̊P0$dѦmV2 uohc=ߟh5l!nO>O>@7B j>r|7t;@r`[wSq lF0@<D Cv  @Ie !`9(*s㹋%@K<AAx^<_Aqqq^{Vq %s-B\EÜcxy+Jt4V *5ԏVDFˌM#"QxO'`3B<Ȓ1NʋC_c5 wD(uX,;w<~xJJJjZlYF GÇWj$Ke򅯞}duظQIBg_<‡{`4+&. ~-U%!D恏;5?=!ySwY[vmBȩ;.K4We%&&kf9444))fkj`4 CFFqCΞ=;k֬g"I7|]wIs}XXؕ+W3&),,%%]vBǰRZ6E :!e,go?OjD &rgn $sS?,[k%!}Mlq9;ܧegg.\(|,|MxxxJ^y`+>̓#e V~*\1{ WLZrkퟜ=f3uM65~ r/)XI#E":O q>AT>d|a엗8bmޱF]L Ijק$*^~vںuң(… E^>;r 'ƍ3x%N\vA-kn9V5o8U)3?7^ٷG 8ɰﬨIooً#,z|ᦣG58Ç?}sL^6Cc>›nB'd_9iԙ$QIQ٢b3\=svИf>ߛڻGhnٲŋB.]}]+69z^xk=\|oUz:zE)X,;v(= ;*U¢URGJ¾R=k8pFRea]z)wwئn6VSm3www $0뚹;[BW}u*>SA.nFo1Inj92d`]Q8r-s_`1WiܱC[W}sETkFwޱc*= xƍK!u/^(|OHHCpbؽŗӔ:=wAW)b+B*FF?FBH$&IY俾fcŢ{tW5'JBo8soj?ɻYu0RRmB~[ANd2^t+fee{bᑑ6mĒKTڕO,Y7pu1''c#r($$čW"*U+%444%%EQ8ⷻ_fL Jx&넰]bK7/]s_>j5wK!!g3d!|xuJBH:`0LSHp-~퇠,{^Χ@Z4oBYrra*-dzY +ԪLh,t.\OZ$Vw7 !SgO'ԭa ~WzE 4(>>~ݺuEn7L^JTA;t[~f~0ra/8ɳIjN&B  lU=m 5%z@hwr=T<5ڽ bhQSy \H5k(=!D\\TFU"r5: Sy({/#1R޻ `*ExpsT^FHt&arW?3/a/ *•]䞽W6l "Zp}Kg3ޕGxU*"mݍ|C|뱗SyxW}SOٹ(p;N.J xe5`/@|Rs~wgU'Ɨw{є9Kϛ!+˹T k6nb̛7OQxVhhiӔ{)=Juـ[xCvRTvsv^?wiַxpf0p]XB勻n=dJ@Ig)[;4_QF۷OQj 6q>/$..>+rAJJĉ{cJ\T5s>|xddңpbؽ۟*kg;*T^ 'M]þ`L+/B._c#;**jȑ#/^( r\`0 ><--k vרOIdZ֭[w!{r`_+th#m,9G.j㉏V=b޷_wl=h&Ʈ1Q1;foOw0G9szY/ZH(+@̒ڃ{_ MJJ;jp%z_ 9Ylٲu֓'OV^CzRiuN.1Lv7V201F'zNyjW8dEGJyɥ% m_>*}aiދkJʒk]~݋ _k;_%HX]n9~37HBHI*R]7>dBN۳xꏍ:T*78TK!T5EC`on?.ˮ /76ڋ|Sr!tz@0|pr# *~'jpiV7)[m;PSpI9c2dBrʎ/7G rxkީY [j_MX/V񆮢N.JkסZ-^K}S"X΢'2 Q8}%9fXZa6ٹʰZ6ugm/U7m#ʞ3 [}c)lN.ٻ`J/V`Gpr@x x4A "h@<D  9[ IENDB`distributional/man/figures/lifecycle-questioning.svg0000644000176200001440000000171415112047012022544 0ustar liggesuserslifecyclelifecyclequestioningquestioning distributional/man/figures/lifecycle-stable.svg0000644000176200001440000000167415112047012021456 0ustar liggesuserslifecyclelifecyclestablestable distributional/man/figures/lifecycle-experimental.svg0000644000176200001440000000171615112047012022676 0ustar liggesuserslifecyclelifecycleexperimentalexperimental distributional/man/figures/lifecycle-deprecated.svg0000644000176200001440000000171215112047012022275 0ustar liggesuserslifecyclelifecycledeprecateddeprecated distributional/man/figures/lifecycle-superseded.svg0000644000176200001440000000171315112047012022341 0ustar liggesusers lifecyclelifecyclesupersededsuperseded distributional/man/figures/lifecycle-archived.svg0000644000176200001440000000170715112047012021766 0ustar liggesusers lifecyclelifecyclearchivedarchived distributional/man/figures/lifecycle-defunct.svg0000644000176200001440000000170415112047012021626 0ustar liggesuserslifecyclelifecycledefunctdefunct distributional/man/figures/lifecycle-soft-deprecated.svg0000644000176200001440000000172615112047012023253 0ustar liggesuserslifecyclelifecyclesoft-deprecatedsoft-deprecated distributional/man/figures/lifecycle-maturing.svg0000644000176200001440000000170615112047012022026 0ustar liggesuserslifecyclelifecyclematuringmaturing distributional/man/figures/lifecycle-retired.svg0000644000176200001440000000170515112047012021635 0ustar liggesusers lifecyclelifecycleretiredretired distributional/man/dist_binomial.Rd0000644000176200001440000000636215131571433017177 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_binomial.R \name{dist_binomial} \alias{dist_binomial} \title{The Binomial distribution} \usage{ dist_binomial(size, prob) } \arguments{ \item{size}{The number of trials. Must be an integer greater than or equal to one. When \code{size = 1L}, the Binomial distribution reduces to the Bernoulli distribution. Often called \code{n} in textbooks.} \item{prob}{The probability of success on each trial, \code{prob} can be any value in \verb{[0, 1]}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Binomial distributions are used to represent situations can that can be thought as the result of \eqn{n} Bernoulli experiments (here the \eqn{n} is defined as the \code{size} of the experiment). The classical example is \eqn{n} independent coin flips, where each coin flip has probability \code{p} of success. In this case, the individual probability of flipping heads or tails is given by the Bernoulli(p) distribution, and the probability of having \eqn{x} equal results (\eqn{x} heads, for example), in \eqn{n} trials is given by the Binomial(n, p) distribution. The equation of the Binomial distribution is directly derived from the equation of the Bernoulli distribution. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_binomial.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_binomial.html} The Binomial distribution comes up when you are interested in the portion of people who do a thing. The Binomial distribution also comes up in the sign test, sometimes called the Binomial test (see \code{\link[stats:binom.test]{stats::binom.test()}}), where you may need the Binomial C.D.F. to compute p-values. In the following, let \eqn{X} be a Binomial random variable with parameter \code{size} = \eqn{n} and \code{p} = \eqn{p}. Some textbooks define \eqn{q = 1 - p}, or called \eqn{\pi} instead of \eqn{p}. \strong{Support}: \eqn{\{0, 1, 2, ..., n\}}{{0, 1, 2, ..., n}} \strong{Mean}: \eqn{np} \strong{Variance}: \eqn{np \cdot (1 - p) = np \cdot q}{np (1 - p)} \strong{Probability mass function (p.m.f)}: \deqn{ P(X = k) = {n \choose k} p^k (1 - p)^{n-k} }{ P(X = k) = choose(n, k) p^k (1 - p)^(n - k) } \strong{Cumulative distribution function (c.d.f)}: \deqn{ P(X \le k) = \sum_{i=0}^{\lfloor k \rfloor} {n \choose i} p^i (1 - p)^{n-i} }{ P(X \le k) = \sum_{i=0}^k choose(n, i) p^i (1 - p)^(n-i) } \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = (1 - p + p e^t)^n }{ E(e^(tX)) = (1 - p + p e^t)^n } \strong{Skewness}: \deqn{ \frac{1 - 2p}{\sqrt{np(1-p)}} }{ (1 - 2p) / sqrt(np(1-p)) } \strong{Excess kurtosis}: \deqn{ \frac{1 - 6p(1-p)}{np(1-p)} }{ (1 - 6p(1-p)) / (np(1-p)) } } \examples{ dist <- dist_binomial(size = 1:5, prob = c(0.05, 0.5, 0.3, 0.9, 0.1)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Binomial]{stats::Binomial} } distributional/man/dist_gamma.Rd0000644000176200001440000000524715131604123016461 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_gamma.R \name{dist_gamma} \alias{dist_gamma} \title{The Gamma distribution} \usage{ dist_gamma(shape, rate = 1/scale, scale = 1/rate) } \arguments{ \item{shape, scale}{shape and scale parameters. Must be positive, \code{scale} strictly.} \item{rate}{an alternative way to specify the scale.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Several important distributions are special cases of the Gamma distribution. When the shape parameter is \code{1}, the Gamma is an exponential distribution with parameter \eqn{1/\beta}. When the \eqn{shape = n/2} and \eqn{rate = 1/2}, the Gamma is a equivalent to a chi squared distribution with n degrees of freedom. Moreover, if we have \eqn{X_1} is \eqn{Gamma(\alpha_1, \beta)} and \eqn{X_2} is \eqn{Gamma(\alpha_2, \beta)}, a function of these two variables of the form \eqn{\frac{X_1}{X_1 + X_2}} \eqn{Beta(\alpha_1, \alpha_2)}. This last property frequently appears in another distributions, and it has extensively been used in multivariate methods. More about the Gamma distribution will be added soon. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_gamma.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_gamma.html} In the following, let \eqn{X} be a Gamma random variable with parameters \code{shape} = \eqn{\alpha} and \code{rate} = \eqn{\beta}. \strong{Support}: \eqn{x \in (0, \infty)} \strong{Mean}: \eqn{\frac{\alpha}{\beta}} \strong{Variance}: \eqn{\frac{\alpha}{\beta^2}} \strong{Probability density function (p.m.f)}: \deqn{ f(x) = \frac{\beta^{\alpha}}{\Gamma(\alpha)} x^{\alpha - 1} e^{-\beta x} }{ f(x) = \frac{\beta^{\alpha}}{\Gamma(\alpha)} x^{\alpha - 1} e^{-\beta x} } \strong{Cumulative distribution function (c.d.f)}: \deqn{ f(x) = \frac{\Gamma(\alpha, \beta x)}{\Gamma{\alpha}} }{ f(x) = \frac{\Gamma(\alpha, \beta x)}{\Gamma{\alpha}} } \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = \Big(\frac{\beta}{ \beta - t}\Big)^{\alpha}, \thinspace t < \beta }{ E(e^(tX)) = \Big(\frac{\beta}{ \beta - t}\Big)^{\alpha}, \thinspace t < \beta } } \examples{ dist <- dist_gamma(shape = c(1,2,3,5,9,7.5,0.5), rate = c(0.5,0.5,0.5,1,2,1,1)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:GammaDist]{stats::GammaDist} } distributional/man/dist_gh.Rd0000644000176200001440000000613415131557626016010 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_gh.R \name{dist_gh} \alias{dist_gh} \title{The generalised g-and-h Distribution} \usage{ dist_gh(A, B, g, h, c = 0.8) } \arguments{ \item{A}{Vector of A (location) parameters.} \item{B}{Vector of B (scale) parameters. Must be positive.} \item{g}{Vector of g parameters.} \item{h}{Vector of h parameters. Must be non-negative.} \item{c}{Vector of c parameters (used for generalised g-and-h). Often fixed at 0.8 which is the default.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The generalised g-and-h distribution is a flexible distribution used to model univariate data, similar to the g-k distribution. It is known for its ability to handle skewness and heavy-tailed behavior. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_gh.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_gh.html} In the following, let \eqn{X} be a g-and-h random variable with parameters \code{A} = \eqn{A}, \code{B} = \eqn{B}, \code{g} = \eqn{g}, \code{h} = \eqn{h}, and \code{c} = \eqn{c}. \strong{Support}: \eqn{(-\infty, \infty)}{R} \strong{Mean}: Does not have a closed-form expression. Approximated numerically. \strong{Variance}: Does not have a closed-form expression. Approximated numerically. \strong{Probability density function (p.d.f)}: The g-and-h distribution does not have a closed-form expression for its density. The density is approximated numerically from the quantile function. The distribution is defined through its quantile function: \deqn{ Q(u) = A + B \left( 1 + c \frac{1 - \exp(-gz(u))}{1 + \exp(-gz(u))} \right) \exp(h z(u)^2/2) z(u) }{ Q(u) = A + B * (1 + c * ((1 - exp(-g * z(u))) / (1 + exp(-g * z(u))))) * exp(h * z(u)^2/2) * z(u) } where \eqn{z(u) = \Phi^{-1}(u)} is the standard normal quantile function. \strong{Cumulative distribution function (c.d.f)}: Does not have a closed-form expression. The cumulative distribution function is approximated numerically by inverting the quantile function. \strong{Quantile function}: \deqn{ Q(p) = A + B \left( 1 + c \frac{1 - \exp(-g\Phi^{-1}(p))}{1 + \exp(-g\Phi^{-1}(p))} \right) \exp(h (\Phi^{-1}(p))^2/2) \Phi^{-1}(p) }{ Q(p) = A + B * (1 + c * ((1 - exp(-g * qnorm(p))) / (1 + exp(-g * qnorm(p))))) * exp(h * qnorm(p)^2/2) * qnorm(p) } where \eqn{\Phi^{-1}(p)} is the standard normal quantile function. } \examples{ dist <- dist_gh(A = 0, B = 1, g = 0, h = 0.5) dist \dontshow{if (requireNamespace("gk", quietly = TRUE)) withAutoprint(\{ # examplesIf} mean(dist) variance(dist) support(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) \dontshow{\}) # examplesIf} } \seealso{ \code{\link[gk:g-and-h]{gk::dgh()}}, \code{\link[gk:g-and-h]{gk::pgh()}}, \code{\link[gk:g-and-h]{gk::qgh()}}, \code{\link[gk:g-and-h]{gk::rgh()}}, \code{\link[=dist_gk]{dist_gk()}} } distributional/man/dist_sample.Rd0000644000176200001440000000730515131575653016674 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_sample.R \name{dist_sample} \alias{dist_sample} \title{Sampling distribution} \usage{ dist_sample(x) } \arguments{ \item{x}{A list of sampled values. For univariate distributions, each element should be a numeric vector. For multivariate distributions, each element should be a matrix where columns represent variables and rows represent observations.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The sampling distribution represents an empirical distribution based on observed samples. It is useful for bootstrapping, representing posterior distributions from Markov Chain Monte Carlo (MCMC) algorithms, or working with any empirical data where the parametric form is unknown. Unlike parametric distributions, the sampling distribution makes no assumptions about the underlying data-generating process and instead uses the sample itself to estimate distributional properties. The distribution can handle both univariate and multivariate samples. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_sample.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_sample.html} In the following, let \eqn{X} be a random variable with sample \eqn{x_1, x_2, \ldots, x_n} of size \eqn{n}. \strong{Support}: The observed range of the sample \strong{Mean} (univariate): \deqn{ \bar{x} = \frac{1}{n} \sum_{i=1}^{n} x_i }{ mean(x) = (1/n) sum(x_i) } \strong{Mean} (multivariate): Computed independently for each variable. \strong{Variance} (univariate): \deqn{ s^2 = \frac{1}{n-1} \sum_{i=1}^{n} (x_i - \bar{x})^2 }{ s^2 = (1/(n-1)) sum((x_i - mean(x))^2) } \strong{Covariance} (multivariate): The sample covariance matrix. \strong{Skewness} (univariate): \deqn{ g_1 = \frac{\sqrt{n} \sum_{i=1}^{n} (x_i - \bar{x})^3}{\left(\sum_{i=1}^{n} (x_i - \bar{x})^2\right)^{3/2}} \left(1 - \frac{1}{n}\right)^{3/2} }{ g_1 = (sqrt(n) sum((x_i - mean(x))^3) / (sum((x_i - mean(x))^2)^(3/2))) * (1 - 1/n)^(3/2) } \strong{Probability density function}: Approximated numerically using kernel density estimation. \strong{Cumulative distribution function} (univariate): \deqn{ F(q) = \frac{1}{n} \sum_{i=1}^{n} I(x_i \leq q) }{ F(q) = (1/n) sum(I(x_i <= q)) } where \eqn{I(\cdot)} is the indicator function. \strong{Cumulative distribution function} (multivariate): \deqn{ F(\mathbf{q}) = \frac{1}{n} \sum_{i=1}^{n} I(\mathbf{x}_i \leq \mathbf{q}) }{ F(q) = (1/n) sum(I(x_i <= q)) } where the inequality is applied element-wise. \strong{Quantile function} (univariate): The sample quantile, computed using the specified quantile type (see \code{\link[stats:quantile]{stats::quantile()}}). \strong{Quantile function} (multivariate): Marginal quantiles are computed independently for each variable. \strong{Random generation}: Bootstrap sampling with replacement from the empirical sample. } \examples{ # Univariate numeric samples dist <- dist_sample(x = list(rnorm(100), rnorm(100, 10))) dist mean(dist) variance(dist) skewness(dist) generate(dist, 10) density(dist, 1) # Multivariate numeric samples dist <- dist_sample(x = list(cbind(rnorm(100), rnorm(100, 10)))) dimnames(dist) <- c("x", "y") dist mean(dist) variance(dist) generate(dist, 10) quantile(dist, 0.4) # Returns the marginal quantiles cdf(dist, matrix(c(0.3,9), nrow = 1)) } \seealso{ \code{\link[stats:density]{stats::density()}}, \code{\link[stats:quantile]{stats::quantile()}}, \code{\link[stats:cor]{stats::cov()}} } distributional/man/dist_poisson.Rd0000644000176200001440000000471415131575653017106 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_poisson.R \name{dist_poisson} \alias{dist_poisson} \title{The Poisson Distribution} \usage{ dist_poisson(lambda) } \arguments{ \item{lambda}{The rate parameter (mean and variance) of the distribution. Can be any positive number. This represents the expected number of events in the given interval.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Poisson distributions are frequently used to model counts. The Poisson distribution is commonly used to model the number of events occurring in a fixed interval of time or space when these events occur with a known constant mean rate and independently of the time since the last event. Examples include the number of emails received per hour, the number of decay events per second from a radioactive source, or the number of customers arriving at a store per day. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_poisson.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_poisson.html} In the following, let \eqn{X} be a Poisson random variable with parameter \code{lambda} = \eqn{\lambda}. \strong{Support}: \eqn{\{0, 1, 2, 3, ...\}}{{0, 1, 2, 3, ...}} \strong{Mean}: \eqn{\lambda} \strong{Variance}: \eqn{\lambda} \strong{Probability mass function (p.m.f)}: \deqn{ P(X = k) = \frac{\lambda^k e^{-\lambda}}{k!} }{ P(X = k) = \lambda^k e^(-\lambda) / k! } \strong{Cumulative distribution function (c.d.f)}: \deqn{ P(X \le k) = e^{-\lambda} \sum_{i = 0}^{\lfloor k \rfloor} \frac{\lambda^i}{i!} }{ P(X \le k) = e^(-\lambda) \sum_{i = 0}^k \lambda^i / i! } \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = e^{\lambda (e^t - 1)} }{ E(e^(tX)) = e^(\lambda (e^t - 1)) } \strong{Skewness}: \deqn{ \gamma_1 = \frac{1}{\sqrt{\lambda}} }{ \gamma_1 = 1 / sqrt(\lambda) } \strong{Excess kurtosis}: \deqn{ \gamma_2 = \frac{1}{\lambda} }{ \gamma_2 = 1 / \lambda } } \examples{ dist <- dist_poisson(lambda = c(1, 4, 10)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Poisson]{stats::Poisson} } distributional/man/parameters.Rd0000644000176200001440000000134615112047012016510 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{parameters} \alias{parameters} \alias{parameters.distribution} \title{Extract the parameters of a distribution} \usage{ parameters(x, ...) \method{parameters}{distribution}(x, ...) } \arguments{ \item{x}{The distribution(s).} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} } \examples{ dist <- c( dist_normal(1:2), dist_poisson(3), dist_multinomial(size = c(4, 3), prob = list(c(0.3, 0.5, 0.2), c(0.1, 0.5, 0.4))) ) parameters(dist) } distributional/man/dist_multivariate_t.Rd0000644000176200001440000000762515131656214020442 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_multivariate_t.R \name{dist_multivariate_t} \alias{dist_multivariate_t} \title{The multivariate t-distribution} \usage{ dist_multivariate_t(df = 1, mu = 0, sigma = diag(1)) } \arguments{ \item{df}{A numeric vector of degrees of freedom (must be positive).} \item{mu}{A list of numeric vectors for the distribution location parameter.} \item{sigma}{A list of matrices for the distribution scale matrix.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The multivariate t-distribution is a generalization of the univariate Student's t-distribution to multiple dimensions. It is commonly used for modeling heavy-tailed multivariate data and in robust statistics. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_multivariate_t.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_multivariate_t.html} In the following, let \eqn{\mathbf{X}} be a multivariate t random vector with degrees of freedom \code{df} = \eqn{\nu}, location parameter \code{mu} = \eqn{\boldsymbol{\mu}}, and scale matrix \code{sigma} = \eqn{\boldsymbol{\Sigma}}. \strong{Support}: \eqn{\mathbf{x} \in \mathbb{R}^k}, where \eqn{k} is the dimension of the distribution \strong{Mean}: \eqn{\boldsymbol{\mu}} for \eqn{\nu > 1}, undefined otherwise \strong{Covariance matrix}: \deqn{ \text{Cov}(\mathbf{X}) = \frac{\nu}{\nu - 2} \boldsymbol{\Sigma} }{ Cov(X) = \nu / (\nu - 2) \Sigma } for \eqn{\nu > 2}, undefined otherwise \strong{Probability density function (p.d.f)}: \deqn{ f(\mathbf{x}) = \frac{\Gamma\left(\frac{\nu + k}{2}\right)} {\Gamma\left(\frac{\nu}{2}\right) \nu^{k/2} \pi^{k/2} |\boldsymbol{\Sigma}|^{1/2}} \left[1 + \frac{1}{\nu}(\mathbf{x} - \boldsymbol{\mu})^T \boldsymbol{\Sigma}^{-1} (\mathbf{x} - \boldsymbol{\mu})\right]^{-\frac{\nu + k}{2}} }{ f(x) = \Gamma((\nu + k)/2) / (\Gamma(\nu/2) \nu^(k/2) \pi^(k/2) |\Sigma|^(1/2)) [1 + 1/\nu (x - \mu)^T \Sigma^(-1) (x - \mu)]^(-(\nu + k)/2) } where \eqn{k} is the dimension of the distribution and \eqn{\Gamma(\cdot)} is the gamma function. \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(\mathbf{t}) = \int_{-\infty}^{t_1} \cdots \int_{-\infty}^{t_k} f(\mathbf{x}) \, d\mathbf{x} }{ F(t) = integral_{-\infty}^{t_1} ... integral_{-\infty}^{t_k} f(x) dx } This integral does not have a closed form solution and is approximated numerically. \strong{Quantile function}: The equicoordinate quantile function finds \eqn{q} such that: \deqn{ P(X_1 \leq q, \ldots, X_k \leq q) = p }{ P(X_1 <= q, ..., X_k <= q) = p } This does not have a closed form solution and is approximated numerically. The marginal quantile function for each dimension \eqn{i} is: \deqn{ Q_i(p) = \mu_i + \sqrt{\Sigma_{ii}} \cdot t_{\nu}^{-1}(p) }{ Q_i(p) = \mu_i + sqrt(\Sigma_{ii}) * t_\nu^(-1)(p) } where \eqn{t_{\nu}^{-1}(p)} is the quantile function of the univariate Student's t-distribution with \eqn{\nu} degrees of freedom, and \eqn{\Sigma_{ii}} is the \eqn{i}-th diagonal element of \code{sigma}. } \examples{ dist <- dist_multivariate_t( df = 5, mu = list(c(1, 2)), sigma = list(matrix(c(4, 2, 2, 3), ncol = 2)) ) dimnames(dist) <- c("x", "y") dist \dontshow{if (requireNamespace("mvtnorm", quietly = TRUE)) withAutoprint(\{ # examplesIf} mean(dist) variance(dist) support(dist) generate(dist, 10) density(dist, cbind(2, 1)) density(dist, cbind(2, 1), log = TRUE) cdf(dist, 4) quantile(dist, 0.7) quantile(dist, 0.7, kind = "marginal") \dontshow{\}) # examplesIf} } \seealso{ \link[mvtnorm:Mvt]{mvtnorm::dmvt}, \link[mvtnorm:pmvt]{mvtnorm::pmvt}, \link[mvtnorm:qmvt]{mvtnorm::qmvt}, \link[mvtnorm:Mvt]{mvtnorm::rmvt} } distributional/man/dist_poisson_inverse_gaussian.Rd0000644000176200001440000000576015131565735022535 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_poisson_inverse_gaussian.R \name{dist_poisson_inverse_gaussian} \alias{dist_poisson_inverse_gaussian} \title{The Poisson-Inverse Gaussian distribution} \usage{ dist_poisson_inverse_gaussian(mean, shape) } \arguments{ \item{mean, shape}{parameters. Must be strictly positive. Infinite values are supported.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The Poisson-Inverse Gaussian distribution is a compound Poisson distribution where the rate parameter follows an Inverse Gaussian distribution. It is useful for modeling overdispersed count data. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_poisson_inverse_gaussian.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_poisson_inverse_gaussian.html} In the following, let \eqn{X} be a Poisson-Inverse Gaussian random variable with parameters \code{mean} = \eqn{\mu} and \code{shape} = \eqn{\phi}. \strong{Support}: \eqn{\{0, 1, 2, 3, ...\}}{{0, 1, 2, 3, ...}} \strong{Mean}: \eqn{\mu} \strong{Variance}: \eqn{\frac{\mu}{\phi}(\mu^2 + \phi)}{\mu/\phi (\mu^2 + \phi)} \strong{Probability mass function (p.m.f)}: \deqn{ P(X = x) = \frac{e^{\phi}}{\sqrt{2\pi}} \left(\frac{\phi}{\mu^2}\right)^{x/2} \frac{1}{x!} \int_0^\infty u^{x-1/2} \exp\left(-\frac{\phi u}{2} - \frac{\phi}{2\mu^2 u}\right) du }{ P(X = x) = (e^\phi / sqrt(2\pi)) (\phi/\mu^2)^(x/2) (1/x!) \int_0^\infty u^(x-1/2) exp(-\phi u/2 - \phi/(2\mu^2 u)) du } for \eqn{x = 0, 1, 2, \ldots}{x = 0, 1, 2, ...} \strong{Cumulative distribution function (c.d.f)}: \deqn{ P(X \le x) = \sum_{k=0}^{\lfloor x \rfloor} P(X = k) }{ P(X <= x) = sum_{k=0}^x P(X = k) } The c.d.f does not have a closed form and is approximated numerically. \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = \exp\left\{\phi\left[1 - \sqrt{1 - \frac{2\mu^2}{\phi}(e^t - 1)}\right]\right\} }{ E(e^(tX)) = exp{\phi [1 - sqrt(1 - 2\mu^2/\phi (e^t - 1))]} } for \eqn{t < -\log(1 + \phi/(2\mu^2))}{t < -log(1 + \phi/(2\mu^2))} } \examples{ dist <- dist_poisson_inverse_gaussian(mean = rep(0.1, 3), shape = c(0.4, 0.8, 1)) dist \dontshow{if (requireNamespace("actuar", quietly = TRUE)) withAutoprint(\{ # examplesIf} mean(dist) variance(dist) support(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) \dontshow{\}) # examplesIf} } \seealso{ \link[actuar:PoissonInverseGaussian]{actuar::PoissonInverseGaussian}, \code{\link[actuar:PoissonInverseGaussian]{actuar::dpoisinvgauss()}}, \code{\link[actuar:PoissonInverseGaussian]{actuar::ppoisinvgauss()}}, \code{\link[actuar:PoissonInverseGaussian]{actuar::qpoisinvgauss()}}, \code{\link[actuar:PoissonInverseGaussian]{actuar::rpoisinvgauss()}} } distributional/man/quantile.distribution.Rd0000644000176200001440000000125215112047012020701 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{quantile.distribution} \alias{quantile.distribution} \title{Distribution Quantiles} \usage{ \method{quantile}{distribution}(x, p, ..., log = FALSE) } \arguments{ \item{x}{The distribution(s).} \item{p}{The probability of the quantile.} \item{...}{Additional arguments passed to methods.} \item{log}{If \code{TRUE}, probabilities will be given as log probabilities.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Computes the quantiles of a distribution. } distributional/man/cdf.Rd0000644000176200001440000000123115112047012015072 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{cdf} \alias{cdf} \alias{cdf.distribution} \title{The cumulative distribution function} \usage{ cdf(x, q, ..., log = FALSE) \method{cdf}{distribution}(x, q, ...) } \arguments{ \item{x}{The distribution(s).} \item{q}{The quantile at which the cdf is calculated.} \item{...}{Additional arguments passed to methods.} \item{log}{If \code{TRUE}, probabilities will be given as log probabilities.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} } distributional/man/covariance.Rd0000644000176200001440000000114315112047012016452 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{covariance} \alias{covariance} \title{Covariance} \usage{ covariance(x, ...) } \arguments{ \item{x}{An object.} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} A generic function for computing the covariance of an object. } \seealso{ \code{\link[=covariance.distribution]{covariance.distribution()}}, \code{\link[=variance]{variance()}} } distributional/man/dist_logistic.Rd0000644000176200001440000000377715131557626017241 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_logistic.R \name{dist_logistic} \alias{dist_logistic} \title{The Logistic distribution} \usage{ dist_logistic(location, scale) } \arguments{ \item{location, scale}{location and scale parameters.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} A continuous distribution on the real line. For binary outcomes the model given by \eqn{P(Y = 1 | X) = F(X \beta)} where \eqn{F} is the Logistic \code{\link[=cdf]{cdf()}} is called \emph{logistic regression}. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_logistic.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_logistic.html} In the following, let \eqn{X} be a Logistic random variable with \code{location} = \eqn{\mu} and \code{scale} = \eqn{s}. \strong{Support}: \eqn{R}, the set of all real numbers \strong{Mean}: \eqn{\mu} \strong{Variance}: \eqn{s^2 \pi^2 / 3} \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \frac{e^{-\frac{x - \mu}{s}}}{s \left[1 + e^{-\frac{x - \mu}{s}}\right]^2} }{ f(x) = e^(-(x - mu) / s) / (s (1 + e^(-(x - \mu) / s))^2) } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = \frac{1}{1 + e^{-\frac{x - \mu}{s}}} }{ F(x) = 1 / (1 + e^(-(x - mu) / s)) } \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = e^{\mu t} B(1 - st, 1 + st) }{ E(e^(tX)) = e^(mu t) B(1 - st, 1 + st) } for \eqn{-1 < st < 1}, where \eqn{B(a, b)} is the Beta function. } \examples{ dist <- dist_logistic(location = c(5,9,9,6,2), scale = c(2,3,4,2,1)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Logistic]{stats::Logistic} } distributional/man/dist_weibull.Rd0000644000176200001440000000572315131565735017060 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_weibull.R \name{dist_weibull} \alias{dist_weibull} \title{The Weibull distribution} \usage{ dist_weibull(shape, scale) } \arguments{ \item{shape, scale}{shape and scale parameters, the latter defaulting to 1.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Generalization of the gamma distribution. Often used in survival and time-to-event analyses. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_weibull.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_weibull.html} In the following, let \eqn{X} be a Weibull random variable with shape parameter \code{shape} = \eqn{k} and scale parameter \code{scale} = \eqn{\lambda}. \strong{Support}: \eqn{[0, \infty)} \strong{Mean}: \deqn{ E(X) = \lambda \Gamma\left(1 + \frac{1}{k}\right) }{ E(X) = \lambda \Gamma(1 + 1/k) } where \eqn{\Gamma} is the gamma function. \strong{Variance}: \deqn{ \text{Var}(X) = \lambda^2 \left[\Gamma\left(1 + \frac{2}{k}\right) - \left(\Gamma\left(1 + \frac{1}{k}\right)\right)^2\right] }{ Var(X) = \lambda^2 [\Gamma(1 + 2/k) - (\Gamma(1 + 1/k))^2] } \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \frac{k}{\lambda}\left(\frac{x}{\lambda}\right)^{k-1}e^{-(x/\lambda)^k}, \quad x \ge 0 }{ f(x) = (k/\lambda)(x/\lambda)^(k-1) e^(-(x/\lambda)^k), x >= 0 } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = 1 - e^{-(x/\lambda)^k}, \quad x \ge 0 }{ F(x) = 1 - e^(-(x/\lambda)^k), x >= 0 } \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = \sum_{n=0}^\infty \frac{t^n\lambda^n}{n!} \Gamma\left(1+\frac{n}{k}\right) }{ E(e^(tX)) = \sum_(n=0)^\infty (t^n \lambda^n / n!) \Gamma(1 + n/k) } \strong{Skewness}: \deqn{ \gamma_1 = \frac{\mu^3 - 3\mu\sigma^2 - \mu^3}{\sigma^3} }{ \gamma_1 = (\mu_3 - 3\mu\sigma^2 - \mu^3) / \sigma^3 } where \eqn{\mu = E(X)}, \eqn{\sigma^2 = \text{Var}(X)}, and the third raw moment is \deqn{ \mu^3 = \lambda^3 \Gamma\left(1 + \frac{3}{k}\right) }{ \mu_3 = \lambda^3 \Gamma(1 + 3/k) } \strong{Excess Kurtosis}: \deqn{ \gamma_2 = \frac{\mu^4 - 4\gamma_1\mu\sigma^3 - 6\mu^2\sigma^2 - \mu^4}{\sigma^4} - 3 }{ \gamma_2 = (\mu_4 - 4\gamma_1\mu\sigma^3 - 6\mu^2\sigma^2 - \mu^4) / \sigma^4 - 3 } where the fourth raw moment is \deqn{ \mu^4 = \lambda^4 \Gamma\left(1 + \frac{4}{k}\right) }{ \mu_4 = \lambda^4 \Gamma(1 + 4/k) } } \examples{ dist <- dist_weibull(shape = c(0.5, 1, 1.5, 5), scale = rep(1, 4)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Weibull]{stats::Weibull} } distributional/man/dist_gpd.Rd0000644000176200001440000000601015131557626016155 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_gpd.R \name{dist_gpd} \alias{dist_gpd} \title{The Generalized Pareto Distribution} \usage{ dist_gpd(location, scale, shape) } \arguments{ \item{location}{the location parameter \eqn{a} of the GPD distribution.} \item{scale}{the scale parameter \eqn{b} of the GPD distribution.} \item{shape}{the shape parameter \eqn{s} of the GPD distribution.} } \description{ The GPD distribution is commonly used to model the tails of distributions, particularly in extreme value theory. The Pickands–Balkema–De Haan theorem states that for a large class of distributions, the tail (above some threshold) can be approximated by a GPD. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_gpd.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_gpd.html} In the following, let \eqn{X} be a Generalized Pareto random variable with parameters \code{location} = \eqn{a}, \code{scale} = \eqn{b > 0}, and \code{shape} = \eqn{s}. \strong{Support}: \eqn{x \ge a} if \eqn{s \ge 0}, \eqn{a \le x \le a - b/s} if \eqn{s < 0} \strong{Mean}: \deqn{ E(X) = a + \frac{b}{1 - s} \quad \textrm{for } s < 1 }{ E(X) = a + b/(1 - s) for s < 1 } \eqn{E(X) = \infty} for \eqn{s \ge 1} \strong{Variance}: \deqn{ \textrm{Var}(X) = \frac{b^2}{(1-s)^2(1-2s)} \quad \textrm{for } s < 0.5 }{ Var(X) = b^2 / ((1-s)^2 (1-2s)) for s < 0.5 } \eqn{\textrm{Var}(X) = \infty} for \eqn{s \ge 0.5} \strong{Probability density function (p.d.f)}: For \eqn{s = 0}: \deqn{ f(x) = \frac{1}{b}\exp\left(-\frac{x-a}{b}\right) \quad \textrm{for } x \ge a }{ f(x) = (1/b) exp(-(x-a)/b) for x >= a } For \eqn{s \ne 0}: \deqn{ f(x) = \frac{1}{b}\left(1 + s\frac{x-a}{b}\right)^{-1/s - 1} }{ f(x) = (1/b)(1 + s(x-a)/b)^(-1/s - 1) } where \eqn{1 + s(x-a)/b > 0} \strong{Cumulative distribution function (c.d.f)}: For \eqn{s = 0}: \deqn{ F(x) = 1 - \exp\left(-\frac{x-a}{b}\right) \quad \textrm{for } x \ge a }{ F(x) = 1 - exp(-(x-a)/b) for x >= a } For \eqn{s \ne 0}: \deqn{ F(x) = 1 - \left(1 + s\frac{x-a}{b}\right)^{-1/s} }{ F(x) = 1 - (1 + s(x-a)/b)^(-1/s) } where \eqn{1 + s(x-a)/b > 0} \strong{Quantile function}: For \eqn{s = 0}: \deqn{ Q(p) = a - b\log(1-p) }{ Q(p) = a - b log(1-p) } For \eqn{s \ne 0}: \deqn{ Q(p) = a + \frac{b}{s}\left[(1-p)^{-s} - 1\right] }{ Q(p) = a + (b/s)[(1-p)^(-s) - 1] } \strong{Median}: For \eqn{s = 0}: \deqn{ \textrm{Median}(X) = a + b\log(2) }{ Median(X) = a + b log(2) } For \eqn{s \ne 0}: \deqn{ \textrm{Median}(X) = a + \frac{b}{s}\left(2^s - 1\right) }{ Median(X) = a + (b/s)(2^s - 1) } \strong{Skewness and Kurtosis}: No closed-form expressions; approximated numerically. } \examples{ dist <- dist_gpd(location = 0, scale = 1, shape = 0) dist mean(dist) variance(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \code{\link[evd:gpd]{evd::dgpd()}} } distributional/man/dist_logarithmic.Rd0000644000176200001440000000454215131571433017705 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_logarithmic.R \name{dist_logarithmic} \alias{dist_logarithmic} \title{The Logarithmic distribution} \usage{ dist_logarithmic(prob) } \arguments{ \item{prob}{parameter. \code{0 <= prob < 1}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} The Logarithmic distribution is a discrete probability distribution derived from the logarithmic series. It is useful in modeling the abundance of species and other phenomena where the frequency of an event follows a logarithmic pattern. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_logarithmic.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_logarithmic.html} In the following, let \eqn{X} be a Logarithmic random variable with parameter \code{prob} = \eqn{p}. \strong{Support}: \eqn{\{1, 2, 3, ...\}}{{1, 2, 3, ...}} \strong{Mean}: \eqn{\frac{-1}{\log(1-p)} \cdot \frac{p}{1-p}}{-1/log(1-p) * p/(1-p)} \strong{Variance}: \eqn{\frac{-(p^2 + p\log(1-p))}{[(1-p)\log(1-p)]^2}}{-(p^2 + p*log(1-p))/((1-p)*log(1-p))^2} \strong{Probability mass function (p.m.f)}: \deqn{ P(X = k) = \frac{-1}{\log(1-p)} \cdot \frac{p^k}{k} }{ P(X = k) = -1/log(1-p) * p^k/k } for \eqn{k = 1, 2, 3, \ldots}{k = 1, 2, 3, ...} \strong{Cumulative distribution function (c.d.f)}: The c.d.f. does not have a simple closed form. It is computed using the recurrence relationship \eqn{P(X = k+1) = \frac{p \cdot k}{k+1} \cdot P(X = k)}{P(X = k+1) = (p*k)/(k+1) * P(X = k)} starting from \eqn{P(X = 1) = \frac{-p}{\log(1-p)}}{P(X = 1) = -p/log(1-p)}. \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = \frac{\log(1 - pe^t)}{\log(1-p)} }{ E(e^(tX)) = log(1 - p*e^t)/log(1-p) } for \eqn{pe^t < 1}{p*e^t < 1} } \examples{ dist <- dist_logarithmic(prob = c(0.33, 0.66, 0.99)) dist \dontshow{if (requireNamespace("actuar", quietly = TRUE)) withAutoprint(\{ # examplesIf} mean(dist) variance(dist) support(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) \dontshow{\}) # examplesIf} } \seealso{ \link[actuar:Logarithmic]{actuar::Logarithmic} } distributional/man/is_hdr.Rd0000644000176200001440000000035015112047012015607 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/hdr.R \name{is_hdr} \alias{is_hdr} \title{Is the object a hdr} \usage{ is_hdr(x) } \arguments{ \item{x}{An object.} } \description{ Is the object a hdr } distributional/man/dist_mixture.Rd0000644000176200001440000000732215131601157017074 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_mixture.R \name{dist_mixture} \alias{dist_mixture} \title{Create a mixture of distributions} \usage{ dist_mixture(..., weights = numeric()) } \arguments{ \item{...}{Distributions to be used in the mixture. Can be any distributional objects.} \item{weights}{A numeric vector of non-negative weights that sum to 1. The length must match the number of distributions passed to \code{...}. Each weight \eqn{w_i} represents the probability that a random draw comes from the \eqn{i}-th component distribution.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} A mixture distribution combines multiple component distributions with specified weights. The resulting distribution can model complex, multimodal data by representing it as a weighted sum of simpler distributions. } \details{ In the following, let \eqn{X} be a mixture random variable composed of \eqn{K} component distributions \eqn{F_1, F_2, \ldots, F_K} with corresponding weights \eqn{w_1, w_2, \ldots, w_K} where \eqn{\sum_{i=1}^K w_i = 1} and \eqn{w_i \geq 0} for all \eqn{i}. \strong{Support}: The union of the supports of all component distributions \strong{Mean}: For univariate mixtures: \deqn{ E(X) = \sum_{i=1}^K w_i \mu_i }{ E(X) = sum_{i=1}^K w_i * mu_i } where \eqn{\mu_i} is the mean of the \eqn{i}-th component distribution. For multivariate mixtures: \deqn{ E(\mathbf{X}) = \sum_{i=1}^K w_i \boldsymbol{\mu}_i }{ E(X) = sum_{i=1}^K w_i * mu_i } where \eqn{\boldsymbol{\mu}_i} is the mean vector of the \eqn{i}-th component distribution. \strong{Variance}: For univariate mixtures: \deqn{ \text{Var}(X) = \sum_{i=1}^K w_i (\mu_i^2 + \sigma_i^2) - \left(\sum_{i=1}^K w_i \mu_i\right)^2 }{ Var(X) = sum_{i=1}^K w_i * (mu_i^2 + sigma_i^2) - (sum_{i=1}^K w_i * mu_i)^2 } where \eqn{\sigma_i^2} is the variance of the \eqn{i}-th component distribution. \strong{Covariance}: For multivariate mixtures: \deqn{ \text{Cov}(\mathbf{X}) = \sum_{i=1}^K w_i \left[ (\boldsymbol{\mu}_i - \bar{\boldsymbol{\mu}})(\boldsymbol{\mu}_i - \bar{\boldsymbol{\mu}})^T + \boldsymbol{\Sigma}_i \right] }{ Cov(X) = sum_{i=1}^K w_i * [ (mu_i - mu_bar)(mu_i - mu_bar)^T + Sigma_i ] } where \eqn{\bar{\boldsymbol{\mu}} = \sum_{i=1}^K w_i \boldsymbol{\mu}_i} is the overall mean vector and \eqn{\boldsymbol{\Sigma}_i} is the covariance matrix of the \eqn{i}-th component distribution. \strong{Probability density/mass function (p.d.f/p.m.f)}: \deqn{ f(x) = \sum_{i=1}^K w_i f_i(x) }{ f(x) = sum_{i=1}^K w_i * f_i(x) } where \eqn{f_i(x)} is the density or mass function of the \eqn{i}-th component distribution. \strong{Cumulative distribution function (c.d.f)}: For univariate mixtures: \deqn{ F(x) = \sum_{i=1}^K w_i F_i(x) }{ F(x) = sum_{i=1}^K w_i * F_i(x) } where \eqn{F_i(x)} is the c.d.f. of the \eqn{i}-th component distribution. For multivariate mixtures, the c.d.f. is approximated numerically. \strong{Quantile function}: For univariate mixtures, the quantile function has no closed form and is computed numerically by inverting the c.d.f. using root-finding (\code{\link[stats:uniroot]{stats::uniroot()}}). For multivariate mixtures, quantiles are not yet implemented. } \examples{ # Univariate mixture of two normal distributions dist <- dist_mixture(dist_normal(0, 1), dist_normal(5, 2), weights = c(0.3, 0.7)) dist mean(dist) variance(dist) density(dist, 2) cdf(dist, 2) quantile(dist, 0.5) generate(dist, 10) } \seealso{ \code{\link[stats:uniroot]{stats::uniroot()}}, \code{\link[vctrs:vec_unique]{vctrs::vec_unique_count()}} } distributional/man/new_hdr.Rd0000644000176200001440000000117415112047012015772 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/hdr.R \name{new_hdr} \alias{new_hdr} \title{Construct hdr intervals} \usage{ new_hdr( lower = list_of(.ptype = double()), upper = list_of(.ptype = double()), size = double() ) } \arguments{ \item{lower, upper}{A list of numeric vectors specifying the region's lower and upper bounds.} \item{size}{A numeric vector specifying the coverage size of the region.} } \value{ A "hdr" vector } \description{ Construct hdr intervals } \examples{ new_hdr(lower = list(1, c(3,6)), upper = list(10, c(5, 8)), size = c(80, 95)) } \author{ Mitchell O'Hara-Wild } distributional/man/dist_exponential.Rd0000644000176200001440000000342715131557626017742 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_exponential.R \name{dist_exponential} \alias{dist_exponential} \title{The Exponential Distribution} \usage{ dist_exponential(rate) } \arguments{ \item{rate}{vector of rates.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Exponential distributions are frequently used to model waiting times and the time between events in a Poisson process. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_exponential.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_exponential.html} In the following, let \eqn{X} be an Exponential random variable with parameter \code{rate} = \eqn{\lambda}. \strong{Support}: \eqn{x \in [0, \infty)}{x >= 0} \strong{Mean}: \eqn{\frac{1}{\lambda}}{1 / \lambda} \strong{Variance}: \eqn{\frac{1}{\lambda^2}}{1 / \lambda^2} \strong{Probability density function (p.d.f)}: \deqn{ f(x) = \lambda e^{-\lambda x} }{ f(x) = \lambda e^(-\lambda x) } \strong{Cumulative distribution function (c.d.f)}: \deqn{ F(x) = 1 - e^{-\lambda x} }{ F(x) = 1 - e^(-\lambda x) } \strong{Moment generating function (m.g.f)}: \deqn{ E(e^{tX}) = \frac{\lambda}{\lambda - t}, \quad t < \lambda }{ E(e^(tX)) = \lambda / (\lambda - t), for t < \lambda } } \examples{ dist <- dist_exponential(rate = c(2, 1, 2/3)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Exponential]{stats::Exponential} } distributional/man/generate.distribution.Rd0000644000176200001440000000115415112047012020652 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{generate.distribution} \alias{generate.distribution} \title{Randomly sample values from a distribution} \usage{ \method{generate}{distribution}(x, times, ...) } \arguments{ \item{x}{The distribution(s).} \item{times}{The number of samples.} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Generate random samples from probability distributions. } distributional/man/dist_chisq.Rd0000644000176200001440000000724615131604123016507 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_chisq.R \name{dist_chisq} \alias{dist_chisq} \title{The (non-central) Chi-Squared Distribution} \usage{ dist_chisq(df, ncp = 0) } \arguments{ \item{df}{Degrees of freedom (non-centrality parameter). Can be any positive real number.} \item{ncp}{Non-centrality parameter. Can be any non-negative real number. Defaults to 0 (central chi-squared distribution).} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} Chi-square distributions show up often in frequentist settings as the sampling distribution of test statistics, especially in maximum likelihood estimation settings. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_chisq.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_chisq.html} In the following, let \eqn{X} be a \eqn{\chi^2} random variable with \code{df} = \eqn{k} and \code{ncp} = \eqn{\lambda}. \strong{Support}: \eqn{R^+}, the set of positive real numbers \strong{Mean}: \eqn{k + \lambda} \strong{Variance}: \eqn{2(k + 2\lambda)} \strong{Probability density function (p.d.f)}: For the central chi-squared distribution (\eqn{\lambda = 0}): \deqn{ f(x) = \frac{1}{2^{k/2} \Gamma(k/2)} x^{k/2 - 1} e^{-x/2} }{ f(x) = 1 / (2^(k/2) \Gamma(k/2)) x^(k/2 - 1) exp(-x/2) } For the non-central chi-squared distribution (\eqn{\lambda > 0}): \deqn{ f(x) = \frac{1}{2} e^{-(x+\lambda)/2} \left(\frac{x}{\lambda}\right)^{k/4-1/2} I_{k/2-1}\left(\sqrt{\lambda x}\right) }{ f(x) = (1/2) exp(-(x+\lambda)/2) (x/\lambda)^(k/4-1/2) I_(k/2-1)(sqrt(\lambda x)) } where \eqn{I_\nu(z)} is the modified Bessel function of the first kind. \strong{Cumulative distribution function (c.d.f)}: For the central chi-squared distribution (\eqn{\lambda = 0}): \deqn{ F(x) = \frac{\gamma(k/2, x/2)}{\Gamma(k/2)} = P(k/2, x/2) }{ F(x) = \gamma(k/2, x/2) / \Gamma(k/2) = P(k/2, x/2) } where \eqn{\gamma(s, x)} is the lower incomplete gamma function and \eqn{P(s, x)} is the regularized gamma function. For the non-central chi-squared distribution (\eqn{\lambda > 0}): \deqn{ F(x) = \sum_{j=0}^{\infty} \frac{e^{-\lambda/2} (\lambda/2)^j}{j!} P(k/2 + j, x/2) }{ F(x) = sum_(j=0)^(\infty) exp(-\lambda/2) (\lambda/2)^j / j! P(k/2 + j, x/2) } This is approximated numerically. \strong{Moment generating function (m.g.f)}: For the central chi-squared distribution (\eqn{\lambda = 0}): \deqn{ E(e^{tX}) = (1 - 2t)^{-k/2}, \quad t < 1/2 }{ E(e^(tX)) = (1 - 2t)^(-k/2), t < 1/2 } For the non-central chi-squared distribution (\eqn{\lambda > 0}): \deqn{ E(e^{tX}) = \frac{e^{\lambda t / (1 - 2t)}}{(1 - 2t)^{k/2}}, \quad t < 1/2 }{ E(e^(tX)) = exp(\lambda t / (1 - 2t)) / (1 - 2t)^(k/2), t < 1/2 } \strong{Skewness}: \deqn{ \gamma_1 = \frac{2^{3/2}(k + 3\lambda)}{(k + 2\lambda)^{3/2}} }{ \gamma_1 = 2^(3/2) (k + 3\lambda) / (k + 2\lambda)^(3/2) } For the central case (\eqn{\lambda = 0}), this simplifies to \eqn{\sqrt{8/k}}. \strong{Excess Kurtosis}: \deqn{ \gamma_2 = \frac{12(k + 4\lambda)}{(k + 2\lambda)^2} }{ \gamma_2 = 12(k + 4\lambda) / (k + 2\lambda)^2 } For the central case (\eqn{\lambda = 0}), this simplifies to \eqn{12/k}. } \examples{ dist <- dist_chisq(df = c(1,2,3,4,6,9)) dist mean(dist) variance(dist) skewness(dist) kurtosis(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[stats:Chisquare]{stats::Chisquare} } distributional/man/support.Rd0000644000176200001440000000105015112047012016051 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/distribution.R \name{support} \alias{support} \alias{support.distribution} \title{Region of support of a distribution} \usage{ support(x, ...) \method{support}{distribution}(x, ...) } \arguments{ \item{x}{The distribution(s).} \item{...}{Additional arguments used by methods.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} } distributional/man/dist_missing.Rd0000644000176200001440000000354715131575653017070 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dist_missing.R \name{dist_missing} \alias{dist_missing} \title{Missing distribution} \usage{ dist_missing(length = 1) } \arguments{ \item{length}{The number of missing distributions} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} A placeholder distribution for handling missing values in a vector of distributions. } \details{ We recommend reading this documentation on \href{https://pkg.mitchelloharawild.com/distributional/reference/dist_missing.html}{pkgdown} which renders math nicely. \url{https://pkg.mitchelloharawild.com/distributional/reference/dist_missing.html} The missing distribution represents the absence of distributional information. It is used as a placeholder when distribution values are not available or not applicable, similar to how \code{NA} is used for missing scalar values. \strong{Support}: Undefined \strong{Mean}: \eqn{\text{NA}} \strong{Variance}: \eqn{\text{NA}} \strong{Skewness}: \eqn{\text{NA}} \strong{Kurtosis}: \eqn{\text{NA}} \strong{Probability density function (p.d.f)}: Undefined \deqn{ f(x) = \text{NA} } \strong{Cumulative distribution function (c.d.f)}: Undefined \deqn{ F(t) = \text{NA} } \strong{Quantile function}: Undefined \deqn{ Q(p) = \text{NA} } \strong{Moment generating function (m.g.f)}: Undefined \deqn{ E(e^{tX}) = \text{NA} } All statistical operations on missing distributions return \code{NA} values of appropriate length, propagating the missingness through calculations. } \examples{ dist <- dist_missing(3L) dist mean(dist) variance(dist) generate(dist, 10) density(dist, 2) density(dist, 2, log = TRUE) cdf(dist, 4) quantile(dist, 0.7) } \seealso{ \link[base:NA]{base::NA} } distributional/DESCRIPTION0000644000176200001440000000523115131672333015021 0ustar liggesusersPackage: distributional Title: Vectorised Probability Distributions Version: 0.6.0 Authors@R: c(person(given = "Mitchell", family = "O'Hara-Wild", role = c("aut", "cre"), email = "mail@mitchelloharawild.com", comment = c(ORCID = "0000-0001-6729-7695")), person(given = "Matthew", family = "Kay", role = c("aut"), comment = c(ORCID = "0000-0001-9446-0419")), person(given = "Alex", family = "Hayes", role = c("aut"), comment = c(ORCID = "0000-0002-4985-5160")), person(given = "Rob", family = "Hyndman", role = c("aut"), comment = c(ORCID = "0000-0002-2140-5352")), person(given = "Earo", family = "Wang", role = c("ctb"), comment = c(ORCID = "0000-0001-6448-5260")), person(given = "Vencislav", family = "Popov", role = c("ctb"), comment = c(ORCID = "0000-0002-8073-4199"))) Description: Vectorised distribution objects with tools for manipulating, visualising, and using probability distributions. Designed to allow model prediction outputs to return distributions rather than their parameters, allowing users to directly interact with predictive distributions in a data-oriented workflow. In addition to providing generic replacements for p/d/q/r functions, other useful statistics can be computed including means, variances, intervals, and highest density regions. License: GPL-3 Depends: R (>= 4.0.0) Imports: vctrs (>= 0.3.0), rlang (>= 0.4.5), generics, stats, numDeriv, utils, lifecycle, pillar Suggests: testthat (>= 2.1.0), covr, mvtnorm, actuar (>= 2.0.0), evd, ggdist, ggplot2, gk, pkgdown RdMacros: lifecycle URL: https://pkg.mitchelloharawild.com/distributional/, https://github.com/mitchelloharawild/distributional BugReports: https://github.com/mitchelloharawild/distributional/issues Encoding: UTF-8 Language: en-GB RoxygenNote: 7.3.3 NeedsCompilation: no Packaged: 2026-01-14 10:04:53 UTC; mitchell Author: Mitchell O'Hara-Wild [aut, cre] (ORCID: ), Matthew Kay [aut] (ORCID: ), Alex Hayes [aut] (ORCID: ), Rob Hyndman [aut] (ORCID: ), Earo Wang [ctb] (ORCID: ), Vencislav Popov [ctb] (ORCID: ) Maintainer: Mitchell O'Hara-Wild Repository: CRAN Date/Publication: 2026-01-14 10:50:03 UTC