ruby-statistics-4.1.0/0000755000004100000410000000000014745414510014722 5ustar www-datawww-dataruby-statistics-4.1.0/bin/0000755000004100000410000000000014745414510015472 5ustar www-datawww-dataruby-statistics-4.1.0/bin/setup0000755000004100000410000000020314745414510016553 0ustar www-datawww-data#!/usr/bin/env bash set -euo pipefail IFS=$'\n\t' set -vx bundle install # Do any other automated setup that you need to do here ruby-statistics-4.1.0/bin/console0000755000004100000410000000053614745414510017066 0ustar www-datawww-data#!/usr/bin/env ruby require "bundler/setup" require "ruby-statistics" # You can add fixtures and/or initialization code here to make experimenting # with your gem easier. You can also use a different console, if you like. # (If you use this, don't forget to add pry to your Gemfile!) # require "pry" # Pry.start require "irb" IRB.start(__FILE__) ruby-statistics-4.1.0/.gitignore0000644000004100000410000000225714745414510016720 0ustar www-datawww-data*.gem *.rbc /.config /coverage/ /InstalledFiles /pkg/ /spec/reports/ /spec/examples.txt /test/tmp/ /test/version_tmp/ /tmp/ # Used by dotenv library to load environment variables. # .env ## Specific to RubyMotion: .dat* .repl_history build/ *.bridgesupport build-iPhoneOS/ build-iPhoneSimulator/ ## Specific to RubyMotion (use of CocoaPods): # # We recommend against adding the Pods directory to your .gitignore. However # you should judge for yourself, the pros and cons are mentioned at: # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control # # vendor/Pods/ ## Documentation cache and generated files: /.yardoc/ /_yardoc/ /doc/ /rdoc/ ## Environment normalization: /.bundle/ /vendor/bundle /lib/bundler/man/ # for a library or gem, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # Gemfile.lock .ruby-version .ruby-gemset # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: .rvmrc /.bundle/ /.yardoc /Gemfile.lock /_yardoc/ /coverage/ /doc/ /pkg/ /spec/reports/ /tmp/ # rspec failure tracking .rspec_status # byebug .byebug_history ruby-statistics-4.1.0/CONTRIBUTING.md0000644000004100000410000000045614745414510017160 0ustar www-datawww-dataBug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant code of conduct](https://www.contributor-covenant.org/). ruby-statistics-4.1.0/.github/0000755000004100000410000000000014745414510016262 5ustar www-datawww-dataruby-statistics-4.1.0/.github/dependabot.yml0000644000004100000410000000124414745414510021113 0ustar www-datawww-data# To get started with Dependabot version updates, you'll need to specify which # package ecosystems to update and where the package manifests are located. # Please see the documentation for all configuration options: # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates version: 2 updates: - package-ecosystem: "bundler" # See documentation for possible values directory: "/" # Location of package manifests schedule: interval: "weekly" - package-ecosystem: "github-actions" # See documentation for possible values directory: "/" # Location of package manifests schedule: interval: "weekly" ruby-statistics-4.1.0/.github/workflows/0000755000004100000410000000000014745414510020317 5ustar www-datawww-dataruby-statistics-4.1.0/.github/workflows/ruby.yml0000644000004100000410000000255514745414510022032 0ustar www-datawww-dataname: Ruby on: [push, pull_request] jobs: build: # Latest ruby runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Ruby 3.3 uses: ruby/setup-ruby@v1.207.0 with: ruby-version: 3.3 - name: Build and test with Rake run: | gem install bundler bundle install --jobs 2 --retry 1 bundle exec rake build_3_2: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Ruby 3.2 uses: ruby/setup-ruby@v1.207.0 with: ruby-version: 3.2 - name: Build and test with Rake run: | gem install bundler bundle install --jobs 2 --retry 1 bundle exec rake build_3_0: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Ruby 3.0 uses: ruby/setup-ruby@v1.207.0 with: ruby-version: 3.0 - name: Build and test with Rake run: | gem install bundler bundle install --jobs 2 --retry 1 bundle exec rake build_3_1: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Ruby 3.1 uses: ruby/setup-ruby@v1.207.0 with: ruby-version: 3.1 - name: Build and test with Rake run: | gem install bundler bundle install --jobs 2 --retry 1 bundle exec rake ruby-statistics-4.1.0/lib/0000755000004100000410000000000014745414510015470 5ustar www-datawww-dataruby-statistics-4.1.0/lib/ruby-statistics.rb0000644000004100000410000000034414745414510021167 0ustar www-datawww-datarequire File.dirname(__FILE__) + '/enumerable' require File.dirname(__FILE__) + '/math' Dir[ File.dirname(__FILE__) + '/ruby-statistics/**/*.rb'].each {|file| require file } module RubyStatistics # Your code goes here... end ruby-statistics-4.1.0/lib/math.rb0000644000004100000410000001144714745414510016755 0ustar www-datawww-datamodule Math def self.factorial(n) return if n < 0 n = n.to_i # Only integers. return 1 if n == 0 || n == 1 Math.gamma(n + 1) # Math.gamma(x) == (n - 1)! for integer values end def self.combination(n, r) self.factorial(n)/(self.factorial(r) * self.factorial(n - r)).to_r # n!/(r! * [n - r]!) end def self.permutation(n, k) self.factorial(n)/self.factorial(n - k).to_r end # Function adapted from the python implementation that exists in https://en.wikipedia.org/wiki/Simpson%27s_rule#Sample_implementation # Finite integral in the interval [a, b] split up in n-intervals def self.simpson_rule(a, b, n, &block) unless n.even? puts "The composite simpson's rule needs even intervals!" return end h = (b - a)/n.to_r resA = yield(a) resB = yield(b) sum = resA + resB (1..n).step(2).each do |number| res = yield(a + number * h) sum += 4 * res end (1..(n-1)).step(2).each do |number| res = yield(a + number * h) sum += 2 * res end return sum * h / 3.0 end def self.lower_incomplete_gamma_function(s, x) base_iterator = x.round(1) base_iterator += 1 if x < 1.0 && !x.zero? # The greater the iterations, the better. That's why we are iterating 10_000 * x times iterator = (10_000 * base_iterator).round iterator = 100_000 if iterator.zero? self.simpson_rule(0, x.to_r, iterator) do |t| (t ** (s - 1)) * Math.exp(-t) end end # Algorithm implementation translated from the ASA147 C++ version https://people.sc.fsu.edu/~jburkardt/cpp_src/asa147/asa147.html # translated from FORTRAN by John Burkardt. Original algorithm written by Chi Leung Lau. # It contains a modification on the error and underflow parameters to use maximum available float number # and it performs the series using `Rational` objects to avoid memory exhaustion and reducing precision errors. # # This algorithm is licensed with MIT license. # # Reference: # # Chi Leung Lau, # Algorithm AS 147: # A Simple Series for the Incomplete Gamma Integral, # Applied Statistics, # Volume 29, Number 1, 1980, pages 113-114. def self.normalised_lower_incomplete_gamma_function(s, x) return 0.0 if s.negative? || x.zero? || x.negative? # e = 1.0e-308 # uflo = 1.0e-47 e = Float::MIN uflo = Float::MIN lgamma, sign = Math.lgamma(s + 1.0) arg = s * Math.log(x) - (sign * lgamma) - x return 0.0 if arg < Math.log(uflo) f = Math.exp(arg).to_r return 0.0 if f.zero? c = 1r value = 1r a = s.to_r rational_x = x.to_r loop do a += 1r c = c * (rational_x / a) value += c break if c <= (e * value) end (value * f).to_f end def self.beta_function(x, y) return 1 if x == 1 && y == 1 (Math.gamma(x) * Math.gamma(y))/Math.gamma(x + y) end ### This implementation is an adaptation of the incomplete beta function made in C by ### Lewis Van Winkle, which released the code under the zlib license. ### The whole math behind this code is described in the following post: https://codeplea.com/incomplete-beta-function-c def self.incomplete_beta_function(x, alp, bet) return if x < 0.0 return 1.0 if x > 1.0 tiny = 1.0E-50 if x > ((alp + 1.0)/(alp + bet + 2.0)) return 1.0 - self.incomplete_beta_function(1.0 - x, bet, alp) end # To avoid overflow problems, the implementation applies the logarithm properties # to calculate in a faster and safer way the values. lbet_ab = (Math.lgamma(alp)[0] + Math.lgamma(bet)[0] - Math.lgamma(alp + bet)[0]).freeze front = (Math.exp(Math.log(x) * alp + Math.log(1.0 - x) * bet - lbet_ab) / alp.to_r).freeze # This is the non-log version of the left part of the formula (before the continuous fraction) # down_left = alp * self.beta_function(alp, bet) # upper_left = (x ** alp) * ((1.0 - x) ** bet) # front = upper_left/down_left f, c, d = 1.0, 1.0, 0.0 returned_value = nil # Let's do more iterations than the proposed implementation (200 iters) (0..500).each do |number| m = number/2 numerator = if number == 0 1.0 elsif number % 2 == 0 (m * (bet - m) * x)/((alp + 2.0 * m - 1.0)* (alp + 2.0 * m)) else top = -((alp + m) * (alp + bet + m) * x) down = ((alp + 2.0 * m) * (alp + 2.0 * m + 1.0)) top/down end d = 1.0 + numerator * d d = tiny if d.abs < tiny d = 1.0 / d.to_r c = 1.0 + numerator / c.to_r c = tiny if c.abs < tiny cd = (c*d).freeze f = f * cd if (1.0 - cd).abs < 1.0E-10 returned_value = front * (f - 1.0) break end end returned_value end end ruby-statistics-4.1.0/lib/enumerable.rb0000644000004100000410000000046014745414510020134 0ustar www-datawww-data# TODO: Avoid monkey-patching. module Enumerable def mean self.reduce(:+) / self.length.to_f end def variance mean = self.mean self.reduce(0) { |memo, value| memo + ((value - mean) ** 2) } / (self.length - 1).to_f end def standard_deviation Math.sqrt(self.variance) end end ruby-statistics-4.1.0/lib/ruby-statistics/0000755000004100000410000000000014745414510020641 5ustar www-datawww-dataruby-statistics-4.1.0/lib/ruby-statistics/statistical_test/0000755000004100000410000000000014745414510024224 5ustar www-datawww-dataruby-statistics-4.1.0/lib/ruby-statistics/statistical_test/wilcoxon_rank_sum_test.rb0000644000004100000410000000620414745414510031353 0ustar www-datawww-datamodule RubyStatistics module StatisticalTest class WilcoxonRankSumTest def rank(elements) ranked_elements = {} elements.sort.each_with_index do |element, index| if ranked_elements.fetch(element, false) # This allow us to solve the ties easily when performing the rank summation per group ranked_elements[element][:counter] += 1 ranked_elements[element][:rank] += (index + 1) else ranked_elements[element] = { counter: 1, rank: (index + 1) } end end # ranked_elements = [{ x => { counter: 1, rank: y } ] ranked_elements end # Steps to perform the calculation are based on http://www.mit.edu/~6.s085/notes/lecture5.pdf def perform(alpha, tails, group_one, group_two) # Size for each group n1, n2 = group_one.size, group_two.size # Rank all data total_ranks = rank(group_one + group_two) # sum rankings per group r1 = ranked_sum_for(total_ranks, group_one) r2 = ranked_sum_for(total_ranks, group_two) # calculate U statistic u1 = (n1 * (n1 + 1)/2.0) - r1 u2 = (n2 * (n2 + 1)/2.0 ) - r2 u_statistic = [u1.abs, u2.abs].min median_u = (n1 * n2)/2.0 ties = total_ranks.values.select { |element| element[:counter] > 1 } std_u = if ties.size > 0 corrected_sigma(ties, n1, n2) else Math.sqrt((n1 * n2 * (n1 + n2 + 1))/12.0) end z = (u_statistic - median_u)/std_u # Most literature are not very specific about the normal distribution to be used. # We ran multiple tests with a Normal(median_u, std_u) and Normal(0, 1) and we found # the latter to be more aligned with the results. probability = Distribution::StandardNormal.new.cumulative_function(z.abs) p_value = 1 - probability p_value *= 2 if tails == :two_tail { probability: probability, u: u_statistic, z: z, p_value: p_value, alpha: alpha, null: alpha < p_value, alternative: p_value <= alpha, confidence_level: 1 - alpha } end # Formula extracted from http://www.statstutor.ac.uk/resources/uploaded/mannwhitney.pdf private def corrected_sigma(ties, total_group_one, total_group_two) n = total_group_one + total_group_two rank_sum = ties.reduce(0) do |memo, t| memo += ((t[:counter] ** 3) - t[:counter])/12.0 end left = (total_group_one * total_group_two)/(n * (n - 1)).to_r right = (((n ** 3) - n)/12.0) - rank_sum Math.sqrt(left * right) end private def ranked_sum_for(total, group) # sum rankings per group group.reduce(0) do |memo, element| rank_of_element = total[element][:rank] / total[element][:counter].to_r memo += rank_of_element end end end # Both test are the same. To keep the selected name, we just alias the class # with the implementation. MannWhitneyU = WilcoxonRankSumTest end end ruby-statistics-4.1.0/lib/ruby-statistics/statistical_test/t_test.rb0000644000004100000410000000717714745414510026067 0ustar www-datawww-datamodule RubyStatistics module StatisticalTest class TTest # Errors for Zero std class ZeroStdError < StandardError STD_ERROR_MSG = 'Standard deviation for the difference or group is zero. Please, reconsider sample contents'.freeze end # Perform a T-Test for one or two samples. # For the tails param, we need a symbol: :one_tail or :two_tail def self.perform(alpha, tails, *args) return if args.size < 2 degrees_of_freedom = 0 # If the comparison mean has been specified t_score = if args[0].is_a? Numeric data_mean = args[1].mean data_std = args[1].standard_deviation raise ZeroStdError, ZeroStdError::STD_ERROR_MSG if data_std == 0 comparison_mean = args[0] degrees_of_freedom = args[1].size - 1 (data_mean - comparison_mean)/(data_std / Math.sqrt(args[1].size).to_r).to_r else sample_left_mean = args[0].mean sample_left_variance = args[0].variance sample_right_variance = args[1].variance sample_right_mean = args[1].mean degrees_of_freedom = args.flatten.size - 2 left_root = sample_left_variance/args[0].size.to_r right_root = sample_right_variance/args[1].size.to_r standard_error = Math.sqrt(left_root + right_root) (sample_left_mean - sample_right_mean).abs/standard_error.to_r end t_distribution = Distribution::TStudent.new(degrees_of_freedom) probability = t_distribution.cumulative_function(t_score) # Steps grabbed from https://support.minitab.com/en-us/minitab/18/help-and-how-to/statistics/basic-statistics/supporting-topics/basics/manually-calculate-a-p-value/ # See https://github.com/estebanz01/ruby-statistics/issues/23 p_value = if tails == :two_tail 2 * (1 - t_distribution.cumulative_function(t_score.abs)) else 1 - probability end { t_score: t_score, probability: probability, p_value: p_value, alpha: alpha, null: alpha < p_value, alternative: p_value <= alpha, confidence_level: 1 - alpha } end def self.paired_test(alpha, tails, left_group, right_group) raise StandardError, 'both samples are the same' if left_group == right_group # Handy snippet grabbed from https://stackoverflow.com/questions/2682411/ruby-sum-corresponding-members-of-two-or-more-arrays differences = [left_group, right_group].transpose.map { |value| value.reduce(:-) } degrees_of_freedom = differences.size - 1 difference_std = differences.standard_deviation raise ZeroStdError, ZeroStdError::STD_ERROR_MSG if difference_std == 0 down = difference_std/Math.sqrt(differences.size) t_score = (differences.mean - 0)/down.to_r t_distribution = Distribution::TStudent.new(degrees_of_freedom) probability = t_distribution.cumulative_function(t_score) p_value = if tails == :two_tail 2 * (1 - t_distribution.cumulative_function(t_score.abs)) else 1 - probability end { t_score: t_score, probability: probability, p_value: p_value, alpha: alpha, null: alpha < p_value, alternative: p_value <= alpha, confidence_level: 1 - alpha } end end end end ruby-statistics-4.1.0/lib/ruby-statistics/statistical_test/f_test.rb0000644000004100000410000000675514745414510026052 0ustar www-datawww-datamodule RubyStatistics module StatisticalTest class FTest # This method calculates the one-way ANOVA F-test statistic. # We assume that all specified arguments are arrays. # It returns an array with three elements: # [F-statistic or F-score, degrees of freedom numerator, degrees of freedom denominator]. # # Formulas extracted from: # https://courses.lumenlearning.com/boundless-statistics/chapter/one-way-anova/ # http://sphweb.bumc.bu.edu/otlt/MPH-Modules/BS/BS704_HypothesisTesting-ANOVA/BS704_HypothesisTesting-Anova_print.html def self.anova_f_score(*args) # If only two groups have been specified as arguments, we follow the classic F-Test for # equality of variances, which is the ratio between the variances. f_score = nil df1 = nil df2 = nil if args.size == 2 variances = [args[0].variance, args[1].variance] f_score = variances.max/variances.min.to_r df1 = 1 # k-1 (k = 2) df2 = args.flatten.size - 2 # N-k (k = 2) elsif args.size > 2 total_groups = args.size total_elements = args.flatten.size overall_mean = args.flatten.mean sample_sizes = args.map(&:size) sample_means = args.map(&:mean) sample_stds = args.map(&:standard_deviation) # Variance between groups iterator = sample_sizes.each_with_index variance_between_groups = iterator.reduce(0) do |summation, (size, index)| inner_calculation = size * ((sample_means[index] - overall_mean) ** 2) summation += (inner_calculation / (total_groups - 1).to_r) end # Variance within groups variance_within_groups = (0...total_groups).reduce(0) do |outer_summation, group_index| outer_summation += args[group_index].reduce(0) do |inner_sumation, observation| inner_calculation = ((observation - sample_means[group_index]) ** 2) inner_sumation += (inner_calculation / (total_elements - total_groups).to_r) end end f_score = variance_between_groups/variance_within_groups.to_r df1 = total_groups - 1 df2 = total_elements - total_groups end [f_score, df1, df2] end # This method expects the alpha value and the groups to calculate the one-way ANOVA test. # It returns a hash with multiple information and the test result (if reject the null hypotesis or not). # Keep in mind that the values for the alternative key (true/false) does not imply that the alternative hypothesis # is TRUE or FALSE. It's a minor notation advantage to decide if reject the null hypothesis or not. def self.one_way_anova(alpha, *args) f_score, df1, df2 = *self.anova_f_score(*args) # Splat array result return if f_score.nil? || df1.nil? || df2.nil? probability = Distribution::F.new(df1, df2).cumulative_function(f_score) p_value = 1 - probability # According to https://stats.stackexchange.com/questions/29158/do-you-reject-the-null-hypothesis-when-p-alpha-or-p-leq-alpha # We can assume that if p_value <= alpha, we can safely reject the null hypothesis, ie. accept the alternative hypothesis. { probability: probability, p_value: p_value, alpha: alpha, null: alpha < p_value, alternative: p_value <= alpha, confidence_level: 1 - alpha } end end end end ruby-statistics-4.1.0/lib/ruby-statistics/statistical_test/chi_squared_test.rb0000644000004100000410000000332214745414510030077 0ustar www-datawww-datamodule RubyStatistics module StatisticalTest class ChiSquaredTest def self.chi_statistic(expected, observed) # If the expected is a number, we asumme that all expected observations # has the same probability to occur, hence we expect to see the same number # of expected observations per each observed value statistic = if expected.is_a? Numeric observed.reduce(0) do |memo, observed_value| up = (observed_value - expected) ** 2 memo += (up/expected.to_r) end else expected.each_with_index.reduce(0) do |memo, (expected_value, index)| up = (observed[index] - expected_value) ** 2 memo += (up/expected_value.to_r) end end [statistic, observed.size - 1] end def self.goodness_of_fit(alpha, expected, observed) chi_score, df = *self.chi_statistic(expected, observed) # Splat array result return if chi_score.nil? || df.nil? probability = Distribution::ChiSquared.new(df).cumulative_function(chi_score) p_value = 1 - probability # According to https://stats.stackexchange.com/questions/29158/do-you-reject-the-null-hypothesis-when-p-alpha-or-p-leq-alpha # We can assume that if p_value <= alpha, we can safely reject the null hypothesis, ie. accept the alternative hypothesis. { probability: probability, p_value: p_value, alpha: alpha, null: alpha < p_value, alternative: p_value <= alpha, confidence_level: 1 - alpha } end end end end ruby-statistics-4.1.0/lib/ruby-statistics/statistical_test/kolmogorov_smirnov_test.rb0000644000004100000410000000622014745414510031563 0ustar www-datawww-datamodule RubyStatistics module StatisticalTest class KolmogorovSmirnovTest # Common alpha, and critical D are calculated following formulas from: https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test def self.two_samples(group_one:, group_two:, alpha: 0.05) samples = group_one + group_two # We can use unbalaced group samples ecdf_one = Distribution::Empirical.new(samples: group_one) ecdf_two = Distribution::Empirical.new(samples: group_two) d_max = samples.sort.map do |sample| d1 = ecdf_one.cumulative_function(x: sample) d2 = ecdf_two.cumulative_function(x: sample) (d1 - d2).abs end.max # TODO: Validate calculation of Common alpha. common_alpha = Math.sqrt((-0.5 * Math.log(alpha))) radicand = (group_one.size + group_two.size) / (group_one.size * group_two.size).to_r critical_d = common_alpha * Math.sqrt(radicand) # critical_d = self.critical_d(alpha: alpha, n: samples.size) # We are unable to calculate the p_value, because we don't have the Kolmogorov distribution # defined. We reject the null hypotesis if Dmax is > than Dcritical. { d_max: d_max, d_critical: critical_d, total_samples: samples.size, alpha: alpha, null: d_max <= critical_d, alternative: d_max > critical_d, confidence_level: 1.0 - alpha } end # This is an implementation of the formula presented by Paul Molin and Hervé Abdi in a paper, # called "New Table and numerical approximations for Kolmogorov-Smirnov / Lilliefors / Van Soest # normality test". # In this paper, the authors defines a couple of 6th-degree polynomial functions that allow us # to find an aproximation of the real critical value. This is based in the conclusions made by # Dagnelie (1968), where indicates that critical values given by Lilliefors can be approximated # numerically. # # In general, the formula found is: # C(N, alpha) ^ -2 = A(alpha) * N + B(alpha). # # Where A(alpha), B(alpha) are two 6th degree polynomial functions computed using the principle # of Monte Carlo simulations. # # paper can be found here: https://utdallas.edu/~herve/MolinAbdi1998-LillieforsTechReport.pdf # def self.critical_d(alpha:, n:) # confidence = 1.0 - alpha # a_alpha = 6.32207539843126 -17.1398870006148 * confidence + # 38.42812675101057 * (confidence ** 2) - 45.93241384693391 * (confidence ** 3) + # 7.88697700041829 * (confidence ** 4) + 29.79317711037858 * (confidence ** 5) - # 18.48090137098585 * (confidence ** 6) # b_alpha = 12.940399038404 - 53.458334259532 * confidence + # 186.923866119699 * (confidence ** 2) - 410.582178349305 * (confidence ** 3) + # 517.377862566267 * (confidence ** 4) - 343.581476222384 * (confidence ** 5) + # 92.123451358715 * (confidence ** 6) # Math.sqrt(1.0 / (a_alpha * n + b_alpha)) # end end KSTest = KolmogorovSmirnovTest # Alias end end ruby-statistics-4.1.0/lib/ruby-statistics/spearman_rank_coefficient.rb0000644000004100000410000000524014745414510026346 0ustar www-datawww-datamodule RubyStatistics class SpearmanRankCoefficient def self.rank(data:, return_ranks_only: true) descending_order_data = data.sort { |a, b| b <=> a } rankings = {} data.each do |value| # If we have ties, the find_index method will only retrieve the index of the # first element in the list (i.e, the most close to the left of the array), # so when a tie is detected, we increase the temporal ranking by the number of # counted elements at that particular time and then we increase the counter. temporal_ranking = descending_order_data.find_index(value) + 1 # 0-index if rankings.fetch(value, false) rankings[value][:rank] += (temporal_ranking + rankings[value][:counter]) rankings[value][:counter] += 1 rankings[value][:tie_rank] = rankings[value][:rank] / rankings[value][:counter].to_r else rankings[value] = { counter: 1, rank: temporal_ranking, tie_rank: temporal_ranking } end end if return_ranks_only data.map do |value| rankings[value][:tie_rank] end else rankings end end # Formulas extracted from: https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php def self.coefficient(set_one, set_two) raise 'Both group sets must have the same number of cases.' if set_one.size != set_two.size return if set_one.size == 0 && set_two.size == 0 set_one_mean, set_two_mean = set_one.mean, set_two.mean have_tie_ranks = (set_one + set_two).any? { |rank| rank.is_a?(Float) || rank.is_a?(Rational) } if have_tie_ranks numerator = 0 squared_differences_set_one = 0 squared_differences_set_two = 0 set_one.size.times do |idx| local_diff_one = (set_one[idx] - set_one_mean) local_diff_two = (set_two[idx] - set_two_mean) squared_differences_set_one += local_diff_one ** 2 squared_differences_set_two += local_diff_two ** 2 numerator += local_diff_one * local_diff_two end denominator = Math.sqrt(squared_differences_set_one * squared_differences_set_two) numerator / denominator.to_r # This is rho or spearman's coefficient. else sum_squared_differences = set_one.each_with_index.reduce(0) do |memo, (rank_one, index)| memo += ((rank_one - set_two[index]) ** 2) memo end numerator = 6 * sum_squared_differences denominator = ((set_one.size ** 3) - set_one.size) 1.0 - (numerator / denominator.to_r) # This is rho or spearman's coefficient. end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution.rb0000644000004100000410000000021214745414510023700 0ustar www-datawww-dataDir[File.dirname(__FILE__) + '/distribution/**/*.rb'].each {|file| require file } module RubyStatistics module Distribution end end ruby-statistics-4.1.0/lib/ruby-statistics/statistical_test.rb0000644000004100000410000000047014745414510024552 0ustar www-datawww-dataDir[File.dirname(__FILE__) + '/statistical_test/**/*.rb'].each {|file| require file } module RubyStatistics module StatisticalTest end end # If StatisticalTest is not defined, setup alias. if defined?(RubyStatistics) && !(defined?(StatisticalTest)) StatisticalTest = RubyStatistics::StatisticalTest end ruby-statistics-4.1.0/lib/ruby-statistics/version.rb0000644000004100000410000000005614745414510022654 0ustar www-datawww-datamodule RubyStatistics VERSION = "4.1.0" end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/0000755000004100000410000000000014745414510023360 5ustar www-datawww-dataruby-statistics-4.1.0/lib/ruby-statistics/distribution/empirical.rb0000644000004100000410000000115014745414510025647 0ustar www-datawww-datamodule RubyStatistics module Distribution class Empirical attr_accessor :samples def initialize(samples:) self.samples = samples end # Formula grabbed from here: https://statlect.com/asymptotic-theory/empirical-distribution def cumulative_function(x:) cumulative_sum = samples.reduce(0) do |summation, sample| summation += if sample <= x 1 else 0 end summation end cumulative_sum / samples.size.to_r end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/f.rb0000644000004100000410000000204214745414510024130 0ustar www-datawww-datamodule RubyStatistics module Distribution class F attr_accessor :d1, :d2 # Degrees of freedom #1 and #2 def initialize(k, j) self.d1 = k self.d2 = j end # Formula extracted from http://www.itl.nist.gov/div898/handbook/eda/section3/eda3665.htm#CDF def cumulative_function(value) k = d2/(d2 + d1 * value.to_r) 1 - Math.incomplete_beta_function(k, d2/2.0, d1/2.0) end def density_function(value) return if d1 < 0 || d2 < 0 # F-pdf is well defined for the [0, +infinity) interval. val = value.to_r upper = ((d1 * val) ** d1) * (d2**d2) lower = (d1 * val + d2) ** (d1 + d2) up = Math.sqrt(upper/lower.to_r) down = val * Math.beta_function(d1/2.0, d2/2.0) up/down.to_r end def mean return if d2 <= 2 d2/(d2 - 2).to_r end def mode return if d1 <= 2 left = (d1 - 2)/d1.to_r right = d2/(d2 + 2).to_r (left * right).to_f end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/gamma.rb0000644000004100000410000000372314745414510024774 0ustar www-datawww-data# frozen_string_literal: true module RubyStatistics module Distribution class Gamma attr_reader :shape, :scale, :rate def initialize(shape:, scale: nil) @shape = shape @scale = scale # If the scale is nil, it means we want the distribution to behave with a rate parameter # instead of a scale parameter @rate = if scale.nil? 1.0 / shape else nil end end def as_rate? scale.nil? end def mean if as_rate? self.shape / self.rate else self.shape * self.scale end end def mode return 0.0 if self.shape < 1.0 if as_rate? (self.shape - 1.0) / self.rate else (self.shape - 1.0) * self.scale end end def variance if as_rate? self.shape / (self.rate ** 2.0) else self.shape * (self.scale ** 2.0) end end def skewness 2.0 / Math.sqrt(self.shape) end def density_function(x) euler = if as_rate? Math.exp(- self.rate * x) else Math.exp(-x / self.scale.to_r) end left = if as_rate? (self.rate ** self.shape).to_r / Math.gamma(self.shape).to_r else 1r / (Math.gamma(self.shape).to_r * (self.scale ** self.shape).to_r) end left * (x ** (self.shape - 1)) * euler end def cumulative_function(x) upper = if as_rate? self.rate * x.to_r else x / self.scale.to_r end # left = 1.0 / Math.gamma(self.shape) # right = Math.lower_incomplete_gamma_function(self.shape, upper) # left * right Math.normalised_lower_incomplete_gamma_function(self.shape, upper) end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/chi_squared.rb0000644000004100000410000000211514745414510026173 0ustar www-datawww-datamodule RubyStatistics module Distribution class ChiSquared attr_accessor :degrees_of_freedom alias_method :mean, :degrees_of_freedom def initialize(k) self.degrees_of_freedom = k end def cumulative_function(value) if degrees_of_freedom == 2 # Special case where DF = 2 https://en.wikipedia.org/wiki/Chi-squared_distribution#Cumulative_distribution_function 1.0 - Math.exp((-1.0 * value / 2.0)) else k = degrees_of_freedom/2.0 # Math.lower_incomplete_gamma_function(k, value/2.0)/Math.gamma(k) Math.normalised_lower_incomplete_gamma_function(k, value / 2.0) end end def density_function(value) return 0 if value < 0 common = degrees_of_freedom/2.0 left_down = (2 ** common) * Math.gamma(common) right = (value ** (common - 1)) * Math.exp(-(value/2.0)) right / left_down end def mode [degrees_of_freedom - 2, 0].max end def variance degrees_of_freedom * 2 end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/uniform.rb0000644000004100000410000000133514745414510025366 0ustar www-datawww-datamodule RubyStatistics module Distribution class Uniform attr_accessor :left, :right def initialize(a, b) self.left = a.to_r self.right = b.to_r end def density_function(value) if value >= left && value <= right 1/(right - left) else 0 end end def cumulative_function(value) if value < left 0 elsif value >= left && value <= right (value - left)/(right - left) else 1 end end def mean (1/2.0) * ( left + right ) end alias_method :median, :mean def variance (1/12.0) * ( right - left ) ** 2 end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/binomial.rb0000644000004100000410000000244514745414510025504 0ustar www-datawww-datamodule RubyStatistics module Distribution class Binomial attr_accessor :number_of_trials, :probability_per_trial def initialize(n, p) self.number_of_trials = n.to_i self.probability_per_trial = p end def probability_mass_function(k) return if k < 0 || k > number_of_trials k = k.to_i Math.combination(number_of_trials, k) * (probability_per_trial ** k) * ((1 - probability_per_trial) ** (number_of_trials - k)) end def cumulative_function(k) return if k < 0 || k > number_of_trials k = k.to_i p = 1 - probability_per_trial Math.incomplete_beta_function(p, number_of_trials - k, 1 + k) end def mean number_of_trials * probability_per_trial end def variance mean * (1 - probability_per_trial) end def mode test = (number_of_trials + 1) * probability_per_trial returned = if test == 0 || (test % 1 != 0) test.floor elsif (test % 1 == 0) && (test >= 1 && test <= number_of_trials) [test, test - 1] elsif test == number_of_trials + 1 number_of_trials end returned end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/tables/0000755000004100000410000000000014745414510024632 5ustar www-datawww-dataruby-statistics-4.1.0/lib/ruby-statistics/distribution/tables/chi_squared.rb0000644000004100000410000001152014745414510027445 0ustar www-datawww-datamodule RubyStatistics module Distribution module Tables class ChiSquared # Values retrieved from the following table provided by the University of Arizona. # https://math.arizona.edu/~jwatkins/chi-square-table.pdf TABLE = [ [0.000, 0.000, 0.001, 0.004, 0.016, 2.706, 3.841, 5.024, 6.635, 7.879], [0.010, 0.020, 0.051, 0.103, 0.211, 4.605, 5.991, 7.378, 9.210, 10.597], [0.072, 0.115, 0.216, 0.352, 0.584, 6.251, 7.815, 9.348, 11.345, 12.838], [0.207, 0.297, 0.484, 0.711, 1.064, 7.779, 9.488, 11.143, 13.277, 14.860], [0.412, 0.554, 0.831, 1.145, 1.610, 9.236, 11.070, 12.833, 15.086, 16.750], [0.676, 0.872, 1.237, 1.635, 2.204, 10.645, 12.592, 14.449, 16.812, 18.548], [0.989, 1.239, 1.690, 2.167, 2.833, 12.017, 14.067, 16.013, 18.475, 20.278], [1.344, 1.646, 2.180, 2.733, 3.490, 13.362, 15.507, 17.535, 20.090, 21.955], [1.735, 2.088, 2.700, 3.325, 4.168, 14.684, 16.919, 19.023, 21.666, 23.589], [2.156, 2.558, 3.247, 3.940, 4.865, 15.987, 18.307, 20.483, 23.209, 25.188], [2.603, 3.053, 3.816, 4.575, 5.578, 17.275, 19.675, 21.920, 24.725, 26.757], [3.074, 3.571, 4.404, 5.226, 6.304, 18.549, 21.026, 23.337, 26.217, 28.300], [3.565, 4.107, 5.009, 5.892, 7.042, 19.812, 22.362, 24.736, 27.688, 29.819], [4.075, 4.660, 5.629, 6.571, 7.790, 21.064, 23.685, 26.119, 29.141, 31.319], [4.601, 5.229, 6.262, 7.261, 8.547, 22.307, 24.996, 27.488, 30.578, 32.801], [5.142, 5.812, 6.908, 7.962, 9.312, 23.542, 26.296, 28.845, 32.000, 34.267], [5.697, 6.408, 7.564, 8.672, 10.085, 24.769, 27.587, 30.191, 33.409, 35.718], [6.265, 7.015, 8.231, 9.390, 10.865, 25.989, 28.869, 31.526, 34.805, 37.156], [6.844, 7.633, 8.907, 10.117, 11.651, 27.204, 30.144, 32.852, 36.191, 38.582], [7.434, 8.260, 9.591, 10.851, 12.443, 28.412, 31.410, 34.170, 37.566, 39.997], [8.034, 8.897, 10.283, 11.591, 13.240, 29.615, 32.671, 35.479, 38.932, 41.401], [8.643, 9.542, 10.982, 12.338, 14.041, 30.813, 33.924, 36.781, 40.289, 42.796], [9.260, 10.196, 11.689, 13.091, 14.848, 32.007, 35.172, 38.076, 41.638, 44.181], [9.886, 10.856, 12.401, 13.848, 15.659, 33.196, 36.415, 39.364, 42.980, 45.559], [10.520, 11.524, 13.120, 14.611, 16.473, 34.382, 37.652, 40.646, 44.314, 46.928], [11.160, 12.198, 13.844, 15.379, 17.292, 35.563, 38.885, 41.923, 45.642, 48.290], [11.808, 12.879, 14.573, 16.151, 18.114, 36.741, 40.113, 43.195, 46.963, 49.645], [12.461, 13.565, 15.308, 16.928, 18.939, 37.916, 41.337, 44.461, 48.278, 50.993], [13.121, 14.256, 16.047, 17.708, 19.768, 39.087, 42.557, 45.722, 49.588, 52.336], [13.787, 14.953, 16.791, 18.493, 20.599, 40.256, 43.773, 46.979, 50.892, 53.672], [20.707, 22.164, 24.433, 26.509, 29.051, 51.805, 55.758, 59.342, 63.691, 66.766], [27.991, 29.707, 32.357, 34.764, 37.689, 63.167, 67.505, 71.420, 76.154, 79.490], [35.534, 37.485, 40.482, 43.188, 46.459, 74.397, 79.082, 83.298, 88.379, 91.952], [43.275, 45.442, 48.758, 51.739, 55.329, 85.527, 90.531, 95.023, 100.425, 104.215], [51.172, 53.540, 57.153, 60.391, 64.278, 96.578, 101.879, 106.629, 112.329, 116.321], [59.196, 61.754, 65.647, 69.126, 73.291, 107.565, 113.145, 118.136, 124.116, 128.299], [67.328, 70.065, 74.222, 77.929, 82.358, 118.498, 124.342, 129.561, 135.807, 140.169] ].freeze ALPHA_HEADER = { 0.995 => 0, 0.990 => 1, 0.975 => 2, 0.95 => 3, 0.9 => 4, 0.1 => 5, 0.05 => 6, 0.025 => 7, 0.01 => 8, 0.005 => 9 }.freeze DEGREES_OF_FREEDOM = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 40, 50, 60, 70, 80, 90, 100].freeze # return an array of the alpha values in correct order # i.e. 0.995 -> 0, 0.990 -> 1 etc def self.alpha_values ALPHA_HEADER.keys end # Checks if a valid alpha value is passed to look up values def self.valid_alpha?(alpha) self.alpha_values.include?(alpha) end # Return a whole column of the distribution table for a certain alpha value def self.alpha_column(alpha) raise "Undefined alpha value." unless self.valid_alpha?(alpha) # return an array of hashes for an alpha value # including the degree of freedom for each critical value # i.e. [{df: 1, critival_value: x},...] TABLE.map.with_index do |row, index| { df: DEGREES_OF_FREEDOM[index], critical_value: row[ALPHA_HEADER[alpha]] } end end end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/t_student.rb0000644000004100000410000000461414745414510025723 0ustar www-datawww-datamodule RubyStatistics module Distribution class TStudent attr_accessor :degrees_of_freedom attr_reader :mode def initialize(v) self.degrees_of_freedom = v @mode = 0 end ### Extracted from https://codeplea.com/incomplete-beta-function-c ### This function is shared under zlib license and the author is Lewis Van Winkle def cumulative_function(value) upper = (value + Math.sqrt(value * value + degrees_of_freedom)) lower = (2.0 * Math.sqrt(value * value + degrees_of_freedom)) x = upper/lower alpha = degrees_of_freedom/2.0 beta = degrees_of_freedom/2.0 Math.incomplete_beta_function(x, alpha, beta) end def density_function(value) return if degrees_of_freedom <= 0 upper = Math.gamma((degrees_of_freedom + 1)/2.0) lower = Math.sqrt(degrees_of_freedom * Math::PI) * Math.gamma(degrees_of_freedom/2.0) left = upper/lower right = (1 + ((value ** 2)/degrees_of_freedom.to_r)) ** -((degrees_of_freedom + 1)/2.0) left * right end def mean 0 if degrees_of_freedom > 1 end def variance if degrees_of_freedom > 1 && degrees_of_freedom <= 2 Float::INFINITY elsif degrees_of_freedom > 2 degrees_of_freedom/(degrees_of_freedom - 2.0) end end # Quantile function extracted from http://www.jennessent.com/arcview/idf.htm # TODO: Make it truly Student's T sample. def random(elements: 1, seed: Random.new_seed) warn 'This is an alpha version code. The generated sample is similar to an uniform distribution' srand(seed) v = degrees_of_freedom results = [] # Because the Quantile function of a student-t distribution is between (-Infinity, y) # we setup an small threshold in order to properly compute the integral threshold = 10_000.0e-12 elements.times do y = rand results << Math.simpson_rule(threshold, y, 10_000) do |t| up = Math.gamma((v+1)/2.0) down = Math.sqrt(Math::PI * v) * Math.gamma(v/2.0) right = (1 + ((y ** 2)/v.to_r)) ** ((v+1)/2.0) left = up/down.to_r left * right end end if elements == 1 results.first else results end end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/geometric.rb0000644000004100000410000000335714745414510025673 0ustar www-datawww-datamodule RubyStatistics module Distribution class Geometric attr_accessor :probability_of_success, :always_success_allowed def initialize(p, always_success: false) self.probability_of_success = p.to_r self.always_success_allowed = always_success end def density_function(k) k = k.to_i if always_success_allowed return if k < 0 ((1.0 - probability_of_success) ** k) * probability_of_success else return if k <= 0 ((1.0 - probability_of_success) ** (k - 1.0)) * probability_of_success end end def cumulative_function(k) k = k.to_i if always_success_allowed return if k < 0 1.0 - ((1.0 - probability_of_success) ** (k + 1.0)) else return if k <= 0 1.0 - ((1.0 - probability_of_success) ** k) end end def mean if always_success_allowed (1.0 - probability_of_success) / probability_of_success else 1.0 / probability_of_success end end def median if always_success_allowed (-1.0 / Math.log2(1.0 - probability_of_success)).ceil - 1.0 else (-1.0 / Math.log2(1.0 - probability_of_success)).ceil end end def mode if always_success_allowed 0.0 else 1.0 end end def variance (1.0 - probability_of_success) / (probability_of_success ** 2) end def skewness (2.0 - probability_of_success) / Math.sqrt(1.0 - probability_of_success) end def kurtosis 6.0 + ((probability_of_success ** 2) / (1.0 - probability_of_success)) end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/normal.rb0000644000004100000410000001016114745414510025174 0ustar www-datawww-datamodule RubyStatistics module Distribution class Normal attr_accessor :mean, :standard_deviation, :variance alias_method :mode, :mean def initialize(avg, std) self.mean = avg.to_r self.standard_deviation = std.to_r self.variance = std.to_r**2 end def cumulative_function(value) (1/2.0) * (1.0 + Math.erf((value - mean)/(standard_deviation * Math.sqrt(2.0)))) end def density_function(value) return 0 if standard_deviation <= 0 up_right = (value - mean)**2.0 down_right = 2.0 * variance right = Math.exp(-(up_right/down_right)) left_down = Math.sqrt(2.0 * Math::PI * variance) left_up = 1.0 (left_up/(left_down) * right) end ## Marsaglia polar method implementation for random gaussian (normal) number generation. # References: # https://en.wikipedia.org/wiki/Marsaglia_polar_method # https://math.stackexchange.com/questions/69245/transform-uniform-distribution-to-normal-distribution-using-lindeberg-l%C3%A9vy-clt # https://www.projectrhea.org/rhea/index.php/The_principles_for_how_to_generate_random_samples_from_a_Gaussian_distribution def random(elements: 1, seed: Random.new_seed) results = [] # Setup seed srand(seed) # Number of random numbers to be generated. elements.times do x, y, r = 0.0, 0.0, 0.0 # Find an (x, y) point in the x^2 + y^2 < 1 circumference. loop do x = 2.0 * rand - 1.0 y = 2.0 * rand - 1.0 r = (x ** 2) + (y ** 2) break unless r >= 1.0 || r == 0 end # Project the random point to the required random distance r = Math.sqrt(-2.0 * Math.log(r) / r) # Transform the random distance to a gaussian value and append it to the results array results << mean + x * r * standard_deviation end if elements == 1 results.first else results end end end class StandardNormal < Normal def initialize super(0, 1) # Mean = 0, Std = 1 end def density_function(value) pow = (value**2)/2.0 euler = Math.exp(-pow) euler/Math.sqrt(2 * Math::PI) end end # Inverse Standard Normal distribution: # References: # https://en.wikipedia.org/wiki/Inverse_distribution # http://www.source-code.biz/snippets/vbasic/9.htm class InverseStandardNormal < StandardNormal A1 = -39.6968302866538 A2 = 220.946098424521 A3 = -275.928510446969 A4 = 138.357751867269 A5 = -30.6647980661472 A6 = 2.50662827745924 B1 = -54.4760987982241 B2 = 161.585836858041 B3 = -155.698979859887 B4 = 66.8013118877197 B5 = -13.2806815528857 C1 = -7.78489400243029E-03 C2 = -0.322396458041136 C3 = -2.40075827716184 C4 = -2.54973253934373 C5 = 4.37466414146497 C6 = 2.93816398269878 D1 = 7.78469570904146E-03 D2 = 0.32246712907004 D3 = 2.445134137143 D4 = 3.75440866190742 P_LOW = 0.02425 P_HIGH = 1 - P_LOW def density_function(_) raise NotImplementedError end def random(elements: 1, seed: Random.new_seed) raise NotImplementedError end def cumulative_function(value) return if value < 0.0 || value > 1.0 return -1.0 * Float::INFINITY if value.zero? return Float::INFINITY if value == 1.0 if value < P_LOW q = Math.sqrt((Math.log(value) * -2.0)) (((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6) / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1.0) elsif value <= P_HIGH q = value - 0.5 r = q ** 2 (((((A1 * r + A2) * r + A3) * r + A4) * r + A5) * r + A6) * q / (((((B1 * r + B2) * r + B3) * r + B4) * r + B5) * r + 1.0) else q = Math.sqrt((Math.log(1 - value) * -2.0)) - (((((C1 * q + C2) * q + C3) * q + C4) * q + C5) * q + C6) / ((((D1 * q + D2) * q + D3) * q + D4) * q + 1) end end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/weibull.rb0000644000004100000410000000275014745414510025354 0ustar www-datawww-datamodule RubyStatistics module Distribution class Weibull attr_accessor :shape, :scale # k and lambda def initialize(k, lamb) self.shape = k.to_r self.scale = lamb.to_r end def cumulative_function(random_value) return 0 if random_value < 0 1 - Math.exp(-((random_value/scale) ** shape)) end def density_function(value) return if shape <= 0 || scale <= 0 return 0 if value < 0 left = shape/scale center = (value/scale)**(shape - 1) right = Math.exp(-((value/scale)**shape)) left * center * right end def mean scale * Math.gamma(1 + (1/shape)) end def mode return 0 if shape <= 1 scale * (((shape - 1)/shape) ** (1/shape)) end def variance left = Math.gamma(1 + (2/shape)) right = Math.gamma(1 + (1/shape)) ** 2 (scale ** 2) * (left - right) end # Using the inverse CDF function, also called quantile, we can calculate # a random sample that follows a weibull distribution. # # Formula extracted from https://www.taygeta.com/random/weibull.html def random(elements: 1, seed: Random.new_seed) results = [] srand(seed) elements.times do results << ((-1/scale) * Math.log(1 - rand)) ** (1/shape) end if elements == 1 results.first else results end end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/poisson.rb0000644000004100000410000000212514745414510025377 0ustar www-datawww-datamodule RubyStatistics module Distribution class Poisson attr_accessor :expected_number_of_occurrences alias_method :mean, :expected_number_of_occurrences alias_method :variance, :expected_number_of_occurrences def initialize(l) self.expected_number_of_occurrences = l end def probability_mass_function(k) return if k < 0 || expected_number_of_occurrences < 0 k = k.to_i upper = (expected_number_of_occurrences ** k) * Math.exp(-expected_number_of_occurrences) lower = Math.factorial(k) upper/lower.to_r end def cumulative_function(k) return if k < 0 || expected_number_of_occurrences < 0 k = k.to_i upper = Math.lower_incomplete_gamma_function((k + 1).floor, expected_number_of_occurrences) lower = Math.factorial(k.floor) # We need the right tail, i.e.: The upper incomplete gamma function. This can be # achieved by doing a substraction between 1 and the lower incomplete gamma function. 1 - (upper/lower.to_r) end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/beta.rb0000644000004100000410000000142114745414510024616 0ustar www-datawww-datamodule RubyStatistics module Distribution class Beta attr_accessor :alpha, :beta def initialize(alp, bet) self.alpha = alp.to_r self.beta = bet.to_r end def cumulative_function(value) Math.incomplete_beta_function(value, alpha, beta) end def density_function(value) return 0 if value < 0 || value > 1 # Density function defined in the [0,1] interval num = (value**(alpha - 1)) * ((1 - value)**(beta - 1)) den = Math.beta_function(alpha, beta) num/den end def mode return unless alpha > 1 && beta > 1 (alpha - 1)/(alpha + beta - 2) end def mean return if alpha + beta == 0 alpha / (alpha + beta) end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/bernoulli.rb0000644000004100000410000000136314745414510025703 0ustar www-datawww-datamodule RubyStatistics module Distribution class Bernoulli def self.density_function(n, p) return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}. case n when 0 then 1.0 - p when 1 then p end end def self.cumulative_function(n, p) return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}. case n when 0 then 1.0 - p when 1 then 1.0 end end def self.variance(p) p * (1.0 - p) end def self.skewness(p) (1.0 - 2.0*p).to_r / Math.sqrt(p * (1.0 - p)) end def self.kurtosis(p) (6.0 * (p ** 2) - (6 * p) + 1) / (p * (1.0 - p)) end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/negative_binomial.rb0000644000004100000410000000261114745414510027361 0ustar www-datawww-datamodule RubyStatistics module Distribution class NegativeBinomial attr_accessor :number_of_failures, :probability_per_trial def initialize(r, p) self.number_of_failures = r.to_i self.probability_per_trial = p end def probability_mass_function(k) return if number_of_failures < 0 || k < 0 || k > number_of_failures left = Math.combination(k + number_of_failures - 1, k) right = ((1 - probability_per_trial) ** number_of_failures) * (probability_per_trial ** k) left * right end def cumulative_function(k) return if k < 0 || k > number_of_failures k = k.to_i 1.0 - Math.incomplete_beta_function(probability_per_trial, k + 1, number_of_failures) end def mean (probability_per_trial * number_of_failures)/(1 - probability_per_trial).to_r end def variance (probability_per_trial * number_of_failures)/((1 - probability_per_trial) ** 2).to_r end def skewness (1 + probability_per_trial).to_r / Math.sqrt(probability_per_trial * number_of_failures) end def mode if number_of_failures > 1 up = probability_per_trial * (number_of_failures - 1) down = (1 - probability_per_trial).to_r (up/down).floor elsif number_of_failures <= 1 0.0 end end end end end ruby-statistics-4.1.0/lib/ruby-statistics/distribution/logseries.rb0000644000004100000410000000244114745414510025702 0ustar www-datawww-datamodule RubyStatistics module Distribution class LogSeries def self.density_function(k, p) return if k <= 0 k = k.to_i left = (-1.0 / Math.log(1.0 - p)) right = (p ** k).to_r left * right / k end def self.cumulative_function(k, p) return if k <= 0 # Sadly, the incomplete beta function is converging # too fast to zero and breaking the calculation on logs. # So, we default to the basic definition of the CDF which is # the integral (-Inf, K) of the PDF, with P(X <= x) which can # be solved as a summation of all PDFs from 1 to K. Note that the summation approach # only applies to discrete distributions. # # right = Math.incomplete_beta_function(p, (k + 1).floor, 0) / Math.log(1.0 - p) # 1.0 + right result = 0.0 1.upto(k) do |number| result += self.density_function(number, p) end result end def self.mode 1.0 end def self.mean(p) (-1.0 / Math.log(1.0 - p)) * (p / (1.0 - p)) end def self.variance(p) up = p + Math.log(1.0 - p) down = ((1.0 - p) ** 2) * (Math.log(1.0 - p) ** 2) (-1.0 * p) * (up / down.to_r) end end end end ruby-statistics-4.1.0/LICENSE.txt0000644000004100000410000000207114745414510016545 0ustar www-datawww-dataThe MIT License (MIT) Copyright (c) 2017 esteban zapata Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ruby-statistics-4.1.0/ruby-statistics.gemspec0000644000004100000410000000350014745414510021436 0ustar www-datawww-data# coding: utf-8 lib = File.expand_path("../lib", __FILE__) $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) require "ruby-statistics/version" Gem::Specification.new do |spec| spec.name = "ruby-statistics" spec.version = RubyStatistics::VERSION spec.authors = ["esteban zapata"] spec.email = ["ruby@estebanz.email"] spec.summary = %q{A ruby gem for som specific statistics. Inspired by the jStat js library.} spec.description = %q{This gem is intended to accomplish the same purpose as jStat js library: to provide ruby with statistical capabilities without the need of a statistical programming language like R or Octave. Some functions and capabilities are an implementation from other authors and are referenced properly in the class/method.} spec.homepage = "https://github.com/estebanz01/ruby-statistics" spec.license = "MIT" # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host' # to allow pushing to a single host or delete this section to allow pushing to any host. spec.files = `git ls-files -z`.split("\x0").reject do |f| f.match(%r{^(test|spec|features)/}) end spec.bindir = "exe" spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } spec.require_paths = ["lib"] # Minimum required ruby version spec.required_ruby_version = '>= 3.0' spec.add_development_dependency "rake", '~> 13.0', '>= 12.0.0' spec.add_development_dependency "rspec", '~> 3.13', '>= 3.10.0' spec.add_development_dependency 'byebug', '~> 11.1', '>= 11.1.0' spec.add_development_dependency 'pry', '~> 0.14', '>= 0.14.0' spec.add_development_dependency 'bigdecimal', '~> 3.1', '>= 3.1.9' end ruby-statistics-4.1.0/.rspec0000644000004100000410000000003714745414510016037 0ustar www-datawww-data--format documentation --color ruby-statistics-4.1.0/Rakefile0000644000004100000410000000016514745414510016371 0ustar www-datawww-datarequire "bundler/gem_tasks" require "rspec/core/rake_task" RSpec::Core::RakeTask.new(:spec) task :default => :spec ruby-statistics-4.1.0/CODE_OF_CONDUCT.md0000644000004100000410000000623714745414510017531 0ustar www-datawww-data# Contributor Covenant Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at ezapata@altavistaed.com. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] [homepage]: http://contributor-covenant.org [version]: http://contributor-covenant.org/version/1/4/ ruby-statistics-4.1.0/Gemfile0000644000004100000410000000024514745414510016216 0ustar www-datawww-datasource "https://rubygems.org" git_source(:github) {|repo_name| "https://github.com/#{repo_name}" } # Specify your gem's dependencies in statistics.gemspec gemspec ruby-statistics-4.1.0/LICENSE0000644000004100000410000000206514745414510015732 0ustar www-datawww-dataMIT License Copyright (c) 2017 Esteban Zapata Rojas Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ruby-statistics-4.1.0/.travis.yml0000644000004100000410000000021314745414510017027 0ustar www-datawww-datasudo: false language: ruby rvm: - 2.5.1 - 2.6.0 - 2.6.3 - 2.6.5 - 2.7 before_install: gem update --system && gem install bundler ruby-statistics-4.1.0/README.md0000644000004100000410000000772514745414510016214 0ustar www-datawww-data# Ruby Statistics ![](https://github.com/estebanz01/ruby-statistics/actions/workflows/ruby.yml/badge.svg) ## Note regarding Versions 3.x and 4.x Hola! :wave: right now the gem got an update that introduced a breaking change in master where the Outermost namespace was changed to be `ruby-statistics` instead of `statistics`. This change is in _master_ already and released in version 4.0. If you're relying on branch instead of gem version, you can checkout branch `3.x`. This branch will be supported but no additional features will be added. ## Note regarding Big Decimal support This gem also provides basic support for `BigDecimal` and the gem is listed as an optional `development` dependency on the gemspec. Since it's no longer supported as a bundled feature since 3.4.0, the functionality around this gem is going to be limited as well. It is not a required dependency to run or use any of the features developed so far. :grin: --- A basic ruby gem that implements some statistical methods, functions and concepts to be used in any ruby environment without depending on any mathematical software like `R`, `Matlab`, `Octave` or similar. Unit test runs under the following ruby versions: * Ruby 3.0. * Ruby 3.1. * Ruby 3.2. * Ruby 3.3. We got the inspiration from the folks at [JStat](https://github.com/jstat/jstat) and some interesting lectures about [Keystroke dynamics](http://www.biometric-solutions.com/keystroke-dynamics.html). Some logic and algorithms are extractions or adaptations from other authors, which are referenced in the comments. This software is released under the MIT License. ## Installation Add this line to your application's Gemfile: ```ruby gem 'ruby-statistics' ``` And then execute: $ bundle Or install it yourself as: $ gem install ruby-statistics ## Basic Usage just require the `ruby-statistics` gem in order to load it. If you don't have defined the `Distribution` namespace, the gem will assign an alias, reducing the number of namespaces needed to use a class. Right now you can load: * The whole statistics gem. `require 'ruby-statistics'` * A namespace. `require 'ruby-statistics/distribution'` * A class. `require 'ruby-statistics/distribution/normal'` Feel free to use the one that is more convenient to you. ### Hello-World Example ```ruby require 'ruby-statistics' poisson = Distribution::Poisson.new(l) # Using Distribution alias. normal = RubyStatistics::Distribution::StandardNormal.new # Using all namespaces. ``` ## Documentation You can find a bit more detailed documentation of all available distributions, tests and functions in the [Documentation Index](https://github.com/estebanz01/ruby-statistics/wiki) ## Development After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). ## Contributing Bug reports and pull requests are welcome on GitHub at https://github.com/estebanz01/ruby-statistics. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct. ## License The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT). ## Code of Conduct Everyone interacting in the RubyStatistics project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/estebanz01/ruby-statistics/blob/master/CODE_OF_CONDUCT.md). ## Contact You can contact me via: * [Github](https://github.com/estebanz01) * [Twitter](https://twitter.com/estebanz01)