energy/0000755000176200001440000000000014173755455011563 5ustar liggesusersenergy/NAMESPACE0000644000176200001440000000167014014450274012767 0ustar liggesusersuseDynLib(energy, .registration=TRUE) importFrom(Rcpp, evalCpp) importFrom("stats", "as.dist", "dist", "dnorm", "hclust", "model.matrix", "pnorm", "ppois", "pt", "rnorm", "rpois", "sd", "var") importFrom(boot, boot) importFrom(gsl, hyperg_1F1) export( bcdcor, D_center, Dcenter, dcor, dcor2d, DCOR, dcor.t, dcor.test, dcor.ttest, dcorT, dcorT.test, dcov, dcov2d, dcov.test, dcovU, dcovU_stats, disco, disco.between, edist, energy.hclust, eqdist.e, eqdist.etest, indep.test, kgroups, ksample.e, mvI, mvI.test, mvnorm.e, mvnorm.etest, mvnorm.test, normal.e, normal.test, pdcor, pdcor.test, pdcov, pdcov.test, poisson.e, poisson.etest, poisson.m, poisson.mtest, poisson.tests, sortrank, U_center, U_product, Ucenter ) S3method(print, disco) S3method(print, kgroups) S3method(fitted, kgroups) energy/README.md0000644000176200001440000000116014005374454013026 0ustar liggesusers# energy energy package for R The energy package for R implements several methods in multivariate analysis and multivariate inference based on the energy distance, which characterizes equality of distributions. Distance correlation (multivariate independence), disco (nonparametric extension of ANOVA), and goodness-of-fit tests are examples of some of the methods included. energy is named based on the analogy with potential energy in physics. See the references in the manual for more details. [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/energy)]https://cran.r-project.org/package=energy)energy/data/0000755000176200001440000000000014005374454012462 5ustar liggesusersenergy/data/EVnormal.rda0000644000176200001440000000777714005374454014717 0ustar liggesusersBZh91AY&SYrB) jt: Q@jdF2 m!zmOSiha#=OS#@@42 M4MiFb44ѠڞQMz&iFhETi`ѣGLQaOP3hhi2=@H'=& 4 =#jz'=OQ24ƈbi1 L@ MLLiE50@4 dd&hAM4hh UTFCLbz&a1bdBz4LAL0F xjcSeOOMii2f&CC@Ԯ<%UUUUUUUUUUUUUUUUUUUU !%')+-/1356789:;<=>? !"#$%&'()*ڡ!apȋlMpx}F (s"#dDD[իVCDAgc`&O_x!A=0$.ԿC;4!dQ  LAYbh5u=Bm$ܹۗ['bc4=(̻QlOFrpM+ ՘g pC~ 2FUS̙Kn3iwY(鼝CS 7e \&_L7A͌lumCB&/^TFՏ+q1yH(8v7^C7 iF#j WD'`)|tEP#C0:I \)>F5܇Q73pYrCuxD,5+,M. iطqz#nԳy< m^*-YU_P_MoBV3CbC˃7.d|. NU48pCs>լq"85ވSјVX-,Na.lK)ZZpR|R̋Gƭy`fsgҲ,닎N&tԌS3BXX*YC'[.)Q`G2rA\]{rv%ye7Y=L!J(I_ ')lGNK"gJa]:['HDYAVfb9a2tdۑvri229*3H!ؒI$I$sJI$(J(J(J(Jm#2|R;+8BlPY^ MW<"#) i(O%~Z ݪLS0e'j=&5 R\$aM /}ˊY9j)PvHlaf֭Oh^(u/KkOV 'B)rG`'gOkf#X'luMO0Aԣ3iԮ<: L ܡfى1#J/b")F.Rr-Sj+wJHCTkRFs-46cո8s Eo {WF ЙhD:KXim[a i$C8Oq f dPב&w0M{b@KH43 V7/y+*Bn@ZGX? bSFDd)!xٍh| gmѠ́n]È ;1_UhVeބ!4 sd*TY yu,:5,Be7HaD%&0d#C‚zPќw52ÅaߵR,"Fh(Vck/ϞqVqJ+YdÚ뒻 A*ɾ{z.OV#;-j&oҟJ\e"KÞyNKgvi#dPBY2֔8>3)(`xt)nZUCa aOxPR ,bDw hjJkC/q͙}!>&~k4fǃX╪ H3?8dDt --q`i?2z.٩G L `R 0>N b~6snGф$X}u&v'{4ù/sd.0eg4j:ḙR)&!Sʞ%OgŸ ǬqHt>#o<*_Ai.0yEe)f~iI?q!fJmqr\| R;=G`.du>+n}\'o^D[F .k\2".pkӮ(*/I(7u4W/,sIqxCѓ٘!"DÚW?9`InY8:9SPb͗,tdhF×p#9 %;Z#%"\V|a e<甕:ο ##x>%v<%OgqO سnP-.nCv& YͣtBQ -E xQl*A@~}-fꙑ´1F;#e}P@A Di$q8Urt1clP;(c R@ 5.vV}´"\T$X6v"APx"IqtEiv e"d.YB7EL\QaBbu~G\w2rX?jqyw!ascm "Zڛ?ҹaQl&࡞Ȟ$ Y 50 n <- 100 x <- rnorm(100) y <- rnorm(100) all.equal(dcov(x, y)^2, dcov2d(x, y), check.attributes = FALSE) all.equal(bcdcor(x, y), dcor2d(x, y, "U"), check.attributes = FALSE) x <- rlnorm(400) y <- rexp(400) dcov.test(x, y, R=199) #permutation test dcor.test(x, y, R=199) } } \keyword{ htest } \keyword{ nonparametric } \concept{ independence } \concept{ distance correlation } \concept{ distance covariance } \concept{ energy statistics } energy/man/dcorT.Rd0000644000176200001440000000521614014531440013660 0ustar liggesusers\name{dcorT} \alias{dcorT.test} \alias{dcorT} \title{ Distance Correlation t-Test} \description{ Distance correlation t-test of multivariate independence for high dimension.} \usage{ dcorT.test(x, y) dcorT(x, y) } \arguments{ \item{x}{ data or distances of first sample} \item{y}{ data or distances of second sample} } \details{ \code{dcorT.test} performs a nonparametric t-test of multivariate independence in high dimension (dimension is close to or larger than sample size). As dimension goes to infinity, the asymptotic distribution of the test statistic is approximately Student t with \eqn{n(n-3)/2-1} degrees of freedom and for \eqn{n \geq 10} the statistic is approximately distributed as standard normal. The sample sizes (number of rows) of the two samples must agree, and samples must not contain missing values. The t statistic (dcorT) is a transformation of a bias corrected version of distance correlation (see SR 2013 for details). Large values (upper tail) of the dcorT statistic are significant. } \note{ \code{dcor.t} and \code{dcor.ttest} are deprecated. } \value{ \code{dcorT} returns the dcor t statistic, and \code{dcorT.test} returns a list with class \code{htest} containing \item{ method}{ description of test} \item{ statistic}{ observed value of the test statistic} \item{ parameter}{ degrees of freedom} \item{ estimate}{ (bias corrected) squared dCor(x,y)} \item{ p.value}{ p-value of the t-test} \item{ data.name}{ description of data} } \seealso{ \code{\link{bcdcor}} \code{\link{dcov.test}} \code{\link{dcor}} \code{\link{DCOR}} } \references{ Szekely, G.J. and Rizzo, M.L. (2013). The distance correlation t-test of independence in high dimension. \emph{Journal of Multivariate Analysis}, Volume 117, pp. 193-213. \cr \doi{10.1016/j.jmva.2013.02.012} Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007), Measuring and Testing Dependence by Correlation of Distances, \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794. \cr \doi{10.1214/009053607000000505} Szekely, G.J. and Rizzo, M.L. (2009), Brownian Distance Covariance, \emph{Annals of Applied Statistics}, Vol. 3, No. 4, 1236-1265. \cr \doi{10.1214/09-AOAS312} } \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \examples{ x <- matrix(rnorm(100), 10, 10) y <- matrix(runif(100), 10, 10) dcorT(x, y) dcorT.test(x, y) } \keyword{ htest } \keyword{ multivariate } \keyword{ nonparametric } \concept{ independence } \concept{ multivariate } \concept{ distance correlation } \concept{ distance covariance } \concept{ energy statistics } energy/man/eigen.Rd0000644000176200001440000000202214014532550013667 0ustar liggesusers\name{EVnormal} \docType{data} \alias{EVnormal} \alias{eigenvalues} \title{Eigenvalues for the energy Test of Univariate Normality} \description{ Pre-computed eigenvalues corresponding to the asymptotic sampling distribution of the energy test statistic for univariate normality, under the null hypothesis. Four Cases are computed: \enumerate{ \item Simple hypothesis, known parameters. \item Estimated mean, known variance. \item Known mean, estimated variance. \item Composite hypothesis, estimated parameters. } Case 4 eigenvalues are used in the test function \code{normal.test} when \code{method=="limit"}. } \usage{data(EVnormal)} \format{Numeric matrix with 125 rows and 5 columns; column 1 is the index, and columns 2-5 are the eigenvalues of Cases 1-4.} \source{Computed} \references{ Szekely, G. J. and Rizzo, M. L. (2005) A New Test for Multivariate Normality, \emph{Journal of Multivariate Analysis}, 93/1, 58-80, \doi{10.1016/j.jmva.2003.12.002}. } energy/man/pdcor.Rd0000644000176200001440000000436714173561423013734 0ustar liggesusers\name{pdcor} \alias{pdcor} \alias{pdcov} \alias{pdcor.test} \alias{pdcov.test} \title{ Partial distance correlation and covariance } \description{Partial distance correlation pdcor, pdcov, and tests.} \usage{ pdcov.test(x, y, z, R) pdcor.test(x, y, z, R) pdcor(x, y, z) pdcov(x, y, z) } \arguments{ \item{x}{ data matrix or dist object of first sample} \item{y}{ data matrix or dist object of second sample} \item{z}{ data matrix or dist object of third sample} \item{R}{ replicates for permutation test} } \details{ \code{pdcor(x, y, z)} and \code{pdcov(x, y, z)} compute the partial distance correlation and partial distance covariance, respectively, of x and y removing z. A test for zero partial distance correlation (or zero partial distance covariance) is implemented in \code{pdcor.test}, and \code{pdcov.test}. If the argument is a matrix, it is treated as a data matrix and distances are computed (observations in rows). If the arguments are distances or dissimilarities, they must be distance (\code{dist}) objects. For symmetric, zero-diagonal dissimilarity matrices, use \code{as.dist} to convert to a \code{dist} object. } \value{ Each test returns an object of class \code{htest}. } \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \references{ Szekely, G.J. and Rizzo, M.L. (2014), Partial Distance Correlation with Methods for Dissimilarities. \emph{Annals of Statistics}, Vol. 42 No. 6, 2382-2412. } \examples{ n = 30 R <- 199 ## mutually independent standard normal vectors x <- rnorm(n) y <- rnorm(n) z <- rnorm(n) pdcor(x, y, z) pdcov(x, y, z) set.seed(1) pdcov.test(x, y, z, R=R) set.seed(1) pdcor.test(x, y, z, R=R) \donttest{ if (require(MASS)) { p = 4 mu <- rep(0, p) Sigma <- diag(p) ## linear dependence y <- mvrnorm(n, mu, Sigma) + x print(pdcov.test(x, y, z, R=R)) ## non-linear dependence y <- mvrnorm(n, mu, Sigma) * x print(pdcov.test(x, y, z, R=R)) } } } \keyword{ htest } \keyword{ multivariate } \keyword{ nonparametric } \concept{ independence } \concept{ multivariate } \concept{ distance correlation } \concept{ distance covariance } \concept{ energy statistics } energy/man/energy.hclust.Rd0000644000176200001440000001274414014533130015401 0ustar liggesusers\name{energy.hclust} \alias{energy.hclust} \title{ Hierarchical Clustering by Minimum (Energy) E-distance } \description{ Performs hierarchical clustering by minimum (energy) E-distance method. } \usage{ energy.hclust(dst, alpha = 1) } \arguments{ \item{dst}{\code{dist} object} \item{alpha}{distance exponent} } \details{ Dissimilarities are \eqn{d(x,y) = \|x-y\|^\alpha}{||x-y||^a}, where the exponent \eqn{\alpha}{a} is in the interval (0,2]. This function performs agglomerative hierarchical clustering. Initially, each of the n singletons is a cluster. At each of n-1 steps, the procedure merges the pair of clusters with minimum e-distance. The e-distance between two clusters \eqn{C_i, C_j} of sizes \eqn{n_i, n_j} is given by \deqn{e(C_i, C_j)=\frac{n_i n_j}{n_i+n_j}[2M_{ij}-M_{ii}-M_{jj}], } where \deqn{M_{ij}=\frac{1}{n_i n_j}\sum_{p=1}^{n_i} \sum_{q=1}^{n_j} \|X_{ip}-X_{jq}\|^\alpha,}{ M_{ij} = 1/(n_i n_j) sum[1:n_i, 1:n_j] ||X_(ip) - X_(jq)||^a,} \eqn{\|\cdot\|}{|| ||} denotes Euclidean norm, and \eqn{X_{ip}}{ X_(ip)} denotes the p-th observation in the i-th cluster. The return value is an object of class \code{hclust}, so \code{hclust} methods such as print or plot methods, \code{plclust}, and \code{cutree} are available. See the documentation for \code{hclust}. The e-distance measures both the heterogeneity between clusters and the homogeneity within clusters. \eqn{\mathcal E}{E}-clustering (\eqn{\alpha=1}{a=1}) is particularly effective in high dimension, and is more effective than some standard hierarchical methods when clusters have equal means (see example below). For other advantages see the references. \code{edist} computes the energy distances for the result (or any partition) and returns the cluster distances in a \code{dist} object. See the \code{edist} examples. } \value{ An object of class \code{hclust} which describes the tree produced by the clustering process. The object is a list with components: \item{merge:}{ an n-1 by 2 matrix, where row i of \code{merge} describes the merging of clusters at step i of the clustering. If an element j in the row is negative, then observation -j was merged at this stage. If j is positive then the merge was with the cluster formed at the (earlier) stage j of the algorithm.} \item{height:}{the clustering height: a vector of n-1 non-decreasing real numbers (the e-distance between merging clusters)} \item{order:}{ a vector giving a permutation of the indices of original observations suitable for plotting, in the sense that a cluster plot using this ordering and matrix \code{merge} will not have crossings of the branches.} \item{labels:}{ labels for each of the objects being clustered.} \item{call:}{ the call which produced the result.} \item{method:}{ the cluster method that has been used (e-distance).} \item{dist.method:}{ the distance that has been used to create \code{dst}.} } \note{ Currently \code{stats::hclust} implements Ward's method by \code{method="ward.D2"}, which applies the squared distances. That method was previously \code{"ward"}. Because both \code{hclust} and energy use the same type of Lance-Williams recursive formula to update cluster distances, now with the additional option \code{method="ward.D"} in \code{hclust}, the energy distance method is easily implemented by \code{hclust}. (Some "Ward" algorithms do not use Lance-Williams, however). Energy clustering (with \code{alpha=1}) and "ward.D" now return the same result, except that the cluster heights of energy hierarchical clustering with \code{alpha=1} are two times the heights from \code{hclust}. In order to ensure compatibility with hclust methods, \code{energy.hclust} now passes arguments through to \code{hclust} after possibly applying the optional exponent to distance. } \references{ Szekely, G. J. and Rizzo, M. L. (2005) Hierarchical Clustering via Joint Between-Within Distances: Extending Ward's Minimum Variance Method, \emph{Journal of Classification} 22(2) 151-183. \cr \doi{10.1007/s00357-005-0012-9} Szekely, G. J. and Rizzo, M. L. (2004) Testing for Equal Distributions in High Dimension, \emph{InterStat}, November (5). Szekely, G. J. (2000) Technical Report 03-05: \eqn{\mathcal{E}}{E}-statistics: Energy of Statistical Samples, Department of Mathematics and Statistics, Bowling Green State University. } \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \seealso{ \code{\link{edist}} \code{\link{ksample.e}} \code{\link{eqdist.etest}} \code{hclust}} \examples{ \dontrun{ library(cluster) data(animals) plot(energy.hclust(dist(animals))) data(USArrests) ecl <- energy.hclust(dist(USArrests)) print(ecl) plot(ecl) cutree(ecl, k=3) cutree(ecl, h=150) ## compare performance of e-clustering, Ward's method, group average method ## when sampled populations have equal means: n=200, d=5, two groups z <- rbind(matrix(rnorm(1000), nrow=200), matrix(rnorm(1000, 0, 5), nrow=200)) g <- c(rep(1, 200), rep(2, 200)) d <- dist(z) e <- energy.hclust(d) a <- hclust(d, method="average") w <- hclust(d^2, method="ward.D2") list("E" = table(cutree(e, k=2) == g), "Ward" = table(cutree(w, k=2) == g), "Avg" = table(cutree(a, k=2) == g)) } } \keyword{ multivariate } \keyword{ cluster } \concept{ energy statistics } energy/man/poisson.Rd0000644000176200001440000001032514014531270014275 0ustar liggesusers\name{Poisson Tests} \alias{poisson.tests} \alias{poisson.e} \alias{poisson.etest} \alias{poisson.m} \alias{poisson.mtest} \title{ Goodness-of-Fit Tests for Poisson Distribution} \description{ Performs the mean distance goodness-of-fit test and the energy goodness-of-fit test of Poisson distribution with unknown parameter. } \usage{ poisson.e(x) poisson.m(x) poisson.etest(x, R) poisson.mtest(x, R) poisson.tests(x, R, test="all") } \arguments{ \item{x}{ vector of nonnegative integers, the sample data } \item{R}{ number of bootstrap replicates } \item{test}{ name of test(s) } } \details{ Two distance-based tests of Poissonity are applied in \code{poisson.tests}, "M" and "E". The default is to do all tests and return results in a data frame. Valid choices for \code{test} are "M", "E", or "all" with default "all". If "all" tests, all tests are performed by a single parametric bootstrap computing all test statistics on each sample. The "M" choice is two tests, one based on a Cramer-von Mises distance and the other an Anderson-Darling distance. The "E" choice is the energy goodness-of-fit test. \code{R} must be a positive integer for a test. If \code{R} is missing or 0, a warning is printed but test statistics are computed (without testing). The mean distance test of Poissonity (M-test) is based on the result that the sequence of expected values E|X-j|, j=0,1,2,... characterizes the distribution of the random variable X. As an application of this characterization one can get an estimator \eqn{\hat F(j)} of the CDF. The test statistic (see \code{\link{poisson.m}}) is a Cramer-von Mises type of distance, with M-estimates replacing the usual EDF estimates of the CDF: \deqn{M_n = n\sum_{j=0}^\infty (\hat F(j) - F(j\;; \hat \lambda))^2 f(j\;; \hat \lambda).}{M_n = n sum [j>=0] (\hat F(j) - F(j; \hat \lambda))^2 f(j; \hat \lambda).} In \code{poisson.tests}, an Anderson-Darling type of weight is also applied when \code{test="M"} or \code{test="all"}. The tests are implemented by parametric bootstrap with \code{R} replicates. An energy goodness-of-fit test (E) is based on the test statistic \deqn{Q_n = n (\frac{2}{n} \sum_{i=1}^n E|x_i - X| - E|X-X'| - \frac{1}{n^2} \sum_{i,j=1}^n |x_i - x_j|, }{Q_n = n((2/n) sum[1:n] E|x_i-X| - E|X-X'| - (1/n^2) sum[1:n,1:n] |x_i-x_j|),} where X and X' are iid with the hypothesized null distribution. For a test of H: X ~ Poisson(\eqn{\lambda}), we can express E|X-X'| in terms of Bessel functions, and E|x_i - X| in terms of the CDF of Poisson(\eqn{\lambda}). If test=="all" or not specified, all tests are run with a single parametric bootstrap. \code{poisson.mtest} implements only the Poisson M-test with Cramer-von Mises type distance. \code{poisson.etest} implements only the Poisson energy test. } \value{ The functions \code{poisson.m} and \code{poisson.e} return the test statistics. The function \code{poisson.mtest} or \code{poisson.etest} return an \code{htest} object containing \item{method}{Description of test} \item{statistic}{observed value of the test statistic} \item{p.value}{approximate p-value of the test} \item{data.name}{replicates R} \item{estimate}{sample mean} \code{poisson.tests} returns "M-CvM test", "M-AD test" and "Energy test" results in a data frame with columns \item{estimate}{sample mean} \item{statistic}{observed value of the test statistic} \item{p.value}{approximate p-value of the test} \item{method}{Description of test} which can be coerced to a \code{tibble}. } \note{The running time of the M test is much faster than the E-test.} \references{ Szekely, G. J. and Rizzo, M. L. (2004) Mean Distance Test of Poisson Distribution, \emph{Statistics and Probability Letters}, 67/3, 241-247. \doi{10.1016/j.spl.2004.01.005}. Szekely, G. J. and Rizzo, M. L. (2005) A New Test for Multivariate Normality, \emph{Journal of Multivariate Analysis}, 93/1, 58-80, \doi{10.1016/j.jmva.2003.12.002}. } \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \examples{ x <- rpois(50, 2) poisson.m(x) poisson.e(x) \donttest{ poisson.etest(x, R=199) poisson.mtest(x, R=199) poisson.tests(x, R=199) } } \keyword{ htest } \keyword{ energy } energy/man/disco.Rd0000644000176200001440000001142514014532137013711 0ustar liggesusers\name{disco} \alias{disco} \alias{disco.between} \alias{print.disco} \title{ distance components (DISCO)} \description{ E-statistics DIStance COmponents and tests, analogous to variance components and anova. } \usage{ disco(x, factors, distance, index=1.0, R, method=c("disco","discoB","discoF")) disco.between(x, factors, distance, index=1.0, R) } \arguments{ \item{x}{ data matrix or distance matrix or dist object} \item{factors}{ matrix of factor labels or integers (not design matrix)} \item{distance}{ logical, TRUE if x is distance matrix} \item{index}{ exponent on Euclidean distance in (0,2]} \item{R}{ number of replicates for a permutation test} \item{method}{ test statistic } } \details{ \code{disco} calculates the distance components decomposition of total dispersion and if R > 0 tests for significance using the test statistic disco "F" ratio (default \code{method="disco"}), or using the between component statistic (\code{method="discoB"}), each implemented by permutation test. If \code{x} is a \code{dist} object, argument \code{distance} is ignored. If \code{x} is a distance matrix, set \code{distance=TRUE}. In the current release \code{disco} computes the decomposition for one-way models only. } \value{ When \code{method="discoF"}, \code{disco} returns a list similar to the return value from \code{anova.lm}, and the \code{print.disco} method is provided to format the output into a similar table. Details: \code{disco} returns a class \code{disco} object, which is a list containing \item{call}{call} \item{method}{method} \item{statistic}{vector of observed statistics} \item{p.value}{vector of p-values} \item{k}{number of factors} \item{N}{number of observations} \item{between}{between-sample distance components} \item{withins}{one-way within-sample distance components} \item{within}{within-sample distance component} \item{total}{total dispersion} \item{Df.trt}{degrees of freedom for treatments} \item{Df.e}{degrees of freedom for error} \item{index}{index (exponent on distance)} \item{factor.names}{factor names} \item{factor.levels}{factor levels} \item{sample.sizes}{sample sizes} \item{stats}{matrix containing decomposition} When \code{method="discoB"}, \code{disco} passes the arguments to \code{disco.between}, which returns a class \code{htest} object. \code{disco.between} returns a class \code{htest} object, where the test statistic is the between-sample statistic (proportional to the numerator of the F ratio of the \code{disco} test. } \references{ M. L. Rizzo and G. J. Szekely (2010). DISCO Analysis: A Nonparametric Extension of Analysis of Variance, Annals of Applied Statistics, Vol. 4, No. 2, 1034-1055. \cr \doi{10.1214/09-AOAS245} } \note{ The current version does all calculations via matrix arithmetic and boot function. Support for more general additive models and a formula interface is under development. \code{disco} methods have been added to the cluster distance summary function \code{edist}, and energy tests for equality of distribution (see \code{eqdist.etest}). } \seealso{ \code{ \link{edist} } \code{ \link{eqdist.e} } \code{ \link{eqdist.etest} } \code{ \link{ksample.e} } } \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \examples{ ## warpbreaks one-way decompositions data(warpbreaks) attach(warpbreaks) disco(breaks, factors=wool, R=99) ## When index=2 for univariate data, we get ANOVA decomposition disco(breaks, factors=tension, index=2.0, R=99) aov(breaks ~ tension) ## Multivariate response ## Example on producing plastic film from Krzanowski (1998, p. 381) tear <- c(6.5, 6.2, 5.8, 6.5, 6.5, 6.9, 7.2, 6.9, 6.1, 6.3, 6.7, 6.6, 7.2, 7.1, 6.8, 7.1, 7.0, 7.2, 7.5, 7.6) gloss <- c(9.5, 9.9, 9.6, 9.6, 9.2, 9.1, 10.0, 9.9, 9.5, 9.4, 9.1, 9.3, 8.3, 8.4, 8.5, 9.2, 8.8, 9.7, 10.1, 9.2) opacity <- c(4.4, 6.4, 3.0, 4.1, 0.8, 5.7, 2.0, 3.9, 1.9, 5.7, 2.8, 4.1, 3.8, 1.6, 3.4, 8.4, 5.2, 6.9, 2.7, 1.9) Y <- cbind(tear, gloss, opacity) rate <- factor(gl(2,10), labels=c("Low", "High")) ## test for equal distributions by rate disco(Y, factors=rate, R=99) disco(Y, factors=rate, R=99, method="discoB") ## Just extract the decomposition table disco(Y, factors=rate, R=0)$stats ## Compare eqdist.e methods for rate ## disco between stat is half of original when sample sizes equal eqdist.e(Y, sizes=c(10, 10), method="original") eqdist.e(Y, sizes=c(10, 10), method="discoB") ## The between-sample distance component disco.between(Y, factors=rate, R=0) } \keyword{ htest } \keyword{ multivariate } energy/man/kgroups.Rd0000644000176200001440000000730014005374454014305 0ustar liggesusers\name{kgroups} \alias{kgroups} \title{ K-Groups Clustering } \description{ Perform k-groups clustering by energy distance. } \usage{ kgroups(x, k, iter.max = 10, nstart = 1, cluster = NULL) } \arguments{ \item{x}{Data frame or data matrix or distance object} \item{k}{number of clusters} \item{iter.max}{maximum number of iterations} \item{nstart}{number of restarts} \item{cluster}{initial clustering vector} } \details{ K-groups is based on the multisample energy distance for comparing distributions. Based on the disco decomposition of total dispersion (a Gini type mean distance) the objective function should either maximize the total between cluster energy distance, or equivalently, minimize the total within cluster energy distance. It is more computationally efficient to minimize within distances, and that makes it possible to use a modified version of the Hartigan-Wong algorithm (1979) to implement K-groups clustering. The within cluster Gini mean distance is \deqn{G(C_j) = \frac{1}{n_j^2} \sum_{i,m=1}^{n_j} |x_{i,j} - x_{m,j}|} and the K-groups within cluster distance is \deqn{W_j = \frac{n_j}{2}G(C_j) = \frac{1}{2 n_j} \sum_{i,m=1}^{n_j} |x_{i,j} - x_{m,j}.} If z is the data matrix for cluster \eqn{C_j}, then \eqn{W_j} could be computed as \code{sum(dist(z)) / nrow(z)}. If cluster is not NULL, the clusters are initialized by this vector (can be a factor or integer vector). Otherwise clusters are initialized with random labels in k approximately equal size clusters. If \code{x} is not a distance object (class(x) == "dist") then \code{x} is converted to a data matrix for analysis. Run up to \code{iter.max} complete passes through the data set until a local min is reached. If \code{nstart > 1}, on second and later starts, clusters are initialized at random, and the best result is returned. } \value{ An object of class \code{kgroups} containing the components \item{call}{the function call} \item{cluster}{vector of cluster indices} \item{sizes}{cluster sizes} \item{within}{vector of Gini within cluster distances} \item{W}{sum of within cluster distances} \item{count}{number of moves} \item{iterations}{number of iterations} \item{k}{number of clusters} \code{cluster} is a vector containing the group labels, 1 to k. \code{print.kgroups} prints some of the components of the kgroups object. Expect that count is 0 if the algorithm converged to a local min (that is, 0 moves happened on the last iteration). If iterations equals iter.max and count is positive, then the algorithm did not converge to a local min. } \author{ Maria Rizzo and Songzi Li } \references{ Li, Songzi (2015). "K-groups: A Generalization of K-means by Energy Distance." Ph.D. thesis, Bowling Green State University. Li, S. and Rizzo, M. L. (2017). "K-groups: A Generalization of K-means Clustering". ArXiv e-print 1711.04359. https://arxiv.org/abs/1711.04359 Szekely, G. J., and M. L. Rizzo. "Testing for equal distributions in high dimension." InterStat 5, no. 16.10 (2004). Rizzo, M. L., and G. J. Szekely. "Disco analysis: A nonparametric extension of analysis of variance." The Annals of Applied Statistics (2010): 1034-1055. Hartigan, J. A. and Wong, M. A. (1979). "Algorithm AS 136: A K-means clustering algorithm." Applied Statistics, 28, 100-108. doi: 10.2307/2346830. } \examples{ x <- as.matrix(iris[ ,1:4]) set.seed(123) kg <- kgroups(x, k = 3, iter.max = 5, nstart = 2) kg fitted(kg) \donttest{ d <- dist(x) set.seed(123) kg <- kgroups(d, k = 3, iter.max = 5, nstart = 2) kg kg$cluster fitted(kg) fitted(kg, method = "groups") } } \keyword{ cluster } \keyword{ multivariate } energy/man/dcovu.Rd0000644000176200001440000000457414014532050013731 0ustar liggesusers\name{Unbiased distance covariance} \alias{bcdcor} \alias{dcovU} \title{Unbiased dcov and bias-corrected dcor statistics} \description{ These functions compute unbiased estimators of squared distance covariance and a bias-corrected estimator of (squared) distance correlation. } \usage{ bcdcor(x, y) dcovU(x, y) } \arguments{ \item{x}{ data or dist object of first sample} \item{y}{ data or dist object of second sample} } \details{ The unbiased (squared) dcov is inner product definition of dCov, in the Hilbert space of U-centered distance matrices. The sample sizes (number of rows) of the two samples must agree, and samples must not contain missing values. Arguments \code{x}, \code{y} can optionally be \code{\link{dist}} objects; otherwise these arguments are treated as data. } \value{ \code{dcovU} returns the unbiased estimator of squared dcov. \code{bcdcor} returns a bias-corrected estimator of squared dcor. } \note{ Unbiased distance covariance (SR2014) corresponds to the biased (original) \eqn{\mathrm{dCov^2}}{dCov^2}. Since \code{dcovU} is an unbiased statistic, it is signed and we do not take the square root. For the original distance covariance test of independence (SRB2007, SR2009), the distance covariance test statistic is the V-statistic \eqn{\mathrm{n\, dCov^2} = n \mathcal{V}_n^2}{n V_n^2} (not dCov). Similarly, \code{bcdcor} is bias-corrected, so we do not take the square root as with dCor. } \references{ Szekely, G.J. and Rizzo, M.L. (2014), Partial Distance Correlation with Methods for Dissimilarities. \emph{Annals of Statistics}, Vol. 42 No. 6, 2382-2412. Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007), Measuring and Testing Dependence by Correlation of Distances, \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794. \cr \doi{10.1214/009053607000000505} Szekely, G.J. and Rizzo, M.L. (2009), Brownian Distance Covariance, \emph{Annals of Applied Statistics}, Vol. 3, No. 4, 1236-1265. \cr \doi{10.1214/09-AOAS312} } \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \examples{ x <- iris[1:50, 1:4] y <- iris[51:100, 1:4] dcovU(x, y) bcdcor(x, y) } \keyword{ multivariate } \keyword{ nonparametric } \concept{ independence } \concept{ multivariate } \concept{ distance correlation } \concept{ distance covariance } \concept{ energy statistics } energy/man/dcov.test.Rd0000644000176200001440000001170614173557455014543 0ustar liggesusers\name{dcov.test} \alias{distance covariance} \alias{dcov.test} \alias{dcor.test} \title{ Distance Covariance Test and Distance Correlation test} \description{ Distance covariance test and distance correlation test of multivariate independence. Distance covariance and distance correlation are multivariate measures of dependence.} \usage{ dcov.test(x, y, index = 1.0, R = NULL) dcor.test(x, y, index = 1.0, R) } \arguments{ \item{x}{ data or distances of first sample} \item{y}{ data or distances of second sample} \item{R}{ number of replicates} \item{index}{ exponent on Euclidean distance, in (0,2]} } \details{ \code{dcov.test} and \code{dcor.test} are nonparametric tests of multivariate independence. The test decision is obtained via permutation bootstrap, with \code{R} replicates. The sample sizes (number of rows) of the two samples must agree, and samples must not contain missing values. Arguments \code{x}, \code{y} can optionally be \code{\link{dist}} objects; otherwise these arguments are treated as data. The \code{dcov} test statistic is \eqn{n \mathcal V_n^2}{nV_n^2} where \eqn{\mathcal V_n(x,y)}{V_n(x,y)} = dcov(x,y), which is based on interpoint Euclidean distances \eqn{\|x_{i}-x_{j}\|}{||x_{i}-x_{j}||}. The \code{index} is an optional exponent on Euclidean distance. Similarly, the \code{dcor} test statistic is based on the normalized coefficient, the distance correlation. (See the manual page for \code{dcor}.) Distance correlation is a new measure of dependence between random vectors introduced by Szekely, Rizzo, and Bakirov (2007). For all distributions with finite first moments, distance correlation \eqn{\mathcal R}{R} generalizes the idea of correlation in two fundamental ways: (1) \eqn{\mathcal R(X,Y)}{R(X,Y)} is defined for \eqn{X} and \eqn{Y} in arbitrary dimension. (2) \eqn{\mathcal R(X,Y)=0}{R(X,Y)=0} characterizes independence of \eqn{X} and \eqn{Y}. Characterization (2) also holds for powers of Euclidean distance \eqn{\|x_i-x_j\|^s}{|x_i-x_j|^s}, where \eqn{0= 0.12.6), stats, boot, gsl LinkingTo: Rcpp Suggests: MASS, CompQuadForm Depends: R (>= 2.10) URL: https://github.com/mariarizzo/energy License: GPL (>= 2) LazyData: true NeedsCompilation: yes Repository: CRAN RoxygenNote: 7.1.2 Packaged: 2022-01-25 00:51:42 UTC; maria Author: Maria Rizzo [aut, cre], Gabor Szekely [aut] Maintainer: Maria Rizzo Date/Publication: 2022-01-25 11:12:45 UTC energy/build/0000755000176200001440000000000014173644633012655 5ustar liggesusersenergy/build/partial.rdb0000644000176200001440000023204514173644633015010 0ustar liggesusersYpٚXHp ,$$H[&$sd "= ҚznTҴٴzTlLRlyzhyjY?yv|5(Ǫ+otX%cli;mjL>,ss2T]볷63K5XX12+ٹIdګ-MzJ/Z2NnZ#KA1%NQ-v~Փ+~s @.?l~Ǯ~< H4._U@x5^/2>^{ex3g-ÉwpHjqI-|`}+.hRe?!&gw,/MvlfѨZN<ߦD0r@>QQUZ Ѻ³Ư(HJ遭(o7o5%d/ղ [&JbG߯mf;q(9_K )7A~rVJQA!?(=I̺>N'whHղ1Mqa^}PpOțx x$)%^d1V:`+4rT$-Wnh!Ym4H2&ȃ~(?n$o%hs$N+(+K' '+# Fz!AhՓzD4 9ۢ'9Dݘ%]ySV?zL1;f\*Y t95+1kL4;Hؚ 3"i*a9h5lٷV;`9PmܧJg8"  P^9r^ȩ}6˅XH=RTYV1mw0ʥafն򍅞ː/+3z*,7XC/Uˢ%;(d16% ̱~4lʱZvj-U)ouEȋx gsR-W]cжk]3ws?q`T*v*ɞUk6hVOT~Wi41=>95ˤ{k&sa )%ʪ> mb :3 9z$k;뭿r,P_U5knzpXofme`w ׍bZ!?@ϧ.3=lt+֗Nmvyw|e|踃S-&'epyz]T w]*7Ff:&pt g!ouzLg6M#ǽŪQrh.e-]y<52rw[o){c8œcigKl*:ZG6#`Mx (PAɡbGΉsI*u'OZxzD |\UE+A!dr ]q#Qȣf d;;sN( ½9 <6܉MXmqJt+ueGTme(+|(Naˁv60 B}0۬SsIo^.3"8Lԝ:99'(5c7 e; p8]e*+T 55Y]M@&W!_U@u@Vwv130]Blиf-nV !!Jl?gy+)":bW E^|)e)~Kiג]@|~c|'AֈCO KL,̙xr(7s0s6mɚ]@/dOH!82r"dA͐)K"oam' kҏ3燚a}vv{ |8y}L8y>~æ?)~þcvf7KoYey1!1%LAJ/d3jL٧9Dߖ`ce:ߚn\(" ÒhJ0<qI8%Oq($ ugXǁrDl?T\D Cl/A! \Γ'Ypd͵h^GdY6z 6IxA4Dko q ` 966q8YH&ρn.u1Y 71IM7;c"6;,I0{ g%K-3XQ9DTcF)G̡h’JttfI'9Y*ј@ 6;JAk5{,nاihؒ{~fct`u< =x7Lsא_+uWAf"1߃ZtX[7!M}^Yȶ[UO1eX[7Y6{%*4MOLe_H1{W.oGd/.W"=Ƞֿ垙o+Y )X5Gܾ{W-R%}PFdG8ͤWWo4u9T:(`gw-^zQ$4Ӣ/oJι?k0VtUFG"!K|EW:{pUg~r:]W'xNjzܑ}C/֗Ib:Je4^;#o{E.⒟6rʵj۫$a??!tB<Vv.?[g`oG>pڭQ5O{RGYZ\h9k,:a_vMםK?p-_.<-GIzY}i^eRG=ޭM%QvI w+XQ?,E4eiv&M]<71uenF#;YéiQ+b`E#Za7AH? Tvf2N_\xff# ?isߦ/vo]vυDls˰Rx=oB .$BKQ IvZPZv!<Vv&Bۑ7vHlA^:\H>Bjq_v@1:BoFE‡OdĽDW@He7JQye\QU#<b 9d^/hR)ZfAD g K=5% *))l/9 z2 18RSj&ŲSӗ'SgRB5 {4~'0:(&jnbiMpx q=VG=ɱ>):rz숪PEO񤗢Ǔ6Fpj2Ҿkssl"rT+մBُpI+C͡q=BA>r@!\CIr[* uK94y)_ڡr4=#!MM'_vIUm:am:j#(QybOA>C1 2_Ii4) @/'!{n'˓w:z IJx dګx0QŞB+!K]Shd Ca =i(Wj @WW6Ɩ<_B&Obvg7*;h?K4v~{roh.jC^OsLf_yEZùU-_toE%}[>F 9e ]lmw 2VڝsjˎyJsVMpŏ$-}sf\VKu#-W27uɡ_.!hp sQz1vVכ ն[]Wa_q@ ՙV&=Vqufmt=R~Gk-i-CsOwzrx$Yvf%[Dt9UID3m] W閖B{?ߙ+UƆtZ[7KM_ o5-̖sQxlM>|fX*}.WnFSϽg m.bVFgRFB :+gh"zrqi`]:9J۴ JPK߾ ko֮)lSv 4nޭ].[>N'l͊\1o`h83o}d}\+6[7i? O*MԮ5eV]{ Bki#4V[ XsUv)(]C\G PޗnA3dՑE\󴭹Ovk,w/5 l8FbJcǯg=ygley/eHח ާO:_ cQLޞ|ӥN.M.M̤IgͰɥ¤xt6N]yḫ/ؖ1X1r(N>w"K>{k33UYMnf#ٲu|E}e%) [n{zAؙlǥoͯ/9tG+BѢ;?߿2N\EMew[բAg -f9+dgyA-^8 _8:Rd8C=L4ٝx0IV#( arF^C:HVk*zD; T~A/,oޭnא]GA!ok̋>tO Y!> !z aUpC |?StGQb䜖u[vO܉E}~dX%wN kJNX\SNT-w1wni?GԺfB';ni"%p!b_v㉺G^ҽwWOײNBU8tCuU*1As$hGh !ak}c&4+IB* JY!]PBWg%W!csqfZ2絺/ː/k!=("I[ˇ8DRۮWjU =GzqLٲ` _)$6֬sg`Gy׽@\[fԊ09nLFISl9jAܐw $[f3N2%@$j9LíwU/xX6-qq4|X =[ݠ[^0PaZ֔Ԍy ј6&IҘwk%ɘ !hd(l`8" (+AV,۩2;j*w8yB[ ĄfLCN߶(Ib7ap{ySV?zL1;f*y< 5+1kÄ5Sb==moR!A8YjJio+Y2w t/Z(&Bl,Nv6vL8y.~o9DT߽N!6 4U$BHq/vSjugF.VgnB :Jn* BޫU)RAW8d#,{Êh _ Ӑ'YAmof!g {fbGw268v>y畭˯7;kz؝I]r-P0At>)>PYcRag#P"; ѫg":"<($08h~8)<,Q]5q PTˡ~,Jԗx̓+1zz Z5q<, 8s~ƀ ?P)8|| vCLhet}M#K (m76k7d$hO4q;=":9DA7AN)09 ѹ!R z@Vyk,RX].A^R&[>&y(+yHh|)hGk/(d7ܬefM ,}_b*Qq`XmZ.\̉)}RS& :/"rVso3l٦cqKE"\@c \^9D^\a/++VߜYnLjOK&^Y0a}"CgC6˸OCn~h/YoBj1gKd0QEt@ډi}\XEQRa WB׏]QFiſ8 yT{q,7}eiʻS߼lAKx -mIߗ.=}~bs^?A~S9DR;?e XD{U[ JR\u'yo\ٿXoEXdPzC"z^USgXn6ߕzrNmש\}}I("Eb ET-E9epze4uC:")/}+>5kV V|IXӨ,Bn7653YdM l}Mek-|pKF+hYie7~k6~YbjFm7~~/ZP›o*b[ R38"}+ieM/fVEnkH֘ا|RzJ6d8O/ٕ)Eo L?eZd_ ]]}#(T}@NIu^z!Ɩ= p21dƬ7ۼW`Hi96s6"<)"SLL\yoѵ#!~/4]JxNocnQG:)1=wJSXNZ;ﮤ ,Pd]è7,I}nS5r>юOr(}"Lg?HEuŸ|,܎X5lP$lZKsQ/" ꎘ@{A Jw ˈWweh hpjڙbDvDG@>ɡZxDNenVʙ'ၼ+xݠf P#Zxm:$0QzB}x0yOg{frxr jn/d % u`+t"x h"rǁzPYEF: )fhԼ2+rOZ pxg<4[ŋ'ꤚ*^) 1R.Q5]5gy\3d]Fze-o 09Bꩼa- B8 yX[[?CM}mjs|ߎ_M=A1 !K1z"*{&n#/q7u@zlв7j~8>[D&\ y՛ )a?Npd~6D_Y1YTޔoLV+LwP۬U-axPi+{x|-HyJEkn}C$IT&@$z~9ͺ&Ñ"w$)^>1f6IzkL|PEP?BRl p !8d9A\^,kAT${k⾋pB<^OzUxȠopϱ8  <@m,mلW&P9DlHW%0 9QNB(3z,G? <**YY g?*!ߏ* ??PocF)Gu&:"I JkJ#zDG馸)8y/ ES Q}wa7mTy}.ހ,7j2&H-PyK%]RM=`<YADiW"MC>b_i6=rU*i6uԕa;Yj-fEլ({W.oGdvCV,U="IHsgۊm:bY )Y5kVݻj\t /黗56z&;:l&Lf2s˙+lD_w?tfz~6bvxMi+>jRi˰Gk\]dAbΰ&&!dn=.=׋jQvsj푺ɖWZ,$1`rFQt[a#=B*72T~j,mEiFBޫqgԪk-A fM'9Dl8K'OX-[vmY|͸w*DxAerflv>@0dN-ls\))ۦcoS#c㌝quz)* s?9gě_G3gmȟ0> K]Km&~zVsp!TƷUx_ Ͷ?g K>ZHx 03T!R׎Cs˽677v!"@}]r(#"iS,@T9A/*a^?B($@9%߆bOA>6C DٟI/H“:CZa"2iS >fL{u#]A+!K]A+' o` k`vhD (=e VْgA~ @vLuD:LAG!!ΜQ%pBrv;.P܋ AҤ }+nc+'V(4jeל" MtǬnhZ&ԫ+aq/+bz7됯kaժƣ됲![PIVlX 4\`@MStK_*m*l2lH}J8yJ]2;\ R=/5\tU0 ߤw8DlҭNvvl*Y-S)P|X6I%WxC] .[ة>I>e+N۠ s*[5X v!yv "I*!h"@$Z?7 'akpDC$qZgYlG=,Hn%)e r -2 F=w0Y|.X?[ g [(CmNUG@ȟ(3Vm5Wn9Byck `b'Z ~4uǕ a~雍-&j3beCr<F&v,u+.m]6F~WlEGS A"z x]e7ese[n-*K=3= [7J5VUmQ<# fg | |A [~se}v#o>lǐ_rWC4٢̍T)lV7LlT(d oC\7fZZ?B-)˛w6m2^[v qs vF/\#E8dUM/X/{T#@ᝨK`AѮbIpAFnQ mK!mx )eIUzo-7MQ:1e! #+R, cә啂MP&udƲLJoDuÍ`7@"xD<.84;(&kaECӎ:3)S*Sk ])M'OVH% 됯7z!R z Y}zFH^ao3Cwxs[u=& MSCSoh24-dMQX|Tpx>Q_+Z)_3l#_5moz;_ :C4vC=hCz(jY(5@3zM!R̓rKsI ?pd DiLAo77MQtl\,8ޔR0mF֍(QuEȋGA&޾|ȋN>>Lۨ;`&GK!r4 ʆחX2Ђli"{2לkS)N,yB*D-ת^gP()C)MKj ojY w!X4F}?f lBځ^LgASڇ+KVM!Km>Gyj ﷽Qn ^v6% B~ݐ8Z`A-]pdKZc,=e8}{,KMf {qYg/ED"H1Kʺu09_/)dW Kl*1Z\VM#큉U%e1] ga5 _  < Y 7k:'8DjYSn=E*vU0WF_cղw.r\gQ3'!KM/INz&Ƚka 'C+O"\Ec \^n埴r%wSx0MK&^Y{3!R;-g&4;6lOz>JԶ~R'hO˶Ș:ʦP%=I ֏bv-~}d{pPvj65Ê}bBV" }ʹT0#7Z2!7QQzZ=Av7@SG DpaQr dӄFʍA˼VD;B5`dڴΨtY,OKrh~3=qfTq'||DrTK+ i 5 p%Jy%u-ȷE+It>)އ yf4zS+…aWI?<)@VʖnvO G?q^)(yZuQg{w%Tπo% ?3Yۤ$5&2Ū'費骟c^ sF.g}y^.OݶPͪq"yj,*<k/2x )@[BT fհNH[p\Ϛ\xr>@$dT8WǶ+a$қuq$}w^D&~[_bB[pOx7(%$WY15D]wE#b27u&}^fn?~~ xD{9b䯔u1e|zKq7Z Ap iq񪃅CzmF^~LxS"o䭽\yJoXy17vcyCO G0gk;8㚗0`Ο۴ܕh`ΟŴm<E]}y'o̟ Ӑ3e?!R||rUm7iwf/g:հjWp@NnC&|YSs( ?q?hSs5]@>ʊUrϥ8!p2E`˄W! eIw#o7SyY®?-f9liI0e+uF0aʟ5u˶&5*9-K¬”٬*io&ApPY3[o*[l/C~~K~-ۘԬ!UoZ#X[$,Q[z*ph- d 5 r?>n{ Kƺ<>noPM 1zz ;衦SJ{Csݙ-;dOIt9"u(lP@ӄN8y\[Bao:QvIYG(B'S*9fR,;5}y";uyV! !Fؠ:XRyaR}?H}57dtv*`:|Bf\ElOrOʰ.6;!TCQǓhw<饨Q;O VIԪs Spj}:~6Y$-FnU"e$u~yof헝1}I_jQB>> .% L{^o /BUdG 1($Pxjm:3=Ua?1; ٝs1m:d lfM~Ab05h+@$In"=D!Kmx{mMf&A33ڌj `rVI$ Sː/+S\;a mp/d}zsF\Rp5ޏyʵb/HvHre&e[ηٌ;@|'Lnz"<2lNn6;+ar Aմc02xѠ2WI Z=³&jxz))]e6(٪m|L6U*";'OrNF|W7 {,uYXC75`~B ( ~ܒg{䗐ɓhuφg?= q'ei;M.V*w17RubL.yGXWgtvnno-M=頺˔evOΌEL (U}/p@C*yެ˄^+ ?[HpgWy6ߚV%L՚95Ԗ6hnn>6(ޢ 8Ӫ1Z;"IЈjpCpm'Q5}UΜQ0/^lUBsQ%}SJΪ jPJ.3Y;1C/Ro\_\*n VŰέ"<(i$zp[N8@{ᦒgV(+7{vOV-T0>DPnQ,ek4{f'C=S@x/f' 7Q.Ӗ˃G˶j"++V Ƭ{t#j^1IUں eV*WEKՇ>TTj5w r|N9eͦgNͪESSѰWMp_{!MSz:>!dOzInWHrڟNbY1ˏ~9b/FiȧysH)>+N[4$f hk6N4:dCD]U ÈDRp=ȞDzFZ_Is$N+0+M`J|T[KCuA>Cosr'4iKVֺ7^ kY k S;kivCh/信YZs@jV-!xJU-;2bX&i0u?~v8vqמfi@f?.c[ JMYH4ۙ\~Cțvp{ .B7+뛎kD[6؛&O|eBMҗ+na{;8M:"iڤ1RL膐wv!2!nG &!+j-UkviKfUNA[o[NYPdcbq^~sm+P]  Ȇ6 LM!/+5?m-x&du)XШ*ay?{FF5_Blowאu,&Q6%̆uh.<_zq;RxK\w~Tr6̒g+R?3bv'jo}1˄cC+zڱxܗwħD/=;kz*,6Ͳ$gY P,>I(pmJVp1MU]w>Qng S6I&N/ӰHBn* s!fs诿&A4px<6q xxlK6!~O' ]ʈ#l f :Cn4}MGز}# /ⱉO_B2X~+eȲB۴ʬz9ƅWDkC}m՗sL(:yMr&RIfTd)kgr} 0dp{._^ԪY gEː[3!s3V*:63b3ӳ33WݮS`K􀏚9&@3@z *I&{jU(?mX5tdzQ+#)ps›o?sEj#@t`Pb<Yy]xy 0̔rMM_N [1; Y}L POqHe{+|E/(Si<# +iYl^t;p!@ u?cJ($P_W‚óAWRԕsSWfbwx XNjVȽ+#2O!wwz8D" Cx X!dYJOsIIB# R| |2=^&rCMGOa"2iS >fL{u?.H h8dc=Z!<DI wo3o N$$E~6D?=hlޜhp;-K'_8LS2X1r(N>7+K>{k33Y12+q7}&}R:24D9z]o8Fs!-6WU~^{/dn(#gz^<7C;[KzeZ?n#ާO:_ Lc Ҫ`]Z#fNzvbm>7}ʃ4@obS# MV}ĖO,4ם9ha$*#uť~?-rE l& z"L'ǵjUam z"<( H:(:4h qX+75 N#Dddh)zO›o*gF#h+lh9ζeL!Kb,4ښ@`:y۪Md 9wDlI~p %d;oSΦV.h0dW˯9jXߏE;d)aYJ*X+ep'ˈ7г;ga9D[C+V,۩t AJ5PSۊ L<"Ls-~l˛mt-;fLo+T9*1cX1S"i*1 DÉð;ɡZ?Gq[Tɺ1ٙq͠P0@j_-wA{XT8$t$ChZy^^~E2bӐ|JۚxOJ(d}CTm8yL~%qX33ol'b2e^j륵Qe6d׿\J)bSmćODlEsϝ4 p=hI\B,mJ5l-ES@: $ZnYyȸ \a` ļ/Scn{d){(R$p )d# A1jqNToCVԧG[frqC%r[ $ H*ނ|KQ>xOR*ac? {"֚*;Ordt'T=y##|l ]ka(Ǒu]>`îEîh!JG0踺1͇M y#Z|oDo@W2u%ĬcWB%oD7㲢4,'@5+JDFt!ݙ:hA?7q/b}#؞XaBhuQ;ވF.<׈'|VI2~@l{LڙѿtFt{k#^ܲ)FﺀFz}oD-.4mjegL_th)nѤ$uL23wyO[vyd%\(S]uz@^Jhyo1|Fc *))l/9 z2 18RSj&ŲSӗ'SgRӰM2d`aR}?H}57dtv*`:f8≮#؞Xa9=]lvDC'xKQIv8 譒U@jۣ^u8 m23I27[vDH T vW/;c>>'Iu~efo &!'isso)kR<:!RB]VOe=2ôax Љu#u-:YINAf;sa#= \cCrQ;& 3OX.A:prZQPn$]o>!dG"o+RZQ,rfR,;?0y aʡpI9!DhȲ6jgYL,K;Q[B)7e[mj:q|nA!:tn%WtPؕ=I܆Bԫ=@1[ɵQIwBů$4H%]dbK<6ꈏ} u.L$@&m*a:OBOQ&{0]ҡ_mO.9HHj_t<26xD:ݻQQF!6Hc]-G:ZSm/=N(mLe{C$ "}LhAȃ2BsrCW e?!&lcF1hcĭDs!iH1P׌ڐ|Կn( + # Ư<~C$I (oKoYǬs -;b|42g!z!BMs[IնP@Zrޱ8:xIJA,fYXn0sTCv0ј[r !h!p"~8k f-BB5Acieȗ5jh^Gd;Y~[ Y s[o)ɰ;^qX"|Tm ߱xk <@hSvdG[pؿCt>ɡ,A}b7H#G>nR#Xb*[ [![] Aj@rkKEAd)HN.{eRLI6^Y. 6[h!-Ryhсu'N&:-RIպ=tNfl +Y+qe'M7KXۂ ۫XJl K\F!u{`0$%LbK`ݎLU`H`vs]XWR1uBt!RLGaH`]ɢt".w 6,dI:X?PX\Nl +;={B>=Ҵ#2#(`H`]I/.<XW|xԐ.X vhyI'9T[^2U:4uIVX@v-s`]>̈́'S槎hĬ SB$ )u `H`ݸ XMzzԘA0$9XN8 7J2:_WB1v%]+X؝֍ˊլ({D,C 3uaKA?YC~\)X2#](x襨#6Fo\xVI2~UF&礽v&n`o붷6"C H [\hľegL_t2_hoQWq ; <Κ΀)ȧ_T{t%}3^QfB{x`]?rGO#X20$n\͇!uBAe!)O@>!3"'9'eXGNOc8Oz)peCZvP9w*GOl{FTS-aۨ{P !WeC$MJSdbKl6(iB)Q}  IJ4Ć Z! F }#`J{UC>6};XZذ ?mTsM(!OZп ~^7CWߦ!SL$hhkxJ~\U\8DҤAq- , j y vqk?!&Vi1 ]rqgK71f~_W%Y;~2.ȴ! MHALO³Ư(Hj=+L=-'ݝVPΓЕ#ztҡWjby\^M/ 'ʅd~4ޒvWg4x,?4["2ml_6{έy 5)v!ΤR{-F3V W&MQ:S4 qsjƼ[uS36ndh+X-D[&8N:'B~\cyVF2&Dl`bU)GvV"ڊ(pd[ew<ш~<(SRi]8,p2#^±heL1գɜ閰=E=.B^l=Qv[ouž+Cw!U&2ƘiטE i e(\LjMxԫ1;!/3Ha 'Ohj_oƩPzwRE۷UN/~J ˝< SI7͵drqheDӅI`s5ł%N%тN%ds m!f;ꭤU ,pԩ9 +LxDM'OC:i-f:9Wj}f:4Ύׂ&`("i2.r֮D|ٮDK ڱQ%sf JNk36W;QIXO%<2.IXdP9D=N}mr0B(HM8^|b7Q{F͔(it҆U][yi);>9}wW!p>+"<EQ_v+M[FDAC`; LCa1rTv َz VԦ`:J𛶭w$ O$ǻm6 ƟCl(S"~h~NU4o9 wPjFu]b#É`8[!Mx3!qW*$X(}/Pi6,k~A!YRC3Ak4{fK֏?[џك4{,nL63.l95+sE?t,g-RŰ wn]ݷUȽ]|4 9}f_p)-V*E,g*/[BnW/4IUfS)LLd3pe%Q""\SdHK/au@ws$I#垙o+ԊUg.ê]\.:KB=`$Ig3'Ϧff# ݥGDx~b̹w CTͷ琈 ֑E_v@R̓GzĿ\*$4~Bh$M;~cf)eg@)}1H_ _ة^RC$iP{9Dsa78U(3z-XmUMhI@K'J$W*[6fjQ.2m%6(Ɣ|Fs0&)O;s+܋enaRY+tB+< ӐʜMPέc4%{{cooI:lxCmf{XsdLEUn؛$ڃ՟Z/5H_a21mCuaX.seWd]b?ԌU$>Um-HDlBV1ޠ} !r)mʜ] JCߤrxaet#cz Hm/a+":l=)$j!U:K s PO4]ͦgPHjsV7{$6J<8 <mWE}uJ+Z'e j1EkP8D>nģCʹ/%jix0LRvCz [*CvT. <H-?!R-outK%}m3߮G!Z)~1Rq߲k]xZR'9D~t;ZGz[kk\ X0d+爦JCV_9iHn]xMR8D~tjh}94Sy9 9(.wDCBg{D'%,7 @lyթBMi`=\wu*F{8yJv]k4 l9@KoΨUׂඔ?wBwq(߽P9t"_%;smv?p[ Hv$oa5\|EƇ|j58 ˁ!^#>;Z5kJ'#c=󩥂 r1Ŗ~\ķvH/wzvW՛:”gY+YohM O\lnB…5 @x=\p ,1 /CL8X c,NqCV?+ny=U7GbnT<2o$|J:˻o{E -VhxWWsGTp^[l,;7w5*i6}5+~ N)_iˏ]NϦTͦ4[B,t-Ȃ<-ȷt"x|BB]~1KwY\# w|Ų*#sTsT_kRQkq>cwZu3s]ooJ#1pZ@Z>rӑO/324`(gTo0&dgey_=eiox{%&)K66kg~NZb} OO#4`|R llm\4fl86fSh,Eg!?61bYkAPA]B yg0яA6gN9lyӫhae$~ ?1% {?wDSEq?-aA.j};@=\5VmCMK,?|0iD Ӑz!zόޡ[EN.k;@=E]ɂMqt꟤:7 I[!HV"󆂱pSvo᳻)fuS[O1XY3⻟}P)"Er[oͥEM=m⾗~P2<-c5{ &: vTkT{C$ZI2&ȃ?uB!B 5@$qZtfӅYV)f!F(n-_ ш,S755ȘM&6~C͛_yϼ{dB¬eM&~1Ϣ?u|V<%BVL8!RV< Vm>uw'v y˫V(3k=_zNјmE cM{Lx M_)v9մ1 C&y6ZoŴkU ]ޠA40̘pL&MrIiIx+W j08ͷלX&5 ]l`-yK[2})vKvh;}O~ 7d'ٚWlôV ϤL=)v[DÙ> y2~k3"i*\=uH޻a#9F`&U:}HZ`ՙ/UKgt*eoXɪ&FJO K;@Ys@"I#G;J IC> <OpŠgtšL>د"OoXfAx~ X1ְ}q')=]է&""۾î@>+IH ~\vQ"g wXC>*Ѕ@ANG`>X)u˄V *!n5QvÉ`ȊH{#e'G Ӧ>o3l_e[Ӑuy du1B&/ڼ, 'X#,mɟ̷u7 rlg\NH":ː/߸(qWuw7@qii*S;dV~'o'`.ȕxsGBr7<iL9 `NY9Qv'uv 69~̧2yHD He S&wq@ZFρSvXҟr{€UwXhL8CQX.yMv2mίm /O !W%;< ᮻsN!YD{S0%Q1e(32gλ^m8FԢY৐?зq.#_nu vD`n?B-e/U?+J g9s`ҹQTF`n˶R"S) (E#Lr۝h8v'$jBIFo.Vʎ{cSl (Jao~ɻ̭Eς y2Ǩ;t{qA'^Gg KB6NrS4s| s,2%}sP3arR`0d]`r+E' N=zz~իEogEXxvVln*V-p{F3FCuO>ǩ>ǬW^(FpԈy|!Œu(")cޛ(n᯼gp~6~"ewgWE *G?[zһ;k.}sqQv8z&Ik]G7mI}MzF o̚w'fǬ8`~^Z^fnWFօloCV7H4z<- R"it~?S._(Ws? YI?䚳}*}c'\d3i+p=Ke{=%cFǁ3ۙQ߇/HhlxMmFsxTY]w-Hqԉ/kCOzJ $|60FSݳ3|$2<Y ~(t (M9նkL,Oap`6ڨd$GDu?I vD00 9m:R_׽[oϠc q:< oK(Z .|iT[_6/BO!#Vƥ?47#Ǭ>7-e4 ;էlG?Zo4{fW%]+ ,uvG[,jƚ5U-W(ofe~W (>g Zsj/^!9‚`z‹>!V3ǒ-,uU9Im\께:Dxhn?$*:Jaڬj[Fi^ʢDO@V'KOi̶uTm&Zaw|H #۠C kh`we7<Yۑ7Vd֡ dm= 9۔pefTȲރ><0Xt`obgY1Sͷӧ(<䖮43瑌&P^ң[ᶫV}]^/Ů-;(tΕ˶iv*F|'\xr*IFGNUGv/h̬/:jg7?+$srQG7F;"v+#b}4e5_ eEofۛV̢.egL_ ~1p-_.(}y}wT&?+ss'ld7˙gqT1lcݬVw_;~5Q@Hy& K>{rbR{VAxD0 2찱q)J9^R_:ŨUvD}Pf/:T͚Or֬T|++($@Iێnef5!Q^ɹcl +&q!͓?!R5L2ݤMUS:E|5,k4>͍":E~Fmj5G4Q`{Rڔ7<`M#!E70GÐ;?!Rܦ+c1mߩڎpHW0E},S$Yˣf6oaJeh}94So9'!M@wq$.G>NC-Hr{/Wٔ$>4mvLOzٖ F.wP( 7K+#jO#i}*a"=^ߦ3ߥX~l67.N%~MƋ"ȟ෣Mr/WJGrG^~Sxwa0{t\{β]=b$<Ph jz:3ZOf^[iӝvLP1:{Y8cP ϗ^M1/x)/ @xX*VC(| l~^VjJص f8iƶ8W Hxy=}m5v/"50Q{]Gh9?oYwpBwq iu ZjΪBx>Q_Q 9eCyR;U.G Ģ2tA"I̬~?v¯RMOArh%kک@#SJڡrI;-E5BxԦEewP Bxm;E(;H {1uY;$A#.5-Ѣap QP?߬[K%IXDҖsTp\w挢e'fC[)^4)ey/;z.G Ġn0d^(,YrnC/v?<4ѦPװ`!z7됯kaժ:LZ$g^>jjja'8s3,&AٟISwwDAV`}BM\a!hkĭ~C$IRC9DRc/憿'QGG«՗G{}p PlD8 4&a~)cE; I([ې՗dÌMOfSlՌ0ca#z9WNZ ){yU#v*T:(t]Qȣ/b@lpȰR/vCy`!ji_GpKQGmA\#v[% Sjd3Ht;nE"yk~Z&ﺀFzoE"_egL_h + ,ڪwGVR ǍRC$zuIo @RwnkoH;UCMw>E4Uoӕ,n&_J]a$hoԛCvL?[hѷMd"t4.P8l1UTΠ5y0 ů>ZHTfͭD|&lwcVh"Ij%J`H6EiO_ o4 n% ӡ~\A[էJ hC$Zio2.6 ޑR [e"qY1 C=yY|]}݈+U/[dNXu\^ԍĞ8-א\_5npK٤)*Ny7v(lYHGԼFŭqd.wUSJE*eDK UQVnKp[n*j\(3Q^X 'HodPN_iW[rݘԸ^qgmtc;+KwdNMŗM4p؃c՚N 4韈v4S+ _j?t68V'1Ir6+Oi!bmF֥j:#< 6&U0wp$u9J<9 LijoGcSx:XUps_\t@A[n 8yEueg@D"G؞OFLKLAQȣں3T-gG"x9im9T4t_jD==vJI_z,؞Xa]!౼z𱼖0i?cyX6rF[%TJLPm5kg~v :DkSNcy'Sď<Θx,OjivDiϏ G⎬Eq>Ϙs[]1o,{R+hnؗtdW}ǩ,bSW& y৐?UybaʥU23v9f$^p4Emހp22Q; <#Z؝vc=+:rjpw?_arOE]70jM<-Vdz$jߩf/uզ}rlWo?h/VrKjYio}k^ݲ JqeQRێD};I|w=|g'A0!w(.J5w)d bXU-V*E+o4E}tN%=9FBp;^|M+Wo0T!wUA=Fōٕ㍑X9i,;X~y wз}E4AKy3eRCp nd ԢQ?\Qo4+u&J1ud$hho4 YBs'B^kK3A4i*2mίyǿos6#d] ;Q%X ohTbboƯD~C$I%&UH2!b<`i [*dUU[ʿ&E>@u}[w YecAg`]6/} Y-ti8+835-0Q>(7EEz~JjvӫJ֐")֊vn* ߜB m'Y%CikI(!aȇM_y|Jcl޵ ;4-Sck k=ި#A?ٟh+~l`R[ z{ WYCK/e?!jt[p.anBX+wuhVBlMAnAk^Pam]'-Oo'6s?}۶߉)SO?}/CkyA[Ι?(C됯Sw&M7y`lct ݆q@Ը _N)[o%ꃓ'3mClAYgm&DxaT57FeRS eso(5c=q%p:Ų,zcF¬ބ|S_]2f5:RD^bKhV%talZ4e+N?!?V."c7 5\yMQͺOX5ܶRݑu=ϼ#i ^l٬nflB+]Sdwz/`FRim( F 3= dhJhxܬ`[HG%)~XttC A1d.kQ%,Nc:Y}\܂LG}?[r-̮cA2[;זɔߓpo|gz?A`ygN% ^{6 RFxD ރl{)nwDR(27qڋVmr? 677'4z9* 3?ދkMx D[ ^H o3LzZc:g8^5ѸJuOۚc8smvޛDUg^0N~ъ߁,~nޅ|7n!"^Q f@Kܫܳlq~ j'AK!Z,uv[Z")Ҹļc%F\=3]B9'?Fi_^|^,lKB<<(9u-{EY8dlr1en{~EEpwz 6Z -TȾQӃlQ;MZE^;N'(cy$+;5D km%I6$*4d}g#F8Y^x BGCƣ{/ 2_Iuـ5!wCR>rjXŰmCmءm}C$FO8DaoyQy_pz" yc3F"s Z.u)95uޝ[ywM/9"E7 y]ٷ =]p _we5F wmȷ 0%ӻ"AUR]i[OW|7[76;}vyYԨ%{&^-f2\gdk즁nXډ #ȏI=-)+ ˨z|[jniv,,G^thhPZk+u?\D+a8Ef9EiXY m)El+; Imk$6|I;e7\>vVEv5G[ofEdBV}7+19AK^qctD'Pܠv4Ele 4H'PEc,:#^Tcw8xQúo@ mxwvaQ"i6Q,Lp7d7B[)W.//)!``-yoEܦDOs򏜈u>jig RqMkr56d)Gq]A*ݨV3..Hk*#>XvV{ r6CGNΉ=gM)﫛hCp~WCVqn=DS4~gλso:Q%:O'9Dl'TtC$ZuffZd|nA3j焆ؔܮ>q <D{-n7$*:,:=̾2',Ki0X0LߙQ\h`\8{)g琟+鯪 &3!5]7qґ #mV(l t3.a"2B7EZe|Y}˺K<5x | 2[fp=AJF5ˆ~Go'(=ok1]ab!b2fСڅ Z*Zl9bF(Q(xGiƋm%ckafcpB1!qނ,7{+ǗmyG*&hBz*'1+k^|YYs{8f'W[bsx=eV=2s^@ +d>ܻry;" !S"IHsgl/o-,׬bwr/^w/ eklLvt}t62d2ӳW&ܞM٩xvw'<&ZHbj͹wTR蹩6jJj92Z=qϳjfc/γjONd/vS4\tbynN1 ߊq˶iv*F|>\[yȅ+תmdt;Ty[=p2N~Mkm¿SFԛN?}ڭ5Qd4қ3w,3->amoZ1N1}1|T˥Jr 1H(~=gM^_ߥE C\^ȝm[5>4C>mT5gUCCt&cSv!kac⯘+@}a9,fJކӒY)U ?D '!Oםh3Y(_wxvARu%^^{i"i[(GuKxTl+bnmi?&(:LAN)= ȹMr1w {Z隖[oԬ[卢^Gا6]n&&{QޘcU7gDxm;gCDd)2|P,A"3ǀh"Ic=[6R7żd"G&[^ujiPǀD= &Nj `#-nW j?Da xkctݝ>`6!JKB(PxWh\;dX"֨Uvf|`(n$p*xH77f ob;j}ZNR͝]ȶbo<@V" '("i׎7!LG=3KX ` Y&' Ot&"i#jJuArd,S1n&A],siR,y4<"<~ Km4N9b" /7 [$H;U|&@$j>[(PmK5s#9߭l.JF`C,wC^YcFZwې0@p/d)Mzzu\?BM%{hųEv' 5[|]`!lk"8qeCdzUrm/_ C9 ֛\ZoL?-* BB} o RԊz2+,̦{^٢#6Ef % ԷG3eT9%WOcYکM]ޠ_Y#`,Ku-*+?fl%?Lf B>t ~fL^׿PMCRazEP"<D/bZ3FݺVsgTk<0b>.ѻly!kƂ1ީqVv|Z>hHPeP_=FF߬naklUPC1ǎ;&LBNpzʥ] ;OAn{Jk8jJM<F(= Ͷ?W\JFa vI[D3r׊U "ޭ6oCkssl"rT 8H_KV <^>?^(J ˾)5Q ՠ㐏kP\"r<DOr$W'N$$l~{ w"^[nH|s{~c:jL>,ss2T]볷63Kc +3Ffe:;77雾wKMtPeszd ΫFH Ė0Rg{ 罔W??n+OU:4;IVį7qN)jI&"n>w+duDJwLGfhT)鬐{HPA֮"/CVV혲!Rv{g=^+A8yP-bo|]f%,=81+%BVLُq.Xq_0%`y6C-W)yfK/؊e;z+gp[BfWv%e}b70m5Z[?p) /^0gcsŨMP2*] Kf/ '70q§o"n`j;$hswX潻KS .RS ^||K6e\l#R̝k)'u!Пhz-Dі/iתE2i4*|+Si70c34e?!Ճ==O@INQ4[ܑ=w NU C>.]aQݏcDl,5\y̑hLrE=Hf gD; MQS'jY#ȏ)^cF]J }5Gb- Z8;Mq%Kp5BϿv` Stp-Ń!ȍqC$mn (ݹDi$OVIۮL#FQ3"6ks;kcoTֻ^h2]wW^pߎ~D tFS,P3J)0 NSh>m;&@t!lSQ7d \B9 2`D<.pVN ;km~Ǔ|+||؍?B_3_#ZpfhN,ʮ T&#vE"šc`52>h̻#vIռBvxW}5Q#ி+<Ȣj"W;^l9kZQv'>Oxܱ]Y+҆U]mg0䢭j\mHn$do9pK<'ZnPcK[JErZ4% P`?Kt&Ƀ2s$*5:ˤ\*n~,Gԉ -OLO@~x 1}u?'\[]cU a[\?>_q d1[g!K4 e'?Ѧ%22j^)#(kG!N4&gQ&t.,5)^~KI|Y[͢crD1!6-5E89!>$Hix!86=h84RZ NMԏ)jE&Q;\LQK\G=^}Q&Af2Z\Dۺkhͮ m?B~WS|ܬb+em"5^l~P䱩Lf&5X6!wRAxJ2''e *cI2ǞR^xux&BO,ΚjhD+mO.oB]k}3*MBg3՞,uZXgǐ?RO8DR1@.gQc WtK$l,jxzYX(AQrĚr1Ir3L|./%[} r+{.ԣW',^zp*n]&3ժ mzW*z-1T*t*tz5>A`yw)RCjE m׃lo@NIv6DŽܮmL.#˪ֽm1w 4FΩ|WYȾ~??x^ߠrށ|Gf)jk .}'fxULxԈZ qkjW:}7Yטoބw'RǬ8`񟄫*[zD{h^hQ{5W[?S._(Ws? Z ,&|c?䚳㐥,r8ᮒKez!n-_ Qb29b$L$pvSwJ Yj~#'qAt~,Hiޯk!b֟ !C1U7kU&k]M?xV'o?|EgAw'^#Kǖ'xЦ($t`Bkܖ!b!6 oC"z!ݨ{ce^ޤY:/ò7,ypXi=? )>2zX50vB-]^Od'͌X @w <2 >t|d̋ e߬7n# h &j<")H$jR2!'(u` p%P.vݾv]U~V{/~[ū'Ժ?ʱ#΀@潕6AvZeh'\(fkhKe X`o=O ˄:Ai;(;E͚Sl ko}͗Y?'-1JcIc:K|"g48LNبll(L-[w8!K X:5"b'!Km |/`uxGڎSSn4n,F>g;.Cn{_Lz \t~9= Id!KUf8dG$rw+Z*f+/fVߛfS㜖oGշ>NNbU9ޠ\Fa :T4+tj5ĥ mX^j8$?W~Vx̺|$콙ܪ o*zܹV}ԱEHBZ6 $9z+ȯ:q)'C<4ԅU'FΩt_gpl£g2SNعZQ.{nO!U-҈AuXZ#ߛH[:LՋt:55t5Ό v~&GCuB!Ef:]NoHU؋uoY =t&s9L:~NЯ5]:Nej+Uȫ4I,9ʮ&~ | e51G=}S'POVڸ0[=H8Z'Ƽq "vNcf_jLVX„f ޡ8 \wqv#ⳘH|W~*zFO>x~jzj+.B^Azǃ-l٦p$_۱߭b5B{0R7E]2Έq҆@rDݒ$09uN1]YAw{IxT@j vQ{H}C<^zX {9ģX*clŹa FD-{9Í@>Z3g!PsVɩ,|iSk ^on_(_wi,*!Rh9v/S{n'4:sH&(iIR;{Yʛx,IYpe`}٪o2HE[%r xηJnx 'Qcwqn}G+e4dVɚ&pFqqS]މoP4Uޚ9CqvCR t;Q]]sUU&E}ģX* %Wu&ؓ${čн)(’ܭ'=> ]Y~shlui mQRլ[48||~\.?ײïN n.!wќUH+Tl62+3իR'!KFv.}o6W P {!UL H>H҈ rۊi U{p_Z-_XCղ ޛu+?51 `4Nff3WR酙gg\ y>Y,OA7g~L8D;:b{c}Zuhs!˗M\}ȶղJ9JZT:~:~"{_NwQlșxUL]6r? d\֬UvH!3[i^4_fao9xzexvO=?Sw;m97.t9o+ߏzXڞl~,A_b@7nRtnzLCӍ'ϖW'jR5MV)3 l,[*Kͤ ߰Qz1AolQƉG hk1/$9ģm|-ʖdkp|e"_2+;AA"uP&Y#T~{f٨TfJq'Wk9/f c/HJ@]6A^fWŷ@p6;ucVLڛ c }}>l8VUAvNvOxŶw+.lmӮ%=LCNksY !zǁPӍCf畩<0֬ X$JB!,OZu˪)+t5sVGT.#mG]Kpki5< Iӭ Q <Yߝ{*pK/- /pq }ӆ?unG;6|p Q[1ዄ!KT,ecr%v6/j~Y/VUb~Y/5v8<Yg|$g kXOxs>ctOɭ\&ܯo'mv ‘mvw=|Gi ڛM(VdW/Q;⑬'?NQ1.(ˀ{2v*_”v`B|jE=t'"潃slۨjKAeZ_2Ͷ ܯ^K{_PyL\,^y`[ #! ?Azw7au˨w҄;gG d3M\'n̰xᇩ Dʤɺ~i6-n'!_0ϵ2c+bZY˗֧>$؎8l.;:1=8 L uz;$Za,sq֗j8NCV;˜mVh}*3;wEܞ*m45 f7l:f篗^Г&rF%t#OGұog0a[%A) S"/TM_fVgl,n , 7?nvLK'Gf_ NcIvrUT,Oegi <-n _ k̻*Wf{$4 շ׌}S3>m놹%M; /gw`MPw.BS^SA8]+he1 {("qHZ㴃 F_led}+w{y8dEzaqh2Ns]_{U [ԏrGUX E /+Xb[MլCOpGuFmA~ o7^t6aSW DOs(} $[Q6#^;l;AlGfߝxbmVT:r|ϨV(^rjLWSusWƜ3JyR$H wwW \ PҺYNvo @ppæEb>6(|be1he۟yQ;#z>W7Ն-ZfF-ߪmVWz/|/_fq ID 尥Bн8Yx4,YH= &Ɓ2Nd:T:!hjZ@U=ArQY_/qbC@}Z7+1˰vNWmʆm—zr;c!W~^VdR!v B!1l u};q%s7ɞk-PvlV|?= T0#D.0 lv%cȏqk'(;~D yU f#]Vm'tܕl,X7pc+Wp)u$ /kMzcM4Nx یC166&ܴG>g RZqg K͇j/OrGƉmri$gp.ӜoJa C63 @f{%iim鄴f qͲYq"nڲ-m 軪3cպ0hRh=g|>N"gP\ [Wf|agg#Dd0cwup"G@+ AoڽTG- E%4(SfNBfyS]BTsM5'˅>- .Wː՗kÝL":<2ng\($ GjN!OwsI%ȗZqxe>357esͪ/m.L8 svYŞ;uf.oSi5JNDZ7Dr%5˪ՊQNH׊m ʈqtA uq9m+`D ʔV\N[x)nޞMo6+֜`lVL3![iM8,ހ~9V4v{`%}6Ur&sf1_|㻴9#YNҡˡ;C)_Z~ eBZ~ A8ӵHTk-vlw.+fjT <214|G!-IM# )&BdÙg1CVn H~cg_2OvNB8 YwѤW:%!K H]|Q] x%{Nb纤.I2""vxo!6 m߮ߵ>$v(I]⋯ Gע&ǁ z H9-HMKFΣґQȣWk4:޿^se`Dx9t\h6\o[}ƭvkR!zav+'A*vn}[Hg>)_5&Ev82?>ީI5uE ɯU#%D8 x"+K~C"\@V-I<- Ce;[gKV5 78qq5ˡ|GwQ>UKhOԥr%ZX: PDibvRKmH]PߥrVOR!1hU%'9uU.;ETUˡު*{oZSI]daQsѭ7.,@TRCf aww`Oģ1!w!"ی=1 V!v]&^?h `NxmuW`4VGÍtԐOB†D I]ȥWWݰ6dff-\ S{sx:] őBBq'CB|Zjj {'oD#8dX"hɵυhNi饨#DQ'Bt I$WnZI:g_x鴍fz5#NMn=`D7nݎ/qD7zU:4F̼͛ ̞8S*%@^Q~Oi}lTYsn &H轊5k4`ZQphʖwhX&,u[Xcȏ" xi T&^/3'=$:CJJBG9u+j- Q:3z|[6N { }~ s#  .A $zڃDz)icԉ 6rBsxE"4Cj:^jA"/dDv;iMk\G #8Xͯ[qDKL48_{Uz~:ߡs~ns8丌!7/aCZ 'a^LjļۄjW5#ŊK6[۴kIs&AӐڜ@,Bq`Q0 =`~i694z7op;m|keָC$X˕ 2ʌ/Ƙ.'IK/uI >iy55B)<lbSO?f?!£*[d=o 2 trY%,T9θ ҄\60E9kn*qģycsjnZ8lZ5%Aq, O q[ 2NR&K[$L@NtFhJOd PϘ* %RrBv谿/nkDF꽐&o{9ҿC<ڊ`˅0kv)չO Q~DB (_A$UyiR~7yoUC4*լBG9ģ*?AnAAw/n BxEC9ģ8CԴ N\eH:X"#],J!'894ef vNrH}[ulzΘPV!<Y䪰 ig-Dp'j5J am:/&ѺE g?: רնbmVTrQ5RQH0jX6WS.ɍ+Fzc.joKƬ2闷EIۭݏ?!d~p`Þ5b>6yybe#he/םfU}q6ת6;WB{:t5U[-qjF`zXfojoQӵ=HYhz ,U8ZdBmQR(~Cޚ`֚mV;mg*{C`KyjljC^|-JFxC<{5׾uҫw$hq׫u͜5 \y5 _bn=W/OnM8[:֫8GXV\3+4?B>gEw>K |s/Zљñc0oLp9ģX*'@ 1QX7b$aBċ!Okkr$"$D$LBs EHHI` rJٍθ1Yٰ*ECQW'niOQ(}Gt k=a T6ou[2K\zv#y. -2ɡA8d_ؙiA(rK z&8Ssg:= -'^~f(+3g噶#+$~U).[|zUy=UKԧÏ}Oӽ]B{}-B4%FX9&b`[ (X z ˵m+S~.}d9JGGT"JyE]:v byi0t% `@^Z+$/0 /mT5?" Aȃ Gbmyi O".[~嗚Rȅ[7"KKl'82Cǂ]Kv3T^Z[oKQ'j#5$m" u.)egL2/mgL6/ߪlif vZ&/?6=.P.a݊K˿DzeHRj8qqCKKr'jpg Pr쳼]*Q(Vަs]]AC* Jpkb9 ˑ5{IfPG9vrzzS4ƞ/ Q;@hTԝ{XwJ,wP2>0v*'Q s}V$ G KE4|Gc KbJ@nsŽ:6̨hU_ѡk  (zj%-`!CԈ ACEI;VuZ6X*׼=nQ"#Uȫr[F9½2Ւ5zUMl͙=O+ 7N§F_aGْU q.uE@r(?b_ a_8m m ,u5N`c6RnM4k0 9ud͢/f K C2Yv9A4r'R.TP*+\tBfģC<7rI8dLH]pxdWO̤.< Y.uԟOuu:v^՟HK) B>J BM%9GRCPTRCvJ.z\nTˡޚ*':CcXU%u}@U93bUŁ%_QWURCu欹C7-Gn/83"'8G>2ԒLYA8d}ݜVcrR.o1|ݐ4-+Kp;DCR堳c]mHnE}RC쀏>%|:GYs|&zG!g95;pmr }1֭ }~cC3zi;UA[j\gTc+gR/b9Z-;;Ԫ{ycޘ,,R_ HmAupNTU) 9DM1|s}beIk2}C wZہyK#=?{\wj} h*&[C]oՔl ._MQL? M"jH-w%zN*AJo>mctl'ˆ7Lv!9l *ev1% a{cMy6 ?a #h}0PmqnjW.FmR?!MFȼŦt>:sʪ#a3Bn5Uj!rKUQۏtb|>&2T)XUaRb»&dYB(EjG<ֺ ;85RkK^|< %L@NDhzQ٬R5h;u֋^VC~(>pCfJ.izprM ٳG k4^`b=>m)R7oW ( .:Z{biu pm ޛ =@en/mzYrݡ^ )Q8-ȷX3|\En@VpK?"A&{$|aQxMʅdݡrDx ; |ZBMɱiQ#@(JD 6-=1>WPu b ^^ u R\a$;a#V~F]##͊y '6Ms+ѷ9" zyZo5AVv7˝vzR5<ypI\N O)sO9|3 &H_9OaCָO2,fK9aIh$ _VtR#{oqm ߛsMY}wߛQBag_ʻ:NW[ooXW9zAx}W9gڋo͒^4O9jF~zע%VbSٍaJ?]+!qM~l&LC;D/&ez8}3E<w-Խ5v޼8}nc{8v\w+LMͦJ)o3v~wL)7mr\ }c[Wf!N +<#E=Y^M 8SQΛL"[XvbG89q;<Yj.4!m~ꓐO*zS@ϢDNۚP'ay)Io. $ܚ d}K"aC+"Szwh $'Q@}3,kȲ9!rF+tCpFqtxr'{Q'{lݤzmv+i}ɍ'3:<}7CaL&r.ttK:BA>r GQ.o|l9ۡEz!QƊtc~/D31$aRKᠱ}%ZIIm 6 pq[̈́nr xT Pk31B^QIK{H}Cvƫ{Io$!jF\>,ϴ9` rFݙTg8;g>vƍgB1ݍtHXJ~Y_`'ԉ3(l #;NL/rG5 k{;*U9QLc.g ͡:q;[n!䮭S4̰+TT,7@1!FW!GE>CV_+~\.@8Um,T[Tk.[ x$*~OdzCHC@M !%$GoOh-cCWvIw{߬؞ 7Ddk6u '> dѮ=`w8yL'!K CVP;r="D>^,dԺ['?]wHFz >zBg?ML" İwE{ Xw:JJ5Pt/qMx -h g܈?K'Bv+RPmx5Jg9:>Q,ky{dc $'q-{|Cw`FBnvH~<~^3Ԋ]%ZL·;vޖ`ˀ K5!,BԢau2YrE% Z؇:Rz}zZ=8 ZĂwO&}n~(WRe=t*撛U*؋o{o֭dfb't2̧&Ӣ%3tz6^>d곐A>'mu{ߋ{*#U#Hb;afɵ^fŖILy'z+{4 @m(SXaɡYU}/V*XKL[1-c\j=u5'A76,CofWVho_ aZj7d=y)CZ 9+.lmӮ%=LCNksYh !zǁ媹_F60 ΡZty(yĆ![YsjنJVaKBn dxA>rxŕh}YN _!w9~*C(JBZvdl˰oڳ 2dLe.0>qd[eK, _ }Y{mР Jх[bRP%n 6 cIcm;3w[{\C9ģ8=̽_D*F`o` BQUH(x4YemrJ0ɷ~Y 1Orf堵oaB+COrGyu 0 iȧ#1Qf!<L&"g9ģDc~>5Bx 샻Y5:֔%j xFQ.:Ia(l>5VN7FE/.^fNiԔZ.9c+gR/b9Z-;;Ԫjcޘs)[Jy#f_$u^ M!cvƇv[Gbm7l動:˘y)tio?/~(Vϟ=UcaV/+VoUvOfj*=NU]۳wFG~1H\!h~ ,U>ZdBnQ悉.>4gOe/ A8y(!ET**h2$^4R|A`#K/iW5_-5n`#BBE⑴W.Ba:KtC<2>7+tJ ~?A8 yVu[vޡSOɮ$k~2^5$5Ճ,wVD0 ~o#N׆Иrrӿd\S8(&|1_NU x?lGȥP$\)Y*{n0A"| IK[S(@O0v0d0fH y{]<e.vo凞BH̋4E= 2a.i%|~K÷dg Fpx 5rYgpz$q1Xހ,^Sk%}KIGP{<^,KI&R͂rVRxqe$[4Rs%R̥*VQث8{ʌ\vVZ7HyV(1x wT#Vtr$ʐ9"kE$aSҝw5cb0A;G}aqFͶFUҺ-n$i@gD_g@C{m{Dq ׶gk4k7?[]6_ڨl(oޱo޼n6;dٙv& OiփrGOG߭KQqL W;eAWAG!ZÐM7’K>۸ >h~IKNLٔ4KngO˜#G nĽ4e*hL2"W\ԑ֍ʺ!޳$%뢆D{rŜ<]**Kt7 QЏlaqڸhS_lJ\{ !/ku gx 3H/Vw6C" qAV6rMSψts9>G=SћtقFbXn u& Qљ9] FeS'&h{'ؚsNŭ6BFI& Lp7*W8>qK/z5NNwrrk5rI 4.5 IgM!E'w~W"4I(dvl*$HJ!Kbq#Q,xiFC4ͪ/Ug >J5٥7 b?Ҫ"s'j\[XXLRC۶{9ߝ)s!4zM[{;ߊ$`!C>2*fV)5n9|2d ۣЉ7<3(ǙX`"]} 1AܨECo.A*Gz1'BPv9Yr+8ՊWX`ga`rRo*pB4*jS kuڳlj5@ za2ilWgU!kt=i>5RK*RC '4 _O*{Qg2LgK^|% j4nqɊn'?(\ނ|K-ֺp3-z3D_˫-/qy+u9T볏ҡ!(JD)??$-4wYOPJiĒoRJ+w!)I X[JiAp!ҿ kK)-C-pT4Pm~}Tk9)oƼƘsoI߈44XOȰ Pt=R)RlL&6FH)\֐z$?'Y'L)Y?M)kw0);[ ~kI)MjRJ=.P.݊߈24QenyuzCҷQM >1}E_އ|_UF.TU5g>q:~RV wTr8V%y@PE1dw( i[=cD:q #%NBV'+P+{h5ͥ7)|Odf 7/}gReR}CsdBG5CX0 ŹbWNqN0kKq',9fbm)Uv.QRKRQNqupOhPqp&qժzqKcKsb9vc_ s{3r*rdL%S!<޹IΡZKڅJfq^y0W~5e,ΫRǠfR?!MVQ&!eqV6Ox0 !wb!'9lhY_,cmY;h0 g7?!M&Ґ%BŹz fqB/ >uo^;tfٯ&s'3|Hu:mFC7~D1ܝEeqWkHgq~e r8\Z/GF`Y4M,UB!a0Q2(x4YϷJXۑ̎Y˅4*~ÖbJd=k싸5u5'P}) Ze?,?]u՗D}9k֗#>/{%ߓ ^5.,9= 4|6:CZK-`KF D8 j 2clc3<=&x,4`ԯJQ,Z$Pc=D]>Hٜ 6d gMܟ2$FfZRI,@.(]3]/?Ý2c7ޠq,YeޣB~嶬|B҂={AGnph 8b"<TkLr*Aj@9r^Vԋ৐?U8WIo!GAYnbn LEL~TaJ>M]rc7+Z%f-@~ʞsdoFο˨f׌oE/'dW|Ei&CKWx,ɛOޔ+eZ` vm=!~)s?ں1=lhنt.}+e{mK r2&WYp ,y3sʙ}Ru[ /q>T|S*cPi[WH< G *remT.éA>|tFއ«_3P>PS@m8c-XwOAVD`ߛ-wԗ3oR&=L|2!cͯo ƚB51!10׵٥V2Uh,ckkmҿ |}gU ӂmnצd)&xHn^^hU4\r5q.kQ*u抵,hewKϚVV͂;wӥF07I/e~Mޙ8}"I3[|xiCf0Ck.ㅧfSi%~Y,r2)J7E_BnPO'qp:=E|HW~C<4^;ytE+ί{1)>;@jŒt8i٢_|y.kȯU:! A#R9`TBAھV gbG[Wq:zw;ּ,Z͍Aۊ/Ur+2w//CA{GIDG1Fqqwaɛd9Cotﺕ._O/}wG|8}Tj +;1ۤ-)px^klp!k6]- w࿅o-|iW sY2z?@buU#_:s3yXF祥`RfU:}C=M Rp>aD݃R#B89< YjX'uCYXӍ&rJX,p~˄]e.g!Fc9r{s^%lo?_D+;q֝ ^)N5hk0>N".Ef|\q7,2dw180YXA~KDdx jHo&;:.dޚ*Y%g GE9"t +q,yڊ\怗E[/dl {[$' Oh{P҄7M#X[Q2uڜ^ЂlEnCj7U/nQ`=1aMRC=އ +܇ޏZ 0°}>a :] +}EVDŽbVPX >a >V/ =‰XdP҄]\ 9ԳzqV>n^{Jn p΄m"sxHIE@?<dNmaLS2>!M.y Uj%R;gBW{^ͩ[]5+tɚ辻7).t Y{=dU,~0$_%GUه8ģX*ϟo3 ; %j{dlj62͞}^)&qWŞC~LDELR߀7ZI Cy| >دcC<2ΚnH`KqeQFj^s,+O@~/n%m.jDV,o޿ֱZ:"aaFʈyyv |CMf;3w[{J<5QhJõ 5b /[8fY)bɕB8#W69v,&NAlG!ֈnp!^mwS/mbH-zlBY,VŲQzg'Zub79NyR¶H;wFBWC4;zΪ?y{ĨY}6`>xۍO/ov~6)s~,5~wlf/[BCܿh6=/w8X~`qdU3͟-(4>?? energy/src/0000755000176200001440000000000014173644633012345 5ustar liggesusersenergy/src/U-product.cpp0000644000176200001440000000060114005374454014723 0ustar liggesusers#include using namespace Rcpp; // [[Rcpp::export]] double U_product(NumericMatrix U, NumericMatrix V) { // U and V are U-centered dissimilarity matrices of the two samples int n = U.nrow(); int i, j; double sums = 0.0; for (i = 0; i < n; i++) for (j=0; j Created: 4 Jan 2004 Updated: 2 April 2008 some functions moved to utilities.c Updated: 25 August 2016 mvnEstat converted to c++ in mvnorm.cpp Updated: 16 February 2021 poisMstat ported to Rcpp in poissonM.cpp ksampleEtest() performs the multivariate E-test for equal distributions, complete version, from data matrix E2sample() computes the 2-sample E-statistic without creating distance */ #include #include void ksampleEtest(double *x, int *byrow, int *nsamples, int *sizes, int *dim, int *R, double *e0, double *e, double *pval); void E2sample(double *x, int *sizes, int *dim, double *stat); double edist(double **D, int m, int n); double multisampleE(double **D, int nsamples, int *sizes, int *perm); double twosampleE(double **D, int m, int n, int *xrows, int *yrows); double E2(double **x, int *sizes, int *start, int ncol, int *perm); double Eksample(double *x, int *byrow, int r, int d, int K, int *sizes, int *ix); void distance(double **bxy, double **D, int N, int d); /* utilities.c */ extern double **alloc_matrix(int r, int c); extern int **alloc_int_matrix(int r, int c); extern void free_matrix(double **matrix, int r, int c); extern void free_int_matrix(int **matrix, int r, int c); extern void permute(int *J, int n); extern void roworder(double *x, int *byrow, int r, int c); extern void vector2matrix(double *x, double **y, int N, int d, int isroworder); extern void distance(double **bxy, double **D, int N, int d); extern void Euclidean_distance(double *x, double **Dx, int n, int d); extern void index_distance(double *x, double **Dx, int n, int d, double index); extern void sumdist(double *x, int *byrow, int *nrow, int *ncol, double *lowersum); void E2sample(double *x, int *sizes, int *dim, double *stat) { /* compute test statistic *stat for testing H:F=G does not store distance matrix x must be in row order: x=as.double(t(x)) where x is pooled sample in matrix sum(en) by dim */ int m=sizes[0], n=sizes[1], d=(*dim); int i, j, k, p, q; double dif, dsum, sumxx, sumxy, sumyy, w; sumxy = 0.0; for (i=0; i 0) { data = alloc_matrix(N, d); /* sample matrix */ vector2matrix(x, data, N, d, *byrow); distance(data, D, N, d); free_matrix(data, N, d); } else vector2matrix(x, D, N, N, *byrow); *e0 = multisampleE(D, K, sizes, perm); /* bootstrap */ if (B > 0) { ek = 0; GetRNGstate(); for (b=0; b using namespace Rcpp; // [[Rcpp::export]] NumericMatrix calc_dist(NumericMatrix x) { int n = x.nrow(), d = x.ncol(), i, j, k; double dsum, dk; NumericMatrix Dx(n, n); for (i = 0; i < n; i++) { for (j = i; j < n; j++) { if (i == j) { Dx(i, i) = 0.0; } else { dsum = 0.0; for (k = 0; k < d; k++) { dk = x(i,k) - x(j,k); dsum += dk * dk; } Dx(i, j) = sqrt(dsum); Dx(j, i) = sqrt(dsum); } } } return Dx; } energy/src/energy_init.c0000644000176200001440000000457214014452144015021 0ustar liggesusers#include #include #include // for NULL #include /* declarations to register native routines in this package */ /* .C calls */ extern void dCOV(void *, void *, void *, void *, void *, void *, void *); extern void dCOVtest(void *, void *, void *, void *, void *, void *, void *, void *); extern void indepE(void *, void *, void *, void *, void *); extern void indepEtest(void *, void *, void *, void *, void *, void *, void *); extern void ksampleEtest(void *, void *, void *, void *, void *, void *, void *, void *, void *); /* .Call calls */ extern SEXP _energy_D_center(SEXP); extern SEXP _energy_dcovU_stats(SEXP, SEXP); extern SEXP _energy_partial_dcor(SEXP, SEXP, SEXP); extern SEXP _energy_partial_dcov(SEXP, SEXP, SEXP); extern SEXP _energy_poisMstat(SEXP); extern SEXP _energy_projection(SEXP, SEXP); extern SEXP _energy_U_center(SEXP); extern SEXP _energy_U_product(SEXP, SEXP); extern SEXP _energy_Btree_sum(SEXP, SEXP); extern SEXP _energy_kgroups_start(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP _energy_calc_dist(SEXP); extern SEXP _energy_dCov2(SEXP, SEXP, SEXP); extern SEXP _energy_dCov2stats(SEXP, SEXP, SEXP); static const R_CMethodDef CEntries[] = { {"dCOV", (DL_FUNC) &dCOV, 7}, {"dCOVtest", (DL_FUNC) &dCOVtest, 8}, {"indepE", (DL_FUNC) &indepE, 5}, {"indepEtest", (DL_FUNC) &indepEtest, 7}, {"ksampleEtest", (DL_FUNC) &ksampleEtest, 9}, {NULL, NULL, 0} }; static const R_CallMethodDef CallEntries[] = { {"_energy_D_center", (DL_FUNC) &_energy_D_center, 1}, {"_energy_dcovU_stats", (DL_FUNC) &_energy_dcovU_stats, 2}, {"_energy_partial_dcor", (DL_FUNC) &_energy_partial_dcor, 3}, {"_energy_partial_dcov", (DL_FUNC) &_energy_partial_dcov, 3}, {"_energy_poisMstat", (DL_FUNC) &_energy_poisMstat, 1}, {"_energy_projection", (DL_FUNC) &_energy_projection, 2}, {"_energy_U_center", (DL_FUNC) &_energy_U_center, 1}, {"_energy_U_product", (DL_FUNC) &_energy_U_product, 2}, {"_energy_Btree_sum", (DL_FUNC) &_energy_Btree_sum, 2}, {"_energy_kgroups_start", (DL_FUNC) &_energy_kgroups_start, 5}, {"_energy_calc_dist", (DL_FUNC) &_energy_calc_dist, 1}, {NULL, NULL, 0} }; void R_init_energy(DllInfo *dll) { R_registerRoutines(dll, CEntries, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } energy/src/dcovU.cpp0000644000176200001440000000166514005374454014134 0ustar liggesusers#include using namespace Rcpp; NumericMatrix U_center(NumericMatrix); //[[Rcpp::export]] NumericVector dcovU_stats(NumericMatrix Dx, NumericMatrix Dy) { // x and y must be square distance matrices NumericMatrix A = U_center(Dx); NumericMatrix B = U_center(Dy); double ab = 0.0, aa = 0.0, bb = 0.0; double V, dcorU = 0.0; double eps = std::numeric_limits::epsilon(); //machine epsilon int n = Dx.nrow(); int n2 = n * (n - 3); for (int i=0; i eps) dcorU = ab / sqrt(V); return NumericVector::create( _["dCovU"] = ab, _["bcdcor"] = dcorU, _["dVarXU"] = aa, _["dVarYU"] = bb ); } energy/src/dcov.c0000644000176200001440000002514214005374454013443 0ustar liggesusers/* dcov.c: distance correlation and covariance statistics and dCov test for multivariate independence Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007) "Measuring and testing dependence by correlation of distances" Annals of Statistics, Vol. 35 No. 6, pp. 2769-2794. Software: Maria Rizzo mrizzo at bgsu.edu URL: personal.bgsu.edu/~mrizzo Notes: 1. The distance covariance dCov is not the test statistic. The test statistic is the V-statistic n*dCov^2 or nV^2. We use dCov^2 in the test and return the estimates dCov, dCor, dVarX, dVarY in dCOVtest. 2. dCOVtest is much faster than dCovTest The two methods of computing dCov^2 are algebraically equivalent. dCovTest is not used in the dcov package but kept here for validation and historical reasons. Also note that the returned objects are different types. energy 1.3-0: Changes to support optionally passing distance matrices as arguments are made in dcov.c (and in utilities.c index_distance is revised). Note: argument "dims" has changed in version 1.3-0 energy 1.3-1: In case dcov=0, bypass the unnecessary loop to generate replicates (in dCOVtest and dCovTest) energy 1.6.2: Insert GetRNGstate() ... PutRNGstate() around replication loop */ #include #include void dCOVtest(double *x, double *y, int *byrow, int *dims, double *index, double *reps, double *DCOV, double *pval); void dCovTest(double *x, double *y, int *byrow, int *dims, double *index, double *reps, double *Dstat, double *pval); void dCOV(double *x, double *y, int *byrow, int *dims, double *index, int *idx, double *DCOV); double Akl(double **akl, double **A, int n); /* functions in utilities.c */ extern double **alloc_matrix(int r, int c); extern int **alloc_int_matrix(int r, int c); extern void free_matrix(double **matrix, int r, int c); extern void free_int_matrix(int **matrix, int r, int c); extern void permute(int *J, int n); extern void roworder(double *x, int *byrow, int r, int c); extern void Euclidean_distance(double *x, double **Dx, int n, int d); extern void index_distance(double **Dx, int n, double index); extern void vector2matrix(double *x, double **y, int N, int d, int isroworder); void dCOVtest(double *x, double *y, int *byrow, int *dims, double *index, double *reps, double *DCOV, double *pval) { /* computes dCov(x,y), dCor(x,y), dVar(x), dVar(y) V-statistic is n*dCov^2 where n*dCov^2 --> Q dims[0] = n (sample size) dims[1] = p (dimension of X) dims[2] = q (dimension of Y) dims[3] = dst (logical, TRUE if x, y are distances) dims[4] = R (number of replicates) index : exponent for distance DCOV : vector [dCov, dCor, dVar(x), dVar(y), mean(A), mean(B)] */ int i, j, k, n, n2, p, q, r, J, K, M, R; int dst; int* perm; double **Dx, **Dy, **A, **B; double dcov, V; n = dims[0]; p = dims[1]; q = dims[2]; dst = dims[3]; R = dims[4]; if (*byrow == FALSE) { /* avoid this step: use as.double(t(x)) in R */ roworder(x, byrow, n, p); *byrow = FALSE; /* false for y */ roworder(y, byrow, n, q); } /* critical to pass correct flag dst from R */ Dx = alloc_matrix(n, n); Dy = alloc_matrix(n, n); if (dst) { vector2matrix(x, Dx, n, n, 1); vector2matrix(y, Dy, n, n, 1); } else { Euclidean_distance(x, Dx, n, p); Euclidean_distance(y, Dy, n, q); } index_distance(Dx, n, *index); index_distance(Dy, n, *index); A = alloc_matrix(n, n); B = alloc_matrix(n, n); Akl(Dx, A, n); Akl(Dy, B, n); free_matrix(Dx, n, n); free_matrix(Dy, n, n); n2 = ((double) n) * n; /* compute dCov(x,y), dVar(x), dVar(y) */ for (k=0; k<4; k++) DCOV[k] = 0.0; for (k=0; k 0) DCOV[k] = sqrt(DCOV[k]); else DCOV[k] = 0.0; } /* compute dCor(x, y) */ V = DCOV[2]*DCOV[3]; if (V > DBL_EPSILON) DCOV[1] = DCOV[0] / sqrt(V); else DCOV[1] = 0.0; if (R > 0) { /* compute the replicates */ if (DCOV[1] > 0.0) { perm = Calloc(n, int); M = 0; for (i=0; i= DCOV[0]) M++; } *pval = (double) (M+1) / (double) (R+1); PutRNGstate(); Free(perm); } else { *pval = 1.0; } } free_matrix(A, n, n); free_matrix(B, n, n); return; } void dCOV(double *x, double *y, int *byrow, int *dims, double *index, int *idx, double *DCOV) { /* computes dCov(x,y), dCor(x,y), dVar(x), dVar(y) V-statistic is n*dCov^2 where n*dCov^2 --> Q dims[0] = n (sample size) dims[1] = p (dimension of X) dims[2] = q (dimension of Y) dims[3] = dst (logical, TRUE if x, y are distances) index : exponent for distance idx : index vector, a permutation of sample indices DCOV : vector [dCov, dCor, dVar(x), dVar(y)] */ int j, k, n, n2, p, q, dst; double **Dx, **Dy, **A, **B; double V; n = dims[0]; p = dims[1]; q = dims[2]; dst = dims[3]; if (*byrow == FALSE) { /* avoid this step: use as.double(t(x)) in R */ roworder(x, byrow, n, p); *byrow = FALSE; /* false for y */ roworder(y, byrow, n, q); } /* critical to pass correct flag dst from R */ Dx = alloc_matrix(n, n); Dy = alloc_matrix(n, n); if (dst) { vector2matrix(x, Dx, n, n, 1); vector2matrix(y, Dy, n, n, 1); } else { Euclidean_distance(x, Dx, n, p); Euclidean_distance(y, Dy, n, q); } index_distance(Dx, n, *index); index_distance(Dy, n, *index); A = alloc_matrix(n, n); B = alloc_matrix(n, n); Akl(Dx, A, n); Akl(Dy, B, n); free_matrix(Dx, n, n); free_matrix(Dy, n, n); n2 = ((double) n) * n; /* compute dCov(x,y), dVar(x), dVar(y) */ for (k=0; k<4; k++) DCOV[k] = 0.0; for (k=0; k 0) DCOV[k] = sqrt(DCOV[k]); else DCOV[k] = 0.0; } /* compute dCor(x, y) */ V = DCOV[2]*DCOV[3]; if (V > DBL_EPSILON) DCOV[1] = DCOV[0] / sqrt(V); else DCOV[1] = 0.0; free_matrix(A, n, n); free_matrix(B, n, n); return; } double Akl(double **akl, double **A, int n) { /* -computes the A_{kl} or B_{kl} distances from the distance matrix (a_{kl}) or (b_{kl}) for dCov, dCor, dVar dCov = mean(Akl*Bkl), dVar(X) = mean(Akl^2), etc. */ int j, k; double *akbar; double abar; akbar = Calloc(n, double); abar = 0.0; for (k=0; k Q dims[0] = n (sample size) dims[1] = p (dimension of X) dims[2] = q (dimension of Y) dims[3] = B (number of replicates, dimension of reps) index : exponent for distance Dstat : the statistic dCov^2 (V_n^2) and S1, S2, S3 */ int b, i, j, k, n, p , q, B, I, J, M; int *perm; double Cx, Cy, Cxy, C3, S1, S2, S3, n2, n3; double **Dx, **Dy; n = dims[0]; p = dims[1]; q = dims[2]; B = dims[3]; if (*byrow == FALSE) { /* avoid this step: use as.double(t(x)) in R */ roworder(x, byrow, n, p); *byrow = FALSE; /* false for y */ roworder(y, byrow, n, q); } Dx = alloc_matrix(n, n); Dy = alloc_matrix(n, n); Euclidean_distance(x, Dx, n, p); Euclidean_distance(y, Dy, n, q); index_distance(Dx, n, *index); index_distance(Dy, n, *index); Cx = Cy = Cxy = C3 = 0.0; n2 = ((double) n) * n; n3 = n2 * n; /* compute observed test statistic */ for (i=0; i 0) { GetRNGstate(); if (Dstat[0] > 0.0) { perm = Calloc(n, int); M = 0; for (i=0; i= (*Dstat)) M++; } *pval = (double) (M+1) / (double) (B+1); PutRNGstate(); Free(perm); } else { *pval = 1.0; } } /* test statistic (the V-statistic) is nV_n^2 = n*Dstat[0] a normalized version is n*Dstat[0]/Dstat[2] */ free_matrix(Dx, n, n); free_matrix(Dy, n, n); return; } energy/src/projection.cpp0000644000176200001440000000171414005411516015212 0ustar liggesusers#include using namespace Rcpp; NumericMatrix U_center(NumericMatrix); double U_product(NumericMatrix, NumericMatrix); // [[Rcpp::export]] NumericMatrix projection(NumericMatrix Dx, NumericMatrix Dz) { /* returns the projection of A(x) distance matrix Dx onto the orthogonal complement of C(z) distance matrix; both Dx and Dz are n by n distance or dissimilarity matrices the projection is an n by n matrix */ int n = Dx.nrow(); int i, j; NumericMatrix A(n, n), C(n, n), P(n, n); double AC, CC, c1; double eps = std::numeric_limits::epsilon(); //machine epsilon A = U_center(Dx); // U-centering to get A^U etc. C = U_center(Dz); AC = U_product(A, C); // (A,C) = dcov^U CC = U_product(C, C); c1 = 0.0; // if (C,C)==0 then C==0 so c1=(A,C)=0 if (fabs(CC) > eps) c1 = AC / CC; for (i=0; i using namespace Rcpp; int kgroups_update(NumericMatrix x, int k, IntegerVector clus, IntegerVector sizes, NumericVector within, bool distance); List kgroups_start(NumericMatrix x, int k, IntegerVector clus, int iter_max, bool distance); int kgroups_update(NumericMatrix x, int k, IntegerVector clus, IntegerVector sizes, NumericVector w, bool distance) { /* * k-groups one pass through sample moving one point at a time * x: data matrix or distance * k: number of clusters * clus: clustering vector clus(i)==j ==> x_i is in cluster j * sizes: cluster sizes * within: vector of within cluster dispersions * distance: true if x is distance matrix * update clus, sizes, and withins * return count = number of points moved */ int n = x.nrow(), d = x.ncol(); int i, j, I, J, ix, nI, nJ; NumericVector rowdst(k), e(k); int best, count = 0; double dsum, dif; for (ix = 0; ix < n; ix++) { I = clus(ix); nI = sizes(I); if (nI > 1) { // calculate the E-distances of this point to each cluster rowdst.fill(0.0); for (i = 0; i < n; i++) { J = clus(i); if (distance == true) { rowdst(J) += x(ix, i); } else { dsum = 0.0; for (j = 0; j < d; j++) { dif = x(ix, j) - x(i, j); dsum += dif * dif; } rowdst(J) += sqrt(dsum); } } for (J = 0; J < k; J++) { nJ = sizes(J); e(J) = (2.0 / (double) nJ) * (rowdst(J) - w(J)); } best = Rcpp::which_min(e); if (best != I) { // move this point and update nI = sizes(I); nJ = sizes(best); w(best) = (((double) nJ) * w(best) + rowdst(best)) / ((double) (nJ + 1)); w(I) = (((double) nI) * w(I) - rowdst(I)) / ((double) (nI - 1)); clus(ix) = best; sizes(I) = nI - 1; sizes(best) = nJ + 1; count ++; // number of moves } } } return count; } // [[Rcpp::export]] List kgroups_start(NumericMatrix x, int k, IntegerVector clus, int iter_max, bool distance) { // k-groups clustering with initial clustering vector clus // up to iter_max iterations of n possible moves each // distance: true if x is distance matrix NumericVector within(k, 0.0); IntegerVector sizes(k, 0); double dif, dsum; int I, J, h, i, j; int n = x.nrow(), d = x.ncol(); for (i = 0; i < n; i++) { I = clus(i); sizes(I)++; for (j = 0; j < i; j++) { J = clus(j); if (I == J) { if (distance == true) { within(I) += x(i, j); } else { dsum = 0.0; for (h = 0; h < d; h++) { dif = x(i, h) - x(j, h); dsum += dif * dif; } within(I) += sqrt(dsum); } } } } for (I = 0; I < k; I++) within(I) /= ((double) sizes(I)); int it = 1, count = 1; count = kgroups_update(x, k, clus, sizes, within, distance); while (it < iter_max && count > 0) { count = kgroups_update(x, k, clus, sizes, within, distance); it++; } double W = Rcpp::sum(within); return List::create( _["within"] = within, _["W"] = W, _["sizes"] = sizes, _["cluster"] = clus, _["iterations"] = it, _["count"] = count); } energy/src/centering.cpp0000644000176200001440000000331214005411463015011 0ustar liggesusers// double centering utilities for the energy package // // Maria L. Rizzo // August, 2016 #include using namespace Rcpp; NumericMatrix D_center(NumericMatrix Dx); NumericMatrix U_center(NumericMatrix Dx); // [[Rcpp::export]] NumericMatrix D_center(NumericMatrix Dx) { /* computes the double centered distance matrix for distance matrix Dx for dCov, dCor, etc. a_{ij} - a_{i.}/n - a_{.j}/n + a_{..}/n^2, all i, j */ int j, k; int n = Dx.nrow(); NumericVector akbar(n); NumericMatrix A(n, n); double abar = 0.0; for (k=0; k using namespace Rcpp; // compute partial sum using binary search algorithm like AVL // pre-compute powers of two to save repeated calculations IntegerVector containerNodes (int y, IntegerVector pwr2, IntegerVector psum); NumericVector gamma1_direct(IntegerVector y, NumericVector z); IntegerVector p2sum(IntegerVector pwr2); IntegerVector powers2 (int L); NumericVector rowsumsDist(NumericVector x, NumericVector sorted, IntegerVector ranks); IntegerVector subNodes (int y, IntegerVector pwr2, IntegerVector psum); // [[Rcpp::export]] NumericVector Btree_sum (IntegerVector y, NumericVector z) { // // y is a permutation of the integers 1:n // z is a numeric vector of length n // compute gamma1(i) = sum(j 0) gamma1(i) += sums(node); } } return gamma1; } IntegerVector containerNodes (int y, IntegerVector pwr2, IntegerVector psum) { /* * get the indices of all nodes of binary tree whose closed * intervals contain integer y */ int i, L = pwr2.length(); IntegerVector nodes(L); nodes(0) = y; for (i = 0; i < L-1; i++) { nodes(i+1) = ceil((double) y / pwr2(i)) + psum(i); } return nodes; } IntegerVector subNodes (int y, IntegerVector pwr2, IntegerVector psum) { /* * get indices of nodes whose intervals disjoint union is 1:y */ int L = psum.length(); int idx, k, level, p2; IntegerVector nodes(L); std::fill(nodes.begin(), nodes.end(), -1L); k = y; for (level = L - 1; level > 0; level --) { p2 = pwr2(level - 1); if (k >= p2) { // at index of left node plus an offset idx = psum(level - 1) + (y / p2); nodes(L - level - 1) = idx; k -= p2; } } if (k > 0) nodes(L - 1) = y; return nodes; } IntegerVector powers2 (int L) { // (2, 4, 8, ..., 2^L, 2^(L+1)) int k; IntegerVector pwr2(L); pwr2(0) = 2; for (k = 1; k < L; k++) pwr2(k) = pwr2(k-1) * 2; return pwr2; } IntegerVector p2sum(IntegerVector pwr2) { // computes the cumsum of 2^L, 2^(L-1), ..., 2^2, 2 int i, L = pwr2.length(); IntegerVector psum(L); std::fill(psum.begin(), psum.end(), pwr2(L-1)); for (i = 1; i < L; i++) psum(i) = psum(i-1) + pwr2(L-i-1); return psum; } NumericVector gamma1_direct(IntegerVector y, NumericVector z) { // utility: direct computation of the sum gamm1 // for the purpose of testing and benchmarks int n = y.length(); int i, j; NumericVector gamma1(n); for (i = 1; i < n; i++) { for (j = 0; j < i; j++) { if (y(j) < y(i)) { gamma1(i) += z(j); } } } return gamma1; } energy/src/partial-dcor.cpp0000644000176200001440000000522514005411576015426 0ustar liggesusers#include using namespace Rcpp; NumericVector partial_dcor(NumericMatrix Dx, NumericMatrix Dy, NumericMatrix Dz); double partial_dcov(NumericMatrix Dx, NumericMatrix Dy, NumericMatrix Dz); NumericMatrix U_center(NumericMatrix); double U_product(NumericMatrix U, NumericMatrix V); NumericMatrix projection(NumericMatrix Dx, NumericMatrix Dz); // [[Rcpp::export]] NumericVector partial_dcor(NumericMatrix Dx, NumericMatrix Dy, NumericMatrix Dz) { /* partial distance correlation, second formulation Dx, Dy, Dz are symmetric distance or dissimilarity matrices with zero diagonals partial_dcor : vector length 4, partial_dcor[0] is pdcor partial_dcor returns vector [Rxyz, Rxy, Rxz, Ryz] starred versions */ int n = Dx.nrow(); NumericMatrix A(n, n), B(n, n), C(n, n); double Rxy=0.0, Rxz=0.0, Ryz=0.0, Rxyz=0.0, den; double AB, AC, BC, AA, BB, CC, pDCOV; double eps = std::numeric_limits::epsilon(); //machine epsilon A = U_center(Dx); /* U-centering to get A^U etc. */ B = U_center(Dy); C = U_center(Dz); AB = U_product(A, B); AC = U_product(A, C); BC = U_product(B, C); AA = U_product(A, A); BB = U_product(B, B); CC = U_product(C, C); pDCOV = U_product(projection(Dx, Dz), projection(Dy, Dz)); den = sqrt(AA*BB); if (den > eps) Rxy = AB / den; den = sqrt(AA*CC); if (den > eps) Rxz = AC / den; den = sqrt(BB*CC); if (den > eps) Ryz = BC / den; den = sqrt(1 - Rxz*Rxz) * sqrt(1 - Ryz * Ryz); if (den > eps) Rxyz = (Rxy - Rxz * Ryz) / den; else { Rxyz = 0.0; } return NumericVector::create( _["pdcor"] = Rxyz, _["pdcov"] = pDCOV, _["Rxy"] = Rxy, _["Rxz"] = Rxz, _["Ryz"] = Ryz ); } //[[Rcpp::export]] double partial_dcov(NumericMatrix Dx, NumericMatrix Dy, NumericMatrix Dz) { /* pdcov following the definition via projections Dx, Dy, Dz are symmetric distance or dissimilarity matrices with zero diagonals returns pdcov sample coefficient */ int n = Dx.nrow(); int i, j; NumericMatrix A(n, n), B(n, n), C(n, n), Pxz(n, n), Pyz(n, n); double AC, BC, CC, c1, c2; double eps = std::numeric_limits::epsilon(); //machine epsilon A = U_center(Dx); /* U-centering to get A^U etc. */ B = U_center(Dy); C = U_center(Dz); AC = U_product(A, C); BC = U_product(B, C); CC = U_product(C, C); c1 = c2 = 0.0; // if (C,C)==0 then C=0 and both (A,C)=0 and (B,C)=0 if (fabs(CC) > eps) { c1 = AC / CC; c2 = BC / CC; } for (i=0; i Created: June 15, 2004 (development) Last Modified: April 5, 2008 */ #include #include void indepE(double *x, double *y, int *byrow, int *dims, double *Istat); void indepEtest(double *x, double *y, int *byrow, int *dims, double *Istat, double *reps, double *pval); void squared_distance(double *x, double **D, int n, int d); extern double **alloc_matrix(int r, int c); extern int **alloc_int_matrix(int r, int c); extern void free_matrix(double **matrix, int r, int c); extern void free_int_matrix(int **matrix, int r, int c); extern void permute(int *J, int n); extern void roworder(double *x, int *byrow, int r, int c); extern void Euclidean_distance(double *x, double **D, int n, int d); void indepE(double *x, double *y, int *byrow, int *dims, double *Istat) { /* E statistic for multiv. indep. of X in R^p and Y in R^q statistic returned is I_n^2 [nI_n^2 has a limit dist under indep] dims[0] = n (sample size) dims[1] = p (dimension of X) dims[2] = q (dimension of Y) Istat : the statistic I_n (normalized) */ int i, j, k, m, n, p, q; double Cx, Cy, Cz, C3, C4, n2, n3, n4, v; double **D2x, **D2y; n = dims[0]; p = dims[1]; q = dims[2]; if (*byrow == FALSE) { /* avoid this step: use as.double(t(x)) in R */ roworder(x, byrow, n, p); *byrow = FALSE; /* false for y */ roworder(y, byrow, n, q); } D2x = alloc_matrix(n, n); D2y = alloc_matrix(n, n); Euclidean_distance(x, D2x, n, p); Euclidean_distance(y, D2y, n, q); Cx = Cy = Cz = C3 = C4 = 0.0; n2 = ((double) n) * n; n3 = n2 * n; n4 = n2 * n2; /* compute observed test statistic */ for (i=0; i Q dims[0] = n (sample size) dims[1] = p (dimension of X) dims[2] = q (dimension of Y) dims[3] = B (number of replicates, dimension of reps) Istat : the statistic I_n (normalized) */ int b, i, j, k, m, n, p, q, B, M; int *perm; double Cx, Cy, Cz, C3, C4, n2, n3, n4, v; double **D2x, **D2y; n = dims[0]; p = dims[1]; q = dims[2]; B = dims[3]; if (*byrow == FALSE) { /* avoid this step: use as.double(t(x)) in R */ roworder(x, byrow, n, p); *byrow = FALSE; /* false for y */ roworder(y, byrow, n, q); } D2x = alloc_matrix(n, n); D2y = alloc_matrix(n, n); squared_distance(x, D2x, n, p); squared_distance(y, D2y, n, q); Cx = Cy = Cz = C3 = C4 = 0.0; n2 = ((double) n) * n; n3 = n2 * n; n4 = n2 * n2; /* compute observed test statistic */ for (i=0; i 0) { GetRNGstate(); perm = Calloc(n, int); for (i=0; i= (*Istat)) M++; } *pval = (double) M / (double) B; PutRNGstate(); Free(perm); } free_matrix(D2x, n, n); free_matrix(D2y, n, n); return; } void squared_distance(double *x, double **D2, int n, int d) { /* interpret x as an n by d matrix, in row order (n vectors in R^d) compute the squared distance matrix D2 */ int i, j, k, p, q; double dsum, dif; for (i=1; i using namespace Rcpp; // [[Rcpp::export(.poisMstat)]] NumericVector poisMstat(IntegerVector x) { /* computes the Poisson mean distance statistic */ int i, j, k, n=x.size(); double eps=1.0e-10; double ad, cvm, d, lambda, m, q; double Mcdf1, Mcdf0, Mpdf1, cdf1, cdf0; NumericVector stats(2); lambda = mean(x); q = R::qpois(1.0-eps, lambda, TRUE, FALSE) + 1; m = 0.0; for (j=0; j 1) Mcdf1 = 1.0; cdf1 = R::ppois(i, lambda, TRUE, FALSE); /* MLE of F(i) */ d = Mcdf1 - cdf1; cvm += d * d * (cdf1 - cdf0); ad += d * d * (cdf1 - cdf0) / (cdf1 * (1-cdf1)); cdf0 = cdf1; Mcdf0 = Mcdf1; } cvm *= n; ad *= n; stats(0) = cvm; stats(1) = ad; return stats; } energy/src/utilities.c0000644000176200001440000001402414011761630014511 0ustar liggesusers/* utilities.c: some utilities for the energy package Author: Maria L. Rizzo (see energy package on CRAN or at personal.bgsu.edu/~mrizzo) alloc_matrix, alloc_int_matrix, free_matrix, free_int_matrix: use R (Calloc, Free) instead of C (calloc, free) for memory management permute permutes the first n elements of an integer vector row_order converts arg from column order to row order vector2matrix copies double* arg into double** arg distance computes Euclidean distance matrix from double** Euclidean_distance computes Euclidean distance matrix from double* index_distance computes Euclidean distance matrix D then D^index sumdist sums the distance matrix without creating the matrix Notes: 1. index_distance (declaration and body of the function) revised in energy 1.3-0, 2/2011. */ #include #include double **alloc_matrix(int r, int c); int **alloc_int_matrix(int r, int c); void free_matrix(double **matrix, int r, int c); void free_int_matrix(int **matrix, int r, int c); void permute(int *J, int n); void permute_check(int *J, int *N); void roworder(double *x, int *byrow, int r, int c); void vector2matrix(double *x, double **y, int N, int d, int isroworder); void distance(double **bxy, double **D, int N, int d); void Euclidean_distance(double *x, double **Dx, int n, int d); void index_distance(double **Dx, int n, double index); void sumdist(double *x, int *byrow, int *nrow, int *ncol, double *lowersum); double **alloc_matrix(int r, int c) { /* allocate a matrix with r rows and c columns */ int i; double **matrix; matrix = Calloc(r, double *); for (i = 0; i < r; i++) matrix[i] = Calloc(c, double); return matrix; } int **alloc_int_matrix(int r, int c) { /* allocate an integer matrix with r rows and c columns */ int i; int **matrix; matrix = Calloc(r, int *); for (i = 0; i < r; i++) matrix[i] = Calloc(c, int); return matrix; } void free_matrix(double **matrix, int r, int c) { /* free a matrix with r rows and c columns */ int i; for (i = 0; i < r; i++) Free(matrix[i]); Free(matrix); } void free_int_matrix(int **matrix, int r, int c) { /* free an integer matrix with r rows and c columns */ int i; for (i = 0; i < r; i++) Free(matrix[i]); Free(matrix); } void permute(int *J, int n) { /* permute the first n integers of J if n is length(J), equivalent to R: J <- rev(sample(J, length(J), replace=FALSE)) */ int i, j, j0, m=n; for (i=0; i DBL_EPSILON) { for (i=0; i do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include using namespace Rcpp; #ifdef RCPP_USE_GLOBAL_ROSTREAM Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // Btree_sum NumericVector Btree_sum(IntegerVector y, NumericVector z); RcppExport SEXP _energy_Btree_sum(SEXP ySEXP, SEXP zSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< IntegerVector >::type y(ySEXP); Rcpp::traits::input_parameter< NumericVector >::type z(zSEXP); rcpp_result_gen = Rcpp::wrap(Btree_sum(y, z)); return rcpp_result_gen; END_RCPP } // calc_dist NumericMatrix calc_dist(NumericMatrix x); RcppExport SEXP _energy_calc_dist(SEXP xSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericMatrix >::type x(xSEXP); rcpp_result_gen = Rcpp::wrap(calc_dist(x)); return rcpp_result_gen; END_RCPP } // U_product double U_product(NumericMatrix U, NumericMatrix V); RcppExport SEXP _energy_U_product(SEXP USEXP, SEXP VSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericMatrix >::type U(USEXP); Rcpp::traits::input_parameter< NumericMatrix >::type V(VSEXP); rcpp_result_gen = Rcpp::wrap(U_product(U, V)); return rcpp_result_gen; END_RCPP } // D_center NumericMatrix D_center(NumericMatrix Dx); RcppExport SEXP _energy_D_center(SEXP DxSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP); rcpp_result_gen = Rcpp::wrap(D_center(Dx)); return rcpp_result_gen; END_RCPP } // U_center NumericMatrix U_center(NumericMatrix Dx); RcppExport SEXP _energy_U_center(SEXP DxSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP); rcpp_result_gen = Rcpp::wrap(U_center(Dx)); return rcpp_result_gen; END_RCPP } // dcovU_stats NumericVector dcovU_stats(NumericMatrix Dx, NumericMatrix Dy); RcppExport SEXP _energy_dcovU_stats(SEXP DxSEXP, SEXP DySEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type Dy(DySEXP); rcpp_result_gen = Rcpp::wrap(dcovU_stats(Dx, Dy)); return rcpp_result_gen; END_RCPP } // kgroups_start List kgroups_start(NumericMatrix x, int k, IntegerVector clus, int iter_max, bool distance); RcppExport SEXP _energy_kgroups_start(SEXP xSEXP, SEXP kSEXP, SEXP clusSEXP, SEXP iter_maxSEXP, SEXP distanceSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericMatrix >::type x(xSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); Rcpp::traits::input_parameter< IntegerVector >::type clus(clusSEXP); Rcpp::traits::input_parameter< int >::type iter_max(iter_maxSEXP); Rcpp::traits::input_parameter< bool >::type distance(distanceSEXP); rcpp_result_gen = Rcpp::wrap(kgroups_start(x, k, clus, iter_max, distance)); return rcpp_result_gen; END_RCPP } // partial_dcor NumericVector partial_dcor(NumericMatrix Dx, NumericMatrix Dy, NumericMatrix Dz); RcppExport SEXP _energy_partial_dcor(SEXP DxSEXP, SEXP DySEXP, SEXP DzSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type Dy(DySEXP); Rcpp::traits::input_parameter< NumericMatrix >::type Dz(DzSEXP); rcpp_result_gen = Rcpp::wrap(partial_dcor(Dx, Dy, Dz)); return rcpp_result_gen; END_RCPP } // partial_dcov double partial_dcov(NumericMatrix Dx, NumericMatrix Dy, NumericMatrix Dz); RcppExport SEXP _energy_partial_dcov(SEXP DxSEXP, SEXP DySEXP, SEXP DzSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type Dy(DySEXP); Rcpp::traits::input_parameter< NumericMatrix >::type Dz(DzSEXP); rcpp_result_gen = Rcpp::wrap(partial_dcov(Dx, Dy, Dz)); return rcpp_result_gen; END_RCPP } // poisMstat NumericVector poisMstat(IntegerVector x); RcppExport SEXP _energy_poisMstat(SEXP xSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< IntegerVector >::type x(xSEXP); rcpp_result_gen = Rcpp::wrap(poisMstat(x)); return rcpp_result_gen; END_RCPP } // projection NumericMatrix projection(NumericMatrix Dx, NumericMatrix Dz); RcppExport SEXP _energy_projection(SEXP DxSEXP, SEXP DzSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type Dz(DzSEXP); rcpp_result_gen = Rcpp::wrap(projection(Dx, Dz)); return rcpp_result_gen; END_RCPP } energy/R/0000755000176200001440000000000014173644547011763 5ustar liggesusersenergy/R/centering.R0000644000176200001440000000127614005374454014061 0ustar liggesusers## use the Rcpp exported function U_center or D_center ## the utilities in this file are provided for reference and historical reasons Dcenter <- function(x) { ## x is a dist object or data matrix if (!inherits(x, "dist")) x <- dist(x) d <- as.matrix(x) n <- nrow(d) m <- rowSums(d) M <- sum(m) / n^2 m <- m / n a <- sweep(d, 1, m) b <- sweep(a, 2, m) B <- b + M } Ucenter <- function(x) { ## x is a dist object or data matrix if (!inherits(x, "dist")) x <- dist(x) d <- as.matrix(x) n <- nrow(d) m <- rowSums(d) M <- sum(m) / ((n-1)*(n-2)) m <- m / (n-2) a <- sweep(d, 1, m) b <- sweep(a, 2, m) B <- b + M diag(B) <- 0 B } energy/R/dcorT.R0000644000176200001440000000432014005374454013147 0ustar liggesusers### dcorT.R ### implementation of the distance correlation t-test ### for high dimension Astar <- function(d) { ## d is a distance matrix or distance object ## modified or corrected doubly centered distance matrices ## denoted A* (or B*) in JMVA t-test paper (2013) if (inherits(d, "dist")) d <- as.matrix(d) n <- nrow(d) if (n != ncol(d)) stop("Argument d should be distance") m <- rowMeans(d) M <- mean(d) a <- sweep(d, 1, m) b <- sweep(a, 2, m) A <- b + M #same as plain A #correction to get A^* A <- A - d/n diag(A) <- m - M (n / (n-1)) * A } BCDCOR <- function(x, y) { ## compute bias corrected distance correlation ## internal function not in NAMESPACE (external: use bcdcor) ## revised version from v. 1.7-7 if (!inherits(x, "dist")) { x <- as.matrix(dist(x)) } else { x <- as.matrix(x) } if (!inherits(y, "dist")) { y <- as.matrix(dist(y)) } else { y <- as.matrix(y) } n <- NROW(x) AA <- Astar(x) BB <- Astar(y) XY <- sum(AA*BB) - (n/(n-2)) * sum(diag(AA*BB)) XX <- sum(AA*AA) - (n/(n-2)) * sum(diag(AA*AA)) YY <- sum(BB*BB) - (n/(n-2)) * sum(diag(BB*BB)) list(bcR=XY / sqrt(XX*YY), XY=XY/n^2, XX=XX/n^2, YY=YY/n^2, n=n) } dcorT <- function(x, y) { # computes the t statistic for corrected high-dim dCor # should be approximately student T # x and y are observed samples or distance objects r <- BCDCOR(x, y) Cn <- r$bcR n <- r$n M <- n*(n-3)/2 sqrt(M-1) * Cn / sqrt(1-Cn^2) } dcorT.test <- function(x, y) { # x and y are observed samples or distance objects dname <- paste(deparse(substitute(x)),"and", deparse(substitute(y))) stats <- BCDCOR(x, y) bcR <- stats$bcR n <- stats$n M <- n * (n-3) / 2 df <- M - 1 names(df) <- "df" tstat <- sqrt(M-1) * bcR / sqrt(1-bcR^2) names(tstat) <- "T" estimate <- bcR names(estimate) <- "Bias corrected dcor" pval <- 1 - pt(tstat, df=df) method <- "dcor t-test of independence for high dimension" rval <- list(statistic = tstat, parameter = df, p.value = pval, estimate=estimate, method=method, data.name=dname) class(rval) <- "htest" return(rval) } energy/R/dcovu.R0000644000176200001440000000202214005374454013211 0ustar liggesusers## dcovu.R ## unbiased dcov^2 and bias-corrected dcor^2 ## bcdcor <- function(x, y) { ## compute bias corrected distance correlation dcorU(x, y) } dcovU <- function(x, y) { ## unbiased dcov^2 if (!inherits(x, "dist")) x <- dist(x) if (!inherits(y, "dist")) y <- dist(y) x <- as.matrix(x) y <- as.matrix(y) n <- nrow(x) m <- nrow(y) if (n != m) stop("sample sizes must agree") if (! (all(is.finite(c(x, y))))) stop("data contains missing or infinite values") estimates <- dcovU_stats(x, y) #RcppExports return (estimates[1]) } dcorU <- function(x, y) { ## unbiased dcov^2 if (!inherits(x, "dist")) x <- dist(x) if (!inherits(y, "dist")) y <- dist(y) x <- as.matrix(x) y <- as.matrix(y) n <- nrow(x) m <- nrow(y) if (n != m) stop("sample sizes must agree") if (! (all(is.finite(c(x, y))))) stop("data contains missing or infinite values") estimates <- dcovU_stats(x, y) #RcppExports return (estimates[2]) } energy/R/dcov2d.R0000644000176200001440000001224414005374454013261 0ustar liggesusersdcor2d<- function(x, y, type = c("V", "U")) { ## computes dcor^2 or bias-corrected dcor^2 by O(n log n) algorithm ## bivariate data only: (x,y) in R^2 ## should be faster than direct calc. for big n type <- match.arg(type) ## argument checking in dcov2d stat <- dcov2d(x, y, type, all.stats=TRUE) dvarX <- stat[2] dvarY <- stat[3] R2 <- 0.0 if (abs(dvarX*dvarY > 10*.Machine$double.eps)) R2 <- stat[1] / sqrt(dvarX*dvarY) return (R2) } dcov2d<- function(x, y, type=c("V", "U"), all.stats=FALSE) { ## O(n log n) computation of dcovU or dcov^2 (V^2) for (x, y) in R^2 only type <- match.arg(type) if (!is.vector(x) || !is.vector(y)) { if (NCOL(x) > 1 || NCOL(y) > 1) stop("this method is only for univariate x and y") } x <- as.vector(x) y <- as.vector(y) n <- length(x) if (n != length(y)) stop("sample sizes must agree") Sums <- .dcovSums2d(x, y, all.sums=all.stats) if (type =="V") { d1 <- n^2 d2 <- n^3 d3 <- n^4 } else { d1 <- n * (n - 3) d2 <- d1 * (n - 2) d3 <- d2 * (n - 1) } dCov2d <- Sums$S1/d1 - 2*Sums$S2/d2 + Sums$S3/d3 if (all.stats) { dvarX <- Sums$S1a/d1 - 2*Sums$S2a/d2 + Sums$S3a/d3 dvarY <- Sums$S1b/d1 - 2*Sums$S2b/d2 + Sums$S3b/d3 } rval <- ifelse(type=="V", c(V=dCov2d), c(U=dCov2d)) if (all.stats) rval <- c(rval, dvarX=dvarX, dvarY=dvarY) return (rval) } .dcovSums2d <- function(x, y, all.sums = FALSE) { ## compute the sums S1, S2, S3 of distances for dcov^2 ## dCov^2 <- S1/d1 - 2 * S2/d2 + S3/d3 ## denominators differ for U-statistic, V-statisic ## if all.sums==TRUE, also return sums for dVar and kernel if (is.matrix(x) || is.matrix(y)) { if (ncol(x) > 1 || ncol(y) > 1) stop("Found multivariate (x,y) in .dcovSums2d, expecting bivariate") } n <- length(x) SRx <- sortrank(x) SRy <- sortrank(y) ## compute the rowSums of the distance matrices a. <- .rowSumsDist1(x, SRx) b. <- .rowSumsDist1(y, SRy) S2 <- sum(a. * b.) a.. <- sum(a.) b.. <- sum(b.) S3 <- sum(a.) * sum(b.) ## also need order and rank for y[order(x)] in gamma1() x1 <- SRx$x y1 <- y[SRx$ix] SRy1 <- sortrank(y1) ones <- rep(1, n) g_1 <- .gamma1(x1=x1, y1=y1, z1=ones, SRx=SRx, SRy1=SRy1) g_x <- .gamma1(x1=x1, y1=y1, z1=x1, SRx=SRx, SRy1=SRy1) g_y <- .gamma1(x1=x1, y1=y1, z1=y1, SRx=SRx, SRy1=SRy1) g_xy <- .gamma1(x1=x1, y1=y1, z1=x1*y1, SRx=SRx, SRy1=SRy1) S1 <- sum(x * y * g_1 + g_xy - x * g_y - y * g_x) L <- list(S1=S1, S2=S2, S3=S3, S1a=NA, S1b=NA, S2a=NA, S2b=NA, S3a=NA, S3b=NA, rowsumsA=NA, rowsumsB=NA, sumA=NA, sumB=NA) if (all.sums) { L$S1a <- 2 * n * (n-1) * var(x) L$S1b <- 2 * n * (n-1) * var(y) L$S2a <- sum(a.^2) L$S2b <- sum(b.^2) L$S3a <- a..^2 L$S3b <- b..^2 L$rowsumsA <- a. L$rowsumsB <- b. L$sumA <- a.. L$sumB <- b.. } return (L); } .dvarU2 <- function(x, SRx = NULL) { ## O(n log n) computation of dvarU for univariate x only ## this is an internal function that will do a stand-alone dVar calc. ## but it is not faster than dcovU2(x, x) unless we supply ## the precomputed sort + rank results in SRx n <- length(x) ## compute the rowSums of the distance matrices if (is.null(SRx)) SRx <- sortrank(x) a. <- .rowSumsDist1(x, SRx) S2 <- sum(a. * a.) S3 <- sum(a.)^2 ## also need order and rank for y[order(x)] in gamma1() x1 <- SRx$x x2 <- x1 SRx1 <- sortrank(x1) ones <- rep(1, n) g_1 <- .gamma1(x1=x1, y1=x2, z1=ones, SRx, SRx1) g_x <- .gamma1(x1=x1, y1=x2, z1=x1, SRx, SRx1) g_xx <- .gamma1(x1=x1, y1=x2, z1=x1*x2, SRx, SRx1) S1 <- sum(x^2 * g_1 + g_xx - 2 * x * g_x) d1 <- n * (n - 3) d2 <- d1 * (n - 2) d3 <- d2 * (n - 1) dVar <- S1/d1 - 2 * S2/d2 + S3/d3 return(dVar) } .gamma1 <- function(x1, y1, z1, SRx, SRy1) { # computes the terms of the sum (ab) in dcovU # original sample (x_i, y_i, z_i) # triples (x1_i, y1_i, z1_i) are sorted by ix=order(x) # SRx is the result of sortrank(x), original order # SRy1 is the result of sortrank(y1), y1=y[order(x)] # pre-compute SRx, SRy1 to avoid repeated sort and rank # n <- length(x1) ix <- SRx$ix #order(x) rankx <- SRx$r #ranks of original sample x ## ranks and order vector for this permutation of sample y1 iy1 <- SRy1$ix #order(y1) ranky1 <- SRy1$r #rank(y1) ## the partial sums in the formula g_1 psumsy1 <- (cumsum(as.numeric(z1[iy1])) - z1[iy1])[ranky1] psumsx1 <- cumsum(as.numeric(z1)) - z1 gamma1 <- Btree_sum(y=ranky1, z=z1) #y1 replaced by rank(y1) g <- sum(z1) - z1 - 2 * psumsx1 - 2 * psumsy1 + 4 * gamma1 g <- g[rankx] } .rowSumsDist1 <- function(x, Sx = NULL) { ## for univariate samples, equivalent to rowSums(as.matrix(dist(x))) ## but much faster ## Sx is a sortrank object usually pre-computed here ## x is the data vector, Sx$x is sort(x) if (is.null(Sx)) Sx <- sortrank(x) n <- length(x) r <- Sx$r #ranks z <- Sx$x #ordered sample x psums1 <- (cumsum(as.numeric(z)) - z)[r] (2*(r-1)-n)*x + sum(x) - 2*psums1 } energy/R/dcov.R0000644000176200001440000001112014173557601013026 0ustar liggesusersdcov.test <- function(x, y, index=1.0, R=NULL) { ## check for valid number of replicates R method <- "Specify the number of replicates R (R > 0) for an independence test" if (! is.null(R)) { R <- floor(R) if (R < 1) R <- 0 if (R > 0) method <- "dCov independence test (permutation test)" } else { R <- 0 } # distance covariance test for multivariate independence if (!inherits(x, "dist")) x <- dist(x) if (!inherits(y, "dist")) y <- dist(y) x <- as.matrix(x) y <- as.matrix(y) dst <- TRUE n <- nrow(x) m <- nrow(y) if (n != m) stop("Sample sizes must agree") if (! (all(is.finite(c(x, y))))) stop("Data contains missing or infinite values") stat <- dcorr <- reps <- 0 dcov <- rep(0, 4) if (R > 0) reps <- rep(0, R) pval <- 1 dims <- c(n, ncol(x), ncol(y), dst, R) # dcov = [dCov,dCor,dVar(x),dVar(y)] a <- .C("dCOVtest", x = as.double(t(x)), y = as.double(t(y)), byrow = as.integer(TRUE), dims = as.integer(dims), index = as.double(index), reps = as.double(reps), DCOV = as.double(dcov), pval = as.double(pval), PACKAGE = "energy") # test statistic is n times the square of dCov statistic stat <- n * a$DCOV[1]^2 dcorr <- a$DCOV V <- dcorr[[1]] names(stat) <- "nV^2" names(V) <- "dCov" dataname <- paste("index ", index, ", replicates ", R, sep="") pval <- ifelse (R < 1, NA, a$pval) e <- list( statistic = stat, method = method, estimate = V, estimates = dcorr, p.value = pval, replicates = n* a$reps^2, n = n, data.name = dataname) class(e) <- "htest" return(e) } dcor.test <- function(x, y, index=1.0, R) { # distance correlation test for multivariate independence # like dcov.test but using dcor as the test statistic if (missing(R)) R <- 0 R <- ifelse(R > 0, floor(R), 0) RESULT <- dcov.test(x, y, index=index, R) # this test statistic is n times the square of dCov statistic DCOVteststat <- RESULT$statistic DCOVreplicates <- RESULT$replicates # RESULT$estimates = [dCov,dCor,dVar(x),dVar(y)] # dVar are invariant under permutation of sample indices estimates = RESULT$estimates names(estimates) <- c("dCov", "dCor", "dVar(X)", "dVar(Y)") DCORteststat <- RESULT$estimates[2] dvarX <- RESULT$estimates[3] dvarY <- RESULT$estimates[4] n <- RESULT$n if (R > 0) { DCORreps <- sqrt(DCOVreplicates / n) / sqrt(dvarX * dvarY) p.value <- (1 + sum(DCORreps >= DCORteststat)) / (1 + R) } else { p.value <- NA DCORreps <- NA } names(DCORteststat) <- "dCor" dataname <- paste("index ", index, ", replicates ", R, sep="") method <- ifelse(R > 0, "dCor independence test (permutation test)", "Specify the number of replicates R>0 for an independence test") e <- list( method = method, statistic = DCORteststat, estimates = estimates, p.value = p.value, replicates = DCORreps, n = n, data.name = dataname) class(e) <- "htest" return(e) } .dcov <- function(x, y, index=1.0) { # distance covariance statistic for independence # dcov = [dCov,dCor,dVar(x),dVar(y)] (vector) # this function provides the fast method for computing dCov # it is called by the dcov and dcor functions if (!inherits(x, "dist")) x <- dist(x) if (!inherits(y, "dist")) y <- dist(y) x <- as.matrix(x) y <- as.matrix(y) dst <- TRUE n <- nrow(x) m <- nrow(y) if (n != m) stop("Sample sizes must agree") if (! (all(is.finite(c(x, y))))) stop("Data contains missing or infinite values") dims <- c(n, NCOL(x), NCOL(y), dst) idx <- 1:dims[1] DCOV <- numeric(4) a <- .C("dCOV", x = as.double(t(x)), y = as.double(t(y)), byrow = as.integer(TRUE), dims = as.integer(dims), index = as.double(index), idx = as.double(idx), DCOV = as.double(DCOV), PACKAGE = "energy") return(a$DCOV) } dcov <- function(x, y, index=1.0) { # distance correlation statistic for independence return(.dcov(x, y, index)[1]) } dcor <- function(x, y, index=1.0) { # distance correlation statistic for independence return(.dcov(x, y, index)[2]) } energy/R/util.R0000644000176200001440000000060414014451405013042 0ustar liggesusers## util.R ## ## miscellaneous utilities ## sortrank <- function(x) { ## sort and rank data with one call to order() ## faster than calling sort and rank separately ## returns an object identical to: ## list(x=sort(x), ix=order(x), r=rank(x, ties.method = "first")) o <- order(x) n <- length(o) N <- 1:n N[o] <- N return(list(x=x[o], ix=o, r=N)) } energy/R/Epoisson.R0000644000176200001440000000652014014220304013657 0ustar liggesuserspoisson.tests <- function(x, R, test="all") { # parametric bootstrap tests of Poisson distribution # poisson.e is the energy GOF statistic # poisson.m is the mean distance statistic # (not related to the test stats::poisson.test) if (any(!is.integer(x)) || any(x < 0)) { warning("sample must be non-negative integers") return(NULL) } test <- casefold(test) poisson.stats <- function(x) { c(poisson.m(x), poisson.e(x)) } stat <- switch(test, "m" = poisson.m, "e" = poisson.e, poisson.stats) method <- switch(test, m=c("M-CvM","M-AD"), e="Energy", c("M-CvM","M-AD","Energy")) method <- paste(method, " test", sep="") n <- length(x) lambda <- mean(x) if (missing(R) || is.null(R)) { R <- 0 message("Specify R > 0 replicates for MC test") } bootobj <- boot::boot(x, statistic = stat, R = R, sim = "parametric", ran.gen = function(x, y) {rpois(n, lambda)}) N <- length(bootobj$t0) p <- rep(NA, times=N) if (R > 0) { for (i in 1:N) { p[i] <- 1 - mean(bootobj$t[,i] < bootobj$t0[i]) } } # a data frame, not an htest object # comparable to broom::tidy on an htest object RVAL <- data.frame(estimate=lambda, statistic=bootobj$t0, p.value=p, method=method) return(RVAL) } poisson.mtest <- function(x, R=NULL) { if (is.null(R)) R <- 0 rval <- poisson.tests(x, R, test="M") DNAME <- paste(deparse1(substitute(x)), "replicates: ", R) stat <- rval$statistic[1] names(stat) <- "M-CvM" e <- list( method = paste("Poisson M-test", sep = ""), statistic = stat, p.value = rval$p.value[1], data.name = DNAME, estimate = rval$estimate[1]) class(e) <- "htest" e } poisson.etest <- function(x, R=NULL) { if (is.null(R)) R <- 0 rval <- poisson.tests(x, R, test="E") DNAME <- paste(deparse1(substitute(x)), "replicates: ", R) stat <- rval$statistic names(stat) <- "E" e <- list( method = paste("Poisson E-test", sep = ""), statistic = stat, p.value = rval$p.value, data.name = paste("replicates: ", R, sep=""), estimate = rval$estimate) class(e) <- "htest" e } poisson.m <- function(x) { # mean distance statistic for Poissonity if (any(!is.integer(x)) || any(x < 0)) { warning("sample must be non-negative integers") return(NULL) } stats <- .poisMstat(x) names(stats) <- c("M-CvM", "M-AD") return(stats) } poisson.e <- function(x) { # energy GOF statistic for Poissonity if (any(!is.integer(x)) || any(x < 0)) { warning("sample must be non-negative integers") return(NULL) } lambda <- mean(x) n <- length(x) ## E|y-X| for X Poisson(lambda) (vectorized) Px <- ppois(x, lambda) Px1 <- ppois(x-1, lambda) meanvec <- 2*x*Px - 2*lambda*Px1 + lambda - x ## second mean E|X-X'| a <- 2 * lambda EXX <- a * exp(-a) * (besselI(a, 0) + besselI(a, 1)) ## third mean = sum_{i,j} |x_i - x_j| / n^2 K <- seq(1 - n, n - 1, 2) y <- sort(x) meanxx <- 2 * sum(K * y) / n^2 stat <- n * (2 * mean(meanvec) - EXX - meanxx) names(stat) <- "E" return(stat) } energy/R/Ecluster.R0000644000176200001440000000100414005374454013656 0ustar liggesusers energy.hclust <- function(dst, alpha = 1) { if (!inherits(dst, "dist")) stop("The first argument must be a dist object.") d <- dst n <- attr(d, "Size") if (!isTRUE(all.equal(alpha, 1))) { if (alpha > 2) warning("Exponent alpha should be in (0,2]") if (alpha < 0) stop("Cannot use negative exponent on distance.") d <- d^alpha } ## heights of hclust are half of energy; otherwise equivalent return(hclust(d, method = "ward.D")) } energy/R/pdcov-test.R0000644000176200001440000000465114173570013014166 0ustar liggesuserspdcov.test <- function(x, y, z, R) { ## x, y, z must be dist. objects or data matrices (no dist matrix) if (missing(R)) R <- 0 if (!inherits(x, "dist")) x <- dist(x) if (!inherits(y, "dist")) y <- dist(y) if (!inherits(z, "dist")) z <- dist(z) Dx <- as.matrix(x) Dy <- as.matrix(y) Dz <- as.matrix(z) n <- nrow(Dx) Pxz <- projection(Dx, Dz) #U-center and compute projections Pyz <- projection(Dy, Dz) #PxzU <- U_center(Pxz) #not necessary, because of invariance #PyzU <- U_center(Pyz) teststat <- n * U_product(Pxz, Pyz) ## calc. pdcor den <- sqrt(U_product(Pxz, Pxz) * U_product(Pyz, Pyz)) if (den > 0.0) { estimate <- teststat / (n * den) } else estimate <- 0.0 bootfn <- function(Pxz, i, Pyz) { # generate the permutation replicates of dcovU(Pxz, Pyz) # PxzU and PyzU are the U-centered matrices U_product(Pxz[i, i], Pyz) #RcppExports } if (R > 0 && den > 0.0) { reps <- replicate(R, expr= { i <- sample(1:n) bootfn(Pxz, i, Pyz=Pyz) }) replicates <- n * reps pval <- (1 + sum(replicates > teststat)) / (1 + R) #df <- n * (n-3) / 2 - 2 } else { pval <- NA replicates <- NA } dataname <- paste("replicates ", R, sep="") if (! R>0) dataname <- "Specify R>0 replicates for a test" condition <- (den > 0.0) names(estimate) <- "pdcor" names(teststat) <- "n V^*" e <- list( call = match.call(), method = paste("pdcov test", sep = ""), statistic = teststat, estimate = estimate, p.value = pval, n = n, replicates = replicates, condition = condition, data.name = dataname) class(e) <- "htest" return(e) } pdcor.test <- function(x, y, z, R) { ## x, y, z must be dist. objects or data matrices (no dist matrix) ## all required calc. done in pdcov.test if (missing(R)) R <- 0 result <- pdcov.test(x, y, z, R=R) if (result$condition) { ## if (A*A)(B*B) > 0 nRootV <- result$statistic / result$estimate pdcor_reps <- result$replicates / nRootV } else pdcor_reps <- NA e <- list( call = match.call(), method = paste("pdcor test", sep = ""), statistic = result$estimate, estimate = result$estimate, p.value = result$p.value, n = result$n, replicates = pdcor_reps, condition = result$condition, data.name = result$data.name) class(e) <- "htest" return(e) } energy/R/energy-defunct.R0000644000176200001440000000067114005374454015020 0ustar liggesusers## defunct functions from the energy package indep.e<- function(x, y) { # energy statistic for multivariate independence (deprecated) .Defunct(new = "mvI", package = "energy") } indep.etest<- function(x, y, R) { # energy test for multivariate independence (deprecated) .Defunct(new = "indep.test", package = "energy", msg = "indep.etest removed; use indep.test with method mvI.") } energy/R/RcppExports.R0000644000176200001440000000200714173644544014373 0ustar liggesusers# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 Btree_sum <- function(y, z) { .Call(`_energy_Btree_sum`, y, z) } calc_dist <- function(x) { .Call(`_energy_calc_dist`, x) } U_product <- function(U, V) { .Call(`_energy_U_product`, U, V) } D_center <- function(Dx) { .Call(`_energy_D_center`, Dx) } U_center <- function(Dx) { .Call(`_energy_U_center`, Dx) } dcovU_stats <- function(Dx, Dy) { .Call(`_energy_dcovU_stats`, Dx, Dy) } kgroups_start <- function(x, k, clus, iter_max, distance) { .Call(`_energy_kgroups_start`, x, k, clus, iter_max, distance) } partial_dcor <- function(Dx, Dy, Dz) { .Call(`_energy_partial_dcor`, Dx, Dy, Dz) } partial_dcov <- function(Dx, Dy, Dz) { .Call(`_energy_partial_dcov`, Dx, Dy, Dz) } .poisMstat <- function(x) { .Call(`_energy_poisMstat`, x) } projection <- function(Dx, Dz) { .Call(`_energy_projection`, Dx, Dz) } energy/R/edist.R0000644000176200001440000000443214005374454013210 0ustar liggesusersedist <- function(x, sizes, distance = FALSE, ix = 1:sum(sizes), alpha = 1, method = c("cluster","discoB")) { # computes the e-dissimilarity matrix between k samples or clusters # x: pooled sample or Euclidean distances # sizes: vector of sample (cluster) sizes # distance: TRUE if x is a distance matrix, otherwise FALSE # ix: a permutation of row indices of x # alpha: distance exponent # method: cluster distances or disco statistics # k <- length(sizes) if (k == 1) return (as.dist(0.0)) if (k < 1) return (NA) e <- matrix(nrow=k, ncol=k) n <- cumsum(sizes) m <- 1 + c(0, n[1:(k-1)]) if (is.vector(x)) x <- matrix(x, ncol=1) if (inherits(x, "dist")) distance <- TRUE if (distance) dst <- as.matrix(x) else dst <- as.matrix(dist(x)) N <- NROW(dst) if (NCOL(dst) != N) stop("distance==TRUE but first argument is not distance") if (alpha != 1) { if (alpha <= 0 || alpha > 2) warning("exponent alpha should be in (0,2]") dst <- dst^alpha } type <- match.arg(method) if (type == "cluster") { for (i in 1:(k - 1)) { e[i, i] <- 0.0 for (j in (i + 1):k) { n1 <- sizes[i] n2 <- sizes[j] ii <- ix[m[i]:n[i]] jj <- ix[m[j]:n[j]] w <- n1 * n2 / (n1 + n2) m11 <- sum(dst[ii, ii]) / (n1 * n1) m22 <- sum(dst[jj, jj]) / (n2 * n2) m12 <- sum(dst[ii, jj]) / (n1 * n2) e[i, j] <- e[j, i] <- w * ((m12 + m12) - (m11 + m22)) } } } if (type == "discoB") { #disco statistics for testing F=G for (i in 1:(k - 1)) { e[i, i] <- 0.0 for (j in (i + 1):k) { n1 <- sizes[i] n2 <- sizes[j] ii <- ix[m[i]:n[i]] jj <- ix[m[j]:n[j]] J <- c(ii,jj) d <- dst[J, J] e[i, j] <- eqdist.e(d, sizes=c(n1, n2), distance=TRUE) e[j, i] <- e[i, j] <- e[i, j] * (n1 + n2) } } e <- 0.5 * e / sum(sizes) #discoB formula } e <- as.dist(e) attr(e,"method") <- paste(method,": index= ", alpha) e } energy/R/disco.R0000644000176200001440000001463614005374454013210 0ustar liggesusers ### disco tests - implementation of DIStance COmponents methods in: ### ### Rizzo, M.L. and Szekely, G.J. (2010) "DISCO Analysis: A Nonparametric ### Extension of Analysis of Variance, Annals of Applied Statistics ### Vol. 4, No. 2, 1034-1055. ### ### Sept 2010 parts of disco package merged into energy package ### this release supports one way models ### this version does not use the C library ### ### disco: computes the decomposition and test using F ratio ### disco.between: statistic and test using between component ### .disco1: internal computations for one factor ### .disco1stat, .disco1Bstat: internal for boot function ### ### disco <- function(x, factors, distance = FALSE, index = 1, R, method = c("disco", "discoB", "discoF")) { ## x is response or Euclidean distance matrix or dist() object factors ## is a matrix or data frame of group labels distance=TRUE if x is ## distance, otherwise FALSE index is the exponent on distance, in (0,2] ## R is number of replicates for test method: use F ratio (default) or ## between component (discoB) disco method is currently alias for discoF method <- match.arg(method) factors <- data.frame(factors) if (inherits(x, "dist")) distance <- TRUE if (method == "discoB") return(disco.between(x, factors = factors, distance = distance, index = index, R = R)) nfactors <- NCOL(factors) if (distance || inherits(x, "dist")) dst <- as.matrix(x) else dst <- as.matrix(dist(x)) N <- NROW(dst) if (NCOL(dst) != N) stop("distance==TRUE but first argument is not distance") if (!isTRUE(all.equal(index, 1))) dst <- dst^index stats <- matrix(0, nfactors, 6) colnames(stats) <- c("Trt", "Within", "df1", "df2", "Stat", "p-value") for (j in 1:nfactors) { trt <- factors[, j] stats[j, 1:4] <- .disco1(trt = trt, dst = dst) if (R > 0) { b <- boot::boot(data = dst, statistic = .disco1stat, sim = "permutation", R = R, trt = trt) stats[j, 5] <- b$t0 stats[j, 6] <- (sum(b$t > b$t0) + 1)/(R + 1) } else { stats[j, 5] <- .disco1stat(dst, i = 1:nrow(dst), trt = trt) stats[j, 6] <- NA } } methodname <- "DISCO (F ratio)" dataname <- deparse(substitute(x)) total <- sum(stats[1, 1:2]) within <- total - sum(stats[, 1]) Df.trt <- stats[, 3] factor.names <- names(factors) factor.levels <- sapply(factors, nlevels) sizes <- sapply(factors, tabulate) e <- list(call = match.call(), method = methodname, statistic = stats[, 5], p.value = stats[, 6], k = nfactors, N = N, between = stats[, 1], withins = stats[, 2], within = within, total = total, Df.trt = Df.trt, Df.e = nrow(dst) - sum(Df.trt) - 1, index = index, factor.names = factor.names, factor.levels = factor.levels, sample.sizes = sizes, stats = stats) class(e) <- "disco" e } disco.between <- function(x, factors, distance = FALSE, index = 1, R) { ## disco test based on the between-sample component similar to disco ## except that 'disco' test is based on the F ratio disco.between test ## for one factor (balanced) is asymptotically equivalent to k-sample E ## test (test statistics are proportional in that case but not in ## general). x is response or Euclidean distance matrix or dist() ## object factors is a matrix or data frame of group labels ## distance=TRUE if x is distance, otherwise FALSE index is the exponent ## on distance, in (0,2] factors <- data.frame(factors) nfactors <- NCOL(factors) if (nfactors > 1) stop("More than one factor is not implemented in disco.between") if (distance || inherits(x, "dist")) dst <- as.matrix(x) else dst <- as.matrix(dist(x)) N <- NROW(dst) if (NCOL(dst) != N) stop("distance==TRUE but first argument is not distance") if (!isTRUE(all.equal(index, 1))) dst <- dst^index trt <- factors[, 1] if (R > 0) { b <- boot::boot(data = dst, statistic = .disco1Bstat, sim = "permutation", R = R, trt = trt) between <- b$t0 reps <- b$t pval <- mean(reps >= between) } else { between <- .disco1Bstat(dst, i = 1:nrow(dst), trt = trt) pval <- NA } if (R == 0) return(between) methodname <- "DISCO (Between-sample)" dataname <- deparse(substitute(x)) names(between) <- "DISCO between statistic" e <- list(call = match.call(), method = methodname, statistic = between, p.value = pval, data.name = dataname) class(e) <- "htest" e } .disco1 <- function(trt, dst) { ## dst is Euclidean distance matrix or power of it trt is the treatment, ## a factor trt <- factor(trt) k <- nlevels(trt) n <- tabulate(trt) N <- sum(n) total <- sum(dst)/(2 * N) y <- as.vector(dst[, 1]) M <- model.matrix(y ~ 0 + trt) G <- t(M) %*% dst %*% M withins <- diag(G)/(2 * n) W <- sum(withins) B <- total - W c(B, W, k - 1, N - k) } .disco1stat <- function(dst, i, trt) { ## i is permuation vector supplied by bootstrap dst is Euclidean ## distance matrix or power of it trt is the treatment, a factor returns ## the disco 'F' ratio idx <- 1:nrow(dst) d <- .disco1(trt = trt[idx[i]], dst = dst) statistic <- (d[1]/d[3])/(d[2]/d[4]) } .disco1Bstat <- function(dst, i, trt) { ## i is permuation vector supplied by bootstrap dst is Euclidean ## distance matrix or power of it trt is the treatment, a factor returns ## the between-sample component (for one factor) idx <- 1:nrow(dst) .disco1(trt = trt[idx[i]], dst = dst)[1] } print.disco <- function(x, ...) { k <- x$k md1 <- x$between/x$Df.trt md2 <- x$within/x$Df.e f0 <- x$statistic print(x$call) cat(sprintf("\nDistance Components: index %5.2f\n", x$index)) cat(sprintf("%-20s %4s %10s %10s %10s %10s\n", "Source", "Df", "Sum Dist", "Mean Dist", "F-ratio", "p-value")) for (i in 1:k) { fname <- x$factor.names[i] cat(sprintf("%-20s %4d %10.5f %10.5f %10.3f %10s\n", fname, x$Df.trt[i], x$between[i], md1[i], f0[i], format.pval(x$p.value[i]))) } cat(sprintf("%-20s %4d %10.5f %10.5f\n", "Within", x$Df.e, x$within, md2)) cat(sprintf("%-20s %4d %10.5f\n", "Total", x$N - 1, x$total)) } energy/R/pdcor.R0000644000176200001440000000105514005374454013205 0ustar liggesusers## pdcor.R ## ## pdcor <- function(x, y, z) { if (!inherits(x, "dist")) x <- dist(x) if (!inherits(y, "dist")) y <- dist(y) if (!inherits(z, "dist")) z <- dist(z) x <- as.matrix(x) y <- as.matrix(y) z <- as.matrix(z) partial_dcor(x, y, z)["pdcor"] } pdcov <- function(x, y, z) { if (!inherits(x, "dist")) x <- dist(x) if (!inherits(y, "dist")) y <- dist(y) if (!inherits(z, "dist")) z <- dist(z) x <- as.matrix(x) y <- as.matrix(y) z <- as.matrix(z) partial_dcov(x, y, z) } energy/R/Eindep.R0000644000176200001440000000426214005374454013305 0ustar liggesusersindep.test<- function(x, y, method = c("dcov","mvI"), index = 1, R) { # two energy tests for multivariate independence type <- match.arg(method) if (type == "dcov") return(dcov.test(x, y, index, R)) else if (type == "mvI") return(mvI.test(x, y, R)) } mvI <- function(x, y) { # energy statistic for multivariate independence # returns dependence coefficient I_n x <- as.matrix(x) y <- as.matrix(y) n <- nrow(x) m <- nrow(y) if (n != m || n < 2) stop("Sample sizes must agree") if (! (all(is.finite(c(x, y))))) stop("Data contains missing or infinite values") stat <- 0 dims <- c(n, ncol(x), ncol(y)) e <- .C("indepE", x = as.double(t(x)), y = as.double(t(y)), byrow = as.integer(TRUE), dims = as.integer(dims), stat = as.double(stat), PACKAGE = "energy") sqrt(e$stat) } mvI.test<- function(x, y, R) { # energy test for multivariate independence x <- as.matrix(x) y <- as.matrix(y) n <- nrow(x) m <- nrow(y) if (n != m || n < 2) stop("Sample sizes must agree") if (! (all(is.finite(c(x, y))))) stop("Data contains missing or infinite values") stat <- reps <- 0 if (R > 0) reps <- rep(0, R) pval <- 1 dims <- c(n, ncol(x), ncol(y), R) a <- .C("indepEtest", x = as.double(t(x)), y = as.double(t(y)), byrow = as.integer(TRUE), dims = as.integer(dims), stat = as.double(stat), reps = as.double(reps), pval = as.double(pval), PACKAGE = "energy") stat <- n*a$stat est <- sqrt(a$stat) names(est) <- "I" names(stat) <- "nI^2" dataname <- paste("x (",n," by ",ncol(x), "), y(",n," by ", ncol(y), "), replicates ", R, sep="") if (R > 0) p.value = a$pval else p.value = NA e <- list( method = "mvI energy test of independence", statistic = stat, estimate = est, replicates = n*reps, p.value = p.value, data.name = dataname) class(e) <- "htest" e } energy/R/Eeqdist.R0000644000176200001440000000712614005374454013501 0ustar liggesuserseqdist.e <- function(x, sizes, distance = FALSE, method = c("original","discoB","discoF")) { ## multivariate E-statistic for testing equal distributions ## x: matrix of pooled sample or distance matrix ## sizes: vector of sample sizes ## distance: logical, TRUE if x is a distance matrix, otherwise false ## method: original (default) or disco between components, or disco F ratio method <-match.arg(method) if (method=="discoB") { g <- as.factor(rep(1:length(sizes), sizes)) RVAL <- disco(x, factors=g, distance=distance, R=0, method=method) } else { RVAL <- eqdist.etest(x, sizes, distance = distance, R=0, method=method)$statistic } RVAL } eqdist.etest <- function(x, sizes, distance = FALSE, method = c("original","discoB","discoF"), R) { ## multivariate E-test of the multisample hypothesis of equal distributions ## x: matrix of pooled sample or distance matrix ## sizes: vector of sample sizes ## distance: logical, TRUE if x is a distance matrix, otherwise false ## method: original (default) or disco components ## R: number of replicates ## method <-match.arg(method) if (method=="discoB" || method=="discoF") { g <- as.factor(rep(1:length(sizes), sizes)) # for other index use disco() function directly return(disco(x, factors=g, distance=distance, index=1.0, R=R, method=method)) } nsamples <- length(sizes) if (nsamples < 2) return (NA) if (min(sizes) < 1) return (NA) if (!is.null(attr(x, "Size"))) distance <- TRUE x <- as.matrix(x) if (NROW(x) != sum(sizes)) stop("nrow(x) should equal sum(sizes)") if (distance == FALSE && nrow(x) == ncol(x)) warning("square data matrix with distance==FALSE") d <- NCOL(x) if (distance == TRUE) d <- 0 str <- "Multivariate " if (d == 1) str <- "Univariate " if (d == 0) str <- "" e0 <- 0.0 repl <- rep(0, R) pval <- 1.0 b <- .C("ksampleEtest", x = as.double(t(x)), byrow = as.integer(1), nsamples = as.integer(nsamples), sizes = as.integer(sizes), dim = as.integer(d), R = as.integer(R), e0 = as.double(e0), e = as.double(repl), pval = as.double(pval), PACKAGE = "energy") names(b$e0) <- "E-statistic" sz <- paste(sizes, collapse = " ", sep = "") methodname <- paste(str, length(sizes), "-sample E-test of equal distributions", sep = "") dataname <- paste("sample sizes ", sz, ", replicates ", R, sep="") e <- list( call = match.call(), method = methodname, statistic = b$e0, p.value = b$pval, data.name = dataname) class(e) <- "htest" e } ksample.e <- function(x, sizes, distance = FALSE, method = c("original","discoB","discoF"), ix = 1:sum(sizes)) { ## computes k-sample E-statistics for equal distributions ## retained for backward compatibility or use with boot ## (this function simply passes arguments to eqdist.e) ## ## x: pooled sample or distance matrix ## sizes: vector of sample sizes ## distance: TRUE if x is a distance matrix, otherwise FALSE ## method: default (original) or disco between components or disco F ratio ## ix: a permutation of row indices of x ## x <- as.matrix(x) method <- match.arg(method) eqdist.e(x[ix,], sizes=sizes, distance=distance, method=method) } energy/R/Emvnorm.R0000644000176200001440000001022314172025577013522 0ustar liggesusersmvnorm.test <- function(x, R) { # parametric bootstrap E-test for multivariate normality if (missing(R)) { method = "Energy test of multivariate normality: (Specify R > 0 for MC test)" R <- 0 } else { method = "Energy test of multivariate normality: estimated parameters" } if (is.vector(x) || NCOL(x)==1) { n <- NROW(x) d <- 1 bootobj <- boot::boot(x, statistic = normal.e, R = R, sim = "parametric", ran.gen = function(x, y) { return(rnorm(n)) }) } else { n <- nrow(x) d <- ncol(x) bootobj <- boot::boot(x, statistic = mvnorm.e, R = R, sim = "parametric", ran.gen = function(x, y) { return(matrix(rnorm(n * d), nrow = n, ncol = d)) }) } if (R > 0) p <- 1 - mean(bootobj$t < bootobj$t0) else p <- NA names(bootobj$t0) <- "E-statistic" e <- list(statistic = bootobj$t0, p.value = p, method = method, data.name = paste("x, sample size ", n, ", dimension ", d, ", replicates ", R, sep = "")) class(e) <- "htest" e } mvnorm.etest <- function(x, R) { return(mvnorm.test(x, R)) } mvnorm.e <- function(x) { # E-statistic for multivariate normality if (is.vector(x) || NCOL(x)==1) return(normal.e(x)) n <- nrow(x) d <- ncol(x) if (n < 2) { warning("sample size must be at least 2") return(NA) } # subtract column means and compute S^(-1/2) z <- scale(x, scale = FALSE) ev <- eigen(var(x), symmetric = TRUE) P <- ev$vectors lambda <- ev$values D <- diag(d) diag(D) <- 1 / sqrt(lambda) y <- z %*% (P %*% D %*% t(P)) if (any(!is.finite(y))) { warning("missing or non-finite y") return(NA) } if (requireNamespace("gsl", quietly=TRUE)) { const <- exp(lgamma((d+1)/2) - lgamma(d/2)) mean2 <- 2*const ysq <- rowSums(y^2) mean1 <- sqrt(2) * const * mean(gsl::hyperg_1F1(-1/2, d/2, -ysq/2)) mean3 <- 2*sum(dist(y)) / n^2 return(n * (2*mean1 - mean2 - mean3)) } else { warning("package gsl required but not found") return (NA) } } normal.e <- function(x) { ## Case 4: unknown parameters x <- as.vector(x) n <- length(x) s <- sd(x) if (!is.finite(s) || !(s > 0)) { warning("sd(x)>0 required") return(NA) } y <- (x - mean(x)) / sd(x) y <- sort(y) K <- seq(1 - n, n - 1, 2) return(2 * (sum(2 * y * pnorm(y) + 2 * dnorm(y)) - n/sqrt(pi) - mean(K * y))) } normal.test <- function(x, method=c("mc", "limit"), R) { ## implements the test for for d=1 ## Case 4: composite hypothesis method <- match.arg(method) estimate <- c(mean(x), sd(x)) names(estimate) <- c("mean", "sd") if (method == "mc") { ## Monte Carlo approach if (missing(R)) R <- 0 e <- energy::mvnorm.etest(x, R=R) e$method <- "Energy test of normality" e$method <- ifelse(R > 0, paste0(e$method,": estimated parameters"), paste0(e$method, " (Specify R > 0 for MC test)")) e$estimate <- estimate return(e) } ## implement test using asymptotic distribution for p-value if (!is.numeric(x) || (!is.vector(x) && NCOL(x) > 1)) { warning("x must be a numeric vector") return (NA) } else { x <- as.vector(x, mode="numeric") } n <- length(x) t0 <- normal.e(x) names(t0) <- "statistic" ## load pre-computed eigenvalues ev <- energy::EVnormal[, "Case4"] if (requireNamespace("CompQuadForm", quietly=TRUE)) { p <- CompQuadForm::imhof(t0, ev)$Qq } else { warning("limit distribution method requires CompQuadForm package for p-value") p <- NA } estimate <- c(mean(x), sd(x)) names(estimate) <- c("mean", "sd") e <- list(statistic = t0, p.value = p, method = paste("Energy test of normality: limit distribution"), estimate = estimate, data.name = "Case 4: composite hypothesis, estimated parameters") class(e) <- "htest" e } energy/R/energy-deprecated.R0000644000176200001440000000412414173570742015471 0ustar liggesusers## deprecated functions in energy package dcor.ttest <- function(x, y, distance=FALSE) { # x and y are observed samples or distance # distance arg is checked in bcdcor .Deprecated(new = "dcorT.test", package = "energy", msg = "dcort.ttest is deprecated, replaced by dcorT.test") if (distance == TRUE) { x <- as.dist(x) y <- as.dist(y) } return(dcorT.test(x, y)) } dcor.t <- function(x, y, distance=FALSE) { # computes the t statistic for corrected high-dim dCor # should be approximately student T # distance arg is checked in bcdcor .Deprecated(new = "dcorT", package = "energy", msg = "dcort.t is deprecated, replaced by dcorT") if (distance == TRUE) { x <- as.dist(x) y <- as.dist(y) } return(dcorT(x, y)) } DCOR <- function(x, y, index=1.0) { # distance covariance and correlation statistics # alternate method, implemented in R without .C call # this method is usually slower than the C version .Deprecated(new = "dcor", package = "energy", msg = "DCOR is deprecated, replaced by dcor or dcov") if (!inherits(x, "dist")) x <- dist(x) if (!inherits(y, "dist")) y <- dist(y) x <- as.matrix(x) y <- as.matrix(y) n <- nrow(x) m <- nrow(y) if (n != m) stop("Sample sizes must agree") if (! (all(is.finite(c(x, y))))) stop("Data contains missing or infinite values") if (index < 0 || index > 2) { warning("index must be in [0,2), using default index=1") index=1.0} stat <- 0 dims <- c(n, ncol(x), ncol(y)) Akl <- function(x) { d <- as.matrix(x)^index m <- rowMeans(d) M <- mean(d) a <- sweep(d, 1, m) b <- sweep(a, 2, m) return(b + M) } A <- Akl(x) B <- Akl(y) dCov <- sqrt(mean(A * B)) dVarX <- sqrt(mean(A * A)) dVarY <- sqrt(mean(B * B)) V <- sqrt(dVarX * dVarY) if (V > 0) dCor <- dCov / V else dCor <- 0 return(list(dCov=dCov, dCor=dCor, dVarX=dVarX, dVarY=dVarY)) } energy/R/kgroups.R0000644000176200001440000000373614005374454013600 0ustar liggesusers kgroups <- function(x, k, iter.max = 10, nstart = 1, cluster = NULL) { distance <- inherits(x, "dist") x <- as.matrix(x) if (!is.numeric(x)) stop("x must be numeric") n <- nrow(x) if (is.null(cluster)) { cluster <- sample(0:(k-1), size = n, replace = TRUE) } else { ## recode cluster as 0,1,...,k-1 cluster <- factor(cluster) if(length(levels(cluster)) != k) stop("cluster vector does not have k clusters") cluster <- as.integer(cluster) - 1 if(length(cluster) != n) stop("data and length of cluster vector must match") } value <- kgroups_start(x, k, cluster, iter.max, distance = distance) if (nstart > 1) { objective <- rep(0, nstart) objective[1] <- value$W values <- vector("list", nstart) values[[1]] <- value for (j in 2:nstart) { ## random initialization of cluster labels cluster <- sample(0:(k-1), size = n, replace = TRUE) values[[j]] <- kgroups_start(x, k, cluster, iter.max, distance = distance) objective[j] <- values[[j]]$W } best <- which.min(objective) value <- values[[best]] } obj <- structure(list( call = match.call(), cluster = value$cluster + 1, sizes = value$sizes, within = value$within, W = sum(value$within), count = value$count, iterations = value$it, k = k), class = "kgroups") return (obj) } print.kgroups <- function(x, ...) { cat("\n"); print(x$call) cat("\nK-groups cluster analysis\n") cat(x$k, " groups of size ", x$sizes, "\n") cat("Within cluster distances:\n", x$within) cat("\nIterations: ", x$iterations, " Count: ", x$count, "\n") } fitted.kgroups <- function(object, method = c("labels", "groups"), ...) { method = match.arg(method) if (method == "groups") { k <- object$k CList <- vector("list", k) for (i in 1:k) CList[[i]] <- which(object$cluster == i) return (CList) } return (object$cluster) } energy/NEWS.md0000644000176200001440000001503714173560672012662 0ustar liggesusers# energy 1.7-9 * bug fix in normal.test * pdcor.test now gives p.value=NA in degenerate case * DCOR is deprecated; use dcor or dcov # energy 1.7-8 * User level changes: - Poisson goodness-of-fit tests - EVnormal (data) issue fixed - gsl package required * Internal changes - mvnorm.e: use gsl::hyperg_1F1 - poisMstat in energy.c moved to Rcpp poisMstat.cpp # energy 1.7-7 * User level changes: - dcorT.test replaces dcor.ttest, now deprecated. - dcorT replaces dcor.t, now deprecated. - edist method "discoF" removed * Internal changes - BCDCOR function (used in the high dim. dcorT test) has been revised. - edist method "discoB" correction - changes for compatibility with R 4.0.0 # energy 1.7-6 * User level changes: - normal.test (new) implements the energy test of univariate normality based on the null limit distribution for the composite hypothesis (estimated parameters). - dataset EVnormal (new) of eigenvalues for energy test of normality. - mvnorm.test replaces mvnorm.etest, and mvnorm.etest now is a wrapper for mvnorm.test. # energy 1.7-5 * User level changes: - kgroups: (new) implements energy clustering for a specified number k classes by energy distance criterion, analogous to the k classes of the k-means algorithm. - dcov2d and dcor2d: (new) O(n log n) methods to compute the U or V statistics for real x and y - sortrank() function added (a utility) * Internal changes: - B-tree.cpp: Btree_sum and other internal functions implement binary tree search for faster O(n log n) calculation of paired distances in dcov2d - kgroups.cpp: Rcpp implementation of k-groups algorithm - energy.hclust implementation: replaced C++ code with call to stats::hclust; since R > 3.0.3 it is now equivalent for alpha = 1 with method = "ward.D". Input and return value unchanged except heights from hclust are half. # energy 1.7-4 * User level changes - disco: handle the case when the user argument x is dist with conflicting argument distance=FALSE - dcor.t and dcor.ttest: handle the cases when class of argument x or y conflicts with the distance argument - Split manual page of dcovU into two files. - indep.etest and indep.e removed now Defunct (were Deprecated since Version 1.1-0, 2008-04-07; replaced by indep.test). * Internal changes - BCDCOR: handle the cases when class of argument x or y conflicts with the distance argument # energy 1.7-2 * User level changes - Provided new dcor.test function, similar to dcov.test but using the distance correlation as the test statistic. - Number of replicates R for Monte Carlo and permutation tests now matches the argument of the boot::boot function (no default value, user must specify). - If user runs a test with 0 replicates, p-value printed is NA * Internal changes - energy_init.c added for registering routines # energy 1.7-0 * Partial Distance Correlation statistics and tests added - pdcov, pdcor, pdcov.test, pdcor.test - dcovU: unbiased estimator of distance covariance - bcdcor: bias corrected distance correlation - Ucenter, Dcenter, U_center, D_center: double-centering and U-centering utilities - U_product: inner product in U-centered Hilbert space * updated NAMESPACE and DESCRIPTION imports, etc. * revised package Title and Description in DESCRIPTION * package now links to Rcpp * mvnorm c code ported to c++ (mvnorm.cpp); corresponding changes in Emvnorm.R * syntax for bcdcor: "distance" argument removed, now argument can optionally be a dist object * syntax for energy.hclust: first argument must now be a dist object * default number of replicates R in tests: for all tests, R now defaults to 0 or R has no default value. # energy 1.6.2 * inserted GetRNGstate() .. PutRNGState around repl. loop in dcov.c. # energy 1.6.1 * replace Depends with Imports in DESCRIPTION file # energy 1.6.0 * implementation of high-dim distance correlation t-test introduced in JMVA Volume 117, pp. 193-213 (2013). * new functions dcor.t, dcor.ttest in dcorT.R * minor changes to tidy other code in dcov.R * removed unused internal function .dcov.test # energy 1.5.0 * NAMESPACE: insert UseDynLib; remove zzz.R, .First.Lib() # energy 1.4-0 * NAMESPACE added. * (dcov.c, Eindep.c) Unused N was removed. * (dcov.c) In case dcov=0, bypass the unnecessary loop that generates replicates (in dCOVtest and dCovTest). In this case dcor=0 and test is not significant. (dcov=0 if one of the samples is constant.) * (Eqdist.R) in eqdist.e and eqdist.etest, method="disco" is replaced by two options: "discoB" (between sample components) and "discoF" (disco F ratio). * (disco.R) Added disco.between and internal functions that compute the disco between-sample component and corresponding test. * (utilities.c) In permute function replaced rand_unif with runif. * (energy.c) In ksampleEtest the pval computation changed from ek/B to (ek+1)/(B+1) as it should be for a permutation test, and unneeded int* n removed. # energy 1.3-0 * In distance correlation, distance covariance functions (dcov, dcor, DCOR) and dcov.test, arguments x and y can now optionally be distance objects (result of dist function or as.dist). Matrices x and y will always be treated as data. * Functions in dcov.c and utilities.c were modified to support arguments that are distances rather than data. In utilities.c the index_distance function changed. In dcov.c there are many changes. Most importantly for the exported objects, there is now an extra required parameter in the dims argument passed from R. In dCOVtest dims must be a vector c(n, p, q, dst, R) where n is sample size, p and q are dimensions of x and y, dst is logical (TRUE if distances) and R is number of replicates. For dCOV dims must be c(n, p, q, dst). # energy 1.2-0 * disco (distance components) added for one-way layout. * A method argument was added to ksample.e, eqdist.e, and eqdist.etest, method = c("original", "disco"). * A method argument was added to edist, which summarizes cluster distances in a table: method = c("cluster","discoB","discoF")) energy/MD50000644000176200001440000000610614173755455012076 0ustar liggesusers9250d0ed4dd3ad76ff4fbe64b6044b2b *DESCRIPTION 5e3a3951f6878eef6b08f97fb2d7ac22 *NAMESPACE 35517a591e678a2504f5f596bd9a6a52 *NEWS.md 8e83d2d85a8d5b50f5de2b1328154d6d *R/Ecluster.R 8b428dedf82ffb7e15422ff9f49addf4 *R/Eeqdist.R b7e4b591f66480d8a5a950559e857092 *R/Eindep.R 819cfd13dc83998ba8f8a4676fd450fb *R/Emvnorm.R 064aa4cf5e0190c615a2e28f5b612317 *R/Epoisson.R 590e508b1c6f61837a438d6f67170d8c *R/RcppExports.R 25f5594b3278f42521643dc117844d5e *R/centering.R 1343cc6dfc935ea74a4ebe3168798451 *R/dcorT.R 916ff1c1a363d7952b77e21ec5c69c06 *R/dcov.R 60b99f7f7dc717614490eed5301ec6c5 *R/dcov2d.R 560259a77303b0ba55eba4fbe656bd6a *R/dcovu.R 70c2735dbc069bbc2e689cbc8a97701e *R/disco.R f522f49e669e143bc9c801fc451464b9 *R/edist.R 239b63f2293937538b6c684e0229be21 *R/energy-defunct.R 9f0d5f4032755f17e3d3c691db5b7944 *R/energy-deprecated.R 61dd5005a0b03370beb62c85ef859b22 *R/kgroups.R 32a4ab80b2ae1d1f982cce185cbc6b6c *R/pdcor.R c27ed9486350a5b73ab63bd2b4e0e81e *R/pdcov-test.R dcb8e55a7969ef5ffc554eaa07621836 *R/util.R 8ab90b7760f971c9b444b931fa17bb2e *README.md 29d513c07ce65029d43ed7b9a9aac310 *build/partial.rdb 05994c5a68aed671284220d94539c7e7 *data/EVnormal.rda d81bf02f4825e725cea43b367722b687 *man/U_product.Rd e2fe9d1397b2de9a61698ebbb1a54052 *man/centering.Rd 38f44473d9e3b23903265f93fc08b131 *man/dcorT.Rd 34a1b6cefa49bdae652a5bf121ec616b *man/dcov.Rd b56f78d451a9cda6a079cd95fb4a9377 *man/dcov.test.Rd 28bd294dda2aaa089c2c8266f92156f2 *man/dcov2d.Rd ad72f862eb29a8abb308593b1812eed6 *man/dcovU_stats.Rd a7a854323cfe2ac41c82f3f141157d5d *man/dcovu.Rd 1125cb6b6ab4ff998d236d1be7b5c60e *man/disco.Rd 13ac539822cfc7848d1fc953c013aa96 *man/edist.Rd c9c2f726d35a8d67eb2d0270b1ca5ca3 *man/eigen.Rd 44a2af634c14d9f6fa2151c9a48bae8a *man/energy-defunct.Rd d1e78ed68572e231459f89107a40c058 *man/energy-deprecated.Rd 9251f1e390875abf2135821f00f2c412 *man/energy-package.Rd 74f8b13f4d63e5cd1dea951bd1215f5b *man/energy.hclust.Rd a46fc3cae3e07309dfafadf41df48233 *man/eqdist.etest.Rd 38fe54607647d4371b1a206dd9331d3e *man/indep.test.Rd 805c2039f6e6b8614a07a02517b72f64 *man/kgroups.Rd f737adf88084af29be2adf2ac6eefe05 *man/mvI.test.Rd 214bbec2cc9a5db9c694c57bc699aeb0 *man/mvnorm-test.Rd 64e1b9b4f227eb8994cbd358436dbfb1 *man/normalGOF.Rd 245663728a851663daf9807562a2c1d4 *man/pdcor.Rd b86bcdc256a2eadc5a84903fbff564e4 *man/poisson.Rd 7a6ff6c66dd6fe89fa9ba80df4b45a26 *man/sortrank.Rd f16c05d5e8c9d12f79e4fe6dbac512ce *src/B-tree.cpp 5f678a27d0f4cfcfe0ad6f8ba4418eb8 *src/Eindep.c cc7fb03af3ab17f748f94d0ff201cc8d *src/Rcpp-utilities.cpp e7f9e32793125c2f0bb4d5bda7590c0b *src/RcppExports.cpp c560954ffd313a1b0aeb9ede7f73c4d1 *src/U-product.cpp e3f6b3a98f9d761ccf47c8c9c1bd58a5 *src/centering.cpp da0b2398098bc08ae54f647b29cf60b5 *src/dcov.c fe3940cdfcef45def8b3aec42b66c1cd *src/dcovU.cpp 7bc4cdb3fff8c30f2be574907eab94a5 *src/energy.c cc17a39a1b1d4d3cf71e092e00fbc67b *src/energy_init.c 8fcd338747eba10fdc05dc312b1da750 *src/kgroups.cpp 27372e65c1abd46a9a9ed64819088c76 *src/partial-dcor.cpp 45d455798b73468136033b3b6d035352 *src/poissonM.cpp 222bcfee0087c59f38a77ea339f8f60b *src/projection.cpp 9ad5c89ba30013de49a6cd0fd94c7396 *src/utilities.c