matrixStats/0000755000175100001440000000000012542554541012622 5ustar hornikusersmatrixStats/inst/0000755000175100001440000000000012542546311013573 5ustar hornikusersmatrixStats/inst/benchmarking/0000755000175100001440000000000012542546241016225 5ustar hornikusersmatrixStats/inst/benchmarking/colRowAlls.md.rsp0000644000175100001440000000256212542546241021440 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAlls"%> <%@string rowname="rowAlls"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-18"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + all() * colSums() == n or rowSums() == n ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode="logical") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colAlls = colAlls(X), "apply+all" = apply(X, MARGIN=2L, FUN=all), "colSums==n" = (colSums(X) == nrow(X)), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowAlls = rowAlls(X), "apply+all" = apply(X, MARGIN=1L, FUN=all), "rowSums==n" = (rowSums(X) == ncol(X)), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-18 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/binMeans.md.rsp0000644000175100001440000000372112542546241021111 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="binMeans"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-04"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * binMeans_R() which is defined as ```r <%=withCapture({ binMeans_R <- function(y, x, bx, na.rm=FALSE, count=TRUE, right=FALSE) { B <- length(bx)-1L res <- double(B) counts <- integer(B) # For each bin... for (kk in seq(length=B)) { if (right) { idxs <- which(bx[kk] < x & x <= bx[kk+1L]) } else { idxs <- which(bx[kk] <= x & x < bx[kk+1L]) } yKK <- y[idxs] muKK <- mean(yKK) res[kk] <- muKK counts[kk] <- length(idxs) } # for (kk ...) if (count) attr(res, "count") <- counts res } # binMeans_R() })%> ``` ## Results ### Non-sorted simulated data ```r <%=withCapture({ nx <- 10e3 # Number of data points set.seed(0xBEEF) x <- runif(nx, min=0, max=1) y <- runif(nx, min=0, max=1) # Uniformely distributed bins nb <- 1e3 # Number of bins bx <- seq(from=0, to=1, length.out=nb+1L) bx <- c(-1, bx, 2) })%> ``` <% benchmark <- function() { %> <% dataLabel <- if (is.unsorted(x)) "unsorted" else "sorted" %> <% message(dataLabel) %> ```r <%=withCapture({ gc() stats <- microbenchmark( binMeans = binMeans(x=x, y=y, bx=bx, count=TRUE), binMeans_R = binMeans_R(x=x, y=y, bx=bx, count=TRUE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # benchmark() %> <% benchmark() %> ### Sorted simulated data ```r <%=withCapture({ x <- sort(x) })%> ``` <% benchmark() %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-02 o Restructured. 2014-05-25 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/x_OP_y.md.rsp0000644000175100001440000000401312542546241020545 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="x_OP_y"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * x_OP_y_R() as below ```r <%=withCapture({ x_OP_y_R <- function(x, y, OP, na.rm=FALSE) { if (na.rm) { xnok <- is.na(x) ynok <- is.na(y) anok <- xnok & ynok unit <- switch(OP, "+" = 0, "-" = NA_real_, "*" = 1, "/" = NA_real_, stop("Unknown 'OP' operator: ", OP) ) x[xnok] <- unit y[ynok] <- unit } ans <- switch(OP, "+" = x + y, "-" = x - y, "*" = x * y, "/" = x / y, stop("Unknown 'OP' operator: ", OP) ) if (na.rm) { ans[anok] <- NA_real_ } ans } # x_OP_y_R() })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] y <- x[,1L] })%> ``` <% for (OP in c("+", "-", "*", "/")) { %> <% OPTag <- c("+"="add", "-"="sub", "*"="mul", "/"="div")[OP] gc() %> ```r <%=withCapture({ OP stats <- microbenchmark( "x_OP_y" = x_OP_y(x, y, OP=OP, na.rm=FALSE), "x_OP_y_R" = x_OP_y_R(x, y, OP=OP, na.rm=FALSE), unit = "ms" ) gc() })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, OPTag)) %> <% } # for (OP ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/varDiff.md.rsp0000644000175100001440000000243412542546241020736 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="varDiff"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-10"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * N/A <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode=mode) data <- data[1:4] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] stats <- microbenchmark( "varDiff" = varDiff(x), "var" = var(x), "diff" = diff(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-10 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/meanOver.md.rsp0000644000175100001440000000437512542546241021137 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="meanOver"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-02"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * mean() + [() * mean.default() + [() - avoids method dispatching as below ```r <%=withCapture({ meanOver_R_v1 <- function(x, na.rm=FALSE, idxs) { mean(x[idxs], na.rm=na.rm) } })%> ``` and ```r <%=withCapture({ meanOver_R_v2 <- function(x, na.rm=FALSE, idxs) { mean.default(x[idxs], na.rm=na.rm) } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode=mode) ##data <- data[1:3] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "meanOver" = meanOver(x, refine=TRUE), "meanOver_no_refine" = meanOver(x, refine=FALSE), "mean" = mean(x), "mean.default" = mean.default(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(dataLabel, "all")) %> <% for (subset in c(0.2, 0.4, 0.8)) { %> #### A <%=sprintf("%g", 100*subset)%>% subset ```r <%=withCapture({ x <- data[[.dataLabel.]] subset idxs <- sort(sample(length(x), size=subset*length(x), replace=FALSE)) gc() stats <- microbenchmark( "meanOver" = meanOver(x, idxs=idxs, refine=TRUE), "meanOver_no_refine" = meanOver(x, idxs=idxs, refine=FALSE), "mean+[()" = meanOver_R_v1(x, idxs=idxs), "mean.default+[()" = meanOver_R_v2(x, idxs=idxs), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, subset)) %> <% } # for (subset in ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-02 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMedians.md.rsp0000644000175100001440000000272512542546241022126 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMedians"%> <%@string rowname="rowMedians"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + median() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colMedians = colMedians(X, na.rm=FALSE), "apply+median" = apply(X, MARGIN=2L, FUN=median, na.rm=FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowMedians = rowMedians(X, na.rm=FALSE), "apply+median" = apply(X, MARGIN=1L, FUN=median, na.rm=FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCummins.md.rsp0000644000175100001440000000264112542546241022156 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCummins"%> <%@string rowname="rowCummins"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + cummin() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colCummins = colCummins(X), "apply+cummin" = apply(X, MARGIN=2L, FUN=cummin), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowCummins = rowCummins(X), "apply+cummin" = apply(X, MARGIN=1L, FUN=cummin), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCumsums.md.rsp0000644000175100001440000000264112542546241022177 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCumsums"%> <%@string rowname="rowCumsums"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + cumsum() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colCumsums = colCumsums(X), "apply+cumsum" = apply(X, MARGIN=2L, FUN=cumsum), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowCumsums = rowCumsums(X), "apply+cumsum" = apply(X, MARGIN=1L, FUN=cumsum), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAnyMissings.md.rsp0000644000175100001440000000347712542546241023017 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAnyMissings"%> <%@string rowname="rowAnyMissings"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * colAnyMissings() and rowAnyMissings() * apply() + anyMissing() * colSums() + is.na() and rowSums() + is.na() where ```r <%=withCapture({ colAnyMissings <- function(x, ...) colAnys(x, value=NA) })%> ``` and ```r <%=withCapture({ rowAnyMissings <- function(x, ...) rowAnys(x, value=NA) })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colAnyMissings = colAnyMissings(X), "apply+anyMissing" = apply(X, MARGIN=2L, FUN=anyMissing), colSums = is.na(colSums(X, na.rm=FALSE)), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowAnyMissings = rowAnyMissings(X), "apply+anyMissing" = apply(X, MARGIN=1L, FUN=anyMissing), rowSums = is.na(rowSums(X, na.rm=FALSE)), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMins.md.rsp0000644000175100001440000000355112542546241021452 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMins"%> <%@string rowname="rowMins"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + min() * lapply() + pmin() * lapply() + pmin.int() See also [StackOverflow:colMins?]. <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colMins = colMins(X, na.rm=FALSE), "apply+min" = apply(X, MARGIN=2L, FUN=min, na.rm=FALSE), "lapply+pmin" = do.call(pmin, lapply(seq_len(nrow(X)), function(i) X[i,])), "lapply+pmin.int" = do.call(pmin.int, lapply(seq_len(nrow(X)), function(i) X[i,])), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowMins = rowMins(X, na.rm=FALSE), "apply+min" = apply(X, MARGIN=1L, FUN=min, na.rm=FALSE), "lapply+pmin" = do.call(pmin, lapply(seq_len(ncol(X)), function(i) X[,i])), "lapply+pmin.int" = do.call(pmin.int, lapply(seq_len(ncol(X)), function(i) X[,i])), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/includes/0000755000175100001440000000000012542546241020033 5ustar hornikusersmatrixStats/inst/benchmarking/includes/footer.md.rsp0000644000175100001440000000136312542546241022461 0ustar hornikusers<%--------------------------------------------------------------- Page footer ---------------------------------------------------------------%> <%@string appendix="${appendix}" default="false"%> <%@ifeq appendix="true"%> <%@include file="appendix.md.rsp"%> <%@endif%> <%@include file="${references}"%> --------------------------------------- Copyright <%@meta name="author"%>. Last updated on <%=format(Sys.time(), format="%Y-%m-%d %H:%M:%S (%z UTC)")%>. Powered by [RSP]. <%--------------------------------------------------------------- Dynamically insert an HTML favicon ---------------------------------------------------------------%> <%=toFavicon({ plot(1, col="blue", bg="yellow", pch=21, cex=4, lwd=4, axes=FALSE) }, force=FALSE)%> matrixStats/inst/benchmarking/includes/header.md.rsp0000644000175100001440000000011212542546241022402 0ustar hornikusers[matrixStats]: Benchmark report --------------------------------------- matrixStats/inst/benchmarking/includes/results.md.rsp0000644000175100001440000001163412542546241022666 0ustar hornikusers<%-------------------------------------------------------------- BENCHMARK RESULTS --------------------------------------------------------------%> <%-------------------------------------------------------------- Local functions --------------------------------------------------------------%> <% toImage <- function(stats, name=levels(stats$expr)[1L], tags=NULL, ylim="auto", col=NULL, alpha=NULL, ...) { %> ![](<%=toPNG(name, tags=c(tags, "benchmark"), aspectRatio=2/3, { if (identical(ylim, "auto")) { y <- stats$time/1e6 ymax <- max(y, na.rm=TRUE) y75 <- quantile(y, probs=0.75, na.rm=TRUE) yupper <- min(c(1.5*y75, ymax), na.rm=TRUE) ylim <- c(0, yupper) } if (!is.null(ylim)) { stats$outlier <- (stats$time > ylim[2]*1e6) stats$time[stats$outlier] <- ylim[2]*1e6 } gg <- ggplot(data=stats, aes(x=seq_along(time)/length(levels(expr)), y=time/1e6)) gg <- gg + geom_point(aes(colour=expr, shape=outlier)) gg <- gg + scale_shape_manual(values=c(16,4), guide="none") if (!is.null(col)) gg <- gg + scale_colour_manual(values=col) if (!is.null(alpha)) gg <- gg + scale_alpha_manual(values=alpha) gg <- gg + xlab("iteration") + ylab("time (ms)") if (!is.null(ylim)) gg <- gg + ylim(ylim) print(gg) })%>) <% } # toImage() %> <% toTable <- function(stats, tags=NULL, order="median", ...) { kable({ s <- summary(stats) s$neval <- NULL s$cld <- NULL s <- s[order(s[[order]]),] s }) kable({ s <- summary(stats, unit="relative") s$neval <- NULL s$cld <- NULL s <- s[order(s[[order]]),] s }) } %> <%-------------------------------------------------------------- Benchmark results for vector functions --------------------------------------------------------------%> <% benchmarkResults <- function(stats, tags=NULL, ...) { %> _Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data. The top panel shows times in milliseconds and the bottom panel shows relative times._ <% toTable(stats, tags=tags) %> _Figure: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data. Outliers are displayed as crosses. Times are in milliseconds._ <% toImage(stats, tags=tags) %> <% } # benchmarkResults() %> <%-------------------------------------------------------------- Benchmark results for col- and row-specific functions --------------------------------------------------------------%> <% crBenchmarkResults <- function(colStats, rowStats=NULL, tags=NULL, ...) { %> _Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(colStats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data. The top panel shows times in milliseconds and the bottom panel shows relative times._ <% toTable(colStats, tags=tags) %> _Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(rowStats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data (transposed). The top panel shows times in milliseconds and the bottom panel shows relative times._ <% if (!is.null(rowStats)) { toTable(rowStats, tags=tags) } %> _Figure: Benchmarking of <%=hpaste(sprintf("%s()", levels(colStats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data <% if (!is.null(rowStats)) { %> as well as <%=hpaste(sprintf("%s()", levels(rowStats$expr)), lastCollapse=" and ")%> on the same data transposed<% } # if (!is.null(rowStats)) %>. Outliers are displayed as crosses. Times are in milliseconds._ <% y <- c(colStats$time, rowStats$time)/1e6 ymax <- max(y, na.rm=TRUE) y75 <- quantile(y, probs=0.75, na.rm=TRUE) yupper <- min(c(1.5*y75, ymax), na.rm=TRUE) ylim <- c(0, yupper) %> <% toImage(colStats, tags=tags, ylim=ylim) %> <% if (!is.null(rowStats)) toImage(rowStats, tags=tags, ylim=ylim) %> <% if (!is.null(rowStats)) { %> <% # Compare performance or the column- and the row-specific methods # for the "main" function. stats <- list(colStats, rowStats) stats <- lapply(stats, FUN=function(x) { level <- levels(x$expr)[1] x <- subset(x, expr %in% level) x$expr <- factor(as.character(x$expr)) x }) stats <- Reduce(rbind, stats) odd <- seq(from=1L, to=nrow(stats), by=2L) top <- 1:(nrow(stats)/2) stats0 <- stats stats[ odd,] <- stats0[ top,] stats[-odd,] <- stats0[-top,] %> _Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data (original and transposed). The top panel shows times in milliseconds and the bottom panel shows relative times._ <% toTable(stats, tags=tags) %> _Figure: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data (original and transposed). Outliers are displayed as crosses. Times are in milliseconds._ <% toImage(stats, name=paste(levels(stats$expr), collapse="_vs_"), tags=tags, col=c("#000000", "#999999")) %> <% } # if (!is.null(rowStats)) %> <% } # crBenchmarkResults() %> matrixStats/inst/benchmarking/includes/setup.md.rsp0000644000175100001440000000236412542546241022325 0ustar hornikusers<%@string header="includes/header.md.rsp"%> <%@string footer="includes/footer.md.rsp"%> <%@string references="../includes/references.md.rsp"%> <%@string appendix="${appendix}" default="true"%> <%@string colname=""%> <%@string rowname=""%> <%@string fcnname=""%> <%@string fcntags=""%> <%@meta author="Henrik Bengtsson"%> <%-------------------------------------------------------------- RSP specific --------------------------------------------------------------%> <% R.utils::use("R.utils, R.devices (>= 2.12.0), knitr, ggplot2") devOptions("png", width=390) options("withCapture/newline"=FALSE) options(deparse.cutoff=100) kable <- function(...) { t <- knitr::kable(..., format="markdown") print(t) } %> <%-------------------------------------------------------------- Report/package specific --------------------------------------------------------------%> <% use("matrixStats") use("microbenchmark") %> <%@include file="results.md.rsp"%> <%-------------------------------------------------------------- Macros --------------------------------------------------------------%> <%-------------------------------------------------------------- Timing --------------------------------------------------------------%> <% rspStartTime <- Sys.time() %> matrixStats/inst/benchmarking/includes/references.md.rsp0000644000175100001440000000121212542546241023275 0ustar hornikusers<%--------------------------------------------------------------- REFERENCES ---------------------------------------------------------------%> [RSP]: http://cran.r-project.org/package=R.rsp [matrixStats]: http://cran.r-project.org/package=matrixStats [StackOverflow:colMins?]: http://stackoverflow.com/questions/13676878 "Stack Overflow: fastest way to get Min from every column in a matrix?" [StackOverflow:colSds?]: http://stackoverflow.com/questions/17549762 "Stack Overflow: Is there such 'colsd' in R?" [StackOverflow:rowProds?]: http://stackoverflow.com/questions/20198801/ "Stack Overflow: Row product of matrix and column sum of matrix" matrixStats/inst/benchmarking/includes/appendix.md.rsp0000644000175100001440000000067512542546241023000 0ustar hornikusers## Appendix ### Session information ```r <% print(sessionInfo()) %> ``` Total processing time was <%=rspDuration <- round(Sys.time()-rspStartTime, digits=2)%> <%=attr(rspDuration, "units")%>. ### Reproducibility To reproduce this report, do: ```r <%@ifeq fcnname=""%><%@string fcnname="${colname}"%><%@endif%> html <- matrixStats:::benchmark('<%@string name="fcnname"%>'<%@ifneq fcntags=""%>, tags='<%@string name="fcntags"%>'<%@endif%>) ``` matrixStats/inst/benchmarking/colRowOrderStats.md.rsp0000644000175100001440000000341612542546241022636 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colOrderStats"%> <%@string rowname="rowOrderStats"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> <% use("Biobase", how="load") rowQ <- Biobase::rowQ %> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + quantile(..., type=3L) * Biobase::rowQ() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() probs <- 0.3 which <- round(probs*nrow(X)) colStats <- microbenchmark( colOrderStats = colOrderStats(X, which=which, na.rm=FALSE), "apply+quantile" = apply(X, MARGIN=2L, FUN=quantile, probs=probs, na.rm=FALSE, type=3L), "rowQ(t(X))" = rowQ(t(X), which=which), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowOrderStats = rowOrderStats(X, which=which, na.rm=FALSE), "apply+quantile" = apply(X, MARGIN=1L, FUN=quantile, probs=probs, na.rm=FALSE, type=3L), rowQ = rowQ(X, which=which), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCounts.md.rsp0000644000175100001440000000335012542546241022014 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCounts"%> <%@string rowname="rowCounts"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * colSums() and rowSums() * apply() + sum() <% for (mode in c("logical", "integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> #### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] value <- 42 })%> ``` <% gc() %> ```r <%=withCapture({ colStats <- microbenchmark( colCounts = colCounts(X, value=value, na.rm=FALSE), colSums = colSums(X == value, na.rm=FALSE), "apply+sum" = apply(X, MARGIN=2L, FUN=function(x) sum(x == value, na.rm=FALSE)), unit = "ms" ) })%> ``` ```r <%=withCapture({ X <- t(X) })%> ``` <% gc() %> ```r <%=withCapture({ rowStats <- microbenchmark( rowCounts = rowCounts(X, value=value, na.rm=FALSE), rowSums = rowSums(X == value, na.rm=FALSE), "apply+sum" = apply(X, MARGIN=1L, FUN=function(x) sum(x == value, na.rm=FALSE)), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMads.md.rsp0000644000175100001440000000413512542546241021427 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMads"%> <%@string rowname="rowMads"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-18"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + mad() * colMads2() and rowMads2() where `rowMads2()` and `colMads2()` are: ```r <%=withCapture({ rowMads2 <- function(x, const=1.4826, na.rm=FALSE) { mu <- rowMedians(x, na.rm=na.rm) x <- abs(x - mu) mad <- rowMedians(x, na.rm=FALSE) const * mad } colMads2 <- function(x, const=1.4826, na.rm=FALSE) { mu <- colMedians(x, na.rm=na.rm) x <- abs(x - mu) mad <- colMedians(x, na.rm=FALSE) const * mad } })%> ``` <% rowMads_R <- function(x, na.rm=FALSE) { apply(x, MARGIN=1L, FUN=mad, na.rm=na.rm) } colMads_R <- function(x, na.rm=FALSE) { apply(x, MARGIN=2L, FUN=mad, na.rm=na.rm) } %> <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colMads = colMads(X, na.rm=FALSE), colMads2 = colMads2(X, na.rm=FALSE), "apply+mad" = apply(X, MARGIN=2L, FUN=mad, na.rm=FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowMads = rowMads(X, na.rm=FALSE), rowMads2 = rowMads2(X, na.rm=FALSE), "apply+mad" = apply(X, MARGIN=1L, FUN=mad, na.rm=FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-17 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/index.md.rsp0000644000175100001440000000362712542546241020471 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@meta title="Benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-05"%> <%@include file="${header}"%> <% use("matrixStats") use("R.utils (>= 1.34.0)") # Simple logging function lenter <- function(...) { s <- mprintf(...) s <- gsub("[.][.][.](\n)*", "...done\\1", s) lexit <<- function() mprintf(s) } %> # <%@meta name="title"%> List of benchmark report for some of the functions available in the [matrixStats] package. <% path <- cmdArg(path=system.file("benchmarking", package="matrixStats")) path <- getAbsolutePath(path) message("Processing benchmark report directory: ", path) pattern <- "[.]md[.]rsp$" filenames <- list.files(path=path, pattern=pattern) filenames <- setdiff(filenames, "index.md.rsp") names <- gsub(pattern, "", filenames) # col- and rowAnyMissing() does not really exist names <- setdiff(names, c("colAnyMissing", "rowAnyMissing")) message("Number of reports found: ", length(names)) mprintf("Report #%d: %s\n", seq_along(names), names) %> <% for (ii in seq_along(names)) { %> <% name <- names[ii] if (regexpr("^colRow", name) != -1L) { label <- gsub("^colRow", "", name) label <- sprintf("col%s() and row%s()", label, label) } else { label <- sprintf("%s()", name) } %> * [<%=label%>](<%={ lenter("%d of %d. Benchmarking %s...\n", ii, length(names), label) html <- sprintf("%s.html", name) if (!file_test("-f", html)) { html <- matrixStats:::benchmark(name, path=path, workdir=".", envir=new.env()) html <- getRelativePath(html) gc() } lexit() html }%>) <% } # for (ii ...) %> ## Appendix To reproduce this page and all of its reports, do: ```r path <- system.file("benchmarking", package="matrixStats") R.rsp::rfile("index.md.rsp", path=path) ``` _Note: Each of the above reports takes up to several minutes to complete._ <%@string appendix="false"%> <%@include file="${footer}"%> <%@string appendix="true"%> matrixStats/inst/benchmarking/sumOver.md.rsp0000644000175100001440000000351312542546241021014 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="sumOver"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-02"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * sum() + [() as below ```r <%=withCapture({ sumOver_R <- function(x, na.rm=FALSE, idxs) { sum(x[idxs], na.rm=na.rm) } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode=mode) ##data <- data[1:3] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "sumOver" = sumOver(x), "sum" = sum(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(dataLabel, "all")) %> <% for (subset in c(0.2, 0.4, 0.8)) { %> #### A <%=sprintf("%g", 100*subset)%>% subset ```r <%=withCapture({ x <- data[[.dataLabel.]] subset idxs <- sort(sample(length(x), size=subset*length(x), replace=FALSE)) gc() stats <- microbenchmark( "sumOver" = sumOver(x, idxs=idxs), "sum+[()" = sumOver_R(x, idxs=idxs), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, subset)) %> <% } # for (subset in ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-02 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/product.md.rsp0000644000175100001440000000321712542546241021035 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="product"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-04"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * product_R() * prod() where ```r <%=withCapture({ product_R <- function(x, na.rm=FALSE, ...) { # Nothing todo? if (length(x) == 0L) return(0); # Any missing values? if (na.rm) { x <- x[!is.na(x)]; } # Any zeros? if (is.integer(x) && any(x == 0)) return(0); # Calculate product via logarithmic sum sign <- if (sum(x < 0) %% 2 == 0) +1 else -1; x <- abs(x); x <- log(x); x <- sum(x, na.rm=FALSE); x <- exp(x); y <- sign*x; y; } # product_R() })%> ``` ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode="double") data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( product = product(x, na.rm=FALSE), product_R = product_R(x, na.rm=FALSE), prod = prod(x, na.rm=FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-02 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/indexByRow.md.rsp0000644000175100001440000000551612542546241021453 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="indexByRow"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-09"%> <%@include file="${header}"%> <% lfun <- local({ locals <- list() function(txt=NULL) { if (is.null(txt)) return(locals) local <- list(txt) locals <<- c(locals, local) } }) %> # <%@meta name="title"%> This report benchmark the performance of `<%=fcnname%>()` against alternative methods: * `indexByRow_R1()` based in `matrix(..., byrow=TRUE)` * `indexByRow_R2()` is a modified version of `indexByRow_R1()` where `indexByRow_R1()` and `indexByRow_R2()` are defined as in the Appendix. <% lfun(withCapture({ indexByRow_R1 <- function(dim, idxs=NULL, ...) { n <- prod(dim) x <- matrix(seq_len(n), nrow=dim[2L], ncol=dim[1L], byrow=TRUE) if (!is.null(idxs)) x <- x[idxs] as.vector(x) } # indexByRow_R1() })) lfun(withCapture({ indexByRow_R2 <- function(dim, idxs=NULL, ...) { n <- prod(dim) if (is.null(idxs)) { x <- matrix(seq_len(n), nrow=dim[2L], ncol=dim[1L], byrow=TRUE) as.vector(x) } else { idxs <- idxs - 1 cols <- idxs %/% dim[2L] rows <- idxs %% dim[2L] cols + dim[1L]*rows + 1L } } # indexByRow_R2() })) %> ## Data <% lfun(withCapture({ <%@include file="R/random-matrices.R"%> })) %> ```r <%=withCapture({ data <- rmatrices(mode="index") })%> ``` where `rmatrices()` is defined in the Appendix. <% # data <- data[1:2] %> ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] dim <- dim(X) idxsList <- list( 'all-by-NULL' = NULL, all = seq_len(prod(dim)), odd = seq(from=1, to=prod(dim), by=2L) ) str(idxsList) })%> ``` <% for (ii in seq_along(idxsList)) { %> #### Index set '<%=names(idxsList)[ii]%>' <% idxs <- idxsList[[ii]] idxsTag <- names(idxsList)[ii] # Validate correctness res <- list( indexByRow = indexByRow(dim, idxs=idxs), indexByRow_R1 = indexByRow_R1(dim, idxs=idxs), indexByRow_R2 = indexByRow_R2(dim, idxs=idxs) ) lapply(res, FUN=function(x) stopifnot(all.equal(x, res[[1]]))) gc() %> ```r <%=withCapture({ stats <- microbenchmark( indexByRow = indexByRow(dim, idxs=idxs), indexByRow_R1 = indexByRow_R1(dim, idxs=idxs), indexByRow_R2 = indexByRow_R2(dim, idxs=idxs), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(dataLabel, idxsTag)) %> <% } # for (ii ...) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> ### Local functions ```r <%=lfun()[[1]]%> ``` ```r <%=lfun()[[2]]%> ``` ```r <%=lfun()[[3]]%> ``` <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/R/0000755000175100001440000000000012542546241016426 5ustar hornikusersmatrixStats/inst/benchmarking/R/random-matrices.R0000644000175100001440000000226212542546241021640 0ustar hornikusersrmatrix <- function(nrow, ncol, mode=c("logical", "double", "integer", "index"), range=c(-100,+100), naProb=0) { mode <- match.arg(mode) n <- nrow*ncol if (mode == "logical") { X <- sample(c(FALSE, TRUE), size=n, replace=TRUE) } else if (mode == "index") { X <- seq_len(n) mode <- "integer"; } else { X <- runif(n, min=range[1], max=range[2]) } storage.mode(X) <- mode if (naProb > 0) X[sample(n, size=naProb*n)] <- NA dim(X) <- c(nrow, ncol) X } # rmatrix() rmatrices <- function(scale=10, seed=1, ...) { set.seed(seed) data <- list() data[[1]] <- rmatrix(nrow=scale* 1, ncol=scale* 1, ...) data[[2]] <- rmatrix(nrow=scale* 10, ncol=scale* 10, ...) data[[3]] <- rmatrix(nrow=scale*100, ncol=scale* 1, ...) data[[4]] <- t(data[[3]]) data[[5]] <- rmatrix(nrow=scale* 10, ncol=scale*100, ...) data[[6]] <- t(data[[5]]) names(data) <- sapply(data, FUN=function(x) paste(dim(x), collapse="x")) data } # rmatrices() ############################################################################ # HISTORY: # 2014-11-09 # o Added 'index' mode. # 2014-06-02 # o Created. ############################################################################ matrixStats/inst/benchmarking/R/random-vectors.R0000644000175100001440000000163012542546241021514 0ustar hornikusersrvector <- function(n, mode=c("logical", "double", "integer"), range=c(-100,+100), naProb=0) { mode <- match.arg(mode) if (mode == "logical") { X <- sample(c(FALSE, TRUE), size=n, replace=TRUE) } else { x <- runif(n, min=range[1], max=range[2]) } storage.mode(x) <- mode if (naProb > 0) x[sample(n, size=naProb*n)] <- NA x } # rvector() rvectors <- function(scale=10, seed=1, ...) { set.seed(seed) data <- list() data[[1]] <- rvector(n=scale*1e2, ...) data[[2]] <- rvector(n=scale*1e3, ...) data[[3]] <- rvector(n=scale*1e4, ...) data[[4]] <- rvector(n=scale*1e5, ...) data[[5]] <- rvector(n=scale*1e6, ...) names(data) <- sprintf("n=%d", sapply(data, FUN=length)) data } # rvectors() ############################################################################ # HISTORY: # 2014-06-04 # o Created. ############################################################################ matrixStats/inst/benchmarking/colRowDiffs.md.rsp0000644000175100001440000000310512542546241021572 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colDiffs"%> <%@string rowname="rowDiffs"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-12-30"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + diff() * apply() + diff2() * diff() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colDiffs = colDiffs(X), "apply+diff" = apply(X, MARGIN=2L, FUN=diff), "apply+diff2" = apply(X, MARGIN=2L, FUN=diff2), diff = diff(X), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowDiffs = rowDiffs(X), "apply+diff" = apply(X, MARGIN=1L, FUN=diff), "apply+diff2" = apply(X, MARGIN=1L, FUN=diff2), "diff + t" = diff(t(X)), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-17 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/anyMissing.md.rsp0000644000175100001440000000254412542546241021500 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="anyMissing"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-01"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * anyNA() * any() + is.na() as below ```r <%=withCapture({ any_is.na <- function(x) { any(is.na(x)) } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode=mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "anyMissing" = anyMissing(x), "anyNA" = anyNA(x), "any_is.na" = any_is.na(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowCumprods.md.rsp0000644000175100001440000000267312542546241022344 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colCumprods"%> <%@string rowname="rowCumprods"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + cumprod() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode, range=c(-1,1)) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colCumprods = colCumprods(X), "apply+cumprod" = apply(X, MARGIN=2L, FUN=cumprod), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowCumprods = rowCumprods(X), "apply+cumprod" = apply(X, MARGIN=1L, FUN=cumprod), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowTabulates.md.rsp0000644000175100001440000000233612542546241022470 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colTabulates"%> <%@string rowname="rowTabulates"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * ??? ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode="integer", range=c(-10,10)) })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colTabulates = colTabulates(X, na.rm=FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowTabulates = rowTabulates(X, na.rm=FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowRanges.md.rsp0000644000175100001440000000272712542546241021767 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colRanges"%> <%@string rowname="rowRanges"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + range() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colRanges = colRanges(X, na.rm=FALSE), "apply+range" = apply(X, MARGIN=2L, FUN=range, na.rm=FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowRanges = rowRanges(X, na.rm=FALSE), "apply+range" = apply(X, MARGIN=1L, FUN=range, na.rm=FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/allocVector.md.rsp0000644000175100001440000000415412542546241021633 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="allocVector"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * vector() + assignment * rep() * matrix() + as.vector() where ```r <%=withCapture({ allocVector_R1 <- function(length, value=NA) { x <- vector(mode=typeof(value), length=length) if (!is.finite(value) || value != 0) x[] <- value x } # allocVector_R1() allocVector_R2 <- function(length, value=NA) { x <- matrix(data=value, nrow=length, ncol=1L) as.vector(x) } # allocVector_R2() })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode=mode) values <- list(zero=0, one=1, "NA"=NA_real_) if (mode != "double") values <- lapply(values, FUN=function(x) { storage.mode(x) <- mode; x }) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> matrix <% for (value in values) { %> <% valueLabel <- as.character(value) mprintf("%s: %s, value=%s\n", mode, dataLabel, valueLabel) %> ```r <%=withCapture({ n <- length(data[[.dataLabel.]]) str(value) })%> ``` <% gc() %> ```r <%=withCapture({ stats <- microbenchmark( "allocVector" = allocVector(length=n, value=value), "rep" = rep(value, times=n), "allocVector_R1" = allocVector_R1(length=n, value=value), "allocVector_R2" = allocVector_R2(length=n, value=value), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, valueLabel)) %> <% } # for (value in values) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowMeans.md.rsp0000644000175100001440000000313612542546241021606 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colMeans"%> <%@string rowname="rowMeans"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * .colMeans() and .rowMeans() * apply() + mean() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colMeans = colMeans(X, na.rm=FALSE), .colMeans = .colMeans(X, m=nrow(X), n=ncol(X), na.rm=FALSE), "apply+mean" = apply(X, MARGIN=2L, FUN=mean, na.rm=FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowMeans = rowMeans(X, na.rm=FALSE), .rowMeans = .rowMeans(X, m=nrow(X), n=ncol(X), na.rm=FALSE), "apply+mean" = apply(X, MARGIN=1L, FUN=mean, na.rm=FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/t_tx_OP_y.md.rsp0000644000175100001440000000406512542546241021263 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="t_tx_OP_y"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-26"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * t_tx_OP_y_R() as below ```r <%=withCapture({ t_tx_OP_y_R <- function(x, y, OP, na.rm=FALSE) { x <- t(x) if (na.rm) { xnok <- is.na(x) ynok <- is.na(y) anok <- xnok & ynok unit <- switch(OP, "+" = 0, "-" = NA_real_, "*" = 1, "/" = NA_real_, stop("Unknown 'OP' operator: ", OP) ) x[xnok] <- unit y[ynok] <- unit } ans <- switch(OP, "+" = x + y, "-" = x - y, "*" = x * y, "/" = x / y, stop("Unknown 'OP' operator: ", OP) ) if (na.rm) { ans[anok] <- NA_real_ } t(ans) } # t_tx_OP_y_R() })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] y <- x[,1L] })%> ``` <% for (OP in c("+", "-", "*", "/")) { %> <% OPTag <- c("+"="add", "-"="sub", "*"="mul", "/"="div")[OP] gc() %> ```r <%=withCapture({ OP stats <- microbenchmark( "t_tx_OP_y" = t_tx_OP_y(x, y, OP=OP, na.rm=FALSE), "t_tx_OP_y_R" = t_tx_OP_y_R(x, y, OP=OP, na.rm=FALSE), unit = "ms" ) gc() })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, OPTag)) %> <% } # for (OP ...) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-26 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/madDiff.md.rsp0000644000175100001440000000243412542546241020707 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="madDiff"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-10"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * N/A <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode=mode) data <- data[1:4] })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector #### All elements ```r <%=withCapture({ x <- data[[.dataLabel.]] stats <- microbenchmark( "madDiff" = madDiff(x), "mad" = mad(x), "diff" = diff(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-10 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowQuantiles.md.rsp0000644000175100001440000000267612542546241022520 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colQuantiles"%> <%@string rowname="rowQuantiles"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + quantile() ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode="double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() probs <- seq(from=0, to=1, by=0.25) colStats <- microbenchmark( colQuantiles = colQuantiles(X, probs=probs, na.rm=FALSE), "apply+quantile" = apply(X, MARGIN=2L, FUN=quantile, probs=probs, na.rm=FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowQuantiles = rowQuantiles(X, probs=probs, na.rm=FALSE), "apply+quantile" = apply(X, MARGIN=1L, FUN=quantile, probs=probs, na.rm=FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/weightedMean.md.rsp0000644000175100001440000000305412542546241021755 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="weightedMean"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-12-09"%> <%@include file="${header}"%> <% library("stats") weighted.mean.default <- stats:::weighted.mean.default %> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * stats::weighted.mean() * stats:::weighted.mean.default() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode=mode) data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] w <- runif(length(x)) gc() stats <- microbenchmark( "weightedMean" = weightedMean(x, w=w, na.rm=FALSE), "stats::weighted.mean" = weighted.mean(x, w=w, na.rm=FALSE), "stats:::weighted.mean.default" = weighted.mean.default(x, w=w, na.rm=FALSE), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-12-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowWeightedMeans.md.rsp0000644000175100001440000000267412542546241023275 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colWeightedMeans"%> <%@string rowname="rowWeightedMeans"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + weighted.mean() ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode="double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] w <- runif(nrow(X)) gc() colStats <- microbenchmark( colWeightedMeans = colWeightedMeans(X, w=w, na.rm=FALSE), "apply+weigthed.mean" = apply(X, MARGIN=2L, FUN=weighted.mean, w=w, na.rm=FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowWeightedMeans = rowWeightedMeans(X, w=w, na.rm=FALSE), "apply+weigthed.mean" = apply(X, MARGIN=1L, FUN=weighted.mean, w=w, na.rm=FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/count.md.rsp0000644000175100001440000000240312542546241020501 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="count"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-12-08"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * sum(x == value) <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode=mode) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] mprintf("%s: %s\n", mode, dataLabel) x <- data[[dataLabel]] value <- 42 gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "count" = count(x, value), "sum(x == value)" = sum(x == value), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/weightedMedian.md.rsp0000644000175100001440000000431512542546241022273 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="weightedMedian"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-03"%> <%@include file="${header}"%> <%--- 'ergm' could be hard to install, because it imports 'Rglpk', which requires GLPK library on the system. ---%> <%@string test_ergm="FALSE"%> <% use("limma", how="load") limma_weighted.median <- limma::weighted.median use("cwhmisc", how="load") cwhmisc_w.median <- cwhmisc::w.median use("laeken", how="load") laeken_weightedMedian <- laeken::weightedMedian <%@ifeq test_ergm="TRUE"%> use("ergm", how="load") ergm_wtd.median <- ergm::wtd.median <%@endif%> weightedMedian <- matrixStats::weightedMedian %> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * apply() + limma::weighted.median() * apply() + cwhmisc::w.median() * apply() + laeken::weightedMedian() <%@ifeq test_ergm="TRUE"%> * apply() + ergm::wtd.median() --%> <%@endif%> ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode="double") data <- data[1:3] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] w <- runif(length(x)) gc() stats <- microbenchmark( "weightedMedian" = weightedMedian(x, w=w, ties="mean", na.rm=FALSE), "limma::weighted.median" = limma_weighted.median(x, w=w, na.rm=FALSE), "cwhmisc::w.median" = cwhmisc_w.median(x, w=w), "laeken::weightedMedian" = laeken_weightedMedian(x, w=w), <%@ifeq test_ergm="TRUE"%> "ergm::wtd.median" = ergm_wtd.median(x, w=w), <%@endif%> unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-12-17 o Made 'ergm::wtd.median' optional. 2014-06-03 o Created using benchmark snippet in incl/weightedMedian.Rex. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/binCounts.md.rsp0000644000175100001440000000363712542546241021327 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="binCounts"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-05-25"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * hist() as below ```r <%=withCapture({ hist <- graphics::hist binCounts_hist <- function(x, bx, right=FALSE, ...) { hist(x, breaks=bx, right=right, include.lowest=TRUE, plot=FALSE)$counts } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Non-sorted simulated data ```r <%=withCapture({ set.seed(0xBEEF) nx <- 100e3 # Number of data points xmax <- 0.01*nx x <- runif(nx, min=0, max=xmax) storage.mode(x) <- mode str(x) # Uniformely distributed bins nb <- 10e3 # Number of bins bx <- seq(from=0, to=xmax, length.out=nb+1L) bx <- c(-1, bx, xmax+1) })%> ``` ### Results <% benchmark <- function() { %> <% dataLabel <- if (is.unsorted(x)) "unsorted" else "sorted" %> <% mprintf("%s: %s\n", mode, dataLabel) %> ```r <%=withCapture({ gc() stats <- microbenchmark( binCounts = binCounts(x, bx=bx), hist = binCounts_hist(x, bx=bx), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel)) %> <% # Sanity checks n0 <- binCounts_hist(x, bx=bx) n1 <- binCounts(x, bx=bx) stopifnot(identical(n1, n0)) n1r <- rev(binCounts(-x, bx=rev(-bx), right=TRUE)) stopifnot(identical(n1r, n1)) %> <% } # benchmark() %> <% benchmark() %> ### Sorted simulated data ```r <%=withCapture({ x <- sort(x) })%> ``` <% benchmark() %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-02 o Restructured. 2014-05-25 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowLogSumExps.md.rsp0000644000175100001440000000332112542546241022605 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colLogSumExps"%> <%@string rowname="rowLogSumExps"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + matrixStats::logSumExp() * apply() + logSumExp0() where ```r <%=withCapture({ logSumExp0 <- function(lx, ...) { iMax <- which.max(lx) log1p(sum(exp(lx[-iMax] - lx[iMax]))) + lx[iMax] } # logSumExp0() })%> ``` ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode="double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colLogSumExps = colLogSumExps(X, na.rm=FALSE), "apply+logSumExp" = apply(X, MARGIN=2L, FUN=logSumExp, na.rm=FALSE), "apply+logSumExp0" = apply(X, MARGIN=2L, FUN=logSumExp0, na.rm=FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowLogSumExps = rowLogSumExps(X, na.rm=FALSE), "apply+logSumExp" = apply(X, MARGIN=1L, FUN=logSumExp, na.rm=FALSE), "apply+logSumExp0" = apply(X, MARGIN=1L, FUN=logSumExp0, na.rm=FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/logSumExp.md.rsp0000644000175100001440000000247212542546241021302 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="logSumExp"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-01"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * logSumExp_R() where ```r <%=withCapture({ logSumExp_R <- function(lx, ...) { iMax <- which.max(lx) log1p(sum(exp(lx[-iMax] - lx[iMax]))) + lx[iMax] } # logSumExp_R() })%> ``` ## Data ```r <%=withCapture({ <%@include file="R/random-vectors.R"%> data <- rvectors(mode="double") data <- data[1:4] })%> ``` ## Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] message(dataLabel) x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> vector ```r <%=withCapture({ x <- data[[.dataLabel.]] gc() stats <- microbenchmark( "logSumExp" = logSumExp(x), "logSumExp_R" = logSumExp_R(x), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=dataLabel) %> <% } # for (ii ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowRanks.md.rsp0000644000175100001440000000275612542546241021630 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colRanks"%> <%@string rowname="rowRanks"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + rank() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colRanks = colRanks(X, na.rm=FALSE), "apply+rank" = apply(X, MARGIN=2L, FUN=rank, na.last="keep", ties.method="max"), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowRanks = rowRanks(X, na.rm=FALSE), "apply+rank" = apply(X, MARGIN=1L, FUN=rank, na.last="keep", ties.method="max"), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowWeightedMedians.md.rsp0000644000175100001440000000271312542546241023604 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colWeightedMedians"%> <%@string rowname="rowWeightedMedians"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + weightedMedian() ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode="double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] w <- runif(nrow(X)) gc() colStats <- microbenchmark( colWeightedMedians = colWeightedMedians(X, w=w, na.rm=FALSE), "apply+weigthedMedian" = apply(X, MARGIN=2L, FUN=weightedMedian, w=w, na.rm=FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowWeightedMedians = rowWeightedMedians(X, w=w, na.rm=FALSE), "apply+weigthedMedian" = apply(X, MARGIN=1L, FUN=weightedMedian, w=w, na.rm=FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowVars.md.rsp0000644000175100001440000000536412542546241021463 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colVars"%> <%@string rowname="rowVars"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2015-01-06"%> <%@include file="${header}"%> <%@string test_genefilter="TRUE"%> <%@ifeq test_genefilter="TRUE"%> <% use("genefilter", how="load") genefilter_rowVars <- genefilter::rowVars genefilter_colVars <- function(x, ...) genefilter_rowVars(t(x), ...) %> <%@endif%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + var() * colVarColMeans() and rowVarColMeans() <%@ifeq test_genefilter="TRUE"%> * genefilter::rowVars(t(.)) and genefilter::rowVars() <%@endif%> where ```r <%=withCapture({ colVarColMeans <- function(x, na.rm=TRUE) { if (na.rm) { n <- colSums(!is.na(x)) } else { n <- nrow(x) } var <- colMeans(x*x, na.rm=na.rm) - (colMeans(x, na.rm=na.rm))^2 var * n/(n-1) } })%> ``` and ```r <%=withCapture({ rowVarRowMeans <- function(x, na.rm=TRUE) { if (na.rm) { n <- rowSums(!is.na(x)) } else { n <- ncol(x) } mu <- rowMeans(x, na.rm=na.rm) var <- rowMeans(x*x, na.rm=na.rm) - mu^2 var * (n/(n-1)) } })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colVars = colVars(X, na.rm=FALSE), colVarColMeans = colVarColMeans(X, na.rm=FALSE), "apply+var" = apply(X, MARGIN=2L, FUN=var, na.rm=FALSE), <%@ifeq test_genefilter="TRUE"%> "genefilter::rowVars(t(.))" = genefilter_colVars(X, na.rm=FALSE), <%@endif%> unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowVars = rowVars(X, na.rm=FALSE), rowVarRowMeans = rowVarRowMeans(X, na.rm=FALSE), "apply+var" = apply(X, MARGIN=1L, FUN=var, na.rm=FALSE), <%@ifeq test_genefilter="TRUE"%> "genefilter::rowVars" = genefilter_rowVars(X, na.rm=FALSE), <%@endif%> unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2015-01-06 o Now benchmarking 'genefilter' functions too. 2014-11-23 o Now benchmarking rowVars() instead of rowSds() since the latter uses the former. 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowSums.md.rsp0000644000175100001440000000311612542546241021470 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colSums"%> <%@string rowname="rowSums"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-06-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * .colSums() and .rowSums() * apply() + sum() <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) })%> ``` ### Results <% for (dataLabel in names(data)) { %> <% mprintf("%s: %s\n", mode, dataLabel) %> #### <%=dataLabel%> <%=mode%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colSums = colSums(X, na.rm=FALSE), .colSums = .colSums(X, m=nrow(X), n=ncol(X), na.rm=FALSE), "apply+sum" = apply(X, MARGIN=2L, FUN=sum, na.rm=FALSE), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowSums = rowSums(X, na.rm=FALSE), .rowSums = .rowSums(X, m=nrow(X), n=ncol(X), na.rm=FALSE), "apply+sum" = apply(X, MARGIN=1L, FUN=sum, na.rm=FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %> <% } # for (dataLabel ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowAnys.md.rsp0000644000175100001440000000255412542546241021460 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colAnys"%> <%@string rowname="rowAnys"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-18"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * apply() + any() * colSums() > 0 or rowSums() > 0 ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode="logical") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] gc() colStats <- microbenchmark( colAnys = colAnys(X), "apply+any" = apply(X, MARGIN=2L, FUN=any), "colSums > 0" = (colSums(X) > 0L), unit = "ms" ) X <- t(X) gc() rowStats <- microbenchmark( rowAnys = rowAnys(X), "apply+any" = apply(X, MARGIN=1L, FUN=any), "rowSums > 0" = (rowSums(X) > 0L), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-18 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/allocMatrix.md.rsp0000644000175100001440000000402612542546241021633 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string fcnname="allocMatrix"%> <% fcnname <- "<%@string name="fcnname"%>" %> <%@meta title="${fcnname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-09"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=fcnname%>() against alternative methods. ## Alternative methods * matrix() * matrix() special trick for NA where ```r <%=withCapture({ allocMatrix_R <- function(nrow, ncol, value=NA) { if (is.na(value) && !is.nan(value)) { matrix(data=value[c()], nrow=nrow, ncol=ncol) } else { matrix(data=value, nrow=nrow, ncol=ncol) } } # allocMatrix_R() })%> ``` <% for (mode in c("integer", "double")) { %> ## Data type "<%=mode%>" ### Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode=mode) values <- list(zero=0, one=1, "NA"=NA_real_) if (mode != "double") values <- lapply(values, FUN=function(x) { storage.mode(x) <- mode; x }) })%> ``` ### Results <% for (ii in seq_along(data)) { %> <% dataLabel <- names(data)[ii] x <- data[[dataLabel]] gc() %> ### <%=dataLabel%> matrix <% for (value in values) { %> <% valueLabel <- as.character(value) mprintf("%s: %s, value=%s\n", mode, dataLabel, valueLabel) %> ```r <%=withCapture({ dim <- dim(data[[.dataLabel.]]) nrow <- dim[1L] ncol <- dim[2L] str(value) })%> ``` <% gc() %> ```r <%=withCapture({ stats <- microbenchmark( "allocMatrix" = allocMatrix(nrow=nrow, ncol=ncol, value=value), "matrix" = matrix(data=value, nrow=nrow, ncol=ncol), "allocMatrix_R" = allocMatrix_R(nrow=nrow, ncol=ncol, value=value), unit = "ms" ) })%> ``` <% benchmarkResults(stats, tags=c(mode, dataLabel, valueLabel)) %> <% } # for (value in values) %> <% } # for (ii ...) %> <% } # for (mode ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-11-01 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/benchmarking/colRowProds.md.rsp0000644000175100001440000000350712542546241021634 0ustar hornikusers<%@include file="includes/setup.md.rsp"%> <%@string colname="colProds"%> <%@string rowname="rowProds"%> <%@meta title="${colname}() and ${rowname}() benchmarks"%> <%@meta author="Henrik Bengtsson"%> <%@meta date="2014-11-15"%> <%@include file="${header}"%> # <%@meta name="title"%> This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods. ## Alternative methods * <%=colname%>()/<%=rowname%>() using method="expSumLog" * apply() + prod() * apply() + product() ## Data ```r <%=withCapture({ <%@include file="R/random-matrices.R"%> data <- rmatrices(mode="double") })%> ``` ## Results <% for (dataLabel in names(data)) { %> <% message(dataLabel) %> ### <%=dataLabel%> matrix ```r <%=withCapture({ X <- data[[.dataLabel.]] })%> <% gc() %> <%=withCapture({ colStats <- microbenchmark( "colProds w/ direct" = colProds(X, method="direct", na.rm=FALSE), "colProds w/ expSumLog" = colProds(X, method="expSumLog", na.rm=FALSE), "apply+prod" = apply(X, MARGIN=2L, FUN=prod, na.rm=FALSE), "apply+product" = apply(X, MARGIN=2L, FUN=product, na.rm=FALSE), unit = "ms" ) })%> <%=withCapture({ X <- t(X) })%> <% gc() %> <%=withCapture({ rowStats <- microbenchmark( "rowProds w/ direct" = rowProds(X, method="direct", na.rm=FALSE), "rowProds w/ expSumLog" = rowProds(X, method="expSumLog", na.rm=FALSE), "apply+prod" = apply(X, MARGIN=1L, FUN=prod, na.rm=FALSE), "apply+product" = apply(X, MARGIN=1L, FUN=product, na.rm=FALSE), unit = "ms" ) })%> ``` <% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %> <% } # for (dataLabel ...) %> <%@include file="${footer}"%> <%--------------------------------------------------------------------------- HISTORY: 2014-06-09 o Created. ---------------------------------------------------------------------------%> matrixStats/inst/doc/0000755000175100001440000000000012542546263014346 5ustar hornikusersmatrixStats/inst/doc/matrixStats-methods.md.rsp0000644000175100001440000002060612542546263021463 0ustar hornikusers<%@meta language="R-vignette" content="-------------------------------- DIRECTIVES FOR R: %\VignetteIndexEntry{matrixStats: Summary of functions} %\VignetteAuthor{Henrik Bengtsson} %\VignetteKeyword{matrix} %\VignetteKeyword{vector} %\VignetteKeyword{apply} %\VignetteKeyword{rows} %\VignetteKeyword{columns} %\VignetteKeyword{memory} %\VignetteKeyword{speed} %\VignetteEngine{R.rsp::rsp} %\VignetteTangle{FALSE} --------------------------------------------------------------------"%> <% pkgName <- "matrixStats" library(pkgName, character.only=TRUE) ns <- getNamespace(pkgName) env <- as.environment(sprintf("package:%s", pkgName)) R.utils::use("R.utils") kable <- function(df, ...) { fcns <- as.character(df$Functions) fcns <- strsplit(fcns, split=",") fcns <- sapply(fcns, FUN=function(names) { names <- trim(names) ok <- sapply(names, FUN=exists, envir=ns, mode="function") names[ok] <- sprintf("%s()", names[ok]) names[!ok] <- sprintf("~~%s()~~", names[!ok]) names <- paste(names, collapse=", ") }) df$Functions <- fcns df$Example <- sprintf("`%s`", df$Example) print(knitr::kable(df, ..., format="markdown")) } # Find all functions all <- ls(envir=env) keep <- sapply(all, FUN=function(name) { is.function(get(name, envir=env)) }) all <- all[keep] keep <- !grepl("[.]([^.]*)$", all) all <- all[keep] # Hidden functions skip <- c("rowAvgsPerColSet", "colAvgsPerRowSet") skip <- c(skip, "allocArray", "allocMatrix", "allocVector") all <- setdiff(all, skip) # Column and row functions crfcns <- grep("^(col|row)", all, value=TRUE) # Vector functions vfcns <- setdiff(all, crfcns) %> # <%@meta name="title"%> <% pkg <- R.oo::Package(pkgName) %> <%@meta name="author"%> on <%=format(as.Date(pkg$date), format="%B %d, %Y")%> <% fcns <- crfcns base <- gsub("^(col|row)", "", fcns) groups <- tapply(fcns, base, FUN=list) stopifnot(all(sapply(groups, FUN=length) == 2L)) groups <- matrix(unlist(groups, use.names=FALSE), nrow=2L) %> <%--- ## Functions that apply to column and rows of matrices ``` <% print(fcns) %> ``` ---%> <% fcns <- vfcns %> <%--- ## Functions that apply to vectors ``` <% print(fcns) %> ``` ---%> ## Location and scale estimators <% tbl <- NULL row <- data.frame( "Estimator" = "Weighted sample mean", "Functions" = "weightedMean, colWeightedMeans, rowWeightedMeans", "Example" = "weightedMean(x, w); rowWeightedMeans(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median", "Functions" = "median, colMedians, rowMedians", "Example" = "median(x); rowMedians(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted median", "Functions" = "weightedMedian, colWeightedMedians, rowWeightedMedians", "Example" = "weightedMedian(x, w); rowWeightedMedians(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample variance", "Functions" = "var, colVars, rowVars", "Example" = "var(x); rowVars(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted sample variance", "Functions" = "weightedVar, colWeightedVars, rowWeightedVars", "Example" = "weightedVar(x, w), rowWeightedVars(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample variance by n-order differences", "Functions" = "varDiff, colVarDiffs, rowVarDiffs", "Example" = "varDiff(x); rowVarDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample standard deviation", "Functions" = "sd, colSds, rowSds", "Example" = "sd(x); rowSds(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted sample deviation", "Functions" = "weightedSd, colWeightedSds, rowWeightedSds", "Example" = "weightedSd(x, w), rowWeightedSds(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample standard deviation by n-order differences", "Functions" = "sdDiff, colSdDiffs, rowSdDiffs", "Example" = "sdDiff(x); rowSdDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median absolute deviation (MAD)", "Functions" = "mad, colMads, rowMads", "Example" = "mad(x); rowMads(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted median absolute deviation (MAD)", "Functions" = "weightedMad, colWeightedMads, rowWeightedMads", "Example" = "weightedMad(x, w), rowWeightedMads(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median absolute deviation (MAD) by n-order differences", "Functions" = "madDiff, colMadDiffs, rowMadDiffs", "Example" = "madDiff(x); rowMadDiffs()" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Quantile", "Functions" = "quantile, colQuantiles, rowQuantiles", "Example" = "quantile(x, probs); rowQuantiles(x, probs)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Interquartile range (IQR)", "Functions" = "iqr, colIQRs, rowIQRs", "Example" = "iqr(x); rowIQRs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Interquartile range (IQR) by n-order differences", "Functions" = "iqrDiff, colIQRDiffs, rowIQRDiffs", "Example" = "iqrDiff(x); rowIQRDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Range", "Functions" = "range, colRanges, rowRanges", "Example" = "range(x); rowRanges(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Minimum", "Functions" = "min, colMins, rowMins", "Example" = "min(x); rowMins(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Maximum", "Functions" = "max, colMaxs, rowMaxs", "Example" = "max(x); rowMaxs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Testing for and counting values <% tbl <- NULL row <- data.frame( "Operator" = "Are there any missing values?", "Functions" = "anyMissing, colAnyMissings, rowAnyMissings", "Example" = "anyMissing(x); rowAnyMissings(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Does TRUE exists?", "Functions" = "any, colAnys, rowAnys", "Example" = "any(x); rowAnys(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Are all values TRUE?", "Functions" = "all, colAlls, rowAlls", "Example" = "all(x); rowAlls(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Does value exists?", "Functions" = "anyValue, colAnys, rowAnys", "Example" = "anyValue(x, value); rowAnys(x, value)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Do all element have a given value?", "Functions" = "allValue, colAlls, rowAlls", "Example" = "allValue(x, value); rowAlls(x, value)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Number of occurrences of a value?", "Functions" = "count, colCounts, rowCounts", "Example" = "count(x, value); rowCounts(x, value)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Cumulative functions <% tbl <- NULL row <- data.frame( "Operator" = "Cumulative sum", "Functions" = "cumsum, colCumsums, rowCumsums", "Example" = "cumsum(x); rowCumsums(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative product", "Functions" = "cumprod, colCumprods, rowCumprods", "Example" = "cumprod(x); rowCumprods(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative minimum", "Functions" = "cummin, colCummins, rowCummins", "Example" = "cummin(x); rowCummins(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative maximum", "Functions" = "cummax, colCummaxs, rowCummaxs", "Example" = "cummax(x); rowCummaxs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Binning <% tbl <- NULL row <- data.frame( "Estimator" = "Counts in disjoint bins", "Functions" = "binCounts", "Example" = "binCounts(x, bx)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample means (and counts) in disjoint bins", "Functions" = "binMeans", "Example" = "binMeans(y, x, bx)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Miscellaneous <% tbl <- NULL row <- data.frame( "Operation" = "Lagged differences", "Functions" = c("diff2, colDiffs, rowDiffs"), "Example" = "diff2(x), rowDiffs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ------------------------------------------------------------- <%=pkgName%> v<%=getVersion(pkg)%>. Release: [CRAN](http://cran.r-project.org/package=<%=pkgName%>), Development: [GitHub](<%=getUrl(pkg)%>). matrixStats/inst/doc/matrixStats-methods.html0000644000175100001440000002257312542546263021231 0ustar hornikusers matrixStats: Summary of functions

matrixStats: Summary of functions

Henrik Bengtsson on June 23, 2015

Location and scale estimators

Estimator Functions Example
Weighted sample mean weightedMean(), colWeightedMeans(), rowWeightedMeans() weightedMean(x, w); rowWeightedMeans(x, w)
Median median(), colMedians(), rowMedians() median(x); rowMedians(x)
Weighted median weightedMedian(), colWeightedMedians(), rowWeightedMedians() weightedMedian(x, w); rowWeightedMedians(x, w)
Sample variance var(), colVars(), rowVars() var(x); rowVars(x)
Weighted sample variance weightedVar(), colWeightedVars(), rowWeightedVars() weightedVar(x, w), rowWeightedVars(x, w)
Sample variance by n-order differences varDiff(), colVarDiffs(), rowVarDiffs() varDiff(x); rowVarDiffs(x)
Sample standard deviation sd(), colSds(), rowSds() sd(x); rowSds(x)
Weighted sample deviation weightedSd(), colWeightedSds(), rowWeightedSds() weightedSd(x, w), rowWeightedSds(x, w)
Sample standard deviation by n-order differences sdDiff(), colSdDiffs(), rowSdDiffs() sdDiff(x); rowSdDiffs(x)
Median absolute deviation (MAD) mad(), colMads(), rowMads() mad(x); rowMads(x)
Weighted median absolute deviation (MAD) weightedMad(), colWeightedMads(), rowWeightedMads() weightedMad(x, w), rowWeightedMads(x, w)
Median absolute deviation (MAD) by n-order differences madDiff(), colMadDiffs(), rowMadDiffs() madDiff(x); rowMadDiffs()
Quantile quantile(), colQuantiles(), rowQuantiles() quantile(x, probs); rowQuantiles(x, probs)
Interquartile range (IQR) iqr(), colIQRs(), rowIQRs() iqr(x); rowIQRs(x)
Interquartile range (IQR) by n-order differences iqrDiff(), colIQRDiffs(), rowIQRDiffs() iqrDiff(x); rowIQRDiffs(x)
Range range(), colRanges(), rowRanges() range(x); rowRanges(x)
Minimum min(), colMins(), rowMins() min(x); rowMins(x)
Maximum max(), colMaxs(), rowMaxs() max(x); rowMaxs(x)

Testing for and counting values

Operator Functions Example
Are there any missing values? anyMissing(), colAnyMissings(), rowAnyMissings() anyMissing(x); rowAnyMissings(x)
Does TRUE exists? any(), colAnys(), rowAnys() any(x); rowAnys(x)
Are all values TRUE? all(), colAlls(), rowAlls() all(x); rowAlls(x)
Does value exists? anyValue(), colAnys(), rowAnys() anyValue(x, value); rowAnys(x, value)
Do all element have a given value? allValue(), colAlls(), rowAlls() allValue(x, value); rowAlls(x, value)
Number of occurrences of a value? count(), colCounts(), rowCounts() count(x, value); rowCounts(x, value)

Cumulative functions

Operator Functions Example
Cumulative sum cumsum(), colCumsums(), rowCumsums() cumsum(x); rowCumsums(x)
Cumulative product cumprod(), colCumprods(), rowCumprods() cumprod(x); rowCumprods(x)
Cumulative minimum cummin(), colCummins(), rowCummins() cummin(x); rowCummins(x)
Cumulative maximum cummax(), colCummaxs(), rowCummaxs() cummax(x); rowCummaxs(x)

Binning

Estimator Functions Example
Counts in disjoint bins binCounts() binCounts(x, bx)
Sample means (and counts) in disjoint bins binMeans() binMeans(y, x, bx)

Miscellaneous

Operation Functions Example
Lagged differences diff2(), colDiffs(), rowDiffs() diff2(x), rowDiffs(x)

matrixStats v0.14.2. Release: CRAN, Development: GitHub.

matrixStats/tests/0000755000175100001440000000000012542546242013763 5ustar hornikusersmatrixStats/tests/signTabulate.R0000644000175100001440000000200312542546242016523 0ustar hornikuserslibrary("matrixStats") signTabulate0 <- function(x, ...) { nneg <- sum(x < 0, na.rm=TRUE) nzero <- sum(x == 0, na.rm=TRUE) npos <- sum(x > 0, na.rm=TRUE) nna <- sum(is.na(x)) nneginf <- sum(is.infinite(x) & x < 0, na.rm=TRUE) nposinf <- sum(is.infinite(x) & x > 0, na.rm=TRUE) res <- c(nneg, nzero, npos, nna, nneginf, nposinf) res <- as.double(res) names(res) <- c("-1", "0", "+1", "NA", "-Inf", "+Inf") if (is.integer(x)) res <- res[1:4] res } # signTabulate0() # Simulate data set.seed(0xBEEF) n <- 1e3 x <- runif(n) x[sample(n, size=0.1*n)] <- 0 x[sample(n, size=0.1*n)] <- NA_real_ x[sample(n, size=0.1*n)] <- -Inf x[sample(n, size=0.1*n)] <- +Inf # Doubles message("Doubles:") counts0 <- signTabulate0(x) print(counts0) counts1 <- signTabulate(x) print(counts1) stopifnot(identical(counts1, counts0)) # Integers message("Integers:") x <- suppressWarnings(as.integer(x)) counts0 <- signTabulate0(x) print(counts0) counts1 <- signTabulate(x) print(counts1) stopifnot(identical(counts1, counts0)) matrixStats/tests/diff2.R0000644000175100001440000000105012542546242015074 0ustar hornikuserslibrary("matrixStats") set.seed(0x42) for (mode in c("integer", "double")) { x <- rnorm(10, sd=5) storage.mode(x) <- mode str(x) for (hasNA in c(FALSE, TRUE)) { if (hasNA) { x[sample(1:10, size=3)] <- NA } for (l in 1:3) { for (d in 1:4) { cat(sprintf("%s: NAs=%s, lag=%d, differences=%d\n", mode, hasNA, l, d)) y0 <- diff(x, lag=l, differences=d) str(y0) y1 <- diff2(x, lag=l, differences=d) str(y1) stopifnot(identical(y1, y0)) } } } # for (hasNA ...) } matrixStats/tests/rowRanks.R0000644000175100001440000000263212542546242015717 0ustar hornikuserslibrary("matrixStats") set.seed(1) cat("Consistency checks:\n") for (kk in 1:4) { cat("Random test #", kk, "\n", sep="") # Simulate data in a matrix of any shape dim <- sample(40:80, size=2L) n <- prod(dim) x <- rnorm(n, sd=10) dim(x) <- dim # Add NAs? if ((kk %% 4) %in% c(3,0)) { cat("Adding NAs\n") nna <- sample(n, size=1L) x[sample(length(x), size=nna)] <- NA_real_ } # Integer or double? if ((kk %% 4) %in% c(2,0)) { cat("Coercing to integers\n") storage.mode(x) <- "integer" } str(x) for (ties in c("max", "min", "average")) { cat(sprintf("ties.method=%s\n", ties)) # rowRanks(): y1 <- matrixStats::rowRanks(x, ties.method=ties) y2 <- t(apply(x, MARGIN=1L, FUN=rank, na.last="keep", ties.method=ties)) stopifnot(identical(y1,y2)) y3 <- matrixStats::colRanks(t(x), ties.method=ties) stopifnot(identical(y1,y3)) # colRanks(): y1 <- matrixStats::colRanks(x, ties.method=ties) y2 <- t(apply(x, MARGIN=2L, FUN=rank, na.last="keep", ties.method=ties)) stopifnot(identical(y1,y2)) y3 <- matrixStats::rowRanks(t(x), ties.method=ties) stopifnot(identical(y1,y3)) } } # for (kk ...) ## Exception handling x <- matrix(1:12, nrow=3L) y <- try(rowRanks(x, ties.method="unknown"), silent=TRUE) stopifnot(inherits(y, "try-error")) y <- try(colRanks(x, ties.method="unknown"), silent=TRUE) stopifnot(inherits(y, "try-error")) matrixStats/tests/rowCollapse.R0000644000175100001440000000124512542546242016402 0ustar hornikuserslibrary("matrixStats") x <- matrix(1:27, ncol=3) idxs <- 1L y <- rowCollapse(x, idxs) stopifnot(identical(y, x[,idxs])) y2 <- colCollapse(t(x), idxs) stopifnot(identical(y2, y)) idxs <- 2L y <- rowCollapse(x, idxs) stopifnot(identical(y, x[,idxs])) y2 <- colCollapse(t(x), idxs) stopifnot(identical(y2, y)) idxs <- c(1,1,1,1,1,3,3,3,3) y <- rowCollapse(x, idxs) stopifnot(identical(y, c(x[1:5,1], x[6:9,3]))) y2 <- colCollapse(t(x), idxs) stopifnot(identical(y2, y)) idxs <- 1:3 y <- rowCollapse(x, idxs) print(y) yT <- c(x[1,1],x[2,2],x[3,3],x[4,1],x[5,2],x[6,3],x[7,1],x[8,2],x[9,3]) stopifnot(identical(y, yT)) y2 <- colCollapse(t(x), idxs) stopifnot(identical(y2, y)) matrixStats/tests/weightedMean.R0000644000175100001440000000425412542546242016514 0ustar hornikuserslibrary("matrixStats") for (mode in c("integer", "double")) { cat("mode: ", mode, "", sep="") n <- 2L x <- runif(n, min=-5, max=5) storage.mode(x) <- mode str(x) cat("All weights are 1\n") w <- rep(1, times=n) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0=m0, m1=m1)) stopifnot(identical(m1,m0)) cat("First weight is 5\n") # Pull the mean towards zero w[1] <- 5 str(w) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0=m0, m1=m1)) stopifnot(identical(m1,m0)) cat("All weights are 0\n") # All weights set to zero w <- rep(0, times=n) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0=m0, m1=m1)) stopifnot(identical(m1,m0)) cat("First weight is 8.5\n") # Put even more weight on the zero w[1] <- 8.5 m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0=m0, m1=m1)) stopifnot(identical(m1,m0)) cat("First weight is Inf\n") # All weight on the first value w[1] <- Inf m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0=m0, m1=m1)) stopifnot(identical(m1,m0)) cat("Last weight is Inf\n") # All weight on the last value w[1] <- 1 w[n] <- Inf m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) str(list(m0=m0, m1=m1)) stopifnot(identical(m1,m0)) } # for (mode ...) message("*** Testing for missing values") # NA tests xs <- list(A=c(1, 2, 3), B=c(NA, 2, 3), C=c(NA, 2, 3)) ws <- list(A=c(1, 1, 1), B=c(NA, 1, 1), C=c(1, NA, 1)) ## NOTE: The (B,B) case with na.rm=TRUE is interesting because ## even if NAs in 'w' are not dropped by na.rm=TRUE, this one ## is because 'x' is dropped and therefore that first element ## is skipped in the computation. It basically does ## keep <- !is.na(x); x <- x[keep]; w <- w[keep] ## without looking at 'w'. for (x in xs) { for (mode in c("integer", "double")) { storage.mode(x) <- mode for (w in ws) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("mode: %s, na.rm=%s\n", mode, na.rm)) str(list(x=x, w=w)) m0 <- weighted.mean(x, w, na.rm=na.rm) m1 <- weightedMean(x, w, na.rm=na.rm) str(list(m0=m0, m1=m1)) stopifnot(all.equal(m1,m0)) } } } } matrixStats/tests/rowProds.R0000644000175100001440000000425212542546242015730 0ustar hornikuserslibrary("matrixStats") rowProds_R <- function(x, na.rm=FALSE) { apply(x, MARGIN=1L, FUN=prod, na.rm=na.rm) } colProds_R <- function(x, na.rm=FALSE) { apply(x, MARGIN=2L, FUN=prod, na.rm=na.rm) } all.equal.na <- function(target, current, ...) { # Computations involving NaN may return NaN or NA, cf. ?is.nan current[is.nan(current)] <- NA_real_ target[is.nan(target)] <- NA_real_ all.equal(target, current, ...) } for (mode in c("integer", "double")) { # Missing values x <- matrix(c(1,NA,NaN,1, 1,0,1,0), nrow=4, ncol=2) cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) y0 <- rowProds_R(x, na.rm=TRUE) print(y0) y1 <- rowProds(x, na.rm=TRUE) print(y1) y2 <- colProds(t(x), na.rm=TRUE) print(y2) stopifnot(all.equal(y1, y0)) stopifnot(all.equal(y1, x[,2])) stopifnot(all.equal(y2, y1)) # Missing values y0 <- rowProds_R(x, na.rm=FALSE) print(y0) y1 <- rowProds(x, na.rm=FALSE) print(y1) y2 <- colProds(t(x), na.rm=FALSE) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal(y2, y1)) y3 <- x[,1]*x[,2] print(y3) stopifnot(all.equal.na(y1, y3)) # "Empty" rows y0 <- rowProds_R(x[integer(0),,drop=FALSE], na.rm=FALSE) print(y0) y1 <- rowProds(x[integer(0),,drop=FALSE], na.rm=FALSE) print(y1) y2 <- colProds(t(x[integer(0),,drop=FALSE]), na.rm=FALSE) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal(y2, y1)) stopifnot(length(y1) == 0L) # Using product() y1 <- rowProds(x, method="expSumLog", na.rm=FALSE) print(y1) y2 <- colProds(t(x), method="expSumLog", na.rm=FALSE) print(y2) stopifnot(all.equal(y2, y1)) } # for (mode ...) # Bug report 2012-06-25 x <- matrix(c(1,1,1,1, 1,0,1,0), nrow=4, ncol=2) y0 <- rowProds_R(x) print(y0) y1 <- rowProds(x) print(y1) y2 <- colProds(t(x)) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal.na(y1, x[,1]*x[,2])) stopifnot(all.equal.na(y2, y1)) # Bug report 2014-03-25 ("all rows contains a zero") x <- matrix(c(0,1,1,0), nrow=2, ncol=2) y0 <- rowProds_R(x) print(y0) y1 <- rowProds(x) print(y1) y2 <- colProds(t(x)) print(y2) stopifnot(all.equal.na(y1, y0)) stopifnot(all.equal.na(y1, c(0,0))) stopifnot(all.equal.na(y2, y1)) matrixStats/tests/x_OP_y.R0000644000175100001440000000713012542546242015304 0ustar hornikuserslibrary("matrixStats") x_OP_y_R <- function(x, y, OP, na.rm=FALSE) { if (na.rm) { xnok <- is.na(x) ynok <- is.na(y) anok <- xnok & ynok unit <- switch(OP, "+" = 0, "-" = NA_real_, "*" = 1, "/" = NA_real_, stop("Unknown 'OP' operator: ", OP) ) x[xnok] <- unit y[ynok] <- unit } ans <- switch(OP, "+" = x + y, "-" = x - y, "*" = x * y, "/" = x / y, stop("Unknown 'OP' operator: ", OP) ) if (na.rm) { ans[anok] <- NA_real_ } ans } # x_OP_y_R() t_tx_OP_y_R <- function(x, y, OP, na.rm=FALSE) { t(x_OP_y_R(x=t(x), y=y, OP=OP, na.rm=na.rm)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # No missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(1:16, nrow=4, ncol=4) y <- 1:nrow(x) storage.mode(y) <- storage.mode(x) for (OP in c("+", "-", "*", "/")) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("OP='%s', na.rm=%s\n", OP, na.rm)) a0 <- x_OP_y_R(x,y, OP, na.rm=na.rm) a1 <- x_OP_y(x,y, OP, na.rm=na.rm) str(a1) stopifnot(all.equal(a1, a0)) b0 <- t_tx_OP_y_R(x,y, OP, na.rm=na.rm) b1 <- t_tx_OP_y(x,y, OP, na.rm=na.rm) str(b1) stopifnot(all.equal(b1, b0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Missing values in x, y, or both. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (which in c("x", "y", "both")) { x <- matrix(1:16, nrow=4, ncol=4) y <- 1:nrow(x) storage.mode(y) <- storage.mode(x) if (which == "x") { x[3:6] <- NA_real_ } else if (which == "y") { y[c(1,3)] <- NA_real_ } else if (which == "both") { x[3:6] <- NA_real_ y[c(1,3)] <- NA_real_ } for (OP in c("+", "-", "*", "/")) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("OP='%s', na.rm=%s\n", OP, na.rm)) a0 <- x_OP_y_R(x,y, OP, na.rm=na.rm) a1 <- x_OP_y(x,y, OP, na.rm=na.rm) str(a1) stopifnot(all.equal(a1, a0)) b0 <- t_tx_OP_y_R(x,y, OP, na.rm=na.rm) b1 <- t_tx_OP_y(x,y, OP, na.rm=na.rm) str(b1) stopifnot(all.equal(b1, b0)) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Length differences # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(1:8, nrow=2, ncol=4) y <- 1:ncol(x) storage.mode(y) <- storage.mode(x) for (OP in c("+", "-", "*", "/")) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("OP='%s', na.rm=%s\n", OP, na.rm)) a0 <- x_OP_y_R(x,y, OP, na.rm=na.rm) a1 <- x_OP_y(x,y, OP, na.rm=na.rm) str(a1) stopifnot(all.equal(a1, a0)) b0 <- t_tx_OP_y_R(x,y, OP, na.rm=na.rm) b1 <- t_tx_OP_y(x,y, OP, na.rm=na.rm) str(b1) stopifnot(all.equal(b1, b0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - xs <- list( A=matrix(1:2, nrow=2, ncol=2), B=matrix(NA_integer_, nrow=2, ncol=2) ) ys <- list( A=1L, B=NA_integer_ ) for (x in xs) { for (y in ys) { for (mode in c("integer", "double")) { storage.mode(x) <- mode storage.mode(y) <- mode str(list(x=x, y=y)) for (OP in c("+", "-", "*", "/")) { for (na.rm in c(FALSE, TRUE)) { cat(sprintf("mode='%s', OP='%s', na.rm=%s\n", mode, OP, na.rm)) suppressWarnings({ z0 <- x_OP_y_R(x, y, OP, na.rm=na.rm) z <- x_OP_y(x, y, OP, na.rm=na.rm) }) str(z) stopifnot(all.equal(z, z0)) } } } # for (mode ...) } # for (y ...) } # for (x ...) matrixStats/tests/rowRanges.R0000644000175100001440000001134012542546242016054 0ustar hornikuserslibrary("matrixStats") rowMins_R <- function(x, ...) { suppressWarnings({ apply(x, MARGIN=1L, FUN=min, ...) }) } # rowMins_R() rowMaxs_R <- function(x, ...) { suppressWarnings({ apply(x, MARGIN=1L, FUN=max, ...) }) } # rowMaxs_R() rowRanges_R <- function(x, ...) { suppressWarnings({ t(apply(x, MARGIN=1L, FUN=range, ...)) }) } # rowRanges_R() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") for (addNA in c(FALSE, TRUE)) { cat("addNA=", addNA, "\n", sep="") x <- matrix(1:100+0.1, nrow=20, ncol=5) if (addNA) { x[13:17,c(2,4)] <- NA_real_ } storage.mode(x) <- mode str(x) # Row/column extremes for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") # Ranges cat("range:\n") r0 <- rowRanges_R(x, na.rm=na.rm) r1 <- rowRanges(x, na.rm=na.rm) r2 <- colRanges(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) # Min cat("min:\n") m0 <- rowMins_R(x, na.rm=na.rm) m1 <- rowMins(x, na.rm=na.rm) m2 <- colMins(t(x), na.rm=na.rm) stopifnot(all.equal(m1, m2)) stopifnot(all.equal(m1, m0)) # Max cat("max:\n") m0 <- rowMaxs_R(x, na.rm=na.rm) m1 <- rowMaxs(x, na.rm=na.rm) m2 <- colMaxs(t(x), na.rm=na.rm) stopifnot(all.equal(m1, m2)) stopifnot(all.equal(m1, m0)) } } # for (addNA ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") x <- matrix(NA_real_, nrow=20, ncol=5) storage.mode(x) <- mode str(x) for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") r0 <- rowRanges_R(x, na.rm=na.rm) r1 <- rowRanges(x, na.rm=na.rm) r2 <- colRanges(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) } } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Nx0 matrix x <- matrix(double(0L), nrow=5L, ncol=0L) r0 <- rowRanges_R(x) #r1 <- rowRanges(x) #rT <- matrix(c(Inf,-Inf), nrow=nrow(x), ncol=2L, byrow=TRUE) #stopifnot(all.equal(r1,rT)) # 0xN matrix x <- t(x) #r1 <- colRanges(x) #stopifnot(all.equal(r1,rT)) # Nx1 matrix x <- matrix(1:5, nrow=5L, ncol=1L) r1 <- rowRanges(x) rT <- matrix(1:5, nrow=nrow(x), ncol=2L, byrow=FALSE) stopifnot(all.equal(r1,rT)) # 1xN matrix x <- t(x) r1 <- colRanges(x) stopifnot(all.equal(r1,rT)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Additional tests with NA_integer_, NA_real, NaN, -Inf, +Inf # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(1:12, nrow=4, ncol=3) naList <- list( "integer" = matrix(1:12, nrow=4, ncol=3), "integer w/ NA" = matrix(NA_integer_, nrow=4, ncol=3), "real" = matrix(as.double(1:12), nrow=4, ncol=3), "real w/ NA" = matrix(NA_real_, nrow=4, ncol=3) ) na <- naList[["real"]] na[2,2] <- NA naList[["real + NA cell"]] <- na na <- naList[["real"]] na[2,] <- NA naList[["real + NA row"]] <- na na <- naList[["real"]] na[2,] <- NaN naList[["real + NaN row"]] <- na na <- naList[["real"]] na[2,2] <- Inf naList[["real + Inf cell"]] <- na na <- naList[["real"]] na[2,] <- Inf naList[["real + Inf row"]] <- na na <- naList[["real"]] na[2,2] <- NaN naList[["real + NaN cell"]] <- na na <- naList[["real w/ NA"]] na[2,2] <- NaN naList[["real w/ NA + NaN cell"]] <- na na <- naList[["real w/ NA"]] na[2,] <- NaN naList[["real w/ NA + NaN row"]] <- na for (na.rm in c(FALSE, TRUE)) { for (name in names(naList)) { na <- naList[[name]] cat(sprintf("%s (%s) w/ na.rm=%s:\n", name, typeof(na), na.rm)) print(na) cat(" min:\n") y0 <- rowMins_R(na, na.rm=na.rm) str(y0) y1 <- rowMins(na, na.rm=na.rm) str(y1) stopifnot(all.equal(y1, y0)) y1c <- colMins(t(na), na.rm=na.rm) str(y1c) stopifnot(all.equal(y1c, y1)) cat(" max:\n") y0 <- rowMaxs_R(na, na.rm=na.rm) str(y0) y1 <- rowMaxs(na, na.rm=na.rm) str(y1) stopifnot(all.equal(y1, y0)) y1c <- colMaxs(t(na), na.rm=na.rm) str(y1c) stopifnot(all.equal(y1c, y1)) cat(" range:\n") y0 <- rowRanges_R(na, na.rm=na.rm) str(y0) y1 <- rowRanges(na, na.rm=na.rm) str(y1) stopifnot(all.equal(y1, y0)) y1c <- colRanges(t(na), na.rm=na.rm) str(y1c) stopifnot(all.equal(y1c, y1)) } # for (name ...) } # for (na.rm ...) matrixStats/tests/allocArray.R0000644000175100001440000000103312542546242016174 0ustar hornikuserslibrary("matrixStats") allocArray_R <- function(nrow, ncol, value=NA) { array(data=value, dim=dim) } # allocArray_R() values <- list( -1L, 0L, +1L, NA_integer_, .Machine$integer.max, -1, 0, +1, NA_real_, NaN, -Inf, +Inf, .Machine$double.xmin, .Machine$double.xmax, .Machine$double.eps, .Machine$double.neg.eps, FALSE, TRUE, NA ) dim <- c(5L, 10L, 4L) for (value in values) { X0 <- allocArray_R(dim, value=value) X <- allocArray(dim, value=value) str(list(dim=dim, value=value, X=X, X0=X0)) stopifnot(identical(X,X0)) } matrixStats/tests/count.R0000644000175100001440000000474712542546242015252 0ustar hornikuserslibrary("matrixStats") count_R <- function(x, value=TRUE, na.rm=FALSE, ...) { if (is.na(value)) { counts <- sum(is.na(x)) } else { counts <- sum(x == value, na.rm=na.rm) } as.integer(counts) } # count_R() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: integer and numeric # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- runif(20*5, min=-3, max=3) x[sample.int(length(x), size=7)] <- 0 storage.mode(x) <- mode for (na.rm in c(FALSE, TRUE)) { # Count zeros n0 <- count_R(x, value=0, na.rm=na.rm) n1 <- count(x, value=0, na.rm=na.rm) stopifnot(identical(n1, n0)) all <- allValue(x, value=0, na.rm=na.rm) any <- anyValue(x, value=0, na.rm=na.rm) # Count NAs n0 <- count_R(x, value=NA, na.rm=na.rm) n1 <- count(x, value=NA, na.rm=na.rm) stopifnot(identical(n1, n0)) all <- allValue(x, value=NA, na.rm=na.rm) any <- anyValue(x, value=NA, na.rm=na.rm) if (mode == "integer") { ux <- unique(as.vector(x)) n0 <- n1 <- integer(length(x)) for (value in ux) { n0 <- n0 + count_R(x, value=value, na.rm=na.rm) n1 <- n1 + count(x, value=value, na.rm=na.rm) stopifnot(identical(n1, n0)) } stopifnot(all(n0 == ncol(x))) } # if (mode == "integer") } # for (na.rm ...) } # for (mode ...) # All NAs naList <- list(NA_integer_, NA_real_, NaN) for (naValue in naList) { x <- rep(naValue, times=100L) for (na.rm in c(FALSE, TRUE)) { n0 <- count_R(x, na.rm=na.rm) n1 <- count(x, na.rm=na.rm) stopifnot(identical(n1, n0)) # Count NAs n0 <- count_R(x, value=NA, na.rm=na.rm) n1 <- count(x, value=NA, na.rm=na.rm) stopifnot(identical(n1, n0)) any <- anyValue(x, value=NA, na.rm=na.rm) all <- allValue(x, value=NA, na.rm=na.rm) stopifnot(any) stopifnot(all) } } # for (naValue ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: logical # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- logical(length=100L) x[13:17] <- TRUE # Row/column counts for (na.rm in c(FALSE, TRUE)) { n0 <- count_R(x, na.rm=na.rm) n1 <- count(x, na.rm=na.rm) stopifnot(identical(n1, n0)) nT <- count(x, value=TRUE, na.rm=na.rm) nF <- count(x, value=FALSE, na.rm=na.rm) stopifnot(nT + nF == ncol(x)) # Count NAs n0 <- count_R(x, value=NA, na.rm=na.rm) n1 <- count(x, value=NA, na.rm=na.rm) stopifnot(identical(n1, n0)) } matrixStats/tests/sumOver.R0000644000175100001440000001061012542546242015544 0ustar hornikuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) sumOver_R <- function(x, na.rm=FALSE, idxs=NULL) { if (is.null(idxs)) { sum(x, na.rm=na.rm) } else { sum(x[idxs], na.rm=na.rm) } } # sumOver_R() cat("Consistency checks:\n") K <- if (Sys.getenv("_R_CHECK_FULL_") == "") 4 else 20 for (kk in seq_len(K)) { cat("Random test #", kk, "\n", sep="") # Simulate data in a matrix of any shape n <- sample(1e3, size=1L) x <- rnorm(n, sd=100) # Add NAs? if ((kk %% 4) %in% c(3,0)) { cat("Adding NAs\n") nna <- sample(n, size=1L) naValues <- c(NA_real_, NaN) x[sample(length(x), size=nna)] <- sample(naValues, size=nna, replace=TRUE) } # Integer or double? if ((kk %% 4) %in% c(2,0)) { cat("Coercing to integers\n") storage.mode(x) <- "integer" } na.rm <- sample(c(TRUE,FALSE), size=1L) # Sum over all y0 <- sumOver_R(x, na.rm=na.rm) y1 <- sumOver(x, na.rm=na.rm) stopifnot(all.equal(y1,y0)) # Sum over subset nidxs <- sample(n, size=1L) idxs <- sample(n, size=nidxs) y0 <- sumOver_R(x, na.rm=na.rm, idxs=idxs) y1 <- sumOver(x, na.rm=na.rm, idxs=idxs) stopifnot(all.equal(y1,y0)) } # for (kk ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (n in 0:2) { for (na.rm in c(FALSE, TRUE)) { x <- rep(NA_real_, times=n) y0 <- sum(x, na.rm=na.rm) y <- sumOver(x, na.rm=na.rm) stopifnot(all.equal(y, y0)) x <- rep(NA_integer_, times=n) y0 <- sum(x, na.rm=na.rm) y <- sumOver(x, na.rm=na.rm) stopifnot(all.equal(y, y0)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Summing of zero elements # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- integer(0) s1 <- sum(x) s2 <- sumOver(x) stopifnot(identical(s1, s2)) x <- 1:10 idxs <- integer(0) s1 <- sum(x[idxs]) s2 <- sumOver(x, idxs=idxs) stopifnot(identical(s1, s2)) x <- rep(NA_integer_, times=10L) s1 <- sum(x, na.rm=TRUE) s2 <- sumOver(x, na.rm=TRUE) stopifnot(identical(s1, s2)) x <- rep(NA_integer_, times=10L) idxs <- 1:5 s1 <- sum(x[idxs], na.rm=TRUE) s2 <- sumOver(x, idxs=idxs, na.rm=TRUE) stopifnot(identical(s1, s2)) x <- double(0) s1 <- sum(x) s2 <- sumOver(x) stopifnot(identical(s1, s2)) x <- as.double(1:10) idxs <- integer(0) s1 <- sum(x[idxs]) s2 <- sumOver(x, idxs=idxs) stopifnot(identical(s1, s2)) x <- rep(NA_real_, times=10L) s1 <- sum(x, na.rm=TRUE) s2 <- sumOver(x, na.rm=TRUE) stopifnot(identical(s1, s2)) x <- rep(NA_real_, times=10L) idxs <- 1:5 s1 <- sum(x[idxs], na.rm=TRUE) s2 <- sumOver(x, idxs=idxs, na.rm=TRUE) stopifnot(identical(s1, s2)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Summing of large integers # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- c(.Machine$integer.max, 1L, -.Machine$integer.max) # Total gives integer overflow s1 <- sum(x[1:2]) # NA_integer_ s2 <- sumOver(x[1:2]) # NA_integer_ stopifnot(identical(s1, s2)) # Total gives integer overflow (coerce to numeric) s1 <- sum(as.numeric(x[1:2])) # 2147483648 s2 <- sumOver(as.numeric(x[1:2])) # 2147483648 s3 <- sumOver(x[1:2], mode="double") # 2147483648 stopifnot(identical(s1, s2)) stopifnot(identical(s1, s3)) # Cumulative sum would give integer overflow but not the total s1 <- sum(x) # 1L s2 <- sumOver(x) # 1L stopifnot(identical(s1, s2)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Summing of large doubles # - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Double overflow x <- rep(.Machine$double.xmax, times=2L) y0 <- sum(x) print(y0) y <- sumOver(x) print(y) stopifnot(is.infinite(y) && y > 0) stopifnot(identical(y, y0)) x <- rep(-.Machine$double.xmax, times=2L) y0 <- sum(x) print(y0) y <- sumOver(x) print(y) stopifnot(is.infinite(y) && y < 0) stopifnot(identical(y, y0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'idxs' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:10 idxsList <- list( integer=1:5, double=as.double(1:5), logical=(x <= 5) ) for (idxs in idxsList) { cat("idxs:\n") str(idxs) s1 <- sum(x[idxs], na.rm=TRUE) s2 <- sumOver(x, idxs=idxs, na.rm=TRUE) stopifnot(identical(s1, s2)) } matrixStats/tests/rowWeightedMeans.R0000644000175100001440000000405612542546242017367 0ustar hornikuserslibrary("matrixStats") set.seed(1) x <- matrix(rnorm(20), nrow=5, ncol=4) print(x) # Non-weighted row averages xM0 <- rowMeans(x) xM1 <- rowWeightedMeans(x) print(xM1) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMeans(t(x)) stopifnot(all.equal(xM2, xM0)) # Weighted row averages (uniform weights) w <- rep(2.5, ncol(x)) xM1 <- rowWeightedMeans(x, w=w) print(xM1) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMeans(t(x), w=w) stopifnot(all.equal(xM2, xM0)) # Weighted row averages (excluding some columns) w <- c(1,1,0,1) xM0 <- rowMeans(x[,(w == 1),drop=FALSE]) xM1 <- rowWeightedMeans(x, w=w) print(xM1) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMeans(t(x), w=w) stopifnot(all.equal(xM2, xM0)) # Weighted row averages (excluding some columns) w <- c(0,1,0,0) xM0 <- rowMeans(x[,(w == 1),drop=FALSE]) xM1 <- rowWeightedMeans(x, w=w) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMeans(t(x), w=w) stopifnot(all.equal(xM2, xM0)) # Weighted row averages (all zero weights) w <- c(0,0,0,0) xM0 <- rowMeans(x[,(w == 1),drop=FALSE]) xM1 <- rowWeightedMeans(x, w=w) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMeans(t(x), w=w) stopifnot(all.equal(xM2, xM0)) # Weighted averages by rows and columns w <- 1:4 xM1 <- rowWeightedMeans(x, w=w) print(xM1) xM2 <- colWeightedMeans(t(x), w=w) stopifnot(all.equal(xM2, xM1)) x[sample(length(x), size=0.3*length(x))] <- NA print(x) # Non-weighted row averages with missing values xM0 <- rowMeans(x, na.rm=TRUE) xM1 <- rowWeightedMeans(x, na.rm=TRUE) print(xM1) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMeans(t(x), na.rm=TRUE) stopifnot(all.equal(xM2, xM0)) # Weighted row averages with missing values xM0 <- apply(x, MARGIN=1, FUN=weighted.mean, w=w, na.rm=TRUE) print(xM0) xM1 <- rowWeightedMeans(x, w=w, na.rm=TRUE) print(xM1) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMeans(t(x), w=w, na.rm=TRUE) stopifnot(all.equal(xM2, xM0)) # Weighted averages by rows and columns w <- 1:4 xM1 <- rowWeightedMeans(x, w=w, na.rm=TRUE) xM2 <- colWeightedMeans(t(x), w=w, na.rm=TRUE) stopifnot(all.equal(xM2, xM1)) matrixStats/tests/rowCumMinMaxs.R0000644000175100001440000000711712542546242016665 0ustar hornikuserslibrary("matrixStats") rowCummins_R <- function(x) { suppressWarnings({ y <- t(apply(x, MARGIN=1L, FUN=cummin)) }) } colCummins_R <- function(x) { suppressWarnings({ y <- apply(x, MARGIN=2L, FUN=cummin) }) } rowCummaxs_R <- function(x) { suppressWarnings({ y <- t(apply(x, MARGIN=1L, FUN=cummax)) }) } colCummaxs_R <- function(x) { suppressWarnings({ y <- apply(x, MARGIN=2L, FUN=cummax) }) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { for (addNA in c(FALSE, TRUE)) { cat("addNA=", addNA, "\n", sep="") x <- matrix(1:100, nrow=20, ncol=5) if (addNA) { x[13:17,c(2,4)] <- NA_real_ } cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) # Row/column ranges r0 <- rowCummins_R(x) r1 <- rowCummins(x) r2 <- t(colCummins(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) r0 <- rowCummaxs_R(x) r1 <- rowCummaxs(x) r2 <- t(colCummaxs(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (addNA ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow=20, ncol=5) cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) r0 <- rowCummins_R(x) r1 <- rowCummins(x) r2 <- t(colCummins(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) r0 <- rowCummaxs_R(x) r1 <- rowCummaxs(x) r2 <- t(colCummaxs(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(0, nrow=1, ncol=1) cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) r0 <- rowCummins_R(x) r1 <- rowCummins(x) r2 <- t(colCummins(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) r0 <- rowCummaxs_R(x) r1 <- rowCummaxs(x) r2 <- t(colCummaxs(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") value <- 0 storage.mode(value) <- mode # A 0x0 matrix x <- matrix(value, nrow=0L, ncol=0L) str(x) r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x)) r1 <- rowCummins(x) r2 <- t(colCummins(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # A 0xK matrix x <- matrix(value, nrow=0L, ncol=5L) str(x) r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x)) r1 <- rowCummins(x) r2 <- t(colCummins(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # A Nx0 matrix x <- matrix(value, nrow=5L, ncol=0L) str(x) r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x)) r1 <- rowCummins(x) r2 <- t(colCummins(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (mode ...) matrixStats/tests/meanOver.R0000644000175100001440000000530212542546242015662 0ustar hornikuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) meanOver_R <- function(x, na.rm=FALSE, idxs=NULL) { if (is.null(idxs)) { mean(x, na.rm=na.rm) } else { mean(x[idxs], na.rm=na.rm) } } # meanOver_R() cat("Consistency checks:\n") K <- if (Sys.getenv("_R_CHECK_FULL_") == "") 4 else 20 for (kk in seq_len(K)) { cat("Random test #", kk, "\n", sep="") # Simulate data in a matrix of any shape n <- sample(1e3, size=1L) x <- rnorm(n, sd=100) # Add NAs? if ((kk %% 4) %in% c(3,0)) { cat("Adding NAs\n") nna <- sample(n, size=1L) naValues <- c(NA_real_, NaN) x[sample(length(x), size=nna)] <- sample(naValues, size=nna, replace=TRUE) } # Integer or double? if ((kk %% 4) %in% c(2,0)) { cat("Coercing to integers\n") storage.mode(x) <- "integer" } na.rm <- sample(c(TRUE,FALSE), size=1L) # Sum over all y0 <- meanOver_R(x, na.rm=na.rm) y1 <- meanOver(x, na.rm=na.rm) stopifnot(all.equal(y1,y0)) # Sum over subset nidxs <- sample(n, size=1L) idxs <- sample(n, size=nidxs) y0 <- meanOver_R(x, na.rm=na.rm, idxs=idxs) y1 <- meanOver(x, na.rm=na.rm, idxs=idxs) stopifnot(all.equal(y1,y0)) } # for (kk ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Averaging over zero elements # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- integer(0) s1 <- mean(x) s2 <- meanOver(x) stopifnot(identical(s1, s2)) x <- 1:10 idxs <- integer(0) s1 <- mean(x[idxs]) s2 <- meanOver(x, idxs=idxs) stopifnot(identical(s1, s2)) x <- rep(NA_integer_, times=10L) s1 <- mean(x, na.rm=TRUE) s2 <- meanOver(x, na.rm=TRUE) stopifnot(identical(s1, s2)) x <- rep(NA_integer_, times=10L) idxs <- 1:5 s1 <- mean(x[idxs], na.rm=TRUE) s2 <- meanOver(x, idxs=idxs, na.rm=TRUE) stopifnot(identical(s1, s2)) x <- double(0) s1 <- mean(x) s2 <- meanOver(x) stopifnot(identical(s1, s2)) x <- as.double(1:10) idxs <- integer(0) s1 <- mean(x[idxs]) s2 <- meanOver(x, idxs=idxs) stopifnot(identical(s1, s2)) x <- rep(NA_real_, times=10L) s1 <- mean(x, na.rm=TRUE) s2 <- meanOver(x, na.rm=TRUE) stopifnot(identical(s1, s2)) x <- rep(NA_real_, times=10L) idxs <- 1:5 s1 <- mean(x[idxs], na.rm=TRUE) s2 <- meanOver(x, idxs=idxs, na.rm=TRUE) stopifnot(identical(s1, s2)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'idxs' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:10 idxsList <- list( integer=1:5, double=as.double(1:5), logical=(x <= 5) ) for (idxs in idxsList) { cat("idxs:\n") str(idxs) s1 <- mean(x[idxs], na.rm=TRUE) s2 <- meanOver(x, idxs=idxs, na.rm=TRUE) stopifnot(identical(s1, s2)) } matrixStats/tests/indexByRow.R0000644000175100001440000000300212542546242016173 0ustar hornikuserslibrary("matrixStats") indexByRow_R1 <- function(dim, idxs=NULL, ...) { n <- prod(dim) x <- matrix(seq_len(n), nrow=dim[2L], ncol=dim[1L], byrow=TRUE) if (!is.null(idxs)) x <- x[idxs] as.vector(x) } # indexByRow_R1() indexByRow_R2 <- function(dim, idxs=NULL, ...) { n <- prod(dim) if (is.null(idxs)) { x <- matrix(seq_len(n), nrow=dim[2L], ncol=dim[1L], byrow=TRUE) as.vector(x) } else { idxs <- idxs - 1 cols <- idxs %/% dim[2L] rows <- idxs %% dim[2L] cols + dim[1L]*rows + 1L } } # indexByRow_R2() dim <- c(5L, 4L) X <- matrix(NA_integer_, nrow=dim[1L], ncol=dim[2L]) Y <- t(X) idxsByCols <- seq(along=X) # Assign by columns X[idxsByCols] <- idxsByCols print(X) # Truth Y0 <- t(X) idxsByRows <- as.vector(Y0) # Assert idxs <- indexByRow(dim) stopifnot(all.equal(idxs, idxsByRows)) Y <- X Y[idxsByRows] <- idxs print(Y) stopifnot(all(as.vector(Y) == as.vector(X))) idxs_R1 <- indexByRow_R1(dim) stopifnot(all.equal(idxs_R1, idxsByRows)) idxs_R2 <- indexByRow_R2(dim) stopifnot(all.equal(idxs_R2, idxsByRows)) # Assert idxsByCols <- seq(from=1, to=length(X), by=3L) idxsByRows <- as.vector(t(X)[idxsByCols]) idxs <- indexByRow(dim, idxs=idxsByCols) stopifnot(all(idxs == idxsByRows)) idxs_R1 <- indexByRow_R1(dim, idxs=idxsByCols) stopifnot(all(idxs_R1 == idxsByRows)) idxs_R2 <- indexByRow_R2(dim, idxs=idxsByCols) stopifnot(all(idxs_R2 == idxsByRows)) ## DEPRECATED: Backward compatibility idxs0 <- indexByRow(dim) idxs1 <- indexByRow(X) stopifnot(identical(idxs1, idxs0)) matrixStats/tests/binCounts.R0000644000175100001440000000477412542546242016066 0ustar hornikuserslibrary("matrixStats") library("stats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - binCounts_hist <- function(x, bx, right=FALSE, ...) { n0 <- graphics::hist(x, breaks=bx, right=right, include.lowest=TRUE, plot=FALSE)$counts } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Non-sorted and sorted positions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nx <- 1e5 # Number of data points nb <- 2e3 # Number of bins # Uniformely distributed bins bx <- seq(from=0, to=1, length.out=nb+1L) bx <- c(-1, bx, 2) # Sample data points set.seed(0xBEEF) x <- runif(nx, min=0, max=1) for (kk in 1:2) { n0 <- binCounts_hist(x, bx=bx) n1 <- binCounts(x, bx=bx) # Sanity check stopifnot(identical(n1, n0)) # Check reversed n1r <- rev(binCounts(-x, bx=rev(-bx), right=TRUE)) stopifnot(identical(n1r, n1)) # Retry with a sorted vector x <- sort(x) } # for (kk in 1:2) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:200 x[100] <- NA_integer_ nx <- length(x) # Bins bx <- c(0.5,50.5,100.5,150.5,200.5) yS0 <- binCounts_hist(x, bx=bx) yS <- binCounts(x, bx=bx) # Sanity check stopifnot(all.equal(yS, yS0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Border cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:10 bx <- min(x) - c(10,1) yS <- binCounts(x, bx=bx) stopifnot(all.equal(yS, 0L)) bx <- range(x) yS <- binCounts(x, bx=bx) stopifnot(all.equal(yS, length(x)-1L)) bx <- max(x) + c(1,10) yS <- binCounts(x, bx=bx) stopifnot(all.equal(yS, 0L)) # Every second empty x <- 1:10 bx <- rep(x, each=2L) yS <- binCounts(x, bx=bx) stopifnot(all.equal(yS, rep(c(0L,1L), length.out=length(bx)-1L))) ## NOTE: binCounts_hist() does not give the same last bin count # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Exception handling # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Zero bin bounderies (invalid bin definition) bx <- double(0L) res <- try(yS <- binCounts(1:10, bx=bx), silent=TRUE) stopifnot(inherits(res, "try-error")) # One bin boundery (invalid bin definition) bx <- double(1L) res <- try(yS <- binCounts(1:10, bx=bx), silent=TRUE) stopifnot(inherits(res, "try-error")) matrixStats/tests/varDiff_etal.R0000644000175100001440000000516012542546242016476 0ustar hornikuserslibrary("matrixStats") set.seed(1) x <- rnorm(1e4) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Variance estimators # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sigma2A <- var(x) cat(sprintf("var(x)=%g\n", sigma2A)) sigma2B <- varDiff(x) cat(sprintf("varDiff(x)=%g\n", sigma2B)) d <- abs(sigma2B - sigma2A) cat(sprintf("Absolute difference=%g\n", d)) stopifnot(d < 0.02) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Standard deviation estimators # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - sigmaA <- sd(x) cat(sprintf("sd(x)=%g\n", sigmaA)) sigmaB <- sdDiff(x) cat(sprintf("sdDiff(x)=%g\n", sigmaB)) d <- abs(sigmaB - sigmaA) cat(sprintf("Absolute difference=%g\n", d)) stopifnot(d < 0.01) # Sanity checks stopifnot(abs(sigma2A - sigmaA^2) < 1e-9) stopifnot(abs(sigma2B - sigmaB^2) < 1e-9) sigmaA2 <- mad(x) cat(sprintf("mad(x)=%g\n", sigmaA2)) sigmaB2 <- madDiff(x) cat(sprintf("madDiff(x)=%g\n", sigmaB2)) d <- abs(sigmaB2 - sigmaA2) cat(sprintf("Absolute difference=%g\n", d)) stopifnot(d < 0.05) sigmaA3 <- IQR(x) cat(sprintf("IQR(x)=%g\n", sigmaA3)) sigmaB3 <- iqrDiff(x) cat(sprintf("iqrDiff(x)=%g\n", sigmaB3)) d <- abs(sigmaB3 - sigmaA3) cat(sprintf("Absolute difference=%g\n", d)) stopifnot(d < 0.05) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Trimmed estimators # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - y <- x outliers <- sample(length(x), size=0.1*length(x)) y[outliers] <- 100*y[outliers] sigmaAo <- sd(y[-outliers]) cat(sprintf("sd(y)=%g\n", sigmaAo)) sigmaBo <- sdDiff(y[-outliers]) cat(sprintf("sdDiff(y)=%g\n", sigmaBo)) d <- abs(sigmaB - sigmaA) cat(sprintf("Absolute difference=%g\n", d)) stopifnot(d < 0.01) sigmaBot <- sdDiff(y, trim=0.05) cat(sprintf("sdDiff(y, trim=0.05)=%g\n", sigmaBot)) d <- abs(sigmaBot - sigmaA) cat(sprintf("Absolute difference=%g\n", d)) #stopifnot(d < 1e-3) sigmaCot <- madDiff(y, trim=0.05) cat(sprintf("madDiff(y, trim=0.05)=%g\n", sigmaCot)) sigmaDot <- iqrDiff(y, trim=0.05) cat(sprintf("iqrDiff(y, trim=0.05)=%g\n", sigmaDot)) FUNs <- list( varDiff=varDiff, sdDiff=sdDiff, madDiff=madDiff, iqrDiff=iqrDiff ) for (fcn in names(FUNs)) { cat(sprintf("%s()...\n", fcn)) FUN <- FUNs[[fcn]] for (mode in c("integer", "double")) { cat("mode: ", mode, "", sep="") for (n in 0:3) { x <- runif(n, min=-5, max=5) storage.mode(x) <- mode str(x) y <- FUN(x) yt <- FUN(x, trim=0.1) str(list("non-trimmed"=y, trimmed=yt)) } # for (mode ...) } cat(sprintf("%s()...DONE\n", fcn)) } # for (fcn ...) matrixStats/tests/rowOrderStats.R0000644000175100001440000000262312542546242016733 0ustar hornikuserslibrary("matrixStats") library("stats") rowOrderStats_R <- function(x, probs) { apply(x, MARGIN=1L, FUN=quantile, probs=probs, type=3L) } # rowOrderStats_R() set.seed(1) K <- if (Sys.getenv("_R_CHECK_FULL_") == "") 5 else 3 # Simulate data in a matrix of any shape nrow <- 300 ncol <- 100 x <- rnorm(nrow*ncol) dim(x) <- c(nrow, ncol) probs <- 0.3 which <- round(probs*ncol) y0 <- rowOrderStats_R(x, probs=probs) y1 <- rowOrderStats(x, which=which) stopifnot(all.equal(y1,y0)) y2 <- colOrderStats(t(x), which=which) stopifnot(all.equal(y2,y0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("Consistency checks without NAs:\n") for (kk in seq_len(K)) { cat("Random test #", kk, "\n", sep="") # Simulate data in a matrix of any shape nrow <- sample(100, size=1) ncol <- sample(100, size=1) x <- rnorm(nrow*ncol) dim(x) <- c(nrow, ncol) cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) probs <- runif(1) which <- round(probs*ncol) y0 <- rowOrderStats_R(x, probs=probs) y1 <- rowOrderStats(x, which=which) stopifnot(all.equal(y1,y0)) y2 <- colOrderStats(t(x), which=which) stopifnot(all.equal(y2,y0)) } # for (kk in ...) } # for (mode ...) matrixStats/tests/rowVarDiffs.R0000644000175100001440000000370712542546242016351 0ustar hornikuserslibrary("matrixStats") FUNs <- list( rowVarDiffs=list(rowVarDiffs, colVarDiffs), rowSdDiffs=list(rowSdDiffs, colSdDiffs), rowMadDiffs=list(rowMadDiffs, colMadDiffs), rowIQRDiffs=list(rowIQRDiffs, colIQRDiffs) ) for (fcn in names(FUNs)) { cat(sprintf("%s()...\n", fcn)) rFUN <- FUNs[[fcn]][[1L]] cFUN <- FUNs[[fcn]][[2L]] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { for (addNA in c(FALSE, TRUE)) { cat("addNA=", addNA, "\n", sep="") x <- matrix(1:100+0.1, nrow=20, ncol=5) if (addNA) { x[13:17,c(2,4)] <- NA_real_ } cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) # Row/column ranges for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") r1 <- rFUN(x, na.rm=na.rm) r2 <- cFUN(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r2)) } } # for (addNA ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow=20, ncol=5) cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") r1 <- rFUN(x, na.rm=na.rm) r2 <- cFUN(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r2)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow=1, ncol=1) for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") r1 <- rFUN(x, na.rm=na.rm) r2 <- cFUN(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r2)) } cat(sprintf("%s()...DONE\n", fcn)) } # for (fcn ...) matrixStats/tests/rowCumsums.R0000644000175100001440000000552412542546242016300 0ustar hornikuserslibrary("matrixStats") rowCumsums_R <- function(x) { suppressWarnings({ t(apply(x, MARGIN=1L, FUN=cumsum)) }) } colCumsums_R <- function(x) { suppressWarnings({ apply(x, MARGIN=2L, FUN=cumsum) }) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { for (addNA in c(FALSE, TRUE)) { cat("addNA=", addNA, "\n", sep="") x <- matrix(1:100, nrow=20, ncol=5) if (addNA) { x[13:17,c(2,4)] <- NA_real_ } cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) # Row/column ranges r0 <- rowCumsums_R(x) r1 <- rowCumsums(x) r2 <- t(colCumsums(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (addNA ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow=20, ncol=5) cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) r0 <- rowCumsums_R(x) r1 <- rowCumsums(x) r2 <- t(colCumsums(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(0, nrow=1, ncol=1) cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) r0 <- rowCumsums_R(x) r1 <- rowCumsums(x) r2 <- t(colCumsums(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") value <- 0 storage.mode(value) <- mode # A 0x0 matrix x <- matrix(value, nrow=0L, ncol=0L) str(x) r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x)) r1 <- rowCumsums(x) r2 <- t(colCumsums(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # A 0xK matrix x <- matrix(value, nrow=0L, ncol=5L) str(x) r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x)) r1 <- rowCumsums(x) r2 <- t(colCumsums(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # A Nx0 matrix x <- matrix(value, nrow=5L, ncol=0L) str(x) r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x)) r1 <- rowCumsums(x) r2 <- t(colCumsums(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (mode ...) matrixStats/tests/benchmark.R0000644000175100001440000000030412542546242016035 0ustar hornikusersif (Sys.getenv("_R_CHECK_FULL_") != "" && Sys.getenv("_R_CHECK_USE_VALGRIND_") == "") { if (require("R.rsp")) { html <- matrixStats:::benchmark('binCounts') print(html) } } # _R_CHECK_FULL_ matrixStats/tests/zzz.package-unload.R0000644000175100001440000000227712542546242017625 0ustar hornikusers## These tests need to be last of all tests, otherwise ## covr::package_coverage() gives an error. cat("1. Loading package\n") requireNamespace("matrixStats") stopifnot("matrixStats" %in% loadedNamespaces()) cat("2. Unloading package\n") unloadNamespace("matrixStats") stopifnot(!"matrixStats" %in% loadedNamespaces()) if (FALSE) { ## 'covr' gives "Error in library("matrixStats") : ## there is no package called 'matrixStats'" here, cf. ## https://travis-ci.org/HenrikBengtsson/matrixStats/builds/48015577 cat("3. Attaching package\n") library("matrixStats") stopifnot("package:matrixStats" %in% search()) cat("4. Detaching package\n") detach("package:matrixStats") stopifnot(!"package:matrixStats" %in% search()) stopifnot("matrixStats" %in% loadedNamespaces()) cat("5. Unloading package\n") unloadNamespace("matrixStats") stopifnot(!"matrixStats" %in% loadedNamespaces()) cat("6. Attaching package (again)\n") library("matrixStats") stopifnot("package:matrixStats" %in% search()) cat("7. Detaching package (again)\n") detach("package:matrixStats") stopifnot(!"package:matrixStats" %in% search()) stopifnot("matrixStats" %in% loadedNamespaces()) } cat("7. DONE\n") matrixStats/tests/rowAvgsPerColSet.R0000644000175100001440000000403712542546242017323 0ustar hornikuserslibrary("matrixStats") X <- matrix(rnorm(20*6), nrow=20, ncol=6) rownames(X) <- LETTERS[1:nrow(X)] colnames(X) <- letters[1:ncol(X)] print(X) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Apply rowMeans() for 3 sets of 2 columns # - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSets <- 3 S <- matrix(1:ncol(X), ncol=nbrOfSets) colnames(S) <- sprintf("s%d", 1:nbrOfSets) print(S) Z <- rowAvgsPerColSet(X, S=S) print(Z) # Validation Z0 <- cbind(s1=rowMeans(X[,1:2]), s2=rowMeans(X[,3:4]), s3=rowMeans(X[,5:6])) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Apply colMeans() for 5 sets of 4 rows # - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSets <- 5 S <- matrix(1:nrow(X), ncol=nbrOfSets) colnames(S) <- sprintf("s%d", 1:nbrOfSets) print(S) Z <- colAvgsPerRowSet(X, S=S) print(Z) # Validation Z0 <- rbind(s1=colMeans(X[1:4,]), s2=colMeans(X[5:8,]), s3=colMeans(X[9:12,]), s4=colMeans(X[13:16,]), s5=colMeans(X[17:20,])) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # When there is only one "complete" set # - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSets <- 1 S <- matrix(1:ncol(X), ncol=nbrOfSets) colnames(S) <- sprintf("s%d", 1:nbrOfSets) print(S) Z <- rowAvgsPerColSet(X, S=S, FUN=rowMeans) print(Z) Z0 <- rowMeans(X) stopifnot(identical(drop(Z), Z0)) nbrOfSets <- 1 S <- matrix(1:nrow(X), ncol=nbrOfSets) colnames(S) <- sprintf("s%d", 1:nbrOfSets) print(S) Z <- colAvgsPerRowSet(X, S=S, FUN=colMeans) print(Z) Z0 <- colMeans(X) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Use weights # - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSets <- 3 S <- matrix(1:ncol(X), ncol=nbrOfSets) colnames(S) <- sprintf("s%d", 1:nbrOfSets) print(S) W <- matrix(runif(length(X)), nrow=nrow(X), ncol=ncol(X)) Z1 <- rowAvgsPerColSet(X, W=W, S=S, FUN=rowWeightedMeans) print(Z1) Z2 <- colAvgsPerRowSet(X, W=W, S=S, FUN=colWeightedMeans) print(Z2) matrixStats/tests/rowWeightedVars.R0000644000175100001440000000513012542546242017231 0ustar hornikuserslibrary("matrixStats") set.seed(1) x <- matrix(rnorm(20), nrow=5L, ncol=4L) print(x) # Non-weighted row variances xM0 <- rowVars(x) w <- rep(1, times=ncol(x)) xM1 <- rowWeightedVars(x, w=w) print(xM1) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedVars(t(x), w=w) stopifnot(all.equal(xM2, xM0)) # Weighted row variances (uniform weights) w <- rep(2.5, ncol(x)) xM1 <- rowWeightedVars(x, w=w) print(xM1) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedVars(t(x), w=w) stopifnot(all.equal(xM2, xM0)) # Weighted row variances (excluding some columns) w <- c(1,1,0,1) xM0 <- rowVars(x[,(w == 1),drop=FALSE]) xM1 <- rowWeightedVars(x, w=w) print(xM1) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedVars(t(x), w=w) stopifnot(all.equal(xM2, xM0)) # Weighted row variances (excluding some columns) w <- c(0,1,0,0) xM0 <- rowVars(x[,(w == 1),drop=FALSE]) xM1 <- rowWeightedVars(x, w=w) #stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedVars(t(x), w=w) stopifnot(all.equal(xM2, xM1)) # Weighted row variances (all zero weights) w <- c(0,0,0,0) xM0 <- rowVars(x[,(w == 1),drop=FALSE]) xM1 <- rowWeightedVars(x, w=w) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedVars(t(x), w=w) stopifnot(all.equal(xM2, xM0)) # Weighted variances by rows and columns w <- 1:4 xM1 <- rowWeightedVars(x, w=w) print(xM1) xM2 <- colWeightedVars(t(x), w=w) stopifnot(all.equal(xM2, xM1)) x[sample(length(x), size=0.3*length(x))] <- NA print(x) # Non-weighted row variances with missing values xM0 <- rowVars(x, na.rm=TRUE) xM1 <- rowWeightedVars(x, w=rep(1, times=ncol(x)), na.rm=TRUE) print(xM1) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedVars(t(x), w=rep(1, times=ncol(x)), na.rm=TRUE) stopifnot(all.equal(xM2, xM0)) # Weighted row variances with missing values xM1 <- rowWeightedVars(x, w=w, na.rm=TRUE) print(xM1) xM2 <- colWeightedVars(t(x), w=w, na.rm=TRUE) stopifnot(all.equal(xM2, xM1)) # Weighted variances by rows and columns w <- 1:4 xM1 <- rowWeightedVars(x, w=w, na.rm=TRUE) xM2 <- colWeightedVars(t(x), w=w, na.rm=TRUE) stopifnot(all.equal(xM2, xM1)) # Weighted row standard deviation (excluding some columns) w <- c(1,1,0,1) ## FIXME: rowVars()/rowSds() needs na.rm=FALSE (wrong default) xM0 <- rowSds(x[,(w == 1),drop=FALSE], na.rm=FALSE) xM1 <- rowWeightedSds(x, w=w) print(xM1) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedSds(t(x), w=w, na.rm=FALSE) stopifnot(all.equal(xM2, xM0)) # Weighted row MADs (excluding some columns) w <- c(1,1,0,1) xM0 <- rowMads(x[,(w == 1),drop=FALSE]) xM1 <- rowWeightedMads(x, w=w) print(xM1) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMads(t(x), w=w) stopifnot(all.equal(xM2, xM0)) matrixStats/tests/allocMatrix.R0000644000175100001440000000111112542546242016357 0ustar hornikuserslibrary("matrixStats") allocMatrix_R <- function(nrow, ncol, value=NA) { matrix(data=value, nrow=nrow, ncol=ncol) } # allocMatrix_R() values <- list( -1L, 0L, +1L, NA_integer_, .Machine$integer.max, -1, 0, +1, NA_real_, NaN, -Inf, +Inf, .Machine$double.xmin, .Machine$double.xmax, .Machine$double.eps, .Machine$double.neg.eps, FALSE, TRUE, NA ) nrow <- 5L ncol <- 10L for (value in values) { X0 <- allocMatrix_R(nrow, ncol, value=value) X <- allocMatrix(nrow, ncol, value=value) str(list(nrow=nrow, ncol=ncol, value=value, X=X, X0=X0)) stopifnot(identical(X,X0)) } matrixStats/tests/rowWeightedMedians.R0000644000175100001440000000365512542546242017710 0ustar hornikuserslibrary("matrixStats") set.seed(1) x <- matrix(rnorm(20), nrow=5, ncol=4) print(x) # Non-weighted row medians xM0 <- rowMedians(x) xM1 <- rowWeightedMedians(x) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMedians(t(x)) stopifnot(all.equal(xM2, xM0)) # Weighted row medians (uniform weights) w <- rep(2.5, ncol(x)) xM1 <- rowWeightedMedians(x, w=w) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMedians(t(x), w=w) stopifnot(all.equal(xM2, xM0)) # Weighted row medians (excluding some columns) w <- c(1,1,0,1) xM0 <- rowMedians(x[,(w == 1),drop=FALSE]) xM1 <- rowWeightedMedians(x, w=w) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMedians(t(x), w=w) stopifnot(all.equal(xM2, xM0)) # Weighted row medians (excluding some columns) w <- c(0,1,0,0) xM0 <- rowMedians(x[,(w == 1),drop=FALSE]) xM1 <- rowWeightedMedians(x, w=w) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMedians(t(x), w=w) stopifnot(all.equal(xM2, xM0)) # Weighted row medians (all zero weights) w <- c(0,0,0,0) xM0 <- rowMedians(x[,(w == 1),drop=FALSE]) xM1 <- rowWeightedMedians(x, w=w) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMedians(t(x), w=w) stopifnot(all.equal(xM2, xM0)) # Weighted medians by rows and columns w <- 1:4 xM1 <- rowWeightedMedians(x, w=w) xM2 <- colWeightedMedians(t(x), w=w) stopifnot(all.equal(xM2, xM1)) # Weighted row medians with missing values xM0 <- apply(x, MARGIN=1, FUN=weightedMedian, w=w, na.rm=TRUE) print(xM0) xM1 <- rowWeightedMedians(x, w=w, na.rm=TRUE) print(xM1) stopifnot(all.equal(xM1, xM0)) xM2 <- colWeightedMedians(t(x), w=w) stopifnot(all.equal(xM2, xM0)) # Weighted medians by rows and columns w <- 1:4 xM1 <- rowWeightedMedians(x, w=w, na.rm=TRUE) xM2 <- colWeightedMedians(t(x), w=w, na.rm=TRUE) stopifnot(all.equal(xM2, xM1)) # Inf weight x <- matrix(1:2, nrow=1, ncol=2) w <- c(7, Inf) xM1 <- rowWeightedMedians(x, w=w) xM2 <- colWeightedMedians(t(x), w=w) stopifnot(identical(2, xM1)) stopifnot(identical(2, xM2)) matrixStats/tests/rowMedians.R0000644000175100001440000001362412542546242016224 0ustar hornikuserslibrary("matrixStats") rowMedians_R <- function(x, na.rm=FALSE, ...) { apply(x, MARGIN=1L, FUN=median, na.rm=na.rm) } colMedians_R <- function(x, na.rm=FALSE, ...) { apply(x, MARGIN=2L, FUN=median, na.rm=na.rm) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Non-ties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Non-ties:\n") for (mode in c("integer", "double")) { x <- matrix(1:9+0.1, nrow=3, ncol=3) storage.mode(x) <- mode y0 <- rowMedians_R(x, na.rm=FALSE) y1 <- rowMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) y0 <- colMedians_R(x, na.rm=FALSE) y1 <- colMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Ties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Ties:\n") for (mode in c("integer", "double")) { x <- matrix(1:16+0.1, nrow=4, ncol=4) storage.mode(x) <- mode y0 <- rowMedians_R(x, na.rm=FALSE) y1 <- rowMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) y0 <- colMedians_R(x, na.rm=FALSE) y1 <- colMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Single-element matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Single-element matrix:\n") for (mode in c("integer", "double")) { x <- matrix(1, nrow=1, ncol=1) storage.mode(x) <- mode y0 <- rowMedians_R(x, na.rm=FALSE) y1 <- rowMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) y0 <- colMedians_R(x, na.rm=FALSE) y1 <- colMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Empty matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Empty matrix:\n") for (mode in c("integer", "double")) { x <- matrix(integer(0), nrow=0, ncol=0) storage.mode(x) <- mode y0 <- rowMedians_R(x, na.rm=FALSE) y1 <- rowMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) y0 <- colMedians_R(x, na.rm=FALSE) y1 <- colMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NAs:\n") for (mode in c("integer", "double")) { x <- matrix(NA_integer_, nrow=3, ncol=3) storage.mode(x) <- mode y0 <- rowMedians_R(x, na.rm=TRUE) y1 <- rowMedians(x, na.rm=TRUE) stopifnot(all.equal(y1,y0)) y0 <- colMedians_R(x, na.rm=TRUE) y1 <- colMedians(x, na.rm=TRUE) stopifnot(all.equal(y1,y0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All NaNs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All NaNs:\n") x <- matrix(NA_real_, nrow=3, ncol=3) y0 <- rowMedians_R(x, na.rm=TRUE) y1 <- rowMedians(x, na.rm=TRUE) stopifnot(all.equal(y1,y0)) y0 <- colMedians_R(x, na.rm=TRUE) y1 <- colMedians(x, na.rm=TRUE) stopifnot(all.equal(y1,y0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All Infs:\n") x <- matrix(Inf, nrow=3, ncol=3) y0 <- rowMedians_R(x, na.rm=FALSE) y1 <- rowMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) y0 <- colMedians_R(x, na.rm=FALSE) y1 <- colMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: All -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: All -Infs:\n") x <- matrix(-Inf, nrow=3, ncol=3) y0 <- rowMedians_R(x, na.rm=FALSE) y1 <- rowMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) y0 <- colMedians_R(x, na.rm=FALSE) y1 <- colMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Infs and -Infs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Infs and -Infs:\n") x <- matrix(c(-Inf,+Inf), nrow=4, ncol=4) y0 <- rowMedians_R(x, na.rm=FALSE) y1 <- rowMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) y0 <- colMedians_R(x, na.rm=FALSE) y1 <- colMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special case: Integer overflow with ties # - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("Special case: Integer overflow with ties:\n") x <- matrix(.Machine$integer.max, nrow=4, ncol=4) y0 <- rowMedians_R(x, na.rm=FALSE) y1 <- rowMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) y0 <- colMedians_R(x, na.rm=FALSE) y1 <- colMedians(x, na.rm=FALSE) stopifnot(all.equal(y1,y0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) cat("Consistency checks:\n") K <- if (Sys.getenv("_R_CHECK_FULL_") == "" || Sys.getenv("_R_CHECK_USE_VALGRIND_") != "") 4 else 10 for (kk in seq_len(K)) { cat("Random test #", kk, "\n", sep="") # Simulate data in a matrix of any shape dim <- sample(50:200, size=2) n <- prod(dim) x <- rnorm(n, sd=100) dim(x) <- dim # Add NAs? if ((kk %% 4) %in% c(3,0)) { cat("Adding NAs\n") nna <- sample(n, size=1) naValues <- c(NA_real_, NaN) x[sample(length(x), size=nna)] <- sample(naValues, size=nna, replace=TRUE) } # Integer or double? if ((kk %% 4) %in% c(2,0)) { cat("Coercing to integers\n") storage.mode(x) <- "integer" } na.rm <- sample(c(TRUE,FALSE), size=1) # rowMedians(): y0 <- rowMedians_R(x, na.rm=na.rm) y1 <- rowMedians(x, na.rm=na.rm) stopifnot(all.equal(y1,y0)) y2 <- colMedians(t(x), na.rm=na.rm) stopifnot(all.equal(y2,y0)) # colMedians(): y0 <- colMedians_R(x, na.rm=na.rm) y1 <- colMedians(x, na.rm=na.rm) stopifnot(all.equal(y1,y0)) y2 <- rowMedians(t(x), na.rm=na.rm) stopifnot(all.equal(y2,y0)) } # for (kk ...) matrixStats/tests/rowLogSumExps.R0000644000175100001440000000707712542546242016717 0ustar hornikusers# Test inspired by the harmonic mean example in R-help # thread '[R] Beyond double-precision?' on May 9, 2009. library("matrixStats") library("stats") logSumExp0 <- function(lx) { iMax <- which.max(lx) log1p(sum(exp(lx[-iMax] - lx[iMax]))) + lx[iMax] } # logSumExp0() n <- 1e3 set.seed(1) for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") X <- matrix(runif(n, min=1.0, max=3.0), nrow=50L) storage.mode(X) <- mode str(X) # The logarithm of the harmonic mean by rows yH <- log(1/rowMeans(1/X)) str(yH) nlX <- -log(X) y0 <- log(ncol(X)) - apply(nlX, MARGIN=1L, FUN=logSumExp0) stopifnot(all.equal(y0,yH)) y1 <- log(ncol(X)) - apply(nlX, MARGIN=1L, FUN=logSumExp) stopifnot(all.equal(y1,y0)) y2 <- log(ncol(X)) - rowLogSumExps(nlX) stopifnot(all.equal(y2,y0)) y3 <- log(ncol(X)) - colLogSumExps(t(nlX)) stopifnot(all.equal(y3,y0)) # The logarithm of the harmonic mean by columns yH <- log(1/colMeans(1/X)) str(yH) y0 <- log(nrow(X)) - apply(nlX, MARGIN=2L, FUN=logSumExp0) stopifnot(all.equal(y0,yH)) y1 <- log(nrow(X)) - apply(nlX, MARGIN=2L, FUN=logSumExp) stopifnot(all.equal(y1,y0)) y2 <- log(nrow(X)) - colLogSumExps(nlX) stopifnot(all.equal(y2,y0)) y3 <- log(nrow(X)) - rowLogSumExps(t(nlX)) stopifnot(all.equal(y3,y0)) # Testing names rownames(nlX) <- seq_len(nrow(X)) colnames(nlX) <- seq_len(ncol(X)) y2 <- rowLogSumExps(nlX) stopifnot(identical(names(y2), rownames(nlX))) y3 <- colLogSumExps(t(nlX)) stopifnot(identical(names(y3), rownames(nlX))) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Zero-size matrices lX <- matrix(numeric(0L), nrow=0L, ncol=0L) y <- rowLogSumExps(lX) print(y) stopifnot(length(y) == nrow(lX)) y <- colLogSumExps(lX) print(y) stopifnot(length(y) == ncol(lX)) ## Zero-height matrices lX <- matrix(numeric(0L), nrow=0L, ncol=10L) y <- rowLogSumExps(lX) print(y) stopifnot(length(y) == nrow(lX)) y <- colLogSumExps(lX) print(y) stopifnot(length(y) == ncol(lX)) stopifnot(all(y == -Inf)) ## Zero-width matrices lX <- matrix(numeric(0L), nrow=10L, ncol=0L) y <- colLogSumExps(lX) print(y) stopifnot(length(y) == ncol(lX)) y <- rowLogSumExps(lX) print(y) stopifnot(length(y) == nrow(lX)) stopifnot(all(y == -Inf)) ## Matrices with one element lX <- matrix(1.0, nrow=1L, ncol=1L) y <- rowLogSumExps(lX) print(y) stopifnot(length(y) == nrow(lX)) stopifnot(all(y == lX)) y <- colLogSumExps(lX) print(y) stopifnot(length(y) == ncol(lX)) stopifnot(all(y == lX)) ## All missing values lX <- matrix(NA_real_, nrow=1L, ncol=1L) y <- rowLogSumExps(lX, na.rm=TRUE) print(y) stopifnot(length(y) == nrow(lX)) stopifnot(identical(y, -Inf)) lX <- matrix(NA_real_, nrow=1L, ncol=1L) y <- colLogSumExps(lX, na.rm=TRUE) print(y) stopifnot(length(y) == ncol(lX)) stopifnot(identical(y, -Inf)) lX <- matrix(NA_real_, nrow=2L, ncol=2L) y <- rowLogSumExps(lX, na.rm=TRUE) print(y) stopifnot(length(y) == nrow(lX)) stopifnot(all(y == -Inf)) y <- rowLogSumExps(lX, na.rm=FALSE) print(y) stopifnot(length(y) == nrow(lX)) stopifnot(all(is.na(y) & !is.nan(y))) lX <- matrix(NA_real_, nrow=2L, ncol=2L) y <- colLogSumExps(lX, na.rm=TRUE) print(y) stopifnot(length(y) == ncol(lX)) stopifnot(all(y == -Inf)) y <- colLogSumExps(lX, na.rm=FALSE) print(y) stopifnot(length(y) == ncol(lX)) stopifnot(all(is.na(y) & !is.nan(y))) ## +Inf values lX <- matrix(c(1, 2, +Inf), nrow=3L, ncol=2L) y <- colLogSumExps(lX, na.rm=TRUE) print(y) stopifnot(length(y) == ncol(lX)) stopifnot(all(y == +Inf)) matrixStats/tests/rowVars.R0000644000175100001440000000536012542546242015555 0ustar hornikuserslibrary("matrixStats") rowVars_R <- function(x, na.rm=FALSE) { suppressWarnings({ apply(x, MARGIN=1L, FUN=var, na.rm=na.rm) }) } colVars_R <- function(x, na.rm=FALSE) { suppressWarnings({ apply(x, MARGIN=2L, FUN=var, na.rm=na.rm) }) } rowVars_center <- function(x, na.rm=FALSE) { center <- rowMeans(x, na.rm=na.rm) rowVars(x, center=center, na.rm=na.rm) } colVars_center <- function(x, na.rm=FALSE) { center <- colMeans(x, na.rm=na.rm) colVars(x, center=center, na.rm=na.rm) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { for (addNA in c(FALSE, TRUE)) { cat("addNA=", addNA, "\n", sep="") x <- matrix(1:100+0.1, nrow=20, ncol=5) if (addNA) { x[13:17,c(2,4)] <- NA_real_ } cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) # Row/column ranges for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") r0 <- rowVars_R(x, na.rm=na.rm) r1 <- rowVars(x, na.rm=na.rm) r1b <- rowVars_center(x, na.rm=na.rm) r2 <- colVars(t(x), na.rm=na.rm) r2b <- colVars_center(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r1)) stopifnot(all.equal(r2b, r2)) } } # for (addNA ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow=20, ncol=5) cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") r0 <- rowVars_R(x, na.rm=na.rm) r1 <- rowVars(x, na.rm=na.rm) r1b <- rowVars_center(x, na.rm=na.rm) r2 <- colVars(t(x), na.rm=na.rm) r2b <- colVars_center(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r1)) stopifnot(all.equal(r2b, r2)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow=1, ncol=1) for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") r0 <- rowVars_R(x, na.rm=na.rm) r1 <- rowVars(x, na.rm=na.rm) r1b <- rowVars_center(x, na.rm=na.rm) r2 <- colVars(t(x), na.rm=na.rm) r2b <- colVars_center(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r1)) stopifnot(all.equal(r2b, r2)) } matrixStats/tests/product.R0000644000175100001440000000224412542546242015570 0ustar hornikuserslibrary("matrixStats") for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") # Empty vector x <- 0 storage.mode(x) <- mode y <- prod(x, na.rm=TRUE) print(y) z <- product(x, na.rm=TRUE) print(z) stopifnot(all.equal(z, y)) # Test negative values x <- c(1,-4,2) storage.mode(x) <- mode y <- prod(x, na.rm=TRUE) print(y) z <- product(x, na.rm=TRUE) print(z) stopifnot(all.equal(z, y)) # Test missing values x <- c(1,NA,NaN,2) storage.mode(x) <- mode y <- prod(x, na.rm=TRUE) print(y) z <- product(x, na.rm=TRUE) print(z) stopifnot(all.equal(z, y)) x <- c(1,NA,NaN,2) storage.mode(x) <- mode y <- prod(x, na.rm=FALSE) print(y) z <- product(x, na.rm=FALSE) print(z) stopifnot(all(is.na(z), is.na(y))) x <- c(1,NaN,2) storage.mode(x) <- mode y <- prod(x, na.rm=FALSE) print(y) stopifnot(is.na(y)) z <- product(x, na.rm=FALSE) print(z) stopifnot(is.na(z)) # NAs following 0s should return NA x <- c(0L, NA_integer_) storage.mode(x) <- mode y <- prod(x, na.rm=FALSE) print(y) stopifnot(is.na(y)) z <- product(x, na.rm=FALSE) print(z) stopifnot(is.na(z)) } # for (mode ...) matrixStats/tests/binMeans,binCounts.R0000644000175100001440000000672312542546242017613 0ustar hornikuserslibrary("matrixStats") library("stats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Naive R implementation of binMeans() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - binMeans0 <- function(y, x, bx, na.rm=TRUE, count=TRUE, right=FALSE) { B <- length(bx)-1L res <- double(B) counts <- rep(NaN, times=B) if (na.rm) { keep <- !is.na(x) & !is.na(y) x <- x[keep] y <- y[keep] } # For each bin... for (kk in seq(length=B)) { if (right) { idxs <- which(bx[kk] < x & x <= bx[kk+1L]) } else { idxs <- which(bx[kk] <= x & x < bx[kk+1L]) } yKK <- y[idxs] muKK <- mean(yKK) res[kk] <- muKK counts[kk] <- length(idxs) } # for (kk ...) if (count) attr(res, "count") <- counts res } # binMeans0() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Case #1 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:200 nx <- length(x) y <- double(nx) y[1:50] <- 5 y[101:150] <- -5 y <- y + rnorm(nx) # Bins bx <- c(0.5,50.5,100.5,150.5,200.5) yS0 <- binMeans0(y, x=x, bx=bx) yS <- binMeans(y, x=x, bx=bx) nS <- binCounts(x, bx=bx) # Sanity check stopifnot(all.equal(yS, yS0)) stopifnot(all.equal(attr(yS, "count"), nS)) yS0r <- rev(binMeans0(y, x=-x, bx=rev(-bx), count=FALSE, right=TRUE)) ySr <- rev(binMeans(y, x=-x, bx=rev(-bx), count=FALSE, right=TRUE)) # Sanity check stopifnot(all.equal(yS0r, yS0, check.attributes=FALSE)) stopifnot(all.equal(ySr, yS0r)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Case #2 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nx <- 1e4 x <- runif(nx) y <- runif(nx) nb <- 20 bx <- do.call(seq, c(as.list(range(x)), length.out=nb)) bx1 <- c(bx[-1], bx[nb] + 1) yS0 <- binMeans0(y, x=x, bx=bx1) yS <- binMeans(y, x=x, bx=bx1) nS <- binCounts(x, bx=bx1) ySr <- rev(binMeans(y, x=-x, bx=rev(-bx1), right=TRUE)) # Sanity check stopifnot(all.equal(yS, yS0)) stopifnot(all.equal(attr(yS, "count"), nS)) stopifnot(all.equal(ySr, yS, check.attributes=FALSE)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Empty bins # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- c(6:8, 16:19) nx <- length(x) y <- runif(nx) bx <- c(0,5,10,15,20,25) yS0 <- binMeans0(y, x=x, bx=bx) yS <- binMeans(y, x=x, bx=bx) nS <- binCounts(x, bx=bx) stopifnot(all.equal(attr(yS, "count"), nS)) stopifnot(all.equal(yS, yS0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- 1:200 x[100] <- NA_integer_ nx <- length(x) y <- double(nx) y[1:50] <- 5 y[101:150] <- -5 y[123:125] <- NA_real_ y <- y + rnorm(nx) # Bins bx <- c(0.5,50.5,100.5,150.5,200.5) yS0 <- binMeans0(y, x=x, bx=bx) yS <- binMeans(y, x=x, bx=bx) # Sanity check stopifnot(all.equal(yS, yS0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Exception handling # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Zero bin bounderies (invalid bin definition) bx <- double(0L) res <- try(yS <- binMeans(x=1:10, y=1:10, bx=bx), silent=TRUE) stopifnot(inherits(res, "try-error")) # One bin boundery (invalid bin definition) bx <- double(1L) res <- try(yS <- binMeans(x=1:10, y=1:10, bx=bx), silent=TRUE) stopifnot(inherits(res, "try-error")) matrixStats/tests/allocVector.R0000644000175100001440000000112212542546242016357 0ustar hornikuserslibrary("matrixStats") allocVector_R <- function(length, value=NA) { x <- vector(mode=typeof(value), length=length) if (!is.finite(value) || value != 0) x[] <- value x } # allocVector_R() values <- list( -1L, 0L, +1L, NA_integer_, .Machine$integer.max, -1, 0, +1, NA_real_, NaN, -Inf, +Inf, .Machine$double.xmin, .Machine$double.xmax, .Machine$double.eps, .Machine$double.neg.eps, FALSE, TRUE, NA ) n <- 1e3 for (value in values) { x0 <- allocVector_R(n, value=value) x <- allocVector(n, value=value) str(list(n=n, value=value, x=x, x0=x0)) stopifnot(identical(x,x0)) } matrixStats/tests/weightedMedian.R0000644000175100001440000000414712542546242017032 0ustar hornikuserslibrary("matrixStats") x <- 1:5 y <- weightedMedian(x) y <- weightedMedian(x, w=c(NA,Inf,NA,Inf,NA), na.rm=TRUE) print(y) y <- weightedMedian(x, w=c(NA,Inf,NA,Inf,NA), na.rm=FALSE) print(y) stopifnot(is.na(y)) x <- 1:10 n <- length(x) y1 <- median(x) # 5.5 y2 <- weightedMedian(x) # 5.5 stopifnot(all.equal(y1, y2)) w <- rep(1, n) y1 <- weightedMedian(x, w) # 5.5 (default) y2a <- weightedMedian(x, ties="weighted") # 5.5 (default) y2b <- weightedMedian(x, ties="min") # 5 y2c <- weightedMedian(x, ties="max") # 6 stopifnot(all.equal(y2a,y1)) y3 <- weightedMedian(x, w) # 5.5 (default) # Pull the median towards zero w[1] <- 5 y1 <- weightedMedian(x, w) # 3.5 y <- c(rep(0,w[1]), x[-1]) # Only possible for integer weights y2 <- median(y) # 3.5 stopifnot(all.equal(y1,y2)) # Put even more weight on the zero w[1] <- 8.5 y <- weightedMedian(x, w) # 2 # All weight on the first value w[1] <- Inf y <- weightedMedian(x, w) # 1 # All weight on the last value w[1] <- 1 w[n] <- Inf y <- weightedMedian(x, w) # 10 # All weights set to zero w <- rep(0, n) y <- weightedMedian(x, w) # NA x <- 1:4 w <- rep(1, times=4) for (mode in c("integer", "double")) { storage.mode(x) <- mode for (ties in c("weighted", "mean", "min", "max")) { cat(sprintf("ties=%s\n", ties)) y <- weightedMedian(x, w, ties=ties) } } set.seed(0x42) y <- weightedMedian(x=double(0L)) print(y) stopifnot(length(y) == 1L) stopifnot(is.na(y)) y <- weightedMedian(x=x[1]) print(y) stopifnot(length(y) == 1L) stopifnot(all.equal(y, x[1])) n <- 1e3 x <- runif(n) w <- runif(n, min=0, max=1) for (mode in c("integer", "double")) { storage.mode(x) <- mode for (ties in c("weighted", "mean", "min", "max")) { y <- weightedMedian(x, w, ties=ties) cat(sprintf("mode=%s, ties=%s, result=%g\n", mode, ties, y)) } } # A large vector n <- 1e5 x <- runif(n) w <- runif(n, min=0, max=1) y <- weightedMedian(x, w) y <- weightedMedian(x, w, ties="min") matrixStats/tests/rowMads.R0000644000175100001440000001240612542546242015525 0ustar hornikuserslibrary("matrixStats") rowMads_R <- function(x, na.rm=FALSE) { suppressWarnings({ apply(x, MARGIN=1L, FUN=mad, na.rm=na.rm) }) } colMads_R <- function(x, na.rm=FALSE) { suppressWarnings({ apply(x, MARGIN=2L, FUN=mad, na.rm=na.rm) }) } rowMads_center <- function(x, na.rm=FALSE) { center <- rowMedians(x, na.rm=na.rm) rowMads(x, center=center, na.rm=na.rm) } colMads_center <- function(x, na.rm=FALSE) { center <- colMedians(x, na.rm=na.rm) colMads(x, center=center, na.rm=na.rm) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 3x3 matrix (no ties) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(c(1,2,3,2,3,4,3,4,5)+0.1, nrow=3, ncol=3) cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) cat("rowMads():\n") r0 <- rowMads_R(x, na.rm=TRUE) r1 <- rowMads(x, na.rm=TRUE) r1b <- rowMads_center(x, na.rm=TRUE) r2 <- colMads(t(x), na.rm=TRUE) r2b <- colMads_center(t(x), na.rm=TRUE) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r1)) stopifnot(all.equal(r2b, r2)) cat("colMads():\n") r0 <- colMads_R(x, na.rm=TRUE) r1 <- colMads(x, na.rm=TRUE) r1b <- colMads_center(x, na.rm=TRUE) r2 <- rowMads(t(x), na.rm=TRUE) r2b <- rowMads_center(t(x), na.rm=TRUE) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r1)) stopifnot(all.equal(r2b, r2)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Ties: a 4x4 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(c(1:4,2:5,3:6,4:7)+0.1, nrow=4, ncol=4) cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) cat("rowMads():\n") r0 <- rowMads_R(x, na.rm=TRUE) r1 <- rowMads(x, na.rm=TRUE) r2 <- colMads(t(x), na.rm=TRUE) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) cat("colMads():\n") r0 <- colMads_R(x, na.rm=TRUE) r1 <- colMads(x, na.rm=TRUE) r2 <- rowMads(t(x), na.rm=TRUE) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # No ties: a 3x3 matrix with an NA value # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(c(1,2,3,2,3,4,3,4,5)+0.1, nrow=3, ncol=3) x[2,2] <- NA_real_ cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) cat("rowMads():\n") r0 <- rowMads_R(x, na.rm=TRUE) r1 <- rowMads(x, na.rm=TRUE) r2 <- colMads(t(x), na.rm=TRUE) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) cat("colMads():\n") r0 <- colMads_R(x, na.rm=TRUE) r1 <- colMads(x, na.rm=TRUE) r2 <- rowMads(t(x), na.rm=TRUE) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (addNA in c(FALSE, TRUE)) { cat("addNA=", addNA, "\n", sep="") x <- matrix(1:100, nrow=20, ncol=5) if (addNA) { x[13:17,c(2,4)] <- NA_real_ } # Row/column ranges for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") cat("rowMads():\n") r0 <- rowMads_R(x, na.rm=na.rm) r1 <- rowMads(x, na.rm=na.rm) r2 <- colMads(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) cat("colMads():\n") r0 <- colMads_R(x, na.rm=na.rm) r1 <- colMads(x, na.rm=na.rm) r2 <- rowMads(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) } } # for (addNA ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(NA_real_, nrow=20, ncol=5) for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") r0 <- rowMads_R(x, na.rm=na.rm) if (na.rm) r0[is.na(r0)] <- NaN r1 <- rowMads(x, na.rm=na.rm) r2 <- colMads(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow=1, ncol=1) for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") r0 <- rowMads_R(x, na.rm=na.rm) r1 <- rowMads(x, na.rm=na.rm) r2 <- colMads(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 0x0 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(double(0), nrow=0, ncol=0) for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") r0 <- rowMads_R(x, na.rm=na.rm) r1 <- rowMads(x, na.rm=na.rm) r2 <- colMads(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } matrixStats/tests/rowCumprods.R0000644000175100001440000000665312542546242016444 0ustar hornikuserslibrary("matrixStats") rowCumprods_R <- function(x) { suppressWarnings({ t(apply(x, MARGIN=1L, FUN=cumprod)) }) } colCumprods_R <- function(x) { suppressWarnings({ apply(x, MARGIN=2L, FUN=cumprod) }) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { for (addNA in c(FALSE, TRUE)) { cat("addNA=", addNA, "\n", sep="") x <- matrix(1:100, nrow=20, ncol=5) if (addNA) { x[13:17,c(2,4)] <- NA_real_ } cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) # Row/column ranges r0 <- rowCumprods_R(x) r1 <- rowCumprods(x) r2 <- t(colCumprods(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (addNA ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow=20, ncol=5) cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) r0 <- rowCumprods_R(x) r1 <- rowCumprods(x) r2 <- t(colCumprods(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(0, nrow=1, ncol=1) cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) r0 <- rowCumprods_R(x) r1 <- rowCumprods(x) r2 <- t(colCumprods(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # BUG FIX TEST: Assert zeros don't trump NAs in integer matrices # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow=3, ncol=2) x[1,2] <- 0 x[2,2] <- 1 x[3,1] <- 0 storage.mode(x) <- mode cat("mode: ", mode, "\n", sep="") str(x) r0 <- rowCumprods_R(x) r1 <- rowCumprods(x) r2 <- t(colCumprods(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") value <- 0 storage.mode(value) <- mode # A 0x0 matrix x <- matrix(value, nrow=0L, ncol=0L) str(x) r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x)) r1 <- rowCumprods(x) r2 <- t(colCumprods(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # A 0xK matrix x <- matrix(value, nrow=0L, ncol=5L) str(x) r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x)) r1 <- rowCumprods(x) r2 <- t(colCumprods(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) # A Nx0 matrix x <- matrix(value, nrow=5L, ncol=0L) str(x) r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x)) r1 <- rowCumprods(x) r2 <- t(colCumprods(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (mode ...) matrixStats/tests/rowTabulates.R0000644000175100001440000000155112542546242016564 0ustar hornikuserslibrary("matrixStats") N <- 6L K <- 5L J <- 5L data <- matrix(1:J, nrow=N, ncol=K) modes <- c("integer", "raw") for (mode in modes) { cat(sprintf("Mode: %s...\n", mode)) x <- data storage.mode(x) <- mode print(x) y <- rowTabulates(x) print(y) stopifnot(identical(dim(y), c(N,J))) y <- colTabulates(x) print(y) stopifnot(identical(dim(y), c(K,J))) # Count only certain values y <- rowTabulates(x, values=1:3) print(y) stopifnot(identical(dim(y), c(N,3L))) y <- colTabulates(x, values=1:3) print(y) stopifnot(identical(dim(y), c(K,3L))) # Raw y <- rowTabulates(x, values=as.raw(1:3)) print(y) stopifnot(identical(dim(y), c(N,3L))) y2 <- colTabulates(t(x), values=as.raw(1:3)) print(y2) stopifnot(identical(dim(y2), c(N,3L))) stopifnot(identical(y2, y)) cat(sprintf("Mode: %s...done\n", mode)) } # for (mode ...) matrixStats/tests/rowCounts.R0000644000175100001440000001002712542546242016111 0ustar hornikuserslibrary("matrixStats") rowCounts_R <- function(x, value=TRUE, na.rm=FALSE, ...) { if (is.na(value)) { counts <- apply(x, MARGIN=1L, FUN=function(x) sum(is.na(x))) } else { counts <- apply(x, MARGIN=1L, FUN=function(x) sum(x == value, na.rm=na.rm)) } as.integer(counts) } # rowCounts_R() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: integer and numeric # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(runif(20*5, min=-3, max=3), nrow=20, ncol=5) x[sample.int(length(x), size=7)] <- 0 storage.mode(x) <- mode for (na.rm in c(FALSE, TRUE)) { # Count zeros r0 <- rowCounts_R(x, value=0, na.rm=na.rm) r1 <- rowCounts(x, value=0, na.rm=na.rm) r2 <- colCounts(t(x), value=0, na.rm=na.rm) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) # Count NAs r0 <- rowCounts_R(x, value=NA, na.rm=na.rm) r1 <- rowCounts(x, value=NA, na.rm=na.rm) r2 <- colCounts(t(x), value=NA, na.rm=na.rm) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) if (mode == "integer") { ux <- unique(as.vector(x)) r0 <- r1 <- r2 <- integer(nrow(x)) for (value in ux) { r0 <- r0 + rowCounts_R(x, value=value, na.rm=na.rm) r1 <- r1 + rowCounts(x, value=value, na.rm=na.rm) r2 <- r2 + colCounts(t(x), value=value, na.rm=na.rm) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) } stopifnot(all(r0 == ncol(x))) } # if (mode == "integer") } # for (na.rm ...) } # for (mode ...) # All NAs naList <- list(NA_integer_, NA_real_, NaN) for (naValue in naList) { x <- matrix(naValue, nrow=20, ncol=5) for (na.rm in c(FALSE, TRUE)) { r0 <- rowCounts_R(x, na.rm=na.rm) r1 <- rowCounts(x, na.rm=na.rm) r2 <- colCounts(t(x), na.rm=na.rm) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) # Count NAs r0 <- rowCounts_R(x, value=NA, na.rm=na.rm) r1 <- rowCounts(x, value=NA, na.rm=na.rm) r2 <- colCounts(t(x), value=NA, na.rm=na.rm) stopifnot(all(r0 == ncol(x))) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) } } # for (naValue ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: logical # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(FALSE, nrow=20, ncol=5) x[13:17,c(2,4)] <- TRUE x[2:4,] <- TRUE x[,1] <- TRUE x[5,] <- FALSE x[,5] <- FALSE # Row/column counts for (na.rm in c(FALSE, TRUE)) { r0 <- rowCounts_R(x, na.rm=na.rm) r1 <- rowCounts(x, na.rm=na.rm) r2 <- colCounts(t(x), na.rm=na.rm) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) rT <- rowCounts(x, value=TRUE, na.rm=na.rm) rF <- rowCounts(x, value=FALSE, na.rm=na.rm) stopifnot(rT + rF == ncol(x)) cT <- colCounts(x, value=TRUE, na.rm=na.rm) cF <- colCounts(x, value=FALSE, na.rm=na.rm) stopifnot(cT + cF == nrow(x)) # Count NAs r0 <- rowCounts_R(x, value=NA, na.rm=na.rm) r1 <- rowCounts(x, value=NA, na.rm=na.rm) r2 <- colCounts(t(x), value=NA, na.rm=na.rm) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: character (not sure if this should be supported) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(rep(letters, length.out=20*5), nrow=20, ncol=5) x[2:3,3:4] <- NA_character_ # Row/column counts for (na.rm in c(FALSE, TRUE)) { for (value in c("g", NA_character_)) { r0 <- rowCounts_R(x, value=value, na.rm=na.rm) r1 <- rowCounts(x, value=value, na.rm=na.rm) r2 <- colCounts(t(x), value=value, na.rm=na.rm) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r0)) c <- count(x[1,], value=value, na.rm=na.rm) stopifnot(identical(c,r1[1])) c <- count(x[2,], value=value, na.rm=na.rm) stopifnot(identical(c,r1[2])) } } # NA row x <- matrix(0, nrow=2, ncol=2) x[1,] <- NA_integer_ r0 <- rowCounts(x, value=0) r1 <- rowCounts_R(x, value=0) stopifnot(identical(r0,r1)) matrixStats/tests/psortKM.R0000644000175100001440000000242412542546242015507 0ustar hornikuserslibrary("matrixStats") library("utils") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - psortKM_R <- function(x, k, m) { x <- sort(x) x[(k-m+1):k] } # psortKM_R() psortKM_R2 <- function(x, k, m) { partial <- (k-m+1):k x <- sort.int(x, partial=partial) x[partial] } # psortKM_R2() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) cat("Consistency checks:\n") x <- 1:500 x[298:300] <- 300 y <- sample(x) cat("x:\n") str(x) cat("sample(x):\n") str(y) for (k in c(1, 2, 300, 301, length(x))) { for (m in 1:min(5,k)) { px0 <- psortKM_R(x, k=k, m=m) px0b <- psortKM_R2(x, k=k, m=m) stopifnot(identical(px0b, px0)) px1 <- matrixStats:::.psortKM(x, k=k, m=m) cat(sprintf(".psortKM(x, k=%d, m=%d):\n", k, m)) print(px1) stopifnot(identical(px1, px0)) py0 <- psortKM_R(y, k=k, m=m) py0b <- psortKM_R2(y, k=k, m=m) stopifnot(identical(py0b, py0)) py1 <- matrixStats:::.psortKM(y, k=k, m=m) cat(sprintf(".psortKM(y, k=%d, m=%d):\n", k, m)) print(py1) stopifnot(identical(py1, py0)) stopifnot(identical(py1, px1)) } # for (m ...) } # for (k ...) matrixStats/tests/rowDiffs.R0000644000175100001440000000414412542546242015674 0ustar hornikuserslibrary("matrixStats") rowDiffs_R <- function(x, lag=1L, differences=1L, ...) { ncol2 <- ncol(x) - lag*differences if (ncol2 <= 0) { return(matrix(x[integer(0L)], nrow=nrow(x), ncol=0L)) } suppressWarnings({ y <- apply(x, MARGIN=1L, FUN=diff, lag=lag, differences=differences) }) y <- t(y) dim(y) <- c(nrow(x), ncol2) y } set.seed(0x42) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") for (addNA in c(FALSE, TRUE)) { cat("addNA=", addNA, "\n", sep="") x <- matrix(sample(20*8)+0.1, nrow=20, ncol=8) if (addNA) { x[13:17,c(2,4)] <- NA_real_ } storage.mode(x) <- mode str(x) for (lag in 1:4) { for (differences in 1:3) { cat(sprintf("mode: %s, lag=%d, differences=%d\n", mode, lag, differences)) # Row/column ranges r0 <- rowDiffs_R(x, lag=lag, differences=differences) r1 <- rowDiffs(x, lag=lag, differences=differences) r2 <- t(colDiffs(t(x), lag=lag, differences=differences)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1, r2)) } } } # for (addNA ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") x <- matrix(NA_real_, nrow=20, ncol=5) storage.mode(x) <- mode str(x) r0 <- rowDiffs_R(x) r1 <- rowDiffs(x) r2 <- t(colDiffs(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow=1, ncol=1) r0 <- rowDiffs_R(x) r1 <- rowDiffs(x) r2 <- t(colDiffs(t(x))) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) matrixStats/tests/logSumExp.R0000644000175100001440000000545012542546242016035 0ustar hornikuserslibrary("matrixStats") library("stats") logSumExp_R <- function(lx, na.rm=FALSE) { log(sum(exp(lx), na.rm=na.rm)) } ## R-help thread \emph{'[R] Beyond double-precision?'} on May 9, 2009. for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") set.seed(1) x <- runif(20, min=1.0, max=3.0) storage.mode(x) <- mode str(x) ## The logarithm of the harmonic mean y0 <- log(1/mean(1/x)) print(y0) ## -1.600885 lx <- log(x) y1 <- log(length(x)) - logSumExp(-lx) print(y1) ## [1] -1.600885 # Sanity check stopifnot(all.equal(y1, y0)) y2 <- log(length(x)) - logSumExp_R(-lx) # Sanity check stopifnot(all.equal(y2, y0)) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## NA values lx <- c(1,2,3) lx[2] <- NA_real_ y0 <- logSumExp_R(lx, na.rm=FALSE) y <- logSumExp(lx, na.rm=FALSE) print(y) stopifnot(identical(y, NA_real_)) stopifnot(all.equal(y, y0)) y0 <- logSumExp_R(lx, na.rm=TRUE) y <- logSumExp(lx, na.rm=TRUE) print(y) stopifnot(all.equal(y, y0)) ## NaN values lx <- c(1,2,3) lx[2] <- NaN y0 <- logSumExp_R(lx, na.rm=FALSE) y <- logSumExp(lx, na.rm=FALSE) print(y) stopifnot(identical(y, NA_real_)) stopifnot(all.equal(y, y0)) y0 <- logSumExp_R(lx, na.rm=TRUE) y <- logSumExp(lx, na.rm=TRUE) print(y) stopifnot(all.equal(y, y0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## Zero-length vectors lx <- numeric(0L) y0 <- logSumExp_R(lx) y <- logSumExp(lx) print(y) stopifnot(identical(y, -Inf)) stopifnot(all.equal(y, y0)) ## Vectors of length one lx <- 1.0 y0 <- logSumExp_R(lx) y <- logSumExp(lx) print(y) stopifnot(identical(y, lx)) stopifnot(all.equal(y, y0)) lx <- NA_real_ y0 <- logSumExp_R(lx, na.rm=TRUE) y <- logSumExp(lx, na.rm=TRUE) print(y) stopifnot(identical(y, -Inf)) stopifnot(all.equal(y, y0)) ## All missing values lx <- c(NA_real_, NA_real_) y0 <- logSumExp_R(lx, na.rm=TRUE) y <- logSumExp(lx, na.rm=TRUE) print(y) stopifnot(identical(y, -Inf)) stopifnot(all.equal(y, y0)) lx <- c(NA_real_, NA_real_) y0 <- logSumExp_R(lx, na.rm=FALSE) y <- logSumExp(lx, na.rm=FALSE) print(y) stopifnot(identical(y, NA_real_)) stopifnot(all.equal(y, y0)) ## +Inf values lx <- c(1, 2, +Inf) y0 <- logSumExp_R(lx) y <- logSumExp(lx) print(y) stopifnot(identical(y, +Inf)) stopifnot(all.equal(y, y0)) ## First element is a missing value, cf. PR #33 lx <- c(NA_real_, 1) y0 <- logSumExp_R(lx) print(y0) y <- logSumExp(lx, na.rm=FALSE) print(y) stopifnot(identical(y, NA_real_)) stopifnot(all.equal(y, y0)) y0 <- logSumExp_R(lx, na.rm=TRUE) print(y0) y <- logSumExp(lx, na.rm=TRUE) print(y) stopifnot(identical(y, 1)) stopifnot(all.equal(y, y0)) matrixStats/tests/rowSds.R0000644000175100001440000000533112542546242015371 0ustar hornikuserslibrary("matrixStats") rowSds_R <- function(x, na.rm=FALSE) { suppressWarnings({ apply(x, MARGIN=1L, FUN=sd, na.rm=na.rm) }) } colSds_R <- function(x, na.rm=FALSE) { suppressWarnings({ apply(x, MARGIN=2L, FUN=sd, na.rm=na.rm) }) } rowSds_center <- function(x, na.rm=FALSE) { center <- rowMeans(x, na.rm=na.rm) rowSds(x, center=center, na.rm=na.rm) } colSds_center <- function(x, na.rm=FALSE) { center <- colMeans(x, na.rm=na.rm) colSds(x, center=center, na.rm=na.rm) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # With and without some NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { for (addNA in c(FALSE, TRUE)) { cat("addNA=", addNA, "\n", sep="") x <- matrix(1:100+0.1, nrow=20, ncol=5) if (addNA) { x[13:17,c(2,4)] <- NA_real_ } cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) # Row/column ranges for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") r0 <- rowSds_R(x, na.rm=na.rm) r1 <- rowSds(x, na.rm=na.rm) r1b <- rowSds_center(x, na.rm=na.rm) r2 <- colSds(t(x), na.rm=na.rm) r2b <- colSds_center(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r1)) stopifnot(all.equal(r2b, r2)) } } # for (addNA ...) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # All NAs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { x <- matrix(NA_real_, nrow=20, ncol=5) cat("mode: ", mode, "\n", sep="") storage.mode(x) <- mode str(x) for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") r0 <- rowSds_R(x, na.rm=na.rm) r1 <- rowSds(x, na.rm=na.rm) r1b <- rowSds_center(x, na.rm=na.rm) r2 <- colSds(t(x), na.rm=na.rm) r2b <- colSds_center(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r1)) stopifnot(all.equal(r2b, r2)) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # A 1x1 matrix # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow=1, ncol=1) for (na.rm in c(FALSE, TRUE)) { cat("na.rm=", na.rm, "\n", sep="") r0 <- rowSds_R(x, na.rm=na.rm) r1 <- rowSds(x, na.rm=na.rm) r1b <- rowSds_center(x, na.rm=na.rm) r2 <- colSds(t(x), na.rm=na.rm) r2b <- colSds_center(t(x), na.rm=na.rm) stopifnot(all.equal(r1, r2)) stopifnot(all.equal(r1, r0)) stopifnot(all.equal(r2, r0)) stopifnot(all.equal(r1b, r1)) stopifnot(all.equal(r2b, r2)) } matrixStats/tests/anyMissing.R0000644000175100001440000000505112542546242016230 0ustar hornikuserslibrary("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- data.frame( logical=c(TRUE, FALSE, TRUE, FALSE), integer=1:4, double=seq(from=1.0, to=4.0, by=1.0), complex=seq(from=1.0, to=4.0, by=1.0) + 1.0i, character=I(letters[1:4]) ) modes <- names(data) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Special cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("NULL...\n") stopifnot(identical(anyMissing(NULL), FALSE)) cat("NULL...done\n") cat("raw...\n") stopifnot(identical(anyMissing(as.raw(0:3)), FALSE)) cat("raw...done\n") cat("list(NULL)...\n") stopifnot(identical(anyMissing(list(NULL)), FALSE)) cat("list(NULL)...done\n") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Scalars, vectors, and matrices of various modes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in modes) { cat(sprintf("Mode: %s...\n", mode)) values <- data[[mode]] # Scalars cat(" scalar\n") x <- values[1L] print(x) stopifnot(identical(anyMissing(x), FALSE)) is.na(x) <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) # Vectors cat(" vector\n") x <- values print(x) stopifnot(identical(anyMissing(x), FALSE)) is.na(x)[2L] <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) # Matrices cat(" matrix\n") x <- matrix(c(values, values), ncol=2L) print(x) stopifnot(identical(anyMissing(x), FALSE)) is.na(x)[2L] <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) cat(sprintf("Mode: %s...done\n", mode)) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data frames # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("data.frame...\n") x <- data stopifnot(identical(anyMissing(x), FALSE)) for (mode in modes) { x <- data is.na(x[[mode]])[2L] <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) } # for (mode ...) cat("data.frame...done\n") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Lists # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - cat("list...\n") x <- as.list(data) stopifnot(identical(anyMissing(x), FALSE)) for (mode in modes) { x <- as.list(data) is.na(x[[mode]])[2L] <- TRUE print(x) stopifnot(identical(anyMissing(x), TRUE)) } # for (mode ...) cat("list...done\n") matrixStats/tests/weightedVar_etal.R0000644000175100001440000000342512542546242017370 0ustar hornikuserslibrary("matrixStats") FUNs <- list( weightedVar=weightedVar, weightedSd=weightedSd, weightedMad=weightedMad ) for (fcn in names(FUNs)) { cat(sprintf("%s()...\n", fcn)) FUN <- FUNs[[fcn]] for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") n <- 15L x <- runif(n, min=-5, max=5) storage.mode(x) <- mode str(x) for (addNA in c(FALSE, TRUE)) { cat("addNA: ", addNA, "\n", sep="") if (addNA) { x[c(5,7)] <- NA } str(x) for (na.rm in c(FALSE, TRUE)) { cat("na.rm: ", na.rm, "\n", sep="") cat("Weights are specified (all are 1)\n") w <- rep(1, times=n) m1 <- FUN(x, na.rm=na.rm) str(list(m1=m1)) cat("All weights are 1\n") w <- rep(1, times=n) m1 <- FUN(x, w, na.rm=na.rm) str(list(m1=m1)) cat("First weight is 5\n") # Pull the mean towards zero w[1] <- 5 str(w) m1 <- FUN(x, w, na.rm=na.rm) str(list(m1=m1)) cat("All weights are 0\n") # All weights set to zero w <- rep(0, times=n) m1 <- FUN(x, w, na.rm=na.rm) str(list(m1=m1)) cat("First weight is 8.5\n") # Put even more weight on the zero w[1] <- 8.5 m1 <- FUN(x, w, na.rm=na.rm) str(list(m1=m1)) cat("First weight is Inf\n") # All weight on the first value w[1] <- Inf m1 <- FUN(x, w, na.rm=na.rm) str(list(m1=m1)) cat("Last weight is Inf\n") # All weight on the last value w[1] <- 1 w[n] <- Inf m1 <- FUN(x, w, na.rm=na.rm) str(list(m1=m1)) } # for (na.rm ...) } # for (addNA ...) } # for (mode ...) cat(sprintf("%s()...DONE\n", fcn)) } # for (fcn ...) matrixStats/tests/rowIQRs.R0000644000175100001440000000350512542546242015457 0ustar hornikuserslibrary("matrixStats") rowIQRs_R <- function(x, na.rm=FALSE) { quantileNA <- function(x, ..., na.rm=FALSE) { if (!na.rm && anyMissing(x)) return(c(NA_real_, NA_real_)) quantile(x, ..., na.rm=na.rm) } Q <- apply(x, MARGIN=1L, FUN=quantileNA, probs=c(0.25, 0.75), na.rm=na.rm) Q[2L,,drop=TRUE] - Q[1L,,drop=TRUE] } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Test with multiple quantiles # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") x <- matrix(1:100+0.1, nrow=10, ncol=10) storage.mode(x) <- mode str(x) for (addNA in c(FALSE, TRUE)) { if (addNA) { x[3:5,6:9] <- NA } for (na.rm in c(FALSE, TRUE)) { probs <- c(0,0.5,1) q0 <- rowIQRs_R(x, na.rm=na.rm) print(q0) q1 <- rowIQRs(x, na.rm=na.rm) print(q1) stopifnot(all.equal(q1, q0)) q2 <- colIQRs(t(x), na.rm=na.rm) stopifnot(all.equal(q2, q0)) q <- iqr(x[3,], na.rm=na.rm) print(q) } # for (na.rm ...) } # for (addNA ...) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Test corner cases # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") # Empty vectors x <- integer(0L) storage.mode(x) <- mode str(x) q <- iqr(x) print(q) stopifnot(identical(q, NA_real_)) # Scalar x <- 1L storage.mode(x) <- mode str(x) q <- iqr(x) str(q) stopifnot(identical(q, 0)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Single row matrices # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(1, nrow=1L, ncol=2L) q <- rowIQRs(x) stopifnot(identical(q, 0)) x <- matrix(1, nrow=2L, ncol=1L) q <- colIQRs(x) stopifnot(identical(q, 0)) matrixStats/tests/rowAllAnys.R0000644000175100001440000001116412542546242016204 0ustar hornikuserslibrary("matrixStats") rowAlls_R <- function(x, value=TRUE, na.rm=FALSE, ...) { if (is.na(value)) { apply(is.na(x), MARGIN=1L, FUN=all, na.rm=na.rm) } else { apply((x == value), MARGIN=1L, FUN=all, na.rm=na.rm) } } rowAnys_R <- function(x, value=TRUE, na.rm=FALSE, ...) { if (is.na(value)) { apply(is.na(x), MARGIN=1L, FUN=any, na.rm=na.rm) } else { apply((x == value), MARGIN=1L, FUN=any, na.rm=na.rm) } } rowAnyMissings_R <- function(x, ...) { apply(x, MARGIN=1L, FUN=anyMissing) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: logical # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(FALSE, nrow=20, ncol=5) x[13:17,c(2,4)] <- TRUE x[2:4,] <- TRUE x[,1] <- TRUE x[5,] <- FALSE x[,5] <- FALSE x[3,] <- FALSE x[4,] <- TRUE for (kk in 1:3) { if (kk == 2) { x[2,2] <- NA } else if (kk == 3) { x[,2] <- NA x[2,] <- NA } # Row/column all for (na.rm in c(FALSE, TRUE)) { m0 <- rowAlls_R(x, na.rm=na.rm) m1 <- rowAlls(x, na.rm=na.rm) m2 <- colAlls(t(x), na.rm=na.rm) str(list("all()", m0=m0, m1=m1, m2=m2)) stopifnot(identical(m1, m0)) stopifnot(identical(m2, m0)) } # Row/column any for (na.rm in c(FALSE, TRUE)) { m0 <- rowAnys_R(x, na.rm=na.rm) m1 <- rowAnys(x, na.rm=na.rm) m2 <- colAnys(t(x), na.rm=na.rm) str(list("any()", m0=m0, m1=m1, m2=m2)) stopifnot(identical(m1, m0)) stopifnot(identical(m2, m0)) m0 <- rowAnyMissings_R(x) m1 <- rowAnyMissings(x) m2 <- colAnyMissings(t(x)) str(list("anyMissing()", m0=m0, m1=m1, m2=m2)) stopifnot(identical(m1, m0)) stopifnot(identical(m2, m0)) } } # for (kk ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: integer # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(rep(1:28, length.out=20*5), nrow=20, ncol=5) x[2,] <- 7L x[3,1] <- 7L x[2:3,3:4] <- NA_integer_ # Row/column counts value <- 7L for (na.rm in c(FALSE, TRUE)) { ## All r0 <- rowAlls_R(x, value=value, na.rm=na.rm) r1 <- rowAlls(x, value=value, na.rm=na.rm) r2 <- colAlls(t(x), value=value, na.rm=na.rm) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r1)) for (rr in seq_len(nrow(x))) { c <- allValue(x[rr,], value=value, na.rm=na.rm) stopifnot(identical(c,r1[rr])) c <- allValue(x[rr,], value=value, na.rm=na.rm) stopifnot(identical(c,r1[rr])) } ## Any r0 <- rowAnys_R(x, value=value, na.rm=na.rm) r1 <- rowAnys(x, value=value, na.rm=na.rm) r2 <- colAnys(t(x), value=value, na.rm=na.rm) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r1)) for (rr in seq_len(nrow(x))) { c <- anyValue(x[rr,], value=value, na.rm=na.rm) stopifnot(identical(c,r1[rr])) c <- anyValue(x[rr,], value=value, na.rm=na.rm) stopifnot(identical(c,r1[rr])) } } all_R <- function(x, value=TRUE, ...) { if (is.na(value)) { all(is.na(x), ...) } else { all(x == value, ...) } } any_R <- function(x, value=TRUE, ...) { if (is.na(value)) { any(is.na(x), ...) } else { any(x == value, ...) } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Data type: character (not sure if this should be supported) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(rep(letters, length.out=20*5), nrow=20, ncol=5) x[2,] <- "g" x[2:4,3:4] <- NA_character_ # Row/column counts for (value in c("g", NA_character_)) { for (na.rm in c(FALSE, TRUE)) { ## All r0 <- rowAlls_R(x, value=value, na.rm=na.rm) r1 <- rowAlls(x, value=value, na.rm=na.rm) r2 <- colAlls(t(x), value=value, na.rm=na.rm) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r1)) for (rr in seq_len(nrow(x))) { c0 <- all_R(x[rr,], value, na.rm=na.rm) c <- allValue(x[rr,], value=value, na.rm=na.rm) stopifnot(identical(c,r1[rr])) stopifnot(identical(c,c0)) } ## Any r0 <- rowAnys_R(x, value=value, na.rm=na.rm) r1 <- rowAnys(x, value=value, na.rm=na.rm) r2 <- colAnys(t(x), value=value, na.rm=na.rm) stopifnot(identical(r1, r0)) stopifnot(identical(r2, r1)) for (rr in seq_len(nrow(x))) { c0 <- any_R(x[rr,], value, na.rm=na.rm) c <- anyValue(x[rr,], value=value, na.rm=na.rm) stopifnot(identical(c,c0)) stopifnot(identical(c,r1[rr])) } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # NA 0 test # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(0, nrow=3, ncol=3) x[1,] <- c(NA_real_, NA_real_, 0) x[3,] <- c(1, 0, 1) r0 <- rowAnys_R(x, value=0) r1 <- rowAnys(x, value=0) stopifnot(identical(r0, r1)) matrixStats/tests/rowQuantiles.R0000644000175100001440000000641712542546242016613 0ustar hornikuserslibrary("matrixStats") rowQuantiles_R <- function(x, probs, na.rm=FALSE) { q <- apply(x, MARGIN=1L, FUN=quantile, probs=probs, na.rm=na.rm) if (!is.null(dim(q))) q <- t(q) q } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Test with multiple quantiles # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") x <- matrix(1:40+0.1, nrow=8, ncol=5) storage.mode(x) <- mode str(x) probs <- c(0,0.5,1) q0 <- rowQuantiles_R(x, probs=probs) print(q0) q1 <- rowQuantiles(x, probs=probs) print(q1) stopifnot(all.equal(q1, q0)) q2 <- colQuantiles(t(x), probs=probs) stopifnot(all.equal(q2, q0)) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Test with a single quantile # - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (mode in c("integer", "double")) { cat("mode: ", mode, "\n", sep="") x <- matrix(1:40, nrow=8, ncol=5) storage.mode(x) <- mode str(x) probs <- c(0.5) q0 <- rowQuantiles_R(x, probs=probs) print(q0) q1 <- rowQuantiles(x, probs=probs) print(q1) stopifnot(all.equal(q1, q0)) q2 <- colQuantiles(t(x), probs=probs) stopifnot(all.equal(q2, q0)) } # for (mode ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Consistency checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - set.seed(1) probs <- seq(from=0, to=1, by=0.25) cat("Consistency checks:\n") K <- if (Sys.getenv("_R_CHECK_FULL_") == "" || Sys.getenv("_R_CHECK_USE_VALGRIND_") != "") 4 else 20 for (kk in seq_len(K)) { cat("Random test #", kk, "\n", sep="") # Simulate data in a matrix of any shape dim <- sample(20:60, size=2L) n <- prod(dim) x <- rnorm(n, sd=100) dim(x) <- dim # Add NAs? hasNA <- (kk %% 4) %in% c(3,0); if (hasNA) { cat("Adding NAs\n") nna <- sample(n, size=1) naValues <- c(NA_real_, NaN) x[sample(length(x), size=nna)] <- sample(naValues, size=nna, replace=TRUE) } # Integer or double? if ((kk %% 4) %in% c(2,0)) { cat("Coercing to integers\n") storage.mode(x) <- "integer" } str(x) # rowQuantiles(): q0 <- rowQuantiles_R(x, probs=probs, na.rm=hasNA) q1 <- rowQuantiles(x, probs=probs, na.rm=hasNA) stopifnot(all.equal(q1, q0)) q2 <- colQuantiles(t(x), probs=probs, na.rm=hasNA) stopifnot(all.equal(q2, q0)) } # for (kk ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Empty matrices # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(NA_real_, nrow=0L, ncol=0L) probs <- c(0, 0.25, 0.75, 1) q <- rowQuantiles(x, probs=probs) stopifnot(identical(dim(q), c(nrow(x), length(probs)))) q <- colQuantiles(x, probs=probs) stopifnot(identical(dim(q), c(ncol(x), length(probs)))) x <- matrix(NA_real_, nrow=2L, ncol=0L) q <- rowQuantiles(x, probs=probs) stopifnot(identical(dim(q), c(nrow(x), length(probs)))) x <- matrix(NA_real_, nrow=0L, ncol=2L) q <- colQuantiles(x, probs=probs) stopifnot(identical(dim(q), c(ncol(x), length(probs)))) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Single column matrices # - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- matrix(1, nrow=2L, ncol=1L) q <- rowQuantiles(x, probs=probs) print(q) x <- matrix(1, nrow=1L, ncol=2L) q <- colQuantiles(x, probs=probs) print(q) matrixStats/src/0000755000175100001440000000000012542546311013405 5ustar hornikusersmatrixStats/src/productExpSumLog.c0000644000175100001440000000260312542546311017036 0ustar hornikusers/*************************************************************************** Public methods: SEXP productExpSumLog(SEXP x, SEXP naRm, SEXP hasNA) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD productExpSumLog #define X_TYPE 'i' #include "productExpSumLog_TYPE-template.h" #define X_TYPE 'r' #include "productExpSumLog_TYPE-template.h" #undef METHOD SEXP productExpSumLog(SEXP x, SEXP naRm, SEXP hasNA) { SEXP ans = NILSXP; double res = NA_REAL; int narm, hasna; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x"); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Double matrices are more common to use. */ if (isReal(x)) { res = productExpSumLog_Real(REAL(x), xlength(x), narm, hasna); } else if (isInteger(x)) { res = productExpSumLog_Integer(INTEGER(x), xlength(x), narm, hasna); } /* Return results */ PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = res; UNPROTECT(1); return(ans); } // productExpSumLog() /*************************************************************************** HISTORY: 2014-06-04 [HB] o Created. **************************************************************************/ matrixStats/src/weightedMean.c0000644000175100001440000000317212542546311016155 0ustar hornikusers/*************************************************************************** Public methods: SEXP weightedMean(SEXP x, SEXP w, SEXP naRm, SEXP refine) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #include #define METHOD weightedMean #define X_TYPE 'i' #include "weightedMean_TYPE-template.h" #define X_TYPE 'r' #include "weightedMean_TYPE-template.h" #undef METHOD SEXP weightedMean(SEXP x, SEXP w, SEXP naRm, SEXP refine) { SEXP ans; int narm, refine2; double avg = NA_REAL; R_xlen_t nx, nw; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'x': */ assertArgVector(w, (R_TYPE_REAL), "w"); nw = xlength(w); if (nx != nw) { error("Argument 'x' and 'w' are of different lengths: %d != %d", nx, nw); } /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'refine': */ refine2 = asLogicalNoNA(refine, "refine"); /* Double matrices are more common to use. */ if (isReal(x)) { avg = weightedMean_Real(REAL(x), nx, REAL(w), nw, narm, refine2); } else if (isInteger(x)) { avg = weightedMean_Integer(INTEGER(x), nx, REAL(w), nw, narm, refine2); } /* Return results */ PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = avg; UNPROTECT(1); return(ans); } // weightedMean() /*************************************************************************** HISTORY: 2014-12-08 [HB] o Created. **************************************************************************/ matrixStats/src/rowCumprods_TYPE-template.h0000644000175100001440000000725112542546311020561 0ustar hornikusers/*********************************************************************** TEMPLATE: void rowCumprods_(...) GENERATES: void rowCumprods_Integer(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, double *ans) void rowCumprods_Real(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, double *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME]) */ #include "templates-types.h" void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, ANS_C_TYPE *ans) { R_xlen_t ii, jj, kk, kk_prev; LDOUBLE value; #if ANS_TYPE == 'i' double R_INT_MIN_d = (double)R_INT_MIN, R_INT_MAX_d = (double)R_INT_MAX; /* OK, i.e. no integer overflow yet? */ int warn = 0, ok, *oks = NULL; #endif if (nrow == 0 || ncol == 0) return; if (byrow) { #if ANS_TYPE == 'i' oks = (int *) R_alloc(nrow, sizeof(int)); #endif for (kk=0; kk < nrow; kk++) { ans[kk] = (ANS_C_TYPE) x[kk]; #if ANS_TYPE == 'i' oks[kk] = !X_ISNA(x[kk]); #endif } kk_prev = 0; for (jj=1; jj < ncol; jj++) { for (ii=0; ii < nrow; ii++) { #if ANS_TYPE == 'i' if (oks[ii]) { /* Missing value? */ if (X_ISNA(x[kk])) { oks[ii] = 0; ans[kk] = ANS_NA; } else { value = (LDOUBLE) ans[kk_prev] * (LDOUBLE) x[kk]; /* Integer overflow? */ if (value < R_INT_MIN_d || value > R_INT_MAX_d) { oks[ii] = 0; warn = 1; ans[kk] = ANS_NA; } else { ans[kk] = (ANS_C_TYPE) value; } } } else { ans[kk] = ANS_NA; } #else ans[kk] = (ANS_C_TYPE) ((LDOUBLE) ans[kk_prev] * (LDOUBLE) x[kk]); #endif kk++; kk_prev++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } else { kk = 0; for (jj=0; jj < ncol; jj++) { value = 1; #if ANS_TYPE == 'i' ok = 1; #endif for (ii=0; ii < nrow; ii++) { #if ANS_TYPE == 'i' if (ok) { /* Missing value? */ if (X_ISNA(x[kk])) { ok = 0; ans[kk] = ANS_NA; } else { value *= (LDOUBLE) x[kk]; /* Integer overflow? */ if (value < R_INT_MIN_d || value > R_INT_MAX_d) { ok = 0; warn = 1; ans[kk] = ANS_NA; } else { ans[kk] = (ANS_C_TYPE) value; } } } else { ans[kk] = ANS_NA; } #else value *= x[kk]; ans[kk] = (ANS_C_TYPE) value; #endif kk++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } /* if (byrow) */ #if ANS_TYPE == 'i' /* Warn on integer overflow? */ if (warn) { warning("Integer overflow. Detected one or more elements whose absolute values were out of the range [%d,%d] that can be used to for integers. Such values are set to NA_integer_.", R_INT_MIN, R_INT_MAX); } #endif } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-26 [HB] o Created from rowVars_TYPE-template.h. **************************************************************************/ matrixStats/src/x_OP_y.c0000644000175100001440000001417612542546311014757 0ustar hornikusers#include #include "types.h" #include "utils.h" #define METHOD x_OP_y /* Addition */ #define X_TYPE 'i' #define Y_TYPE 'i' #define ANS_TYPE 'i' #define OP '+' #include "x_OP_y_TYPE-template.h" #define X_TYPE 'i' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '+' #include "x_OP_y_TYPE-template.h" #define X_TYPE 'r' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '+' #include "x_OP_y_TYPE-template.h" #define X_TYPE 'r' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '+' #include "x_OP_y_TYPE-template.h" /* Subtraction */ #define X_TYPE 'i' #define Y_TYPE 'i' #define ANS_TYPE 'i' #define OP '-' #include "x_OP_y_TYPE-template.h" #define X_TYPE 'i' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '-' #include "x_OP_y_TYPE-template.h" #define X_TYPE 'r' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '-' #include "x_OP_y_TYPE-template.h" #define X_TYPE 'r' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '-' #include "x_OP_y_TYPE-template.h" /* Multiplication */ #define X_TYPE 'i' #define Y_TYPE 'i' #define ANS_TYPE 'i' #define OP '*' #include "x_OP_y_TYPE-template.h" #define X_TYPE 'i' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '*' #include "x_OP_y_TYPE-template.h" #define X_TYPE 'r' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '*' #include "x_OP_y_TYPE-template.h" #define X_TYPE 'r' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '*' #include "x_OP_y_TYPE-template.h" /* Division */ #define X_TYPE 'i' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '/' #include "x_OP_y_TYPE-template.h" #define X_TYPE 'i' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '/' #include "x_OP_y_TYPE-template.h" #define X_TYPE 'r' #define Y_TYPE 'i' #define ANS_TYPE 'r' #define OP '/' #include "x_OP_y_TYPE-template.h" #define X_TYPE 'r' #define Y_TYPE 'r' #define ANS_TYPE 'r' #define OP '/' #include "x_OP_y_TYPE-template.h" #undef METHOD SEXP x_OP_y(SEXP x, SEXP y, SEXP dim, SEXP operator, SEXP commute, SEXP naRm, SEXP hasNA, SEXP byRow) { SEXP ans = NILSXP; int narm, hasna, byrow, commute2; int op; R_xlen_t nrow, ncol; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'y': */ assertArgVector(y, (R_TYPE_INT | R_TYPE_REAL), "y"); /* Argument 'byRow': */ byrow = asLogicalNoNA(byRow, "byrow"); /* Argument 'commute2': */ commute2 = asLogicalNoNA(commute, "commute"); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'operator': */ op = asInteger(operator); if (op == 1) { /* Addition */ if (isReal(x) || isReal(y)) { PROTECT(ans = allocMatrix(REALSXP, nrow, ncol)); if (isReal(x) && isReal(y)) { x_OP_y_Real_Real_Add(REAL(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isReal(x) && isInteger(y)) { x_OP_y_Real_Integer_Add(REAL(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isInteger(x) && isReal(y)) { x_OP_y_Integer_Real_Add(INTEGER(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } UNPROTECT(1); } else { PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); x_OP_y_Integer_Integer_Add(INTEGER(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, INTEGER(ans), xlength(ans)); UNPROTECT(1); } } if (op == 2) { /* Subtraction */ if (isReal(x) || isReal(y)) { PROTECT(ans = allocMatrix(REALSXP, nrow, ncol)); if (isReal(x) && isReal(y)) { x_OP_y_Real_Real_Sub(REAL(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isReal(x) && isInteger(y)) { x_OP_y_Real_Integer_Sub(REAL(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isInteger(x) && isReal(y)) { x_OP_y_Integer_Real_Sub(INTEGER(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } UNPROTECT(1); } else { PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); x_OP_y_Integer_Integer_Sub(INTEGER(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, INTEGER(ans), xlength(ans)); UNPROTECT(1); } } else if (op == 3) { /* Multiplication */ if (isReal(x) || isReal(y)) { PROTECT(ans = allocMatrix(REALSXP, nrow, ncol)); if (isReal(x) && isReal(y)) { x_OP_y_Real_Real_Mul(REAL(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isReal(x) && isInteger(y)) { x_OP_y_Real_Integer_Mul(REAL(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isInteger(x) && isReal(y)) { x_OP_y_Integer_Real_Mul(INTEGER(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } UNPROTECT(1); } else { PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); x_OP_y_Integer_Integer_Mul(INTEGER(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, INTEGER(ans), xlength(ans)); UNPROTECT(1); } } else if (op == 4) { /* Division */ PROTECT(ans = allocMatrix(REALSXP, nrow, ncol)); if (isReal(x) && isReal(y)) { x_OP_y_Real_Real_Div(REAL(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isReal(x) && isInteger(y)) { x_OP_y_Real_Integer_Div(REAL(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isInteger(x) && isReal(y)) { x_OP_y_Integer_Real_Div(INTEGER(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } else if (isInteger(x) && isInteger(y)) { x_OP_y_Integer_Integer_Div(INTEGER(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans)); } UNPROTECT(1); } return(ans); } /* x_OP_y() */ matrixStats/src/rowOrderStats.c0000644000175100001440000000440612542546311016377 0ustar hornikusers/*************************************************************************** Public methods: SEXP rowOrderStats(SEXP x, SEXP which) Authors: Henrik Bengtsson. Adopted from rowQ() by R. Gentleman. To do: Add support for missing values. Copyright Henrik Bengtsson, 2007-2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD rowOrderStats #define X_TYPE 'i' #include "rowOrderStats_TYPE-template.h" #define X_TYPE 'r' #include "rowOrderStats_TYPE-template.h" #undef METHOD SEXP rowOrderStats(SEXP x, SEXP dim, SEXP which) { SEXP ans = NILSXP; R_xlen_t nrow, ncol, qq; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'which': */ if (length(which) != 1) error("Argument 'which' must be a single number."); if (!isNumeric(which)) error("Argument 'which' must be a numeric number."); /* Subtract one here, since rPsort does zero based addressing */ qq = asInteger(which) - 1; /* Assert that 'qq' is a valid index */ if (qq < 0 || qq >= ncol) { error("Argument 'which' is out of range."); } /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocVector(REALSXP, nrow)); rowOrderStats_Real(REAL(x), nrow, ncol, qq, REAL(ans)); UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocVector(INTSXP, nrow)); rowOrderStats_Integer(INTEGER(x), nrow, ncol, qq, INTEGER(ans)); UNPROTECT(1); } return(ans); } // rowOrderStats() /*************************************************************************** HISTORY: 2009-02-04 [HB] o BUG FIX: For some errors in rowOrderStats(), the stack would not become UNPROTECTED before calling error. 2008-03-25 [HB] o Renamed from 'rowQuantiles' to 'rowOrderStats'. 2007-08-10 [HB] o Removed arguments for NAs since rowOrderStats() still don't support it. 2005-11-24 [HB] o Cool, it works and compiles nicely. o Preallocate colOffset to speed up things even more. o Added more comments and error checking. o Adopted from rowQ() in Biobase of Bioconductor. **************************************************************************/ matrixStats/src/binMeans.c0000644000175100001440000000551712542546311015315 0ustar hornikusers/*************************************************************************** Public methods: binMeans(SEXP y, SEXP x, SEXP bx, SEXP retCount, SEXP right) Copyright Henrik Bengtsson, 2012-2013 **************************************************************************/ #include #include "types.h" #include "utils.h" #include #define BIN_BY 'L' #include "binMeans-BINBY-template.h" #define BIN_BY 'R' #include "binMeans-BINBY-template.h" SEXP binMeans(SEXP y, SEXP x, SEXP bx, SEXP retCount, SEXP right) { SEXP ans = NILSXP, count = NILSXP; R_xlen_t nx, ny, nbins; int closedRight, retcount; int *count_ptr = NULL; /* Argument 'y': */ assertArgVector(y, (R_TYPE_REAL), "y"); ny = xlength(y); /* Argument 'x': */ assertArgVector(x, (R_TYPE_REAL), "x"); nx = xlength(x); if (nx != ny) { error("Argument 'y' and 'x' are of different lengths: %d != %d", ny, nx); } /* Argument 'bx': */ assertArgVector(bx, (R_TYPE_REAL), "bx"); nbins = xlength(bx)-1; if (nbins <= 0) { error("Argument 'bx' must specify at least two bin boundaries (= one bin): %d", xlength(bx)); } /* Argument 'right': */ closedRight = asLogicalNoNA(right, "right"); /* Argument 'retCount': */ retcount = asLogicalNoNA(retCount, "retCount"); PROTECT(ans = allocVector(REALSXP, nbins)); if (retcount) { PROTECT(count = allocVector(INTSXP, nbins)); count_ptr = INTEGER(count); } if (closedRight) { binMeans_R(REAL(y), ny, REAL(x), nx, REAL(bx), nbins, REAL(ans), count_ptr); } else { binMeans_L(REAL(y), ny, REAL(x), nx, REAL(bx), nbins, REAL(ans), count_ptr); } if (retcount) { setAttrib(ans, install("count"), count); UNPROTECT(1); // 'count' } UNPROTECT(1); // 'ans' return ans; return(ans); } // binMeans() /*************************************************************************** HISTORY: 2015-05-30 [HB] o Added protected against 'bx' too short. 2014-10-06 [HB] o CLEANUP: All argument validation is now done by the high-level C API. 2014-06-02 [HB] o CLEANUP: Removed unused variable in binMeans(). 2013-10-08 [HB] o Now binCounts() calls binCounts_(). 2013-05-10 [HB] o SPEEDUP: binMeans() no longer tests in every iteration (=for every data point) whether the last bin has been reached or not. 2012-10-10 [HB] o BUG FIX: binMeans() would return random/garbage means/counts for bins that were beyond the last data point. o BUG FIX: In some cases binMeans() could try to go past the last bin. 2012-10-03 [HB] o Created binMeans(), which was adopted from from code proposed by Martin Morgan (Fred Hutchinson Cancer Research Center, Seattle) as a reply to HB's R-devel thread 'Fastest non-overlapping binning mean function out there?' on Oct 3, 2012. **************************************************************************/ matrixStats/src/sumOver.c0000644000175100001440000000511712542546311015215 0ustar hornikusers/*************************************************************************** Public methods: SEXP sumOver(SEXP x, SEXP idxs, SEXP naRm, SEXP mode) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include #include "types.h" #include "utils.h" #define METHOD sumOver #define X_TYPE 'i' #include "sumOver_TYPE-template.h" #define X_TYPE 'r' #include "sumOver_TYPE-template.h" #undef METHOD SEXP sumOver(SEXP x, SEXP idxs, SEXP naRm, SEXP mode) { SEXP ans = NILSXP; int *idxsp; R_xlen_t nidxs; int narm, mode2; double sum; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x"); /* Argument 'idxs': */ if (isNull(idxs)) { idxsp = NULL; nidxs = 0; } else if (isVectorAtomic(idxs)) { idxsp = INTEGER(idxs); nidxs = xlength(idxs); } else { /* To please compiler */ idxsp = NULL; nidxs = 0; error("Argument 'idxs' must be NULL or a vector."); } /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'mode': */ if (!isInteger(mode)) { error("Argument 'mode' must be a single integer."); } mode2 = asInteger(mode); /* Dispatch to low-level C function */ if (isReal(x)) { sum = sumOver_Real(REAL(x), xlength(x), idxsp, nidxs, narm, mode2); } else if (isInteger(x)) { sum = sumOver_Integer(INTEGER(x), xlength(x), idxsp, nidxs, narm, mode2); } else { error("Argument 'x' must be numeric."); } /* Return results */ switch (mode2) { case 1: /* integer */ PROTECT(ans = allocVector(INTSXP, 1)); if (ISNAN(sum)) { INTEGER(ans)[0] = NA_INTEGER; } else if (sum > R_INT_MAX || sum < R_INT_MIN) { Rf_warning("Integer overflow. Use sumOver(..., mode=\"numeric\") to avoid this."); INTEGER(ans)[0] = NA_INTEGER; } else { INTEGER(ans)[0] = (int)sum; } UNPROTECT(1); break; case 2: /* numeric */ PROTECT(ans = allocVector(REALSXP, 1)); if (sum > DOUBLE_XMAX) { REAL(ans)[0] = R_PosInf; } else if (sum < -DOUBLE_XMAX) { REAL(ans)[0] = R_NegInf; } else { REAL(ans)[0] = sum; } UNPROTECT(1); break; default: /* To please compiler */ ans = NILSXP; break; } return(ans); } // sumOver() /*************************************************************************** HISTORY: 2014-11-06 [HB] o Moved validation of arguments and construction of return object to this function. 2014-11-02 [HB] o Created. **************************************************************************/ matrixStats/src/allocMatrix2.c0000644000175100001440000001064612542546311016121 0ustar hornikusers#include #include "types.h" #include /* Checks whether setting bytes of an int/double to all zeroes corresponds to assigning a zero value. Note that the bit representation of int's and double's may not be the same on all architectures. */ int memset_zero_ok_int() { int t = 1; memset(&t, 0, sizeof(t)); return (t == 0); } int memset_zero_ok_double() { double t = 1; memset(&t, 0, sizeof(t)); return (t == 0); } /* For debugging purposes */ /* SEXP memsetZeroable() { SEXP ans; PROTECT(ans = allocVector(LGLSXP, 2)); LOGICAL(ans)[1] = memset_zero_ok_int(); LOGICAL(ans)[2] = memset_zero_ok_double(); UNPROTECT(1); return(ans); } */ void fillWithValue(SEXP ans, SEXP value) { R_xlen_t i, n; SEXPTYPE type; double *ans_ptr_d, value_d; int *ans_ptr_i, value_i; int *ans_ptr_l, value_l; /* Argument 'ans': */ if (!isVectorAtomic(ans)) { error("Argument 'ans' must be a vector."); } n = xlength(ans); /* Argument 'value': */ if (!isVectorAtomic(value) || xlength(value) != 1) { error("Argument 'value' must be a scalar."); } type = TYPEOF(value); switch (type) { case INTSXP: value_i = asInteger(value); ans_ptr_i = INTEGER(ans); if (value_i == 0 && memset_zero_ok_int()) { memset(ans_ptr_i, 0, n*sizeof(value_i)); } else { for (i=0; i < n; i++) ans_ptr_i[i] = value_i; } break; case REALSXP: value_d = asReal(value); ans_ptr_d = REAL(ans); if (value_d == 0 && memset_zero_ok_double()) { memset(ans_ptr_d, 0, n*sizeof(value_d)); } else { for (i=0; i < n; i++) ans_ptr_d[i] = value_d; } break; case LGLSXP: value_l = asLogical(value); ans_ptr_l = LOGICAL(ans); if (value_l == 0 && memset_zero_ok_int()) { memset(ans_ptr_l, 0, n*sizeof(value_l)); } else { for (i=0; i < n; i++) ans_ptr_l[i] = value_l; } break; default: error("Argument 'value' must be either of type integer, numeric or logical."); break; } } /* fillWithValue() */ SEXP allocVector2(SEXP length, SEXP value, SEXP set) { SEXP ans; SEXPTYPE type; R_xlen_t n = 0; /* Argument 'length': */ if (isInteger(length) && xlength(length) == 1) { n = (R_xlen_t)asInteger(length); } else if (isReal(length) && xlength(length) == 1) { n = (R_xlen_t)asReal(length); } else { error("Argument 'length' must be a single numeric."); } if (n < 0) error("Argument 'length' is negative."); /* Argument 'value': */ if (!isVectorAtomic(value) || xlength(value) != 1) { error("Argument 'value' must be a scalar."); } type = TYPEOF(value); PROTECT(ans = allocVector(type, n)); fillWithValue(ans, value); UNPROTECT(1); return(ans); } /* allocVector2() */ SEXP allocMatrix2(SEXP nrow, SEXP ncol, SEXP value, SEXP set) { SEXP ans; SEXPTYPE type; int nc, nr; /* Argument 'nrow' & 'ncol': */ if (!isInteger(nrow) || xlength(nrow) != 1) { error("Argument 'nrow' must be a single integer."); } if (!isInteger(ncol) || xlength(ncol) != 1) { error("Argument 'ncol' must be a single integer."); } nr = asInteger(nrow); nc = asInteger(ncol); if (nr < 0) error("Argument 'nrow' is negative."); if (nr < 0) error("Argument 'ncol' is negative."); /* Argument 'value': */ if (!isVectorAtomic(value) || xlength(value) != 1) { error("Argument 'value' must be a scalar."); } type = TYPEOF(value); PROTECT(ans = allocMatrix(type, nr, nc)); fillWithValue(ans, value); UNPROTECT(1); return(ans); } /* allocMatrix2() */ SEXP allocArray2(SEXP dim, SEXP value, SEXP set) { SEXP ans; SEXPTYPE type; int i, d; double nd = 1.0; R_xlen_t n; /* Argument 'dim': */ if (!isInteger(dim) || xlength(dim) == 0) { error("Argument 'dim' must be an integer vector of at least length one."); } for (i = 0; i < xlength(dim); i++) { d = INTEGER(dim)[i]; nd *= d; #ifndef LONG_VECTOR_SUPPORT if (nd > R_INT_MAX) { error("Argument 'dim' specifies too many elements: %.g > %d", nd, R_INT_MAX); } #endif } n = (int)nd; /* Argument 'value': */ if (!isVectorAtomic(value) || xlength(value) != 1) { error("Argument 'value' must be a scalar."); } type = TYPEOF(value); PROTECT(dim = duplicate(dim)); PROTECT(ans = allocVector(type, n)); fillWithValue(ans, value); setAttrib(ans, R_DimSymbol, dim); UNPROTECT(2); return(ans); } /* allocArray2() */ matrixStats/src/rowDiffs.c0000644000175100001440000000411512542546311015335 0ustar hornikusers/*************************************************************************** Public methods: SEXP rowDiffs(SEXP x, ...) SEXP colDiffs(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD rowDiffs #define X_TYPE 'i' #include "rowDiffs_TYPE-template.h" #define X_TYPE 'r' #include "rowDiffs_TYPE-template.h" #undef METHOD SEXP rowDiffs(SEXP x, SEXP dim, SEXP lag, SEXP differences, SEXP byRow) { int byrow; SEXP ans = NILSXP; R_xlen_t lagg, diff; R_xlen_t nrow, ncol; R_xlen_t nrow_ans, ncol_ans; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'lag': */ lagg = asInteger(lag); if (lagg < 1) { error("Argument 'lag' must be a positive integer."); } /* Argument 'differences': */ diff = asInteger(differences); if (diff < 1) { error("Argument 'differences' must be a positive integer."); } /* Argument 'byRow': */ byrow = asLogical(byRow); /* Dimension of result matrix */ if (byrow) { nrow_ans = nrow; ncol_ans = (R_xlen_t)((double)ncol - ((double)diff*(double)lagg)); if (ncol_ans < 0) ncol_ans = 0; } else { nrow_ans = (R_xlen_t)((double)nrow - ((double)diff*(double)lagg)); if (nrow_ans < 0) nrow_ans = 0; ncol_ans = ncol; } if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrow_ans, ncol_ans)); rowDiffs_Real(REAL(x), nrow, ncol, byrow, lagg, diff, REAL(ans), nrow_ans, ncol_ans); UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocMatrix(INTSXP, nrow_ans, ncol_ans)); rowDiffs_Integer(INTEGER(x), nrow, ncol, byrow, lagg, diff, INTEGER(ans), nrow_ans, ncol_ans); UNPROTECT(1); } return(ans); } /* rowDiffs() */ /*************************************************************************** HISTORY: 2014-12-29 [HB] o Created. **************************************************************************/ matrixStats/src/rowVars_TYPE-template.h0000644000175100001440000000562212542546311017700 0ustar hornikusers/*********************************************************************** TEMPLATE: void rowVars_(...) GENERATES: void rowVars_Integer(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int narm, int hasna, int byrow, double *ans) void rowVars_Real(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int narm, int hasna, int byrow, double *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Adopted from rowQuantiles.c by R. Gentleman. Template by Henrik Bengtsson. Copyright: Henrik Bengtsson, 2007-2013 ***********************************************************************/ #include #include #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME]) */ #include "templates-types.h" void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int narm, int hasna, int byrow, double *ans) { R_xlen_t ii, jj, kk; R_xlen_t *colOffset; X_C_TYPE *values, value; double value_d, mu_d, sigma2_d; /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(ncol, sizeof(X_C_TYPE)); /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; /* Pre-calculate the column offsets */ colOffset = (R_xlen_t *) R_alloc(ncol, sizeof(R_xlen_t)); if (byrow) { for (jj=0; jj < ncol; jj++) colOffset[jj] = (R_xlen_t)jj*nrow; } else { for (jj=0; jj < ncol; jj++) colOffset[jj] = (R_xlen_t)jj; } for (ii=0; ii < nrow; ii++) { R_xlen_t rowIdx = byrow ? ii : ncol*ii; //HJ kk = 0; for (jj=0; jj < ncol; jj++) { value = x[rowIdx+colOffset[jj]]; if (X_ISNAN(value)) { if (narm == FALSE) { kk = -1; break; } } else { values[kk] = value; kk = kk + 1; } } /* for (jj ...) */ /* Note that 'values' will never contain NA/NaNs */ if (kk <= 1) { ans[ii] = NA_REAL; } else { /* (a) Calculate mu = sum(x)/length(x) */ mu_d = 0; for (jj=0; jj < kk; jj++) { mu_d += (double)values[jj]; } mu_d /= (double)kk; /* (b) Calculate sigma^2 */ sigma2_d = 0; for (jj=0; jj < kk; jj++) { value_d = ((double)values[jj] - mu_d); value_d *= value_d; sigma2_d += value_d; } sigma2_d /= (double)(kk-1); ans[ii] = sigma2_d; } /* if (kk <= 1) */ R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-18 [HB] o Created from rowMads_TYPE-template.h. **************************************************************************/ matrixStats/src/rowVars.c0000644000175100001440000000336112542546311015217 0ustar hornikusers/*************************************************************************** Public methods: SEXP rowVars(SEXP x, SEXP naRm, SEXP hasNA) SEXP colVars(SEXP x, SEXP naRm, SEXP hasNA) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD rowVars #define X_TYPE 'i' #include "rowVars_TYPE-template.h" #define X_TYPE 'r' #include "rowVars_TYPE-template.h" #undef METHOD SEXP rowVars(SEXP x, SEXP dim, SEXP naRm, SEXP hasNA, SEXP byRow) { int narm, hasna, byrow; SEXP ans; R_xlen_t nrow, ncol; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Get dimensions of 'x'. */ if (byrow) { nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; } else { nrow = INTEGER(dim)[1]; ncol = INTEGER(dim)[0]; } /* R allocate a double vector of length 'nrow' Note that 'nrow' means 'ncol' if byrow=FALSE. */ PROTECT(ans = allocVector(REALSXP, nrow)); /* Double matrices are more common to use. */ if (isReal(x)) { rowVars_Real(REAL(x), nrow, ncol, narm, hasna, byrow, REAL(ans)); } else if (isInteger(x)) { rowVars_Integer(INTEGER(x), nrow, ncol, narm, hasna, byrow, REAL(ans)); } UNPROTECT(1); return(ans); } /* rowVars() */ /*************************************************************************** HISTORY: 2014-11-18 [HB] o Created from rowMads.c. **************************************************************************/ matrixStats/src/psortKM.c0000644000175100001440000000534512542546311015157 0ustar hornikusers/*************************************************************************** Public methods: SEXP psortKM(SEXP x, SEXP k, SEXP nk) Arguments: x: numeric vector k: integer scalar in [1,length(x)] m: integer scalar in [1,k] and not too large if k is large. Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2012 **************************************************************************/ #include #include #include "types.h" #include "utils.h" void psortKM_C(double *x, R_xlen_t nx, R_xlen_t k, R_xlen_t m, double *ans) { R_xlen_t ii, ll; double *xx; /* R allocate memory for the 'xx'. This will be taken care of by the R garbage collector later on. */ xx = (double *) R_alloc(nx, sizeof(double)); /* Create a local copy 'xx' of 'x'. */ for (ii=0; ii < nx; ii++) { xx[ii] = x[ii]; } /* Permute xx[0:partial] so that xx[partial+1] is in the correct place with smaller values to the left, ... Example: psortKM(x, k=50, m=2) with length(x) = 1000 rPsort(xx, 1000, 50); We know x[50] and that x[1:49] <= x[50] rPsort(xx, 50, 49); x[49] and that x[1:48] <= x[49] rPsort(xx, 49, 48); x[48] and that x[1:47] <= x[48] */ ll = nx; for (ii=0; ii < m; ii++) { rPsort(xx, ll, k-1-ii); ll = (k-1)-ii; } for (ii=0; ii < m; ii++) { ans[ii] = xx[(k-m)+ii]; } } /* psortKM_C() */ SEXP psortKM(SEXP x, SEXP k, SEXP m) { SEXP ans; R_xlen_t nx, kk, mm; /* Argument 'x': */ assertArgVector(x, (R_TYPE_REAL), "x"); nx = xlength(x); if (nx == 0) { error("Argument 'x' must not be empty."); } /* Argument 'k': */ if (!isInteger(k)) { error("Argument 'k' must be an integer."); } if (length(k) != 1) { error("Argument 'k' must be a single integer."); } kk = asInteger(k); if (kk <= 0) { error("Argument 'k' must be a positive integer."); } if (kk > nx) { error("Argument 'k' must not be greater than number of elements in 'x'."); } /* Argument 'm': */ if (!isInteger(m)) { error("Argument 'm' must be an integer."); } if (length(m) != 1) { error("Argument 'm' must be a single integer."); } mm = asInteger(m); if (mm <= 0) { error("Argument 'm' must be a positive integer."); } else if (mm > kk) { error("Argument 'm' must not be greater than argument 'k'."); } /* R allocate a double vector of length 'partial' */ PROTECT(ans = allocVector(REALSXP, mm)); psortKM_C(REAL(x), nx, kk, mm, REAL(ans)); UNPROTECT(1); return(ans); } /* psortKM() */ /*************************************************************************** HISTORY: 2012-09-10 [HB] o Added psortKM(). o Created. **************************************************************************/ matrixStats/src/rowMedians.c0000644000175100001440000000540712542546311015667 0ustar hornikusers/*************************************************************************** Public methods: SEXP rowMedians(SEXP x, SEXP naRm, SEXP hasNA) SEXP colMedians(SEXP x, SEXP naRm, SEXP hasNA) Authors: Adopted from rowQuantiles.c by R. Gentleman. Copyright Henrik Bengtsson, 2007 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD rowMedians #define X_TYPE 'i' #include "rowMedians_TYPE-template.h" #define X_TYPE 'r' #include "rowMedians_TYPE-template.h" #undef METHOD SEXP rowMedians(SEXP x, SEXP dim, SEXP naRm, SEXP hasNA, SEXP byRow) { int narm, hasna, byrow; SEXP ans; R_xlen_t nrow, ncol; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Get dimensions of 'x'. */ if (byrow) { nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; } else { nrow = INTEGER(dim)[1]; ncol = INTEGER(dim)[0]; } /* R allocate a double vector of length 'nrow' Note that 'nrow' means 'ncol' if byrow=FALSE. */ PROTECT(ans = allocVector(REALSXP, nrow)); /* Double matrices are more common to use. */ if (isReal(x)) { rowMedians_Real(REAL(x), nrow, ncol, narm, hasna, byrow, REAL(ans)); } else if (isInteger(x)) { rowMedians_Integer(INTEGER(x), nrow, ncol, narm, hasna, byrow, REAL(ans)); } UNPROTECT(1); return(ans); } /* rowMedians() */ /*************************************************************************** HISTORY: 2013-01-13 [HB] o Added argument 'byRow' to rowMedians() and dropped colMedians(). o Using internal arguments 'by_row' instead of 'by_column'. 2011-12-11 [HB] o BUG FIX: rowMediansReal(..., na.rm=TRUE) did not handle NaN:s, only NA:s. Note that NaN:s does not exist for integers. 2011-10-12 [HJ] o Added colMedians(). o Now rowMediansInteger/Real() can operate also by columns, cf. argument 'by_column'. 2007-08-14 [HB] o Added checks for user interrupts every 1000 line. o Added argument 'hasNA' to rowMedians(). 2005-12-07 [HB] o BUG FIX: When calculating the median of an even number (non-NA) values, the length of the second sort was one element too short, which made the method to freeze, i.e. rPsort(rowData, qq, qq) is now (...qq+1, qq). 2005-11-24 [HB] o By implementing a special version for integers, there is no need to coerce to double in R, which would take up twice the amount of memory. o rowMedians() now handles NAs too. o Adopted from rowQuantiles.c in Biobase of Bioconductor. **************************************************************************/ matrixStats/src/rowLogSumExp.c0000644000175100001440000000416112542546311016166 0ustar hornikusers/*************************************************************************** Public methods: SEXP rowLogSumExps(SEXP lx, SEXP dim, SEXP naRm, SEXP hasNA, SEXP byRow) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2013-2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #include "logSumExp_internal.h" SEXP rowLogSumExps(SEXP lx, SEXP dim, SEXP naRm, SEXP hasNA, SEXP byRow) { SEXP ans; int narm, hasna, byrow; R_xlen_t nrow, ncol, len, ii; double *x, *xx, *ans_ptr; /* Argument 'lx' and 'dim': */ assertArgMatrix(lx, dim, (R_TYPE_REAL), "lx"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'byRow': */ byrow = asLogical(byRow); /* R allocate a double vector of length 'nrow' Note that 'nrow' means 'ncol' if byrow=FALSE. */ if (byrow) { len = nrow; } else { len = ncol; } PROTECT(ans = allocVector(REALSXP, len)); ans_ptr = REAL(ans); /* Get the values */ x = REAL(lx); if (byrow) { /* R allocate memory for row-vector 'xx' of length 'ncol'. This will be taken care of by the R garbage collector later on. */ xx = (double *) R_alloc(ncol, sizeof(double)); for (ii=0; ii < nrow; ii++) { ans_ptr[ii] = logSumExp_double_by(x, ncol, narm, hasna, nrow, xx); /* Move to the beginning next row */ x++; } } else { for (ii=0; ii < ncol; ii++) { ans_ptr[ii] = logSumExp_double(x, nrow, narm, hasna); /* Move to the beginning next column */ x += nrow; } } UNPROTECT(1); /* PROTECT(ans = ...) */ return(ans); } /* rowLogSumExps() */ /*************************************************************************** HISTORY: 2013-05-02 [HB] o BUG FIX: Incorrectly used ISNAN() on an int variable as caught by the 'cc' compiler on Solaris. Reported by Brian Ripley upon CRAN submission. 2013-04-30 [HB] o Created. **************************************************************************/ matrixStats/src/colCounts.c0000644000175100001440000000574412542546311015534 0ustar hornikusers/*************************************************************************** Public methods: SEXP colCounts(SEXP x, SEXP value, SEXP naRm, SEXP hasNA) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD colCounts #define X_TYPE 'i' #include "colCounts_TYPE-template.h" #define X_TYPE 'r' #include "colCounts_TYPE-template.h" #define X_TYPE 'l' #include "colCounts_TYPE-template.h" #undef METHOD SEXP colCounts(SEXP x, SEXP dim, SEXP value, SEXP what, SEXP naRm, SEXP hasNA) { SEXP ans; int narm, hasna, what2; R_xlen_t nrow, ncol; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'value': */ if (length(value) != 1) error("Argument 'value' must be a single value."); if (!isNumeric(value)) error("Argument 'value' must be a numeric value."); /* Argument 'what': */ what2 = asInteger(what); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* R allocate an integer vector of length 'ncol' */ PROTECT(ans = allocVector(INTSXP, ncol)); if (isReal(x)) { colCounts_Real(REAL(x), nrow, ncol, asReal(value), what2, narm, hasna, INTEGER(ans)); } else if (isInteger(x)) { colCounts_Integer(INTEGER(x), nrow, ncol, asInteger(value), what2, narm, hasna, INTEGER(ans)); } else if (isLogical(x)) { colCounts_Logical(LOGICAL(x), nrow, ncol, asLogical(value), what2, narm, hasna, INTEGER(ans)); } UNPROTECT(1); return(ans); } // colCounts() SEXP count(SEXP x, SEXP value, SEXP what, SEXP naRm, SEXP hasNA) { SEXP ans; int narm, hasna, what2; R_xlen_t nx; /* Argument 'x' and 'dim': */ assertArgVector(x, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'value': */ if (length(value) != 1) error("Argument 'value' must be a single value."); if (!isNumeric(value)) error("Argument 'value' must be a numeric value."); /* Argument 'what': */ what2 = asInteger(what); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* R allocate a integer scalar */ PROTECT(ans = allocVector(INTSXP, 1)); if (isReal(x)) { colCounts_Real(REAL(x), nx, 1, asReal(value), what2, narm, hasna, INTEGER(ans)); } else if (isInteger(x)) { colCounts_Integer(INTEGER(x), nx, 1, asInteger(value), what2, narm, hasna, INTEGER(ans)); } else if (isLogical(x)) { colCounts_Logical(LOGICAL(x), nx, 1, asLogical(value), what2, narm, hasna, INTEGER(ans)); } UNPROTECT(1); return(ans); } // count() /*************************************************************************** HISTORY: 2014-11-14 [HB] o Created from rowCounts.c. **************************************************************************/ matrixStats/src/rowMedians_TYPE-template.h0000644000175100001440000001326212542546311020344 0ustar hornikusers/*********************************************************************** TEMPLATE: void rowMedians_(...) GENERATES: void rowMedians_Integer(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int narm, int hasna, int byrow, double *ans) void rowMedians_Real(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int narm, int hasna, int byrow, double *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Adopted from rowQuantiles.c by R. Gentleman. Template by Henrik Bengtsson. Copyright: Henrik Bengtsson, 2007-2013 ***********************************************************************/ #include #include #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME]) */ #include "templates-types.h" void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int narm, int hasna, int byrow, double *ans) { int isOdd; R_xlen_t ii, jj, kk, qq; R_xlen_t *colOffset; X_C_TYPE *values, value; /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(ncol, sizeof(X_C_TYPE)); /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; /* When narm == FALSE, isOdd and qq are the same for all rows */ if (narm == FALSE) { isOdd = (ncol % 2 == 1); qq = (R_xlen_t)(ncol/2) - 1; } else { isOdd = FALSE; qq = 0; } value = 0; /* Pre-calculate the column offsets */ colOffset = (R_xlen_t *) R_alloc(ncol, sizeof(R_xlen_t)); // HJ begin if (byrow) { for (jj=0; jj < ncol; jj++) colOffset[jj] = (R_xlen_t)jj*nrow; } else { for (jj=0; jj < ncol; jj++) colOffset[jj] = (R_xlen_t)jj; } // HJ end if (hasna == TRUE) { for (ii=0; ii < nrow; ii++) { R_xlen_t rowIdx = byrow ? ii : ncol*ii; //HJ kk = 0; /* The index of the last non-NA value detected */ for (jj=0; jj < ncol; jj++) { value = x[rowIdx+colOffset[jj]]; //HJ if (X_ISNAN(value)) { if (narm == FALSE) { kk = -1; break; } } else { values[kk] = value; kk = kk + 1; } } /* Note that 'values' will never contain NA/NaNs */ if (kk == 0) { ans[ii] = R_NaN; } else if (kk == -1) { ans[ii] = R_NaReal; } else { /* When narm == TRUE, isOdd and qq may change with row */ if (narm == TRUE) { isOdd = (kk % 2 == 1); qq = (R_xlen_t)(kk/2) - 1; } /* Permute x[0:kk-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, kk, qq+1); value = values[qq+1]; if (isOdd == TRUE) { ans[ii] = (double)value; } else { /* Permute x[0:qq-2] so that x[qq-1] is in the correct place with smaller values to the left, ... */ X_PSORT(values, qq+1, qq); ans[ii] = ((double)values[qq] + (double)value)/2; } } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } else { for (ii=0; ii < nrow; ii++) { R_xlen_t rowIdx = byrow ? ii : ncol*ii; //HJ for (jj=0; jj < ncol; jj++) values[jj] = x[rowIdx+colOffset[jj]]; //HJ /* Permute x[0:ncol-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, ncol, qq+1); value = values[qq+1]; if (isOdd == TRUE) { ans[ii] = (double)value; } else { /* Permute x[0:qq-2] so that x[qq-1] is in the correct place with smaller values to the left, ... */ X_PSORT(values, qq+1, qq); ans[ii] = ((double)values[qq] + (double)value)/2; } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } /* if (hasna ...) */ } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-11-01 [HB] o SPEEDUP: Now using 'ansp = REAL(ans)' once and then assigning to 'ansp' instead of to 'REAL(ans)'. 2013-04-23 [HB] o BUG FIX: The integer template of rowMedians_() would not handle ties properly. This was because ties were calculated as '(double)((rowData[qq] + value)/2)' instead of '((double)(rowData[qq] + value))/2'. 2013-01-13 [HB] o Merged rowMedians_Integer() and rowMedians_Read() into template rowMedians_(). 2013-01-13 [HB] o Using internal arguments 'by_row' instead of 'by_column'. 2011-12-11 [HB] o BUG FIX: rowMediansReal(..., na.rm=TRUE) did not handle NaN:s, only NA:s. Note that NaN:s does not exist for integers. 2011-10-12 [HJ] o Added colMedians(). o Now rowMediansInteger/Real() can operate also by columns, cf. argument 'by_column'. 2007-08-14 [HB] o Added checks for user interrupts every 1000 line. o Added argument 'hasNA' to rowMedians(). 2005-12-07 [HB] o BUG FIX: When calculating the median of an even number (non-NA) values, the length of the second sort was one element too short, which made the method to freeze, i.e. rPsort(rowData, qq, qq) is now (...qq+1, qq). 2005-11-24 [HB] o By implementing a special version for integers, there is no need to coerce to double in R, which would take up twice the amount of memory. o rowMedians() now handles NAs too. o Adopted from rowQuantiles.c in Biobase of Bioconductor. **************************************************************************/ matrixStats/src/rowDiffs_TYPE-template.h0000644000175100001440000000654212542546311020022 0ustar hornikusers/*********************************************************************** TEMPLATE: void rowDiffs_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) */ #include "templates-types.h" #include #if X_TYPE == 'i' static R_INLINE int diff_int(int a, int b) { if (X_ISNA(a) || X_ISNA(b)) return(NA_INTEGER); return a-b; } #define X_DIFF diff_int #define DIFF_X_MATRIX diff_matrix_int #elif X_TYPE == 'r' #define X_DIFF(a,b) a-b #define DIFF_X_MATRIX diff_matrix_double #endif static R_INLINE void DIFF_X_MATRIX(X_C_TYPE *x, int nrow_x, int ncol_x, int byrow, int lag, X_C_TYPE *y, int nrow_y, int ncol_y) { int ii, jj, ss, tt, uu; if (byrow) { uu = lag * nrow_x; tt = 0; ss = 0; for (jj=0; jj < ncol_y; jj++) { for (ii=0; ii < nrow_y; ii++) { y[ss++] = X_DIFF(x[uu++], x[tt++]); } } } else { uu = lag; tt = 0; ss = 0; for (jj=0; jj < ncol_y; jj++) { for (ii=0; ii < nrow_y; ii++) { /* Rprintf("y[%d] = x[%d] - x[%d] = %g - %g = %g\n", ss, uu, tt, (double)x[uu], (double)x[tt], (double)X_DIFF(x[uu], x[tt])); */ y[ss++] = X_DIFF(x[uu++], x[tt++]); } tt += lag; uu += lag; } } } void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans) { R_xlen_t nrow_tmp, ncol_tmp; X_C_TYPE *tmp = NULL; /* Nothing to do? */ if ((byrow && ncol_ans <= 0) || (!byrow && nrow_ans <= 0)) return; /* Special case (difference == 1) */ if (differences == 1) { DIFF_X_MATRIX(x, nrow, ncol, byrow, lag, ans, nrow_ans, ncol_ans); } else { /* Allocate temporary work matrix (to hold intermediate differences) */ if (byrow) { nrow_tmp = nrow; ncol_tmp = ncol - lag; } else { nrow_tmp = nrow - lag; ncol_tmp = ncol; } tmp = Calloc(nrow_tmp*ncol_tmp, X_C_TYPE); /* (a) First order of differences */ DIFF_X_MATRIX(x, nrow, ncol, byrow, lag, tmp, nrow_tmp, ncol_tmp); if (byrow) { ncol_tmp = ncol_tmp - lag; } else { nrow_tmp = nrow_tmp - lag; } /* (a) Intermediate orders of differences */ while (--differences > 1) { DIFF_X_MATRIX(tmp, nrow_tmp, ncol_tmp, byrow, lag, tmp, nrow_tmp, ncol_tmp); if (byrow) { ncol_tmp = ncol_tmp - lag; } else { nrow_tmp = nrow_tmp - lag; } } /* (c) Last order of differences */ DIFF_X_MATRIX(tmp, nrow_tmp, ncol_tmp, byrow, lag, ans, nrow_ans, ncol_ans); /* Deallocate temporary work matrix */ Free(tmp); } /* if (differences ...) */ } #undef X_DIFF #undef DIFF_X_MATRIX /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-12-29 [HB] o Created. **************************************************************************/ matrixStats/src/indexByRow.c0000644000175100001440000000431212542546311015643 0ustar hornikusers/*************************************************************************** Public methods: SEXP indexByRow(SEXP dim, SEXP idxs) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" SEXP indexByRow(SEXP dim, SEXP idxs) { SEXP ans; int nr, nc; int *ans_ptr, *idxs_ptr; R_xlen_t i, idx, n, nidxs; int col, row; int d; double nd = 1.0; /* Argument 'dim': */ if (!isInteger(dim) || xlength(dim) != 2) { error("Argument 'dim' must be an integer vector of length two."); } for (i = 0; i < xlength(dim); i++) { d = INTEGER(dim)[i]; if (d < 0) { error("Argument 'dim' specifies a negative value: %d", d); } nd *= d; #ifndef LONG_VECTOR_SUPPORT if (nd > R_INT_MAX) { error("Argument 'dim' specifies too many elements: %.g > %d", nd, R_INT_MAX); } #endif } n = (R_xlen_t)nd; /* Argument 'idxs': */ if (isNull(idxs)) { idxs_ptr = NULL; nidxs = 0; } else if (isVectorAtomic(idxs)) { idxs_ptr = INTEGER(idxs); nidxs = xlength(idxs); } else { /* To please compiler */ idxs_ptr = NULL; nidxs = 0; error("Argument 'idxs' must be NULL or a vector."); } nr = INTEGER(dim)[0]; nc = INTEGER(dim)[1]; if (idxs_ptr == NULL) { PROTECT(ans = allocVector(INTSXP, n)); ans_ptr = INTEGER(ans); row = 1; col = 0; for (i = 0; i < n; i++) { ans_ptr[i] = row + col*nr; col++; if (col == nc) { row++; col = 0; } } UNPROTECT(1); } else { PROTECT(ans = allocVector(INTSXP, nidxs)); ans_ptr = INTEGER(ans); for (i = 0; i < nidxs; i++) { // idxs <- idxs - 1 // cols <- idxs %/% dim[2L] // rows <- idxs %% dim[2L] // cols + dim[1L]*rows + 1L idx = idxs_ptr[i] - 1; col = idx / nc; row = idx - nc*col; row = idx % nc; idx = col + nr*row + 1; ans_ptr[i] = idx; } UNPROTECT(1); } return(ans); } // indexByRow() /*************************************************************************** HISTORY: 2014-11-09 [HB] o Created. **************************************************************************/ matrixStats/src/rowCounts.c0000644000175100001440000000366712542546311015570 0ustar hornikusers/*************************************************************************** Public methods: SEXP rowCounts(SEXP x, SEXP value, SEXP naRm, SEXP hasNA) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD rowCounts #define X_TYPE 'i' #include "rowCounts_TYPE-template.h" #define X_TYPE 'r' #include "rowCounts_TYPE-template.h" #define X_TYPE 'l' #include "rowCounts_TYPE-template.h" #undef METHOD SEXP rowCounts(SEXP x, SEXP dim, SEXP value, SEXP what, SEXP naRm, SEXP hasNA) { SEXP ans; int narm, hasna, what2; R_xlen_t nrow, ncol; /* Argument 'x' & 'dim': */ assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'value': */ if (length(value) != 1) error("Argument 'value' must be a single value."); if (!isNumeric(value)) error("Argument 'value' must be a numeric value."); /* Argument 'what': */ what2 = asInteger(what); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* R allocate a double vector of length 'nrow' */ PROTECT(ans = allocVector(INTSXP, nrow)); /* Double matrices are more common to use. */ if (isReal(x)) { rowCounts_Real(REAL(x), nrow, ncol, asReal(value), what2, narm, hasna, INTEGER(ans)); } else if (isInteger(x)) { rowCounts_Integer(INTEGER(x), nrow, ncol, asInteger(value), what2, narm, hasna, INTEGER(ans)); } else if (isLogical(x)) { rowCounts_Logical(LOGICAL(x), nrow, ncol, asLogical(value), what2, narm, hasna, INTEGER(ans)); } UNPROTECT(1); return(ans); } // rowCounts() /*************************************************************************** HISTORY: 2014-06-02 [HB] o Created. **************************************************************************/ matrixStats/src/templates-types_undef.h0000644000175100001440000000066112542546311020102 0ustar hornikusers#undef CONCAT #undef CONCAT_MACROS #undef METHOD_NAME #undef X_C_TYPE #undef X_IN_C #undef X_ISNAN #undef X_ISNA #undef X_ABS #undef X_PSORT #undef X_QSORT_I #undef Y_C_TYPE #undef Y_IN_C #undef Y_ISNAN #undef Y_ISNA #undef Y_ABS #undef Y_PSORT #undef Y_QSORT_I #undef ANS_SXP #undef ANS_NA #undef ANS_ISNAN #undef ANS_ISNA #undef ANS_C_TYPE #undef ANS_IN_C #undef X_TYPE #undef Y_TYPE #undef ANS_TYPE #undef MARGIN #undef OP matrixStats/src/templates-types.h0000644000175100001440000000716112542546311016723 0ustar hornikusers#include /* * Sets type-specific macros */ #define CONCAT(x,y) x ##_## y #define CONCAT_MACROS(x,y) CONCAT(x,y) /* Data type macros for argument 'x' */ #if X_TYPE == 'i' #define X_C_TYPE int #define X_IN_C INTEGER #define X_ISNAN(x) (x == NA_INTEGER) #define X_ISNA(x) (x == NA_INTEGER) #define X_ABS(x) abs(x) #define X_PSORT iPsort #define X_QSORT_I R_qsort_int_I #elif X_TYPE == 'r' #define X_C_TYPE double #define X_IN_C REAL #define X_ISNAN(x) ISNAN(x) /* NA or NaN */ #define X_ISNA(x) ISNA(x) /* NA only */ #define X_ABS(x) fabs(x) #define X_PSORT rPsort #define X_QSORT_I R_qsort_I #elif X_TYPE == 'l' #define X_C_TYPE int #define X_IN_C LOGICAL #define X_ISNAN(x) (x == NA_LOGICAL) #else #error "INTERNAL ERROR: Failed to set C macro X_C_TYPE etc.: Unknown X_TYPE" #endif /* Data type macros for argument 'y' */ #ifdef Y_TYPE #if Y_TYPE == 'i' #define Y_C_TYPE int #define Y_IN_C INTEGER #define Y_ISNAN(x) (x == NA_INTEGER) #define Y_ISNA(x) (x == NA_INTEGER) #define Y_ABS(x) abs(x) #define Y_PSORT iPsort #define Y_QSORT_I R_qsort_int_I #elif Y_TYPE == 'r' #define Y_C_TYPE double #define Y_IN_C REAL #define Y_ISNAN(x) ISNAN(x) /* NA or NaN */ #define Y_ISNA(x) ISNA(x) /* NA only */ #define Y_ABS(x) fabs(x) #define Y_PSORT rPsort #define Y_QSORT_I R_qsort_I #elif Y_TYPE == 'l' #define Y_C_TYPE int #define Y_IN_C LOGICAL #define Y_ISNAN(x) (x == NA_LOGICAL) #else #error "INTERNAL ERROR: Failed to set C macro Y_C_TYPE etc.: Unknown Y_TYPE" #endif #else #define Y_TYPE '.' #endif /* Data type macros for result ('ans') */ #ifndef ANS_TYPE /* Default to same as 'x' */ #define ANS_TYPE X_TYPE #endif #if ANS_TYPE == 'i' #define ANS_SXP INTSXP #define ANS_NA NA_INTEGER #define ANS_ISNAN(x) (x == NA_INTEGER) #define ANS_ISNA(x) (x == NA_INTEGER) #define ANS_C_TYPE int #define ANS_IN_C INTEGER #elif ANS_TYPE == 'r' #define ANS_SXP REALSXP #define ANS_NA NA_REAL #define ANS_ISNAN(x) ISNAN(x) /* NA or NaN */ #define ANS_ISNA(x) ISNA(x) /* NA only */ #define ANS_C_TYPE double #define ANS_IN_C REAL #elif ANS_TYPE == 'l' #define ANS_SXP LGLSXP #define ANS_NA NA_LOGICAL #define ANS_ISNAN(x) (x == NA_LOGICAL) #define ANS_C_TYPE int #define ANS_IN_C LOGICAL #else #error "INTERNAL ERROR: Failed to set C macro ANS_C_TYPE: Unknown ANS_TYPE" #endif /* Method name based on 'x' (and 'y') types */ #ifndef METHOD_NAME #if X_TYPE == 'i' #if Y_TYPE == 'i' #define METHOD_NAME CONCAT_MACROS(METHOD, Integer_Integer) #elif Y_TYPE == 'r' #define METHOD_NAME CONCAT_MACROS(METHOD, Integer_Real) #elif Y_TYPE == 'l' #define METHOD_NAME CONCAT_MACROS(METHOD, Integer_Logical) #else #define METHOD_NAME CONCAT_MACROS(METHOD, Integer) #endif #elif X_TYPE == 'r' #if Y_TYPE == 'i' #define METHOD_NAME CONCAT_MACROS(METHOD, Real_Integer) #elif Y_TYPE == 'r' #define METHOD_NAME CONCAT_MACROS(METHOD, Real_Real) #elif Y_TYPE == 'l' #define METHOD_NAME CONCAT_MACROS(METHOD, Real_Logical) #else #define METHOD_NAME CONCAT_MACROS(METHOD, Real) #endif #elif X_TYPE == 'l' #if Y_TYPE == 'i' #define METHOD_NAME CONCAT_MACROS(METHOD, Logical_Integer) #elif Y_TYPE == 'r' #define METHOD_NAME CONCAT_MACROS(METHOD, Logical_Real) #elif Y_TYPE == 'l' #define METHOD_NAME CONCAT_MACROS(METHOD, Logical_Logical) #else #define METHOD_NAME CONCAT_MACROS(METHOD, Logical) #endif #else #error "INTERNAL ERROR: Failed to set C macro METHOD_NAME: Unknown X_TYPE" #endif #endif matrixStats/src/rowMads_TYPE-template.h0000644000175100001440000001604112542546311017646 0ustar hornikusers/*********************************************************************** TEMPLATE: void rowMads_(...) GENERATES: void rowMads_Integer(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, double scale, int narm, int hasna, int byrow, double *ans) void rowMads_Real(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, double scale, int narm, int hasna, int byrow, double *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Adopted from rowQuantiles.c by R. Gentleman. Template by Henrik Bengtsson. Copyright: Henrik Bengtsson, 2007-2013 ***********************************************************************/ #include #include #include "types.h" #include /* abs() and fabs() */ /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME]) */ #include "templates-types.h" void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, double scale, int narm, int hasna, int byrow, double *ans) { int isOdd; R_xlen_t ii, jj, kk, qq; R_xlen_t *colOffset; X_C_TYPE *values, value, mu; double *values_d, value_d, mu_d; /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(ncol, sizeof(X_C_TYPE)); values_d = (double *) R_alloc(ncol, sizeof(double)); /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; /* When narm == FALSE, isOdd and qq are the same for all rows */ if (narm == FALSE) { isOdd = (ncol % 2 == 1); qq = (R_xlen_t)(ncol/2) - 1; } else { isOdd = FALSE; qq = 0; } value = 0; /* Pre-calculate the column offsets */ colOffset = (R_xlen_t *) R_alloc(ncol, sizeof(R_xlen_t)); // HJ begin if (byrow) { for (jj=0; jj < ncol; jj++) colOffset[jj] = (R_xlen_t)jj*nrow; } else { for (jj=0; jj < ncol; jj++) colOffset[jj] = (R_xlen_t)jj; } // HJ end hasna = TRUE; if (hasna == TRUE) { for (ii=0; ii < nrow; ii++) { R_xlen_t rowIdx = byrow ? ii : ncol*ii; //HJ kk = 0; /* The index of the last non-NA value detected */ for (jj=0; jj < ncol; jj++) { value = x[rowIdx+colOffset[jj]]; //HJ if (X_ISNAN(value)) { if (narm == FALSE) { kk = -1; break; } } else { values[kk] = value; kk = kk + 1; } } /* for (jj ...) */ /* Note that 'values' will never contain NA/NaNs */ if (kk == 0) { ans[ii] = NA_REAL; } else if (kk == 1) { ans[ii] = 0; } else if (kk == -1) { ans[ii] = R_NaReal; } else { /* When narm == TRUE, isOdd and qq may change with row */ if (narm == TRUE) { isOdd = (kk % 2 == 1); qq = (R_xlen_t)(kk/2) - 1; } /* Permute x[0:kk-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, kk, qq+1); value = values[qq+1]; /* Calculate mu and sigma */ if (isOdd == TRUE) { /* Since there are an odd number of values, then we also know that 'mu' is one of the values in 'x', which in turn mean we don't have to coerce integers to doubles, if 'x' is an integer. Simple benchmarking shows that it significantly faster to avoid coercion. */ mu = value; /* (a) Subtract mu and absolute value, i.e. x <- |x-mu| */ for (jj=0; jj < kk; jj++) { value = (values[jj] - mu); values[jj] = X_ABS(value); } /* (b) Calculate median of |x-mu| */ /* Permute x[0:kk-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, kk, qq+1); value = values[qq+1]; ans[ii] = scale * (double)value; } else { /* Here we have to coerce to doubles since 'mu' is an average. */ /* Permute x[0:qq-2] so that x[qq-1] is in the correct place with smaller values to the left, ... */ X_PSORT(values, qq+1, qq); #if X_TYPE == 'i' /* If the difference between two integers is an even number, then their means is also an integer, and then we can avoid coercion to double also here. This should happen roughly half the time we end up here which is worth optimizing for. Simple benchmarking show a significant difference in speed, particular for the column-based version. */ if ((values[qq] - value) % 2 == 0) { /* No need to coerce */ mu = (values[qq] + value)/2; /* (a) Subtract mu and absolute value, i.e. x <- |x-mu| */ for (jj=0; jj < kk; jj++) { value = (values[jj] - mu); values[jj] = X_ABS(value); } /* (b) Calculate median of |x-mu| */ /* Permute x[0:kk-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, kk, qq+1); X_PSORT(values, qq+1, qq); ans[ii] = scale * ((double)values[qq] + (double)values[qq+1])/2; /* Done, continue to next vector */ continue; } #endif mu_d = ((double)values[qq] + (double)value)/2; /* (a) Subtract mu and square, i.e. x <- (x-mu)^2 */ for (jj=0; jj < kk; jj++) { value_d = ((double)values[jj] - mu_d); values_d[jj] = fabs(value_d); } /* (b) Calculate median */ /* Permute x[0:kk-1] so that x[qq-1] and x[qq] are in the correct places with smaller values to the left, ... */ rPsort(values_d, kk, qq+1); rPsort(values_d, qq+1, qq); ans[ii] = scale * (values_d[qq] + values_d[qq+1])/2; } } /* if (kk == 0) */ R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } else { for (ii=0; ii < nrow; ii++) { R_xlen_t rowIdx = byrow ? ii : ncol*ii; //HJ for (jj=0; jj < ncol; jj++) values[jj] = x[rowIdx+colOffset[jj]]; //HJ /* Permute x[0:ncol-1] so that x[qq] is in the correct place with smaller values to the left, ... */ X_PSORT(values, ncol, qq+1); value = values[qq+1]; if (isOdd == TRUE) { ans[ii] = (double)value; } else { /* Permute x[0:qq-2] so that x[qq-1] is in the correct place with smaller values to the left, ... */ X_PSORT(values, qq+1, qq); ans[ii] = ((double)values[qq] + value)/2; } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ } /* if (hasna ...) */ } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-17 [HB] o Created from rowMedians_TYPE-template.h. **************************************************************************/ matrixStats/src/productExpSumLog_TYPE-template.h0000644000175100001440000000511712542546311021520 0ustar hornikusers/*********************************************************************** TEMPLATE: LDOUBLE productExpSumLog_(X_C_TYPE *x, R_xlen_t nx, int narm, int hasna) GENERATES: LDOUBLE productExpSumLog_Real(double *x, R_xlen_t nx, int narm, int hasna) LDOUBLE productExpSumLog_Integer(int *x, R_xlen_t nx, int narm, int hasna) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) */ #include "templates-types.h" double METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, int narm, int hasna) { LDOUBLE y = 0.0, t; R_xlen_t ii; int isneg = 0; int hasZero = 0; /* Calculate sum(log(abs(x))) */ for (ii = 0 ; ii < nx; ii++) { t = x[ii]; /* Missing values? */ if (narm) { if (X_ISNAN(t)) continue; } #if X_TYPE == 'i' /* Early stopping? */ if (X_ISNAN(t)) { y = NA_REAL; break; } else if (t < 0) { isneg = !isneg; t = -t; } else if (t == 0) { hasZero = 1; } #elif X_TYPE == 'r' if (t < 0) { isneg = !isneg; t = -t; } #endif t = log(t); y += t; /* Rprintf("#%d: x=%g, is.nan(x)=%d, abs(x)=%g, is.nan(abs(x))=%d, log(abs(x))=%g, is.nan(log(abs(x)))=%d, sum=%g, is.nan(sum)=%d\n", ii, x[ii], R_IsNaN(x[ii]), X_ABS(x[ii]), R_IsNaN(abs(x[ii])), t, R_IsNaN(y), y, R_IsNaN(y)); */ } if (ISNAN(y)) { /* If there where NA and/or NaN elements, then 'y' will at this point be NaN. The information on an NA value is lost when calculating fabs(NA), which returns NaN. For consistency with integers, we return NA in all cases. */ y = NA_REAL; } else if (hasZero) { /* no NA in 'x' and 'x' contains zero */ y = 0; } else { y = exp(y); /* Update sign */ if (isneg) { y = -y; } /* Overflow or underflow? */ if (y > DOUBLE_XMAX) { y = R_PosInf; } else if (y < -DOUBLE_XMAX) { y = R_NegInf; } } return (double)y; } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-06-04 [HB] o Created. **************************************************************************/ matrixStats/src/anyMissing.c0000644000175100001440000000402012542546311015666 0ustar hornikusers/*************************************************************************** Public methods: anyMissing(SEXP x) TO DO: Support list():s too. Copyright Henrik Bengtsson, 2007 **************************************************************************/ #include #include "types.h" SEXP anyMissing(SEXP x) { SEXP ans; R_xlen_t nx, ii; double *xdp; int *xip, *xlp; Rcomplex *xcp; PROTECT(ans = allocVector(LGLSXP, 1)); LOGICAL(ans)[0] = 0; nx = xlength(x); /* anyMissing() on zero-length objects should always return FALSE, just like any(double(0)). */ if (nx == 0) { UNPROTECT(1); return(ans); } switch (TYPEOF(x)) { case REALSXP: xdp = REAL(x); for (ii=0; ii < nx; ii++) { if ISNAN(xdp[ii]) { LOGICAL(ans)[0] = 1; break; } } break; case INTSXP: xip = INTEGER(x); for (ii=0; ii < nx; ii++) { if (xip[ii] == NA_INTEGER) { LOGICAL(ans)[0] = 1; break; } } break; case LGLSXP: xlp = LOGICAL(x); for (ii=0; ii < nx; ii++) { if (xlp[ii] == NA_LOGICAL) { LOGICAL(ans)[0] = 1; break; } } break; case CPLXSXP: xcp = COMPLEX(x); for (ii=0; ii < nx; ii++) { if (ISNAN(xcp[ii].r) || ISNAN(xcp[ii].i)) { LOGICAL(ans)[0] = 1; break; } } break; case STRSXP: for (ii=0; ii < nx; ii++) { if (STRING_ELT(x, ii) == NA_STRING) { LOGICAL(ans)[0] = 1; break; } } break; case RAWSXP: /* no such thing as a raw NA; always FALSE */ break; default: break; } /* switch() */ UNPROTECT(1); /* ans */ return(ans); } // anyMissing() /*************************************************************************** HISTORY: 2007-08-14 [HB] o Created using do_isna() in src/main/coerce.c as a template. **************************************************************************/ matrixStats/src/binCounts-BINBY-template.h0000644000175100001440000000640012542546311020174 0ustar hornikusers/*************************************************************************** TEMPLATE: void binCounts_(...) GENERATES: void binCounts_L(double *x, int nx, double *bx, int nbins, int *count) void binCounts_R(double *x, int nx, double *bx, int nbins, int *count) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - BIN_BY: 'L' or 'R' Copyright Henrik Bengtsson, 2012-2014 **************************************************************************/ #include #if BIN_BY == 'L' /* [u,v) */ #define METHOD_NAME binCounts_L #define IS_PART_OF_FIRST_BIN(x, bx0) (x < bx0) #define IS_PART_OF_NEXT_BIN(x, bx1) (x >= bx1) #elif BIN_BY == 'R' /* (u,v] */ #define METHOD_NAME binCounts_R #define IS_PART_OF_FIRST_BIN(x, bx0) (x <= bx0) #define IS_PART_OF_NEXT_BIN(x, bx1) (x > bx1) #endif void METHOD_NAME(double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, int *count) { R_xlen_t ii = 0, jj = 0, iStart = 0; int n = 0; int warn = 0; // Count? if (nbins > 0) { // Skip to the first bin while ((iStart < nx) && IS_PART_OF_FIRST_BIN(x[iStart], bx[0])) { ++iStart; } // For each x... for (ii = iStart; ii < nx; ++ii) { // Skip to a new bin? while (IS_PART_OF_NEXT_BIN(x[ii], bx[jj+1])) { count[jj++] = n; // No more bins? if (jj >= nbins) { ii = nx; // Cause outer for-loop to exit break; } n = 0; } /* Although unlikely, with long vectors the count for a bin can become greater than what is possible to represent by an integer. Detect and warn about this. */ if (n == R_INT_MAX) { warn = 1; // No point in keep counting for this bin break; } // Count ++n; } // Update count of the last bin? if (jj < nbins) { count[jj] = n; // Assign the remaining bins to zero counts while (++jj < nbins) { count[jj] = 0; } } } // if (nbins > 0) if (warn) { warning("Integer overflow. Detected one or more bins with a count that is greater than what can be represented by the integer data type. Setting count to the maximum integer possible (.Machine$integer.max = %d). The bin mean is still correct.", R_INT_MAX); } } /* Undo template macros */ #undef BIN_BY #undef IS_PART_OF_FIRST_BIN #undef IS_PART_OF_NEXT_BIN #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-07 [HB] o ROBUSTNESS: Added protection for integer overflow in bin counts. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2013-10-08 [HB] o Created template for binCounts_() to create functions that bin either by [u,v) or (u,v]. 2013-05-10 [HB] o SPEEDUP: binCounts() no longer tests in every iteration (=for every data point) whether the last bin has been reached or not. 2012-10-10 [HB] o BUG FIX: binCounts() would return random/garbage counts for bins that were beyond the last data point. o BUG FIX: In some cases binCounts() could try to go past the last bin. 2012-10-03 [HB] o Created. **************************************************************************/ matrixStats/src/utils.h0000644000175100001440000000523712542546311014725 0ustar hornikusers#include #include "types.h" #define R_TYPE_LGL 1 /* 0b0001 */ #define R_TYPE_INT 2 /* 0b0010 */ #define R_TYPE_REAL 4 /* 0b0100 */ static R_INLINE void assertArgVector(SEXP x, int type, char *xlabel) { /* Argument 'x': */ if (!isVectorAtomic(x)) { error("Argument '%s' must be a matrix or a vector.", xlabel); } switch (TYPEOF(x)) { case LGLSXP: if (!(type & R_TYPE_LGL)) error("Argument '%s' cannot be logical.", xlabel); break; case INTSXP: if (!(type & R_TYPE_INT)) error("Argument '%s' cannot be integer.", xlabel); break; case REALSXP: if (!(type & R_TYPE_REAL)) error("Argument '%s' cannot be numeric.", xlabel); break; } /* switch */ } /* assertArgVector() */ static R_INLINE void assertArgDim(SEXP dim, double max, char *maxlabel) { double nrow, ncol; /* Argument 'dim': */ if (!isVectorAtomic(dim) || xlength(dim) != 2 || !isInteger(dim)) { error("Argument 'dim' must be an integer vector of length two."); } nrow = (double)INTEGER(dim)[0]; ncol = (double)INTEGER(dim)[1]; if (nrow < 0) { error("Argument 'dim' specifies a negative number of rows (dim[1]): %d", nrow); } else if (ncol < 0) { error("Argument 'dim' specifies a negative number of columns (dim[2]): %d", ncol); } else if (nrow * ncol != max) { error("Argument 'dim' does not match length of argument '%s': %g * %g != %g", maxlabel, nrow, ncol, max); } } /* assertArgDim() */ static R_INLINE void assertArgMatrix(SEXP x, SEXP dim, int type, char *xlabel) { /* Argument 'x': */ if (isMatrix(x)) { } else if (isVectorAtomic(x)) { } else { error("Argument '%s' must be a matrix or a vector.", xlabel); } switch (TYPEOF(x)) { case LGLSXP: if (!(type & R_TYPE_LGL)) error("Argument '%s' cannot be logical.", xlabel); break; case INTSXP: if (!(type & R_TYPE_INT)) error("Argument '%s' cannot be integer.", xlabel); break; case REALSXP: if (!(type & R_TYPE_REAL)) error("Argument '%s' cannot be numeric.", xlabel); break; } /* switch */ /* Argument 'dim': */ assertArgDim(dim, xlength(x), "x"); } /* assertArgMatrix() */ static R_INLINE int asLogicalNoNA(SEXP x, char *xlabel) { int value = 0; if (length(x) != 1) error("Argument '%s' must be a single value.", xlabel); if (isLogical(x)) { value = asLogical(x); } else if (isInteger(x)) { value = asInteger(x); } else { error("Argument '%s' must be a logical.", xlabel); } if (value != TRUE && value != FALSE) error("Argument '%s' must be either TRUE or FALSE.", xlabel); return value; } /* asLogicalNoNA() */ matrixStats/src/colOrderStats.c0000644000175100001440000000343512542546311016346 0ustar hornikusers/*************************************************************************** Public methods: SEXP colOrderStats(SEXP x, SEXP which) Authors: Henrik Bengtsson To do: Add support for missing values. Copyright Henrik Bengtsson, 2007-2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD colOrderStats #define X_TYPE 'i' #include "colOrderStats_TYPE-template.h" #define X_TYPE 'r' #include "colOrderStats_TYPE-template.h" #undef METHOD SEXP colOrderStats(SEXP x, SEXP dim, SEXP which) { SEXP ans = NILSXP; R_xlen_t nrow, ncol, qq; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'which': */ if (length(which) != 1) error("Argument 'which' must be a single number."); if (!isNumeric(which)) error("Argument 'which' must be a numeric number."); /* Subtract one here, since rPsort does zero based addressing */ qq = asInteger(which) - 1; /* Assert that 'qq' is a valid index */ if (qq < 0 || qq >= nrow) { error("Argument 'which' is out of range."); } /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocVector(REALSXP, ncol)); colOrderStats_Real(REAL(x), nrow, ncol, qq, REAL(ans)); UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocVector(INTSXP, ncol)); colOrderStats_Integer(INTEGER(x), nrow, ncol, qq, INTEGER(ans)); UNPROTECT(1); } return(ans); } // colOrderStats() /*************************************************************************** HISTORY: 2014-11-16 [HB] o Created from rowOrderStats.c. **************************************************************************/ matrixStats/src/colRanges.c0000644000175100001440000000713712542546311015476 0ustar hornikusers/*************************************************************************** Public methods: SEXP colRanges(SEXP x, SEXP what) Authors: Henrik Bengtsson. Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD colRanges #define X_TYPE 'i' #include "colRanges_TYPE-template.h" #define X_TYPE 'r' #include "colRanges_TYPE-template.h" #undef METHOD SEXP colRanges(SEXP x, SEXP dim, SEXP what, SEXP naRm, SEXP hasNA) { SEXP ans = NILSXP, ans2 = NILSXP; int *mins, *maxs; double *mins2, *maxs2; int *is_counted, all_counted = 0; int what2, narm, hasna; R_xlen_t nrow, ncol, jj; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'what': */ if (length(what) != 1) error("Argument 'what' must be a single number."); if (!isNumeric(what)) error("Argument 'what' must be a numeric number."); what2 = asInteger(what); if (what2 < 0 || what2 > 2) error("Invalid value of 'what': %d", what2); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); is_counted = (int *) R_alloc(ncol, sizeof(int)); if (isReal(x)) { if (what2 == 2) { PROTECT(ans = allocMatrix(REALSXP, ncol, 2)); } else { PROTECT(ans = allocVector(REALSXP, ncol)); } colRanges_Real(REAL(x), nrow, ncol, what2, narm, hasna, REAL(ans), is_counted); UNPROTECT(1); } else if (isInteger(x)) { if (what2 == 2) { PROTECT(ans = allocMatrix(INTSXP, ncol, 2)); } else { PROTECT(ans = allocVector(INTSXP, ncol)); } colRanges_Integer(INTEGER(x), nrow, ncol, what2, narm, hasna, INTEGER(ans), is_counted); /* Any entries with zero non-missing values? */ all_counted = 1; for (jj=0; jj < ncol; jj++) { if (!is_counted[jj]) { all_counted = 0; break; } } if (!all_counted) { /* Handle zero non-missing values */ /* Instead of return INTSXP, we must return REALSXP (to hold -Inf, and Inf) */ if (what2 == 0) { PROTECT(ans2 = allocVector(REALSXP, ncol)); mins = INTEGER(ans); mins2 = REAL(ans2); for (jj=0; jj < ncol; jj++) { if (is_counted[jj]) { mins2[jj] = (double)mins[jj]; } else { mins2[jj] = R_PosInf; } } UNPROTECT(1); /* ans2 */ } else if (what2 == 1) { PROTECT(ans2 = allocVector(REALSXP, ncol)); maxs = INTEGER(ans); maxs2 = REAL(ans2); for (jj=0; jj < ncol; jj++) { if (is_counted[jj]) { maxs2[jj] = (double)maxs[jj]; } else { maxs2[jj] = R_NegInf; } } UNPROTECT(1); /* ans2 */ } else if (what2 == 2) { PROTECT(ans2 = allocMatrix(REALSXP, ncol, 2)); mins = INTEGER(ans); maxs = &INTEGER(ans)[ncol]; mins2 = REAL(ans2); maxs2 = &REAL(ans2)[ncol]; for (jj=0; jj < ncol; jj++) { if (is_counted[jj]) { mins2[jj] = (double)mins[jj]; maxs2[jj] = (double)maxs[jj]; } else { mins2[jj] = R_PosInf; maxs2[jj] = R_NegInf; } } UNPROTECT(1); /* ans2 */ } ans = ans2; } UNPROTECT(1); /* ans */ } return(ans); } // rowRanges() /*************************************************************************** HISTORY: 2014-11-16 [HB] o Created. **************************************************************************/ matrixStats/src/signTabulate_TYPE-template.h0000644000175100001440000000330612542546311020654 0ustar hornikusers/*********************************************************************** TEMPLATE: void signTabulate_(X_C_TYPE *x, R_xlen_t nx, double *ans) GENERATES: void signTabulate_Real(double *x, R_xlen_t nx, double *ans) void signTabulate_Integer(int *x, R_xlen_t nx, double *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) */ #include "templates-types.h" void METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, double *ans) { X_C_TYPE xi; R_xlen_t ii; R_xlen_t nNeg = 0, nZero = 0, nPos = 0, nNA=0; #if X_TYPE == 'r' R_xlen_t nPosInf=0, nNegInf=0; #endif for (ii = 0; ii < nx; ii++) { xi = x[ii]; if (X_ISNAN(xi)) { nNA++; } else if (xi > 0) { nPos++; #if X_TYPE == 'r' if (xi == R_PosInf) nPosInf++; #endif } else if (xi < 0) { nNeg++; #if X_TYPE == 'r' if (xi == R_NegInf) nNegInf++; #endif } else if (xi == 0) { nZero++; } } ans[0] = nNeg; ans[1] = nZero; ans[2] = nPos; ans[3] = nNA; #if X_TYPE == 'r' ans[4] = nNegInf; ans[5] = nPosInf; #endif } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-06-04 [HB] o Created. **************************************************************************/ matrixStats/src/colOrderStats_TYPE-template.h0000644000175100001440000000370312542546311021023 0ustar hornikusers/*********************************************************************** TEMPLATE: void colOrderStats_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int qq, X_C_TYPE *ans) GENERATES: void colOrderStats_Real(double *x, R_xlen_t nrow, R_xlen_t ncol, int qq, double *ans) void colOrderStats_Integer(int *x, R_xlen_t nrow, R_xlen_t ncol, int qq, int *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' Authors: Adopted from ditto for rows. Copyright: Henrik Bengtsson, 2007-2014 ***********************************************************************/ #include #include #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "templates-types.h" void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t qq, X_C_TYPE *ans) { R_xlen_t ii, jj; R_xlen_t offset; X_C_TYPE *values; /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(nrow, sizeof(X_C_TYPE)); for (jj=0; jj < ncol; jj++) { offset = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) values[ii] = x[ii+offset]; /* Sort vector of length 'nrow' up to position 'qq'. "...partial sorting: they permute x so that x[qq] is in the correct place with smaller values to the left, larger ones to the right." */ X_PSORT(values, nrow, qq); ans[jj] = values[qq]; } } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-16 [HB] o Created from rowOrderStats() ditto. **************************************************************************/ matrixStats/src/signTabulate.c0000644000175100001440000000221012542546311016166 0ustar hornikusers/*************************************************************************** Public methods: SEXP signTabulate(SEXP x) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD signTabulate #define X_TYPE 'i' #include "signTabulate_TYPE-template.h" #define X_TYPE 'r' #include "signTabulate_TYPE-template.h" #undef METHOD SEXP signTabulate(SEXP x) { SEXP ans = NILSXP; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x"); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocVector(REALSXP, 6)); signTabulate_Real(REAL(x), xlength(x), REAL(ans)); UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocVector(REALSXP, 4)); signTabulate_Integer(INTEGER(x), xlength(x), REAL(ans)); UNPROTECT(1); } return(ans); } // signTabulate() /*************************************************************************** HISTORY: 2014-06-04 [HB] o Created. **************************************************************************/ matrixStats/src/binCounts.c0000644000175100001440000000360712542546311015523 0ustar hornikusers/*************************************************************************** Public methods: binCounts(SEXP x, SEXP bx, SEXP right) Copyright Henrik Bengtsson, 2012-2013 **************************************************************************/ #include #include "types.h" #include "utils.h" #include #define BIN_BY 'L' #include "binCounts-BINBY-template.h" #define BIN_BY 'R' #include "binCounts-BINBY-template.h" SEXP binCounts(SEXP x, SEXP bx, SEXP right) { SEXP counts = NILSXP; R_xlen_t nbins; int closedRight; /* Argument 'x': */ assertArgVector(x, (R_TYPE_REAL), "x"); /* Argument 'bx': */ assertArgVector(bx, (R_TYPE_REAL), "bx"); nbins = xlength(bx)-1; if (nbins <= 0) { error("Argument 'bx' must specify at least two bin boundaries (= one bin): %d", xlength(bx)); } /* Argument 'right': */ closedRight = asLogicalNoNA(right, "right"); PROTECT(counts = allocVector(INTSXP, nbins)); if (closedRight) { binCounts_R(REAL(x), xlength(x), REAL(bx), nbins, INTEGER(counts)); } else { binCounts_L(REAL(x), xlength(x), REAL(bx), nbins, INTEGER(counts)); } UNPROTECT(1); return(counts); } // binCounts() /*************************************************************************** HISTORY: 2015-05-30 [HB] o Added protected against 'bx' too short. 2014-06-03 [HB] o Dropped unused variable 'count'. 2013-10-08 [HB] o Now binCounts() calls binCounts_(). 2013-05-10 [HB] o SPEEDUP: binCounts() no longer tests in every iteration (=for every data point) whether the last bin has been reached or not. 2012-10-10 [HB] o BUG FIX: binCounts() would return random/garbage counts for bins that were beyond the last data point. o BUG FIX: In some cases binCounts() could try to go past the last bin. 2012-10-03 [HB] o Created. **************************************************************************/ matrixStats/src/rowCumprods.c0000644000175100001440000000271212542546311016077 0ustar hornikusers/*************************************************************************** Public methods: SEXP rowCumprods(SEXP x, SEXP naRm, SEXP hasNA) SEXP colCumprods(SEXP x, SEXP naRm, SEXP hasNA) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD rowCumprods #define X_TYPE 'i' #include "rowCumprods_TYPE-template.h" #define X_TYPE 'r' #include "rowCumprods_TYPE-template.h" #undef METHOD SEXP rowCumprods(SEXP x, SEXP dim, SEXP byRow) { int byrow; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'byRow': */ byrow = asLogical(byRow); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrow, ncol)); rowCumprods_Real(REAL(x), nrow, ncol, byrow, REAL(ans)); UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); rowCumprods_Integer(INTEGER(x), nrow, ncol, byrow, INTEGER(ans)); UNPROTECT(1); } return(ans); } /* rowCumprods() */ /*************************************************************************** HISTORY: 2014-11-26 [HB] o Created from rowVars.c. **************************************************************************/ matrixStats/src/rowRanges_TYPE-template.h0000644000175100001440000001375412542546311020211 0ustar hornikusers/*********************************************************************** TEMPLATE: void rowRanges_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int what, X_C_TYPE *ans) GENERATES: void rowRanges_Real(double *x, R_xlen_t nrow, R_xlen_t ncol, int what, double *ans) void rowRanges_Integer(int *x, R_xlen_t nrow, R_xlen_t ncol, int what, int *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "templates-types.h" void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int what, int narm, int hasna, X_C_TYPE *ans, int *is_counted) { R_xlen_t ii, jj; R_xlen_t offset; X_C_TYPE value, *mins = NULL, *maxs = NULL; int *skip = NULL; /* Rprintf("(nrow,ncol)=(%d,%d), what=%d\n", nrow, ncol, what); */ /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; if (hasna) { skip = (int *) R_alloc(nrow, sizeof(int)); for (ii=0; ii < nrow; ii++) { is_counted[ii] = 0; skip[ii] = 0; } /* Missing values */ if (what == 0) { /* rowMins() */ mins = ans; for (jj=0; jj < ncol; jj++) { offset = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) { if (!narm && skip[ii]) continue; value = x[ii+offset]; if (X_ISNAN(value)) { if (!narm) { mins[ii] = value; is_counted[ii] = 1; /* Early stopping? */ #if X_TYPE == 'i' skip[ii] = 1; #elif X_TYPE == 'r' if (X_ISNA(value)) skip[ii] = 1; #endif } } else if (!is_counted[ii]) { mins[ii] = value; is_counted[ii] = 1; } else if (value < mins[ii]) { mins[ii] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (ii=0; ii < nrow; ii++) { if (!is_counted[ii]) { mins[ii] = R_PosInf; } } #endif } else if (what == 1) { /* rowMaxs() */ maxs = ans; for (jj=0; jj < ncol; jj++) { offset = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) { if (!narm && skip[ii]) continue; value = x[ii+offset]; if (X_ISNAN(value)) { if (!narm) { maxs[ii] = value; is_counted[ii] = 1; /* Early stopping? */ #if X_TYPE == 'i' skip[ii] = 1; #elif X_TYPE == 'r' if (X_ISNA(value)) skip[ii] = 1; #endif } } else if (!is_counted[ii]) { maxs[ii] = value; is_counted[ii] = 1; } else if (value > maxs[ii]) { maxs[ii] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (ii=0; ii < nrow; ii++) { if (!is_counted[ii]) { maxs[ii] = R_NegInf; } } #endif } else if (what == 2) { /* rowRanges() */ mins = ans; maxs = &ans[nrow]; for (jj=0; jj < ncol; jj++) { offset = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) { if (!narm && skip[ii]) continue; value = x[ii+offset]; if (X_ISNAN(value)) { if (!narm) { mins[ii] = value; maxs[ii] = value; is_counted[ii] = 1; /* Early stopping? */ #if X_TYPE == 'i' skip[ii] = 1; #elif X_TYPE == 'r' if (X_ISNA(value)) skip[ii] = 1; #endif } } else if (!is_counted[ii]) { mins[ii] = value; maxs[ii] = value; is_counted[ii] = 1; } else if (value < mins[ii]) { mins[ii] = value; } else if (value > maxs[ii]) { maxs[ii] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (ii=0; ii < nrow; ii++) { if (!is_counted[ii]) { mins[ii] = R_PosInf; maxs[ii] = R_NegInf; } } #endif } /* if (what ...) */ } else { /* No missing values */ if (what == 0) { /* rowMins() */ mins = ans; /* Initiate results */ for (ii=0; ii < nrow; ii++) { mins[ii] = x[ii]; } for (jj=1; jj < ncol; jj++) { offset = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) { value = x[ii+offset]; if (value < mins[ii]) mins[ii] = value; } } } else if (what == 1) { /* rowMax() */ maxs = ans; /* Initiate results */ for (ii=0; ii < nrow; ii++) { maxs[ii] = x[ii]; } for (jj=1; jj < ncol; jj++) { offset = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) { value = x[ii+offset]; if (value > maxs[ii]) maxs[ii] = value; } } } else if (what == 2) { /* rowRanges()*/ mins = ans; maxs = &ans[nrow]; /* Initiate results */ for (ii=0; ii < nrow; ii++) { mins[ii] = x[ii]; maxs[ii] = x[ii]; } for (jj=1; jj < ncol; jj++) { offset = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) { value = x[ii+offset]; if (value < mins[ii]) { mins[ii] = value; } else if (value > maxs[ii]) { maxs[ii] = value; } } } } /* if (what ...) */ } /* if (narm) */ } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-16 [HB] o Created. **************************************************************************/ matrixStats/src/diff2.c0000644000175100001440000000326712542546311014553 0ustar hornikusers/*************************************************************************** Public methods: SEXP diff2(SEXP x, SEXP lag, SEXP differences) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include #include "types.h" #include "utils.h" #define METHOD diff2 #define X_TYPE 'i' #include "diff2_TYPE-template.h" #define X_TYPE 'r' #include "diff2_TYPE-template.h" #undef METHOD SEXP diff2(SEXP x, SEXP lag, SEXP differences) { SEXP ans = NILSXP; R_xlen_t nx, nans, lagg, diff; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'lag': */ lagg = asInteger(lag); if (lagg < 1) { error("Argument 'lag' must be a positive integer."); } /* Argument 'differences': */ diff = asInteger(differences); if (diff < 1) { error("Argument 'differences' must be a positive integer."); } /* Length of result vector */ nans = (R_xlen_t)((double)nx - ((double)diff*(double)lagg)); if (nans < 0) nans = 0; /* Dispatch to low-level C function */ if (isReal(x)) { PROTECT(ans = allocVector(REALSXP, nans)); diff2_Real(REAL(x), nx, lagg, diff, REAL(ans), nans); UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocVector(INTSXP, nans)); diff2_Integer(INTEGER(x), nx, lagg, diff, INTEGER(ans), nans); UNPROTECT(1); } else { error("Argument 'x' must be numeric."); } return ans; } // diff2() /*************************************************************************** HISTORY: 2014-12-29 [HB] o Created. **************************************************************************/ matrixStats/src/binMeans-BINBY-template.h0000644000175100001440000001173412542546311017772 0ustar hornikusers/*************************************************************************** TEMPLATE: binMeans_(...) GENERATES: void binMeans_L(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) void binMeans_R(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - BIN_BY: 'L' or 'R' Copyright Henrik Bengtsson, 2012-2013 **************************************************************************/ #include "types.h" #if BIN_BY == 'L' /* [u,v) */ #define METHOD_NAME binMeans_L #define IS_PART_OF_FIRST_BIN(x, bx0) (x < bx0) #define IS_PART_OF_NEXT_BIN(x, bx1) (x >= bx1) #elif BIN_BY == 'R' /* (u,v] */ #define METHOD_NAME binMeans_R #define IS_PART_OF_FIRST_BIN(x, bx0) (x <= bx0) #define IS_PART_OF_NEXT_BIN(x, bx1) (x > bx1) #endif void METHOD_NAME(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) { R_xlen_t ii = 0, jj = 0, iStart=0; R_xlen_t n = 0; LDOUBLE sum = 0.0; int warn = 0; // Count? if (nbins > 0) { // Skip to the first bin while ((iStart < nx) && IS_PART_OF_FIRST_BIN(x[iStart], bx[0])) { ++iStart; } // For each x... for (ii = iStart; ii < nx; ++ii) { // Skip to a new bin? while (IS_PART_OF_NEXT_BIN(x[ii], bx[jj+1])) { // Update statistic of current bin? if (count) { /* Although unlikely, with long vectors the count for a bin can become greater than what is possible to represent by an integer. Detect and warn about this. */ if (n > R_INT_MAX) { warn = 1; count[jj] = R_INT_MAX; } else { count[jj] = n; } } ans[jj] = n > 0 ? sum / n : R_NaN; sum = 0.0; n = 0; // ...and move to next ++jj; // No more bins? if (jj >= nbins) { // Make the outer for-loop to exit... ii = nx - 1; // ...but correct for the fact that the y[nx-1] point will // be incorrectly added to the sum. Doing the correction // here avoids an if (ii < nx) sum += y[ii] below. sum -= y[ii]; break; } } // Sum and count sum += y[ii]; ++n; } // Update statistic of the last bin? if (jj < nbins) { if (count) { /* Although unlikely, with long vectors the count for a bin can become greater than what is possible to represent by an integer. Detect and warn about this. */ if (n > R_INT_MAX) { warn= 1; count[jj] = R_INT_MAX; } else { count[jj] = n; } } ans[jj] = n > 0 ? sum / n : R_NaN; // Assign the remaining bins to zero counts and missing mean values while (++jj < nbins) { ans[jj] = R_NaN; if (count) count[jj] = 0; } } } // if (nbins > 0) if (warn) { warning("Integer overflow. Detected one or more bins with a count that is greater than what can be represented by the integer data type. Setting count to the maximum integer possible (.Machine$integer.max = %d). The bin mean is still correct.", R_INT_MAX); } } /* Undo template macros */ #undef BIN_BY #undef IS_PART_OF_FIRST_BIN #undef IS_PART_OF_NEXT_BIN #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-07 [HB] o ROBUSTNESS: Added protection for integer overflow in bin counts. 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-10-01 [HB] o BUG FIX: binMeans() returned 0.0 instead of NA_real_ for empty bins. 2014-04-04 [HB] o BUG FIX: The native code of binMeans(x, bx) would try to access an out-of-bounds value of argument 'y' iff 'x' contained elements that are left of all bins in 'bx'. This bug had no impact on the results and since no assignment was done it should also not crash/ core dump R. This was discovered thanks to new memtests (ASAN and valgrind) provided by CRAN. 2013-10-08 [HB] o Created template for binMeans_() to create functions that bin either by [u,v) or (u,v]. 2013-05-10 [HB] o SPEEDUP: binMeans() no longer tests in every iteration (=for every data point) whether the last bin has been reached or not. 2012-10-10 [HB] o BUG FIX: binMeans() would return random/garbage means/counts for bins that were beyond the last data point. o BUG FIX: In some cases binMeans() could try to go past the last bin. 2012-10-03 [HB] o Created binMeans(), which was adopted from from code proposed by Martin Morgan (Fred Hutchinson Cancer Research Center, Seattle) as a reply to HB's R-devel thread 'Fastest non-overlapping binning mean function out there?' on Oct 3, 2012. **************************************************************************/ matrixStats/src/types.h0000644000175100001440000000120312542546311014716 0ustar hornikusers#include /* R_xlen_t, ... */ #ifndef R_INT_MIN #define R_INT_MIN -INT_MAX #endif #ifndef R_INT_MAX #define R_INT_MAX INT_MAX #endif /* As in /src/include/Defn.h */ #ifdef HAVE_LONG_DOUBLE #define LDOUBLE long double #else #define LDOUBLE double #endif /* Backward compatibility with R (< 3.0.0) As in /src/include/Rinternals.h */ #ifndef R_XLEN_T_MAX typedef int R_xlen_t; #define R_XLEN_T_MAX R_LEN_T_MAX #ifndef xlength #define xlength length #endif #endif /* Macro to check for user interrupts every 2^20 iteration */ #define R_CHECK_USER_INTERRUPT(i) if (i % 1048576 == 0) R_CheckUserInterrupt() matrixStats/src/weightedMedian_TYPE-template.h0000644000175100001440000002035512542546311021153 0ustar hornikusers/*********************************************************************** TEMPLATE: double weightedMedian_(X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t nw, int narm, int interpolate, int ties) Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) */ #include "templates-types.h" #include double METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t nw, int narm, int interpolate, int ties) { X_C_TYPE *xtmp; double res; double dx, dy, Dy; double *wtmp, *wcum, wtotal, wlow, whigh, tmp_d, tmp_d2; R_xlen_t nxt, ii, jj, half; int *idxs; int equalweights = 0; /* Quick results? */ if (nx == 0) { return NA_REAL; } else if (nx == 1) { return (double)x[0]; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Weights */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ wtmp = Calloc(nx, double); /* Check for missing, negative, and infite weights */ nxt = 0; for (ii=0; ii < nx; ii++) { /* Assume negative or missing weight by default or that the signals is missing and should be dropped */ wtmp[ii] = 0; if (ISNAN(w[ii])) { if (!narm) { Free(wtmp); return NA_REAL; } } else if (w[ii] <= 0) { /* Drop non-positive weights */ } else if (isinf(w[ii])) { /* Detected a +Inf. From now on, treat all +Inf weights equal and drop everything else */ nxt = 0; for (jj=0; jj < nx; jj++) { /* Assume non-infinite weight by default */ wtmp[jj] = 0; if (isinf(w[jj])) { if (X_ISNAN(x[ii])) { if (!narm) { Free(wtmp); return NA_REAL; } } else { /* Infinite weight, i.e. use data point */ wtmp[jj] = 1; nxt++; } } else if (ISNAN(w[jj])) { if (!narm) { Free(wtmp); return NA_REAL; } } } equalweights = 1; break; } else { /* A data points with a finite positive weight */ if (X_ISNAN(x[ii])) { if (!narm) { Free(wtmp); return NA_REAL; } } else { /* A data point with a non-missing value */ wtmp[ii] = w[ii]; nxt++; } } } /* printf("nx=%d, nxt=%d\n", nx, nxt); for (ii=0; ii < nx; ii++) printf("w[%d]=%g, wtmp[%d]=%g\n", (int)ii, (double)w[ii], (int)ii, wtmp[ii]); */ /* Nothing to do? */ if (nxt == 0) { Free(wtmp); return NA_REAL; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Copy (x,w) to work with and calculate total weight */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ xtmp = Calloc(nxt, X_C_TYPE); jj = 0; wtotal = 0; for (ii=0; ii < nx; ii++) { if (wtmp[ii] > 0) { /* printf("ii=%d, jj=%d, wtmp[%d]=%g\n", (int)ii, (int)jj, (int)ii, wtmp[ii]); */ xtmp[jj] = x[ii]; wtmp[jj] = wtmp[ii]; wtotal += wtmp[jj]; jj++; } } x = xtmp; w = wtmp; nx = nxt; nw = nx; /* for (ii=0; ii < nx; ii++) printf("x[%d]=%g, w[%d]=%g\n", (int)ii, (double)x[ii], (int)ii, w[ii]); */ /* Early stopping? */ if (nx == 1) { res = (double)x[0]; Free(xtmp); Free(wtmp); return res; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* All weights equal? Happens if +Inf were detected. */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (equalweights) { half = (nx+1) / 2; /* printf("half=%d\n", (int)half); */ X_PSORT(x, nx, half); /* for (ii=0; ii < nx; ii++) printf("x[%d]=%g\n", (int)ii, (double)x[ii]); */ /* FIXME: Add support for ties here too */ if (nx % 2 == 1) { res = (double)x[half-1]; } else { X_PSORT(x, half, half-1); res = ((double)x[half-1] + (double)x[half]) / 2; } Free(xtmp); Free(wtmp); return res; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Sort x and calculated the cumulative sum of weights (normalize to */ /* one) according to the reordered vector. */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* (a) Sort x */ idxs = Calloc(nx, int); for (ii = 0; ii < nx; ii++) idxs[ii] = ii; X_QSORT_I(x, idxs, 1, nx); /* (b) Normalized cumulative weights */ wcum = Calloc(nx, double); tmp_d2 = 0; /* Index where cumulative weight passed 1/2 */ half = nx+1; /* Default is last */ if (interpolate) { /* Adjust */ for (ii = 0; ii < nx; ii++) { tmp_d = w[idxs[ii]] / wtotal; tmp_d2 += tmp_d; wcum[ii] = tmp_d2 - (tmp_d/2); if (wcum[ii] >= 0.5) { half = ii; /* Early stopping - no need to continue */ break; } } } else { for (ii = 0; ii < nx; ii++) { tmp_d2 += w[idxs[ii]] / wtotal; wcum[ii] = tmp_d2; if (tmp_d2 > 0.5) { half = ii; /* Early stopping - no need to continue */ break; } } } Free(wtmp); Free(idxs); /* Two special cases where more than half of the total weight is at a) the first, or b) the last value */ if (half == 0 || half == nx) { res = (double)x[half]; Free(wcum); Free(xtmp); return res; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Alt 1: Linearly interpolated weighted median */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ if (interpolate) { /* The width and the height of the "rectangle". */ dx = (double)(x[half] - x[half-1]); Dy = wcum[half] - wcum[half-1]; /* printf("dx=%g, Dy=%g\n", dx, Dy); */ /* The width and the height of the triangle which upper corner touches the level where the cumulative sum of weights *equals* half the total weight. */ dy = 0.5 - wcum[half]; dx = (dy/Dy) * dx; /* printf("dx=%g, dy=%g\n", dx, dy); */ /* The corresponding x value */ res = dx + x[half]; Free(wcum); Free(xtmp); return res; } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* Alt 2: Classical weighted median (tied or not) */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ /* At this point we know that: 1) at most half the total weight is in the set x[1:half], 2) that the set x[(half+2):n] contains less than half the total weight The question is whether x[(half+1):n] contains *more* than half the total weight (try x=c(1,2,3), w=c(1,1,1)). If it is then we can be sure that x[half+1] is the weighted median we are looking for, otherwise it is any function of x[k:(half+1)]. */ wlow = wcum[half-1]; whigh = 1 - wlow; /* printf("half=%d, wtotal=%g, wlow=%g, whigh=%g, ties=%d\n", half, (double)wtotal, (double)wlow, (double)whigh, ties); printf("x[half+(-1:1)]=c(%g, %g, %g)\n", x[half-1-1], x[half-1], x[half-1+1]); */ if (whigh > 0.5) { /* printf("matrixStats2: Not a tie!\n"); */ /* Not a tie */ res = x[half]; } else { /* printf("matrixStats2: A tie!\n"); */ /* A tie! */ if (ties == 1) { /* weighted */ /* printf("ties=%d, half=%d, wlow*x[half]=%g, whigh*x[half+1]=%g\n", ties, half, wlow*x[half-1], whigh*x[half]); */ res = wlow*(double)x[half-1] + whigh*(double)x[half]; } else if (ties == 2) { /* min */ res = (double)x[half-1]; } else if (ties == 4) { /* max */ res = (double)x[half]; } else if (ties == 8) { /* mean */ res = ((double)x[half-1] + (double)x[half]) / 2; } else { error("Unknown value of argument 'ties': %d", ties); } } Free(wcum); Free(xtmp); return res; } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2015-01-01 [HB] o Created. **************************************************************************/ matrixStats/src/colRanges_TYPE-template.h0000644000175100001440000001333112542546311020146 0ustar hornikusers/*********************************************************************** TEMPLATE: void colRanges_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int what, X_C_TYPE *ans) GENERATES: void colRanges_Real(double *x, R_xlen_t nrow, R_xlen_t ncol, int what, double *ans) void colRanges_Integer(int *x, R_xlen_t nrow, R_xlen_t ncol, int what, int *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "templates-types.h" void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int what, int narm, int hasna, X_C_TYPE *ans, int *is_counted) { R_xlen_t ii, jj; R_xlen_t offset; X_C_TYPE value, *mins = NULL, *maxs = NULL; /* Rprintf("(nrow,ncol)=(%d,%d), what=%d\n", nrow, ncol, what); */ /* If there are no missing values, don't try to remove them. */ if (hasna == FALSE) narm = FALSE; if (hasna) { for (jj=0; jj < ncol; jj++) is_counted[jj] = 0; /* Missing values */ if (what == 0) { /* colMins() */ mins = ans; for (jj=0; jj < ncol; jj++) { offset = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) { value = x[ii+offset]; if (X_ISNAN(value)) { if (!narm) { mins[jj] = value; is_counted[jj] = 1; /* Early stopping? */ #if X_TYPE == 'i' break; #elif X_TYPE == 'r' if (X_ISNA(value)) break; #endif } } else if (!is_counted[jj]) { mins[jj] = value; is_counted[jj] = 1; } else if (value < mins[jj]) { mins[jj] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (jj=0; jj < ncol; jj++) { if (!is_counted[jj]) { mins[jj] = R_PosInf; } } #endif } else if (what == 1) { /* colMaxs() */ maxs = ans; for (jj=0; jj < ncol; jj++) { offset = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) { value = x[ii+offset]; if (X_ISNAN(value)) { if (!narm) { maxs[jj] = value; is_counted[jj] = 1; /* Early stopping? */ #if X_TYPE == 'i' break; #elif X_TYPE == 'r' if (X_ISNA(value)) break; #endif } } else if (!is_counted[jj]) { maxs[jj] = value; is_counted[jj] = 1; } else if (value > maxs[jj]) { maxs[jj] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (jj=0; jj < ncol; jj++) { if (!is_counted[jj]) { maxs[jj] = R_NegInf; } } #endif } else if (what == 2) { /* colRanges() */ mins = ans; maxs = &ans[ncol]; for (jj=0; jj < ncol; jj++) { offset = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) { value = x[ii+offset]; if (X_ISNAN(value)) { if (!narm) { mins[jj] = value; maxs[jj] = value; is_counted[jj] = 1; /* Early stopping? */ #if X_TYPE == 'i' break; #elif X_TYPE == 'r' if (X_ISNA(value)) break; #endif } } else if (!is_counted[jj]) { mins[jj] = value; maxs[jj] = value; is_counted[jj] = 1; } else if (value < mins[jj]) { mins[jj] = value; } else if (value > maxs[jj]) { maxs[jj] = value; } } } /* for (jj ...) */ #if X_TYPE == 'r' /* Handle zero non-missing values */ for (jj=0; jj < ncol; jj++) { if (!is_counted[jj]) { mins[jj] = R_PosInf; maxs[jj] = R_NegInf; } } #endif } /* if (what ...) */ } else { /* No missing values */ if (what == 0) { /* colMins() */ mins = ans; /* Initiate results */ for (jj=0; jj < ncol; jj++) { mins[jj] = x[jj]; } for (jj=1; jj < ncol; jj++) { offset = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) { value = x[ii+offset]; if (value < mins[jj]) mins[jj] = value; } } } else if (what == 1) { /* colMax() */ maxs = ans; /* Initiate results */ for (jj=0; jj < ncol; jj++) { maxs[jj] = x[jj]; } for (jj=1; jj < ncol; jj++) { offset = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) { value = x[ii+offset]; if (value > maxs[jj]) maxs[jj] = value; } } } else if (what == 2) { /* colRanges()*/ mins = ans; maxs = &ans[ncol]; /* Initiate results */ for (jj=0; jj < ncol; jj++) { mins[jj] = x[jj]; maxs[jj] = x[jj]; } for (jj=1; jj < ncol; jj++) { offset = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) { value = x[ii+offset]; if (value < mins[jj]) { mins[jj] = value; } else if (value > maxs[jj]) { maxs[jj] = value; } } } } /* if (what ...) */ } /* if (narm) */ } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-16 [HB] o Created. **************************************************************************/ matrixStats/src/rowRanksWithTies.c0000644000175100001440000001466712542546311017056 0ustar hornikusers/*************************************************************************** Public methods: SEXP rowRanksWithTies(SEXP x, SEXP tiesMethod, SEXP byRow) Authors: Hector Corrada Bravo, Peter Langfelder and Henrik Bengtsson TO DO: Add support for missing values. **************************************************************************/ #include #include "utils.h" /* Template Ranks__ties() */ /***************************************************************** * ties.method = "min" *****************************************************************/ #define METHOD_NAME rowRanks_Real_tiesMin #define MARGIN 'r' #define X_TYPE 'r' #define TIESMETHOD '0' /* min */ #include "rowRanksWithTies_TYPE_TIES-template.h" #define METHOD_NAME rowRanks_Integer_tiesMin #define MARGIN 'r' #define X_TYPE 'i' #define TIESMETHOD '0' /* min */ #include "rowRanksWithTies_TYPE_TIES-template.h" #define METHOD_NAME colRanks_Real_tiesMin #define MARGIN 'c' #define X_TYPE 'r' #define TIESMETHOD '0' /* min */ #include "rowRanksWithTies_TYPE_TIES-template.h" #define METHOD_NAME colRanks_Integer_tiesMin #define MARGIN 'c' #define X_TYPE 'i' #define TIESMETHOD '0' /* min */ #include "rowRanksWithTies_TYPE_TIES-template.h" /***************************************************************** * ties.method = "max" *****************************************************************/ #define METHOD_NAME rowRanks_Real_tiesMax #define MARGIN 'r' #define X_TYPE 'r' #define TIESMETHOD '1' /* max */ #include "rowRanksWithTies_TYPE_TIES-template.h" #define METHOD_NAME rowRanks_Integer_tiesMax #define MARGIN 'r' #define X_TYPE 'i' #define TIESMETHOD '1' /* max */ #include "rowRanksWithTies_TYPE_TIES-template.h" #define METHOD_NAME colRanks_Real_tiesMax #define MARGIN 'c' #define X_TYPE 'r' #define TIESMETHOD '1' /* max */ #include "rowRanksWithTies_TYPE_TIES-template.h" #define METHOD_NAME colRanks_Integer_tiesMax #define MARGIN 'c' #define X_TYPE 'i' #define TIESMETHOD '1' /* max */ #include "rowRanksWithTies_TYPE_TIES-template.h" /***************************************************************** * ties.method = "average" *****************************************************************/ #define METHOD_NAME rowRanks_Real_tiesAverage #define MARGIN 'r' #define X_TYPE 'r' #define TIESMETHOD 'a' /* average */ #include "rowRanksWithTies_TYPE_TIES-template.h" #define METHOD_NAME rowRanks_Integer_tiesAverage #define MARGIN 'r' #define X_TYPE 'i' #define TIESMETHOD 'a' /* average */ #include "rowRanksWithTies_TYPE_TIES-template.h" #define METHOD_NAME colRanks_Real_tiesAverage #define MARGIN 'c' #define X_TYPE 'r' #define TIESMETHOD 'a' /* average */ #include "rowRanksWithTies_TYPE_TIES-template.h" #define METHOD_NAME colRanks_Integer_tiesAverage #define MARGIN 'c' #define X_TYPE 'i' #define TIESMETHOD 'a' /* average */ #include "rowRanksWithTies_TYPE_TIES-template.h" /* Peter Langfelder's modifications: * byrow: 0 => rank columns, !0 => rank rows * tiesMethod: 1: maximum, 2: average, 3:minimum * The returned rank is a REAL matrix to accomodate average ranks */ SEXP rowRanksWithTies(SEXP x, SEXP dim, SEXP tiesMethod, SEXP byRow) { int tiesmethod, byrow; SEXP ans = NILSXP; int nrow, ncol; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'tiesMethod': */ tiesmethod = asInteger(tiesMethod); if (tiesmethod < 1 || tiesmethod > 3) { error("Argument 'tiesMethod' is out of range [1,3]: %d", tiesmethod); } /* Argument 'byRow': */ byrow = asLogical(byRow); /* Double matrices are more common to use. */ if (isReal(x)) { if (byrow) { switch (tiesmethod) { case 1: PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); rowRanks_Real_tiesMax(REAL(x), nrow, ncol, 1, INTEGER(ans)); UNPROTECT(1); break; case 2: PROTECT(ans = allocMatrix(REALSXP, nrow, ncol)); rowRanks_Real_tiesAverage(REAL(x), nrow, ncol, 1, REAL(ans)); UNPROTECT(1); break; case 3: PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); rowRanks_Real_tiesMin(REAL(x), nrow, ncol, 1, INTEGER(ans)); UNPROTECT(1); break; } /* switch */ } else { switch (tiesmethod) { case 1: PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); colRanks_Real_tiesMax(REAL(x), nrow, ncol, 0, INTEGER(ans)); UNPROTECT(1); break; case 2: PROTECT(ans = allocMatrix(REALSXP, nrow, ncol)); colRanks_Real_tiesAverage(REAL(x), nrow, ncol, 0, REAL(ans)); UNPROTECT(1); break; case 3: PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); colRanks_Real_tiesMin(REAL(x), nrow, ncol, 0, INTEGER(ans)); UNPROTECT(1); break; } /* switch */ } } else if (isInteger(x)) { if (byrow) { switch (tiesmethod) { case 1: PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); rowRanks_Integer_tiesMax(INTEGER(x), nrow, ncol, 1, INTEGER(ans)); UNPROTECT(1); break; case 2: PROTECT(ans = allocMatrix(REALSXP, nrow, ncol)); rowRanks_Integer_tiesAverage(INTEGER(x), nrow, ncol, 1, REAL(ans)); UNPROTECT(1); break; case 3: PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); rowRanks_Integer_tiesMin(INTEGER(x), nrow, ncol, 1, INTEGER(ans)); UNPROTECT(1); break; } /* switch */ } else { switch (tiesmethod) { case 1: PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); colRanks_Integer_tiesMax(INTEGER(x), nrow, ncol, 0, INTEGER(ans)); UNPROTECT(1); break; case 2: PROTECT(ans = allocMatrix(REALSXP, nrow, ncol)); colRanks_Integer_tiesAverage(INTEGER(x), nrow, ncol, 0, REAL(ans)); UNPROTECT(1); break; case 3: PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); colRanks_Integer_tiesMin(INTEGER(x), nrow, ncol, 0, INTEGER(ans)); UNPROTECT(1); break; } /* switch */ } } return(ans); } // rowRanksWithTies() /*************************************************************************** HISTORY: 2013-01-13 [HB] o Added argument 'tiesMethod' to rowRanks(). **************************************************************************/ matrixStats/src/rowRanges.c0000644000175100001440000000713712542546311015530 0ustar hornikusers/*************************************************************************** Public methods: SEXP rowRanges(SEXP x, SEXP what) Authors: Henrik Bengtsson. Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD rowRanges #define X_TYPE 'i' #include "rowRanges_TYPE-template.h" #define X_TYPE 'r' #include "rowRanges_TYPE-template.h" #undef METHOD SEXP rowRanges(SEXP x, SEXP dim, SEXP what, SEXP naRm, SEXP hasNA) { SEXP ans = NILSXP, ans2 = NILSXP; int *mins, *maxs; double *mins2, *maxs2; int *is_counted, all_counted = 0; int what2, narm, hasna; R_xlen_t nrow, ncol, ii; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'what': */ if (length(what) != 1) error("Argument 'what' must be a single number."); if (!isNumeric(what)) error("Argument 'what' must be a numeric number."); what2 = asInteger(what); if (what2 < 0 || what2 > 2) error("Invalid value of 'what': %d", what2); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); is_counted = (int *) R_alloc(nrow, sizeof(int)); if (isReal(x)) { if (what2 == 2) { PROTECT(ans = allocMatrix(REALSXP, nrow, 2)); } else { PROTECT(ans = allocVector(REALSXP, nrow)); } rowRanges_Real(REAL(x), nrow, ncol, what2, narm, hasna, REAL(ans), is_counted); UNPROTECT(1); } else if (isInteger(x)) { if (what2 == 2) { PROTECT(ans = allocMatrix(INTSXP, nrow, 2)); } else { PROTECT(ans = allocVector(INTSXP, nrow)); } rowRanges_Integer(INTEGER(x), nrow, ncol, what2, narm, hasna, INTEGER(ans), is_counted); /* Any entries with zero non-missing values? */ all_counted = 1; for (ii=0; ii < nrow; ii++) { if (!is_counted[ii]) { all_counted = 0; break; } } if (!all_counted) { /* Handle zero non-missing values */ /* Instead of return INTSXP, we must return REALSXP (to hold -Inf, and Inf) */ if (what2 == 0) { PROTECT(ans2 = allocVector(REALSXP, nrow)); mins = INTEGER(ans); mins2 = REAL(ans2); for (ii=0; ii < nrow; ii++) { if (is_counted[ii]) { mins2[ii] = (double)mins[ii]; } else { mins2[ii] = R_PosInf; } } UNPROTECT(1); /* ans2 */ } else if (what2 == 1) { PROTECT(ans2 = allocVector(REALSXP, nrow)); maxs = INTEGER(ans); maxs2 = REAL(ans2); for (ii=0; ii < nrow; ii++) { if (is_counted[ii]) { maxs2[ii] = (double)maxs[ii]; } else { maxs2[ii] = R_NegInf; } } UNPROTECT(1); /* ans2 */ } else if (what2 == 2) { PROTECT(ans2 = allocMatrix(REALSXP, nrow, 2)); mins = INTEGER(ans); maxs = &INTEGER(ans)[nrow]; mins2 = REAL(ans2); maxs2 = &REAL(ans2)[nrow]; for (ii=0; ii < nrow; ii++) { if (is_counted[ii]) { mins2[ii] = (double)mins[ii]; maxs2[ii] = (double)maxs[ii]; } else { mins2[ii] = R_PosInf; maxs2[ii] = R_NegInf; } } UNPROTECT(1); /* ans2 */ } ans = ans2; } UNPROTECT(1); /* ans */ } return(ans); } // rowRanges() /*************************************************************************** HISTORY: 2014-11-16 [HB] o Created. **************************************************************************/ matrixStats/src/meanOver.c0000644000175100001440000000333512542546311015331 0ustar hornikusers/*************************************************************************** Public methods: SEXP meanOver(SEXP x, SEXP idxs, SEXP naRm, SEXP refine) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD meanOver #define X_TYPE 'i' #include "meanOver_TYPE-template.h" #define X_TYPE 'r' #include "meanOver_TYPE-template.h" #undef METHOD SEXP meanOver(SEXP x, SEXP idxs, SEXP naRm, SEXP refine) { SEXP ans; int *idxs_ptr; R_xlen_t nidxs; int narm, refine2; double avg = NA_REAL; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x"); /* Argument 'idxs': */ if (isNull(idxs)) { idxs_ptr = NULL; nidxs = 0; } else if (isVectorAtomic(idxs)) { idxs_ptr = INTEGER(idxs); nidxs = xlength(idxs); } else { /* To please compiler */ idxs_ptr = NULL; nidxs = 0; error("Argument 'idxs' must be NULL or a vector."); } /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'refine': */ refine2 = asLogicalNoNA(refine, "refine"); /* Double matrices are more common to use. */ if (isReal(x)) { avg = meanOver_Real(REAL(x), xlength(x), idxs_ptr, nidxs, narm, refine2); } else if (isInteger(x)) { avg = meanOver_Integer(INTEGER(x), xlength(x), idxs_ptr, nidxs, narm, refine2); } /* Return results */ PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = avg; UNPROTECT(1); return(ans); } // meanOver() /*************************************************************************** HISTORY: 2014-11-02 [HB] o Created. **************************************************************************/ matrixStats/src/rowCumsums.c0000644000175100001440000000263512542546311015743 0ustar hornikusers/*************************************************************************** Public methods: SEXP rowCumsums(SEXP x, ...) SEXP colCumsums(SEXP x, ...) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD rowCumsums #define X_TYPE 'i' #include "rowCumsums_TYPE-template.h" #define X_TYPE 'r' #include "rowCumsums_TYPE-template.h" #undef METHOD SEXP rowCumsums(SEXP x, SEXP dim, SEXP byRow) { int byrow; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'byRow': */ byrow = asLogical(byRow); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrow, ncol)); rowCumsums_Real(REAL(x), nrow, ncol, byrow, REAL(ans)); UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); rowCumsums_Integer(INTEGER(x), nrow, ncol, byrow, INTEGER(ans)); UNPROTECT(1); } return(ans); } /* rowCumsums() */ /*************************************************************************** HISTORY: 2014-11-26 [HB] o Created from rowVars.c. **************************************************************************/ matrixStats/src/rowOrderStats_TYPE-template.h0000644000175100001440000000544312542546311021060 0ustar hornikusers/*********************************************************************** TEMPLATE: void rowOrderStats_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int qq, X_C_TYPE *ans) GENERATES: void rowOrderStats_Real(double *x, R_xlen_t nrow, R_xlen_t ncol, int qq, double *ans) void rowOrderStats_Integer(int *x, R_xlen_t nrow, R_xlen_t ncol, int qq, int *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' Authors: Adopted from rowQ() by R. Gentleman. Template by Henrik Bengtsson. Copyright: Henrik Bengtsson, 2007-2014 ***********************************************************************/ #include #include #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "templates-types.h" void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t qq, X_C_TYPE *ans) { R_xlen_t ii, jj; R_xlen_t *colOffset; X_C_TYPE *values; /* R allocate memory for the 'values'. This will be taken care of by the R garbage collector later on. */ values = (X_C_TYPE *) R_alloc(ncol, sizeof(X_C_TYPE)); /* Pre-calculate the column offsets */ colOffset = (R_xlen_t *) R_alloc(ncol, sizeof(R_xlen_t)); for (jj=0; jj < ncol; jj++) colOffset[jj] = (R_xlen_t)jj*nrow; for (ii=0; ii < nrow; ii++) { for (jj=0; jj < ncol; jj++) values[jj] = x[ii+colOffset[jj]]; /* Sort vector of length 'ncol' up to position 'qq'. "...partial sorting: they permute x so that x[qq] is in the correct place with smaller values to the left, larger ones to the right." */ X_PSORT(values, ncol, qq); ans[ii] = values[qq]; } } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2013-01-13 [HB] o Merged rowOrderStatsReal() and rowOrderStatsInteger() into one rowOrderStats_() templated function. 2009-02-04 [HB] o BUG FIX: For some errors in rowOrderStats(), the stack would not become UNPROTECTED before calling error. 2008-03-25 [HB] o Renamed from 'rowQuantiles' to 'rowOrderStats'. 2007-08-10 [HB] o Removed arguments for NAs since rowOrderStats() still don't support it. 2005-11-24 [HB] o Cool, it works and compiles nicely. o Preallocate colOffset to speed up things even more. o Added more comments and error checking. o Adopted from rowQ() in Biobase of Bioconductor. **************************************************************************/ matrixStats/src/logSumExp_internal.h0000644000175100001440000000060012542546311017371 0ustar hornikusers/*************************************************************************** Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2013-2014 **************************************************************************/ double logSumExp_double(double *x, R_xlen_t nx, int narm, int hasna); double logSumExp_double_by(double *x, R_xlen_t nx, int narm, int hasna, int by, double *xx); matrixStats/src/rowCumsums_TYPE-template.h0000644000175100001440000000717312542546311020424 0ustar hornikusers/*********************************************************************** TEMPLATE: void rowCumsums_(...) GENERATES: void rowCumsums_Integer(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, double *ans) void rowCumsums_Real(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, double *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME]) */ #include "templates-types.h" void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, ANS_C_TYPE *ans) { R_xlen_t ii, jj, kk, kk_prev; LDOUBLE value; #if ANS_TYPE == 'i' double R_INT_MIN_d = (double)R_INT_MIN, R_INT_MAX_d = (double)R_INT_MAX; /* OK, i.e. no integer overflow yet? */ int warn = 0, ok, *oks = NULL; #endif if (nrow == 0 || ncol == 0) return; if (byrow) { #if ANS_TYPE == 'i' oks = (int *) R_alloc(nrow, sizeof(int)); #endif for (kk=0; kk < nrow; kk++) { ans[kk] = (ANS_C_TYPE) x[kk]; #if ANS_TYPE == 'i' oks[kk] = !X_ISNA(x[kk]); #endif } kk_prev = 0; for (jj=1; jj < ncol; jj++) { for (ii=0; ii < nrow; ii++) { #if ANS_TYPE == 'i' if (oks[ii]) { /* Missing value? */ if (X_ISNA(x[kk])) { oks[ii] = 0; ans[kk] = ANS_NA; } else { value = (LDOUBLE) ans[kk_prev] + (LDOUBLE) x[kk]; /* Integer overflow? */ if (value < R_INT_MIN_d || value > R_INT_MAX_d) { oks[ii] = 0; warn = 1; ans[kk] = ANS_NA; } else { ans[kk] = (ANS_C_TYPE) value; } } } else { ans[kk] = ANS_NA; } #else ans[kk] = (ANS_C_TYPE) ((LDOUBLE) ans[kk_prev] + (LDOUBLE) x[kk]); #endif kk++; kk_prev++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } else { kk = 0; for (jj=0; jj < ncol; jj++) { value = 0; #if ANS_TYPE == 'i' ok = 1; #endif for (ii=0; ii < nrow; ii++) { #if ANS_TYPE == 'i' if (ok) { /* Missing value? */ if (X_ISNA(x[kk])) { ok = 0; ans[kk] = ANS_NA; } else { value += (LDOUBLE) x[kk]; /* Integer overflow? */ if (value < R_INT_MIN_d || value > R_INT_MAX_d) { ok = 0; warn = 1; ans[kk] = ANS_NA; } else { ans[kk] = (ANS_C_TYPE) value; } } } else { ans[kk] = ANS_NA; } #else value += x[kk]; ans[kk] = (ANS_C_TYPE) value; #endif kk++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } /* if (byrow) */ #if ANS_TYPE == 'i' /* Warn on integer overflow? */ if (warn) { warning("Integer overflow. Detected one or more elements whose absolute values were out of the range [%d,%d] that can be used to for integers. Such values are set to NA_integer_.", R_INT_MIN, R_INT_MAX); } #endif } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-26 [HB] o Created from rowVars_TYPE-template.h. **************************************************************************/ matrixStats/src/logSumExp.c0000644000175100001440000000230412542546311015473 0ustar hornikusers/*************************************************************************** Public methods: SEXP logSumExp(SEXP lx, SEXP naRm, SEXP hasNA) Arguments: lx : numeric vector naRm : a logical scalar hasNA: a logical scalar Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2013 **************************************************************************/ #include #include #include "types.h" #include "utils.h" #include "logSumExp_internal.h" SEXP logSumExp(SEXP lx, SEXP naRm, SEXP hasNA) { int narm, hasna; /* Argument 'lx': */ assertArgVector(lx, (R_TYPE_REAL), "lx"); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); return(Rf_ScalarReal(logSumExp_double(REAL(lx), xlength(lx), narm, hasna))); } /* logSumExp() */ /*************************************************************************** HISTORY: 2013-05-02 [HB] o BUG FIX: Incorrectly used ISNAN() on an int variable as caught by the 'cc' compiler on Solaris. Reported by Brian Ripley upon CRAN submission. 2013-04-30 [HB] o Created. **************************************************************************/ matrixStats/src/logSumExp_internal.c0000644000175100001440000001074212542546311017374 0ustar hornikusers#include #include #include "types.h" #include "utils.h" /* logSumExp_double(x): 1. Scans for the maximum value of x=(x[0], x[1], ..., x[n-1]) 2. Computes result from 'x'. NOTE: The above sweeps the "contiguous" 'x' vector twice. */ double logSumExp_double(double *x, R_xlen_t nx, int narm, int hasna) { R_xlen_t ii, iMax; double xii, xMax; LDOUBLE sum; int hasna2 = FALSE; /* Indicates whether NAs where detected or not */ /* Quick return? */ if (nx == 0) { return(R_NegInf); } else if (nx == 1) { if (narm && ISNAN(x[0])) { return(R_NegInf); } else { return(x[0]); } } /* Find the maximum value */ iMax = 0; xMax = x[0]; if (ISNAN(xMax)) hasna2 = TRUE; for (ii=1; ii < nx; ii++) { /* Get the ii:th value */ xii = x[ii]; if (hasna && ISNAN(xii)) { if (narm) { hasna2 = TRUE; continue; } else { return(R_NaReal); } } if (xii > xMax || (narm && ISNAN(xMax))) { iMax = ii; xMax = xii; } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ /* Early stopping? */ if (ISNAN(xMax)) { /* Found only missing values? */ return narm ? R_NegInf : R_NaReal; } else if (xMax == R_PosInf) { /* Found +Inf? */ return(R_PosInf); } /* Sum differences */ sum = 0.0; for (ii=0; ii < nx; ii++) { if (ii == iMax) { continue; } /* Get the ii:th value */ xii = x[ii]; if (!hasna2 || !ISNAN(xii)) { sum += exp(xii - xMax); } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ sum = xMax + log1p(sum); return(sum); } /* logSumExp_double() */ /* logSumExp_double_by(x): 1. Scans for the maximum value of x=(x[0], x[by], ..., x[(n-1)*by]) and copies the values to xx = (xx[0], xx[1], xx[2], ..., xx[n-1]), which *must* be preallocated. 2. Computes result from 'xx'. NOTE: The above sweeps the "scattered" 'x' vector only once, and then the "contigous" 'xx' vector once. This is more likely to create cache hits. */ double logSumExp_double_by(double *x, R_xlen_t nx, int narm, int hasna, int by, double *xx) { R_xlen_t ii, iMax, idx; double xii, xMax; LDOUBLE sum; int hasna2 = FALSE; /* Indicates whether NAs where detected or not */ /* Quick return? */ if (nx == 0) { return(R_NegInf); } else if (nx == 1) { if (narm && ISNAN(x[0])) { return(R_NegInf); } else { return(x[0]); } } /* To increase the chances for cache hits below, which sweeps through the data twice, we copy data into a temporary contigous vector while scanning for the maximum value. */ /* Find the maximum value (and copy) */ iMax = 0; xMax = x[0]; if (ISNAN(xMax)) hasna2 = TRUE; xx[0] = xMax; idx = 0; for (ii=1; ii < nx; ii++) { /* Get the ii:th value */ idx = idx + by; xii = x[idx]; /* Copy */ xx[ii] = xii; if (hasna && ISNAN(xii)) { if (narm) { hasna2 = TRUE; continue; } else { return(R_NaReal); } } if (xii > xMax || (narm && ISNAN(xMax))) { iMax = ii; xMax = xii; } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ /* Early stopping? */ if (ISNAN(xMax)) { /* Found only missing values? */ return narm ? R_NegInf : R_NaReal; } else if (xMax == R_PosInf) { /* Found +Inf? */ return(R_PosInf); } /* Sum differences */ sum = 0.0; for (ii=0; ii < nx; ii++) { if (ii == iMax) { continue; } /* Get the ii:th value */ xii = xx[ii]; if (!hasna2 || !ISNAN(xii)) { sum += exp(xii - xMax); } R_CHECK_USER_INTERRUPT(ii); } /* for (ii ...) */ sum = xMax + log1p(sum); return(sum); } /* logSumExp_double_by() */ /*************************************************************************** HISTORY: 2015-01-26 [HB] o SPEEDUP: Now step 2 ("summing") only checks where NAs if NAs were detected in step 1 ("max value"), which should be noticibly faster since testing for NA is expensive for double values. o SPEEDUP: Now function returns early after step 1 ("max value") if the maximum value found is +Inf, or if all values where NAs. o BUG FIX: Now logSumExp(, na.rm=TRUE) also returns -Inf. 2013-05-02 [HB] o BUG FIX: Incorrectly used ISNAN() on an int variable as caught by the 'cc' compiler on Solaris. Reported by Brian Ripley upon CRAN submission. 2013-04-30 [HB] o Created. **************************************************************************/ matrixStats/src/colCounts_TYPE-template.h0000644000175100001440000001163312542546311020205 0ustar hornikusers/*********************************************************************** TEMPLATE: void colCounts_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, X_C_TYPE value, int narm, int hasna, int *ans) GENERATES: void colCounts_Real(double *x, R_xlen_t nrow, R_xlen_t ncol, double value, int narm, int hasna, int *ans) void colCounts_Integer(int *x, R_xlen_t nrow, R_xlen_t ncol, int value, int narm, int hasna, int *ans) void colCounts_Logical(int *x, R_xlen_t nrow, R_xlen_t ncol, int value, int narm, int hasna, int *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i', 'r', or 'l' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) */ #include "templates-types.h" void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, X_C_TYPE value, int what, int narm, int hasna, int *ans) { R_xlen_t ii, jj, kk; int count; X_C_TYPE xvalue; if (what == 0L) { /* all */ /* Count missing values? [sic!] */ if (X_ISNAN(value)) { kk = 0; for (jj=0; jj < ncol; jj++) { count = 1; for (ii=0; ii < nrow; ii++) { if (!X_ISNAN(x[kk++])) { count = 0; /* Found another value! Early stopping */ kk += nrow - ii - 1; break; } } ans[jj] = count; } } else { kk = 0; for (jj=0; jj < ncol; jj++) { count = 1; for (ii=0; ii < nrow; ii++) { xvalue = x[kk++]; if (xvalue == value) { } else if (narm && X_ISNAN(xvalue)) { /* Skip */ } else if (!narm && X_ISNAN(xvalue)) { /* Early stopping is not possible, because if we do find an element that is not 'value' later, then we know for sure that all = FALSE regardless of missing values. In other words, at this point the answer can be either NA or FALSE.*/ count = NA_INTEGER; } else { count = 0; /* Found another value! Early stopping */ kk += nrow - ii - 1; break; } } /* for (ii ...) */ ans[jj] = count; } /* for (jj ...) */ } /* if (X_ISNAN(value)) */ } else if (what == 1L) { /* any */ /* Count missing values? [sic!] */ if (X_ISNAN(value)) { kk = 0; for (jj=0; jj < ncol; jj++) { count = 0; for (ii=0; ii < nrow; ii++) { if (X_ISNAN(x[kk++])) { count = 1; /* Found value! Early stopping */ kk += nrow - ii - 1; break; } } ans[jj] = count; } } else { kk = 0; for (jj=0; jj < ncol; jj++) { count = 0; for (ii=0; ii < nrow; ii++) { xvalue = x[kk++]; if (xvalue == value) { count = 1; /* Found value! Early stopping */ kk += nrow - ii - 1; break; } else if (narm && X_ISNAN(xvalue)) { /* Skipping */ } else if (!narm && X_ISNAN(xvalue)) { /* Early stopping is not possible, because if we do find an element that is 'value' later, then we know for sure that any = TRUE regardless of missing values. In other words, at this point the answer can be either NA or TRUE.*/ count = NA_INTEGER; } } /* for (ii ...) */ ans[jj] = count; } /* for (jj ...) */ } /* if (X_ISNAN(value)) */ } else if (what == 2L) { /* count */ /* Count missing values? [sic!] */ if (X_ISNAN(value)) { kk = 0; for (jj=0; jj < ncol; jj++) { count = 0; for (ii=0; ii < nrow; ii++) { if (X_ISNAN(x[kk++])) { ++count; } } ans[jj] = count; } } else { kk = 0; for (jj=0; jj < ncol; jj++) { count = 0; for (ii=0; ii < nrow; ii++) { xvalue = x[kk++]; if (xvalue == value) { ++count; } else if (!narm && X_ISNAN(xvalue)) { count = NA_INTEGER; /* Early stopping */ kk += nrow - ii - 1; break; } } /* for (ii ...) */ ans[jj] = count; } /* for (jj ...) */ } /* if (X_ISNAN(value)) */ } else { error("INTERNAL ERROR: Unknown value of 'what' for colCounts: %d", what); } /* if (what) */ } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-14 [HB] o Created colCounts() templates from rowCounts() templates. **************************************************************************/ matrixStats/src/rowCumMinMaxs_TYPE-template.h0000644000175100001440000000613412542546311021005 0ustar hornikusers/*********************************************************************** TEMPLATE: void rowCummins_(...) GENERATES: void rowCummins_Integer(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, double *ans) void rowCummins_Real(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, double *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD: the name of the resulting function - X_TYPE: 'i' or 'r' Authors: Henrik Bengtsson. Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME]) */ #include "templates-types.h" #if COMP == '<' #define OP < #elif COMP == '>' #define OP > #endif void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, ANS_C_TYPE *ans) { R_xlen_t ii, jj, kk, kk_prev; ANS_C_TYPE value; int ok; int *oks = NULL; if (nrow == 0 || ncol == 0) return; if (byrow) { oks = (int *) R_alloc(nrow, sizeof(int)); for (kk=0; kk < nrow; kk++) { value = (ANS_C_TYPE) x[kk]; if (ANS_ISNAN(value)) { oks[kk] = 0; value = ANS_NA; ans[kk] = ANS_NA; } else { oks[kk] = 1; ans[kk] = value; } } kk_prev = 0; for (jj=1; jj < ncol; jj++) { for (ii=0; ii < nrow; ii++) { if (oks[ii]) { value = (ANS_C_TYPE) x[kk]; if (ANS_ISNAN(value)) { oks[ii] = 0; ans[kk] = ANS_NA; } else { if (value OP ans[kk_prev]) { ans[kk] = value; } else { ans[kk] = (ANS_C_TYPE) ans[kk_prev]; } } } else { ans[kk] = ANS_NA; } kk++; kk_prev++; R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } else { kk = 0; for (jj=0; jj < ncol; jj++) { value = (ANS_C_TYPE) x[kk]; if (ANS_ISNAN(value)) { ok = 0; value = ANS_NA; ans[kk] = ANS_NA; } else { ok = 1; ans[kk] = value; } kk_prev = kk; kk++; for (ii=1; ii < nrow; ii++) { if (ok) { value = (ANS_C_TYPE) x[kk]; if (ANS_ISNAN(value)) { ok = 0; value = ANS_NA; ans[kk] = ANS_NA; } else { if (value OP ans[kk_prev]) { ans[kk] = value; } else { ans[kk] = (ANS_C_TYPE) ans[kk_prev]; } } kk++; kk_prev++; } else { ans[kk] = ANS_NA; kk++; } R_CHECK_USER_INTERRUPT(kk); } /* for (ii ...) */ } /* for (jj ...) */ } /* if (byrow) */ } #undef OP /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-26 [HB] o Created from rowVars_TYPE-template.h. **************************************************************************/ matrixStats/src/weightedMedian.c0000644000175100001440000000342312542546311016471 0ustar hornikusers/*************************************************************************** Public methods: SEXP weightedMedian(SEXP x, SEXP w, SEXP naRm, SEXP interpolate, SEXP ties) Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #include #define METHOD weightedMedian #define X_TYPE 'i' #include "weightedMedian_TYPE-template.h" #define X_TYPE 'r' #include "weightedMedian_TYPE-template.h" #undef METHOD SEXP weightedMedian(SEXP x, SEXP w, SEXP naRm, SEXP interpolate, SEXP ties) { SEXP ans; int narm, interpolate2, ties2; double mu = NA_REAL; R_xlen_t nx, nw; /* Argument 'x': */ assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x"); nx = xlength(x); /* Argument 'x': */ assertArgVector(w, (R_TYPE_REAL), "w"); nw = xlength(w); if (nx != nw) { error("Argument 'x' and 'w' are of different lengths: %d != %d", nx, nw); } /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'interpolate': */ interpolate2 = asLogicalNoNA(interpolate, "interpolate"); /* Argument 'ties': */ ties2 = asInteger(ties); /* Double matrices are more common to use. */ if (isReal(x)) { mu = weightedMedian_Real(REAL(x), nx, REAL(w), nw, narm, interpolate2, ties2); } else if (isInteger(x)) { mu = weightedMedian_Integer(INTEGER(x), nx, REAL(w), nw, narm, interpolate2, ties2); } /* Return results */ PROTECT(ans = allocVector(REALSXP, 1)); REAL(ans)[0] = mu; UNPROTECT(1); return(ans); } // weightedMedian() /*************************************************************************** HISTORY: 2015-01-01 [HB] o Created. **************************************************************************/ matrixStats/src/weightedMean_TYPE-template.h0000644000175100001440000000420512542546311020632 0ustar hornikusers/*********************************************************************** TEMPLATE: double weightedMean_(X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t nw, int narm, int refine) Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) */ #include "templates-types.h" #include double METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t nw, int narm, int refine) { X_C_TYPE value; double weight; R_xlen_t i; LDOUBLE sum = 0, wtotal = 0; LDOUBLE avg = R_NaN; for (i=0; i < nx; i++) { weight = w[i]; /* Skip or early stopping? */ if (weight == 0) { continue; } value = x[i]; #if X_TYPE == 'i' if (X_ISNAN(value)) { /* Skip or early stopping? */ if (narm) { continue; } else { sum = R_NaReal; break; } } else { sum += (LDOUBLE)weight * (LDOUBLE)value; wtotal += weight; } #elif X_TYPE == 'r' if (!narm || !X_ISNAN(value)) { sum += (LDOUBLE)weight * (LDOUBLE)value; wtotal += weight; } #endif } /* for (i ...) */ if (wtotal > DOUBLE_XMAX || wtotal < -DOUBLE_XMAX) { avg = R_NaN; } else if (sum > DOUBLE_XMAX) { avg = R_PosInf; } else if (sum < -DOUBLE_XMAX) { avg = R_NegInf; } else { avg = sum / wtotal; #if X_TYPE == 'r' /* Extra precision by summing over residuals? */ if (refine && R_FINITE(avg)) { sum = 0; for (i=0; i < nx; i++) { weight = w[i]; /* Skip? */ if (weight == 0) { continue; } value = (LDOUBLE)x[i]; if (!narm || !ISNAN(value)) { sum += (LDOUBLE)weight * (value - avg); } } avg += (sum / wtotal); } #endif } return (double)avg; } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-12-08 [HB] o Created. **************************************************************************/ matrixStats/src/rowCounts_TYPE-template.h0000644000175100001440000001253312542546311020237 0ustar hornikusers/*********************************************************************** TEMPLATE: void rowCounts_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, X_C_TYPE value, int narm, int hasna, int *ans) GENERATES: void rowCounts_Real(double *x, R_xlen_t nrow, R_xlen_t ncol, double value, int narm, int hasna, int *ans) void rowCounts_Integer(int *x, R_xlen_t nrow, R_xlen_t ncol, int value, int narm, int hasna, int *ans) void rowCounts_Logical(int *x, R_xlen_t nrow, R_xlen_t ncol, int value, int narm, int hasna, int *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i', 'r', or 'l' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) */ #include "templates-types.h" void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, X_C_TYPE value, int what, int narm, int hasna, int *ans) { R_xlen_t ii, jj, kk; int count; X_C_TYPE xvalue; if (what == 0) { /* all */ for (ii=0; ii < nrow; ii++) ans[ii] = 1; /* Count missing values? [sic!] */ if (X_ISNAN(value)) { kk = 0; for (jj=0; jj < ncol; jj++) { for (ii=0; ii < nrow; ii++) { /* Skip? */ if (ans[ii]) { xvalue = x[kk++]; if (!X_ISNAN(xvalue)) { ans[ii] = 0; /* Found another value! Skip from now on */ } } else { kk++; } } } } else { kk = 0; for (jj=0; jj < ncol; jj++) { for (ii=0; ii < nrow; ii++) { /* Skip? */ if (ans[ii]) { xvalue = x[kk++]; if (xvalue == value) { } else if (narm && X_ISNAN(xvalue)) { /* Skip */ } else if (!narm && X_ISNAN(xvalue)) { /* Early stopping is not possible, because if we do find an element that is not 'value' later, then we know for sure that all = FALSE regardless of missing values. In other words, at this point the answer can be either NA or FALSE.*/ ans[ii] = NA_INTEGER; } else { /* Found another value! Skip from now on */ ans[ii] = 0; } } else { kk++; } } /* for (ii ...) */ } /* for (jj ...) */ } } else if (what == 1) { /* any */ for (ii=0; ii < nrow; ii++) ans[ii] = 0; /* Count missing values? [sic!] */ if (X_ISNAN(value)) { kk = 0; for (jj=0; jj < ncol; jj++) { for (ii=0; ii < nrow; ii++) { /* Skip? */ if (ans[ii]) { kk++; } else { xvalue = x[kk++]; if (X_ISNAN(xvalue)) { ans[ii] = 1; /* Found value! Skip from now on */ } } } } } else { kk = 0; for (jj=0; jj < ncol; jj++) { for (ii=0; ii < nrow; ii++) { /* Skip? */ if (ans[ii] && ans[ii] != NA_INTEGER) { kk++; } else { xvalue = x[kk++]; if (xvalue == value) { /* Found value! Skip from now on */ ans[ii] = 1; } else if (narm && X_ISNAN(xvalue)) { /* Skip */ } else if (!narm && X_ISNAN(xvalue)) { /* Early stopping is not possible, because if we do find an element that is 'value' later, then we know for sure that any = TRUE regardless of missing values. In other words, at this point the answer can be either NA or TRUE.*/ ans[ii] = NA_INTEGER; } } } /* for (ii ...) */ } /* for (jj ...) */ } } else if (what == 2) { /* count */ for (ii=0; ii < nrow; ii++) ans[ii] = 0; /* Count missing values? [sic!] */ if (X_ISNAN(value)) { kk = 0; for (jj=0; jj < ncol; jj++) { for (ii=0; ii < nrow; ii++) { xvalue = x[kk++]; if (X_ISNAN(xvalue)) ans[ii] = ans[ii] + 1; } } } else { kk = 0; for (jj=0; jj < ncol; jj++) { for (ii=0; ii < nrow; ii++) { count = ans[ii]; /* Nothing more to do on this row? */ if (count == NA_INTEGER) { kk++; continue; } xvalue = x[kk++]; if (xvalue == value) { ans[ii] = count + 1; } else { if (!narm && X_ISNAN(xvalue)) { ans[ii] = NA_INTEGER; continue; } } } /* for (ii ...) */ } /* for (jj ...) */ } } else { error("INTERNAL ERROR: Unknown value of 'what' for colCounts: %d", what); } /* if (what ...) */ } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2014-11-01 [HB] o SPEEDUP: Now using ansp = INTEGER(ans) once and then querying/assigning 'ansp[i]' instead of INTEGER(ans)[i]. 2014-06-02 [HB] o Created. **************************************************************************/ matrixStats/src/meanOver_TYPE-template.h0000644000175100001440000000664112542546311020013 0ustar hornikusers/*********************************************************************** TEMPLATE: double meanOver_(X_C_TYPE *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int refine) GENERATES: double meanOver_Integer(int *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int refine) double meanOver_Real(double *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int refine) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) */ #include "templates-types.h" #include double METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int refine) { X_C_TYPE value; R_xlen_t i, idx; LDOUBLE sum = 0, avg = R_NaN; #if X_TYPE == 'r' LDOUBLE rsum = 0; #endif int count = 0; /* Sum over all element? */ if (!idxs) { for (i=0; i < nx; i++) { value = x[i]; #if X_TYPE == 'i' if (!X_ISNAN(value)) { sum += (LDOUBLE)value; ++count; } else if (!narm) { sum = R_NaReal; break; } #elif X_TYPE == 'r' if (!narm || !ISNAN(value)) { sum += (LDOUBLE)value; ++count; } #endif } /* for (i ...) */ if (sum > DOUBLE_XMAX) { avg = R_PosInf; } else if (sum < -DOUBLE_XMAX) { avg = R_NegInf; } else { avg = sum / count; /* Extra precision by summing over residuals? */ #if X_TYPE == 'r' if (refine && R_FINITE(avg)) { for (i=0; i < nx; i++) { value = x[i]; if (!narm || !ISNAN(value)) { rsum += (LDOUBLE)(value - avg); } } avg += (rsum / count); } #endif } } else { for (i=0; i < nidxs; i++) { idx = idxs[i]; if (idx <= 0) { Rf_error("Argument \'idxs\' contains a non-positive index: %d", idx); } else if (idx > nx) { Rf_error("Argument \'idxs\' contains an index out of range [1,%d]: %d", nx, idx); } value = x[idx-1]; #if X_TYPE == 'i' if (!X_ISNAN(value)) { sum += (LDOUBLE)value; ++count; } else if (!narm) { sum = R_NaReal; break; } #elif X_TYPE == 'r' if (!narm || !ISNAN(value)) { sum += (LDOUBLE)value; ++count; } #endif } /* for (i ...) */ if (sum > DOUBLE_XMAX) { avg = R_PosInf; } else if (sum < -DOUBLE_XMAX) { avg = R_NegInf; } else { avg = sum / count; /* Extra precision by summing over residuals? */ #if X_TYPE == 'r' if (refine && R_FINITE(avg)) { for (i=0; i < nidxs; i++) { idx = idxs[i]; value = x[idx-1]; if (!narm || !ISNAN(value)) { rsum += (LDOUBLE)(value - avg); } } avg += (rsum / count); } #endif } } return (double)avg; } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-06 [HB] o CLEANUP: Now meanOver_() uses only basic C types. 2014-11-02 [HB] o Created. **************************************************************************/ matrixStats/src/sumOver_TYPE-template.h0000644000175100001440000000442712542546311017677 0ustar hornikusers/*********************************************************************** TEMPLATE: double sumOver_(X_C_TYPE *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int mode) GENERATES: double sumOver_Integer(int *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int mode) double sumOver_Real(double *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int mode) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) */ #include "templates-types.h" #include double METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int mode) { X_C_TYPE value; R_xlen_t i, idx; LDOUBLE sum = 0; /* Sum over all element? */ if (!idxs) { for (i=0; i < nx; i++) { value = x[i]; #if X_TYPE == 'i' if (!X_ISNAN(value)) { sum += (LDOUBLE)value; } else if (!narm) { sum = R_NaReal; break; } #elif X_TYPE == 'r' if (!narm || !X_ISNAN(value)) { sum += (LDOUBLE)value; } #endif } /* for (i ...) */ } else { for (i=0; i < nidxs; i++) { idx = idxs[i]; if (idx <= 0) { Rf_error("Argument \'idxs\' contains a non-positive index: %d", idx); } else if (idx > nx) { Rf_error("Argument \'idxs\' contains an index out of range [1,%d]: %d", nx, idx); } value = x[idx-1]; #if X_TYPE == 'i' if (!X_ISNAN(value)) { sum += (LDOUBLE)value; } else if (!narm) { sum = R_NaReal; break; } #elif X_TYPE == 'r' if (!narm || !X_ISNAN(value)) { sum += (LDOUBLE)value; } #endif } /* for (i ...) */ } return (double)sum; } /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-06 [HB] o CLEANUP: Now sumOver_() uses only basic C types. 2014-11-02 [HB] o Created. **************************************************************************/ matrixStats/src/rowRanksWithTies_TYPE_TIES-template.h0000644000175100001440000001354712542546311022355 0ustar hornikusers/*********************************************************************** TEMPLATE: Ranks_Real_ties(...) GENERATES: void colRanks_Real_tiesMin(double *x, int nrow, int ncol, int byrow, double *ans) void rowRanks_Real_tiesMin(double *x, int nrow, int ncol, int byrow, double *ans) void colRanks_Real_tiesMax(double *x, int nrow, int ncol, int byrow, double *ans) void rowRanks_Real_tiesMax(double *x, int nrow, int ncol, int byrow, double *ans) void colRanks_Real_tiesAverage(double *x, int nrow, int ncol, int byrow, double *ans) void rowRanks_Real_tiesAverage(double *x, int nrow, int ncol, int byrow, double *ans) void colRanks_Integer_tiesMin(int *x, int nrow, int ncol, int byrow, int *ans) void rowRanks_Integer_tiesMin(int *x, int nrow, int ncol, int byrow, int *ans) void colRanks_Integer_tiesMax(int *x, int nrow, int ncol, int byrow, int *ans) void rowRanks_Integer_tiesMax(int *x, int nrow, int ncol, int byrow, int *ans) void colRanks_Integer_tiesAverage(int *x, int nrow, int ncol, int byrow, int *ans) void rowRanks_Integer_tiesAverage(int *x, int nrow, int ncol, int byrow, int *ans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - MARGIN: 'r' (rows) or 'c' (columns). - X_TYPE: 'i' or 'r' - ANS_TYPE: 'i' or 'r' - TIESMETHOD: '0' (min), '1' (max), 'a' (average) Authors: Hector Corrada Bravo [HCB] Peter Langfelder [PL] Henrik Bengtsson [HB] ***********************************************************************/ #include #if TIESMETHOD == '0' /* min */ #define ANS_TYPE 'i' #define RANK(firstTie, aboveTie) firstTie + 1 #elif TIESMETHOD == '1' /* max */ #define ANS_TYPE 'i' #define RANK(firstTie, aboveTie) aboveTie #elif TIESMETHOD == 'a' /* average */ #define ANS_TYPE 'r' #define RANK(firstTie, aboveTie) ((double) (firstTie + aboveTie + 1))/2 #endif /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME]) ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C) */ #include "templates-types.h" /* Indexing formula to compute the vector index of element j of vector i. Should take arguments element, vector, nElements, nVectors. */ #if MARGIN == 'r' /* rows */ #define INDEX_OF(element, vector, nElements, nVectors) \ vector + element*nVectors #elif MARGIN == 'c' /* columns */ #define INDEX_OF(element, vector, nElements, nVectors) \ element + vector*nElements #endif void METHOD_NAME(X_C_TYPE *x, int nrow, int ncol, int byrow, ANS_C_TYPE *ans) { ANS_C_TYPE rank; X_C_TYPE *values, current, tmp; int ii, jj, kk; int *I; int lastFinite, firstTie, aboveTie; int nvalues, nVec; if (byrow) { nvalues = ncol; nVec = nrow; } else { nvalues = nrow; nVec = ncol; } values = (X_C_TYPE *) R_alloc(nvalues, sizeof(X_C_TYPE)); I = (int *) R_alloc(nvalues, sizeof(int)); for (ii=0; ii < nVec; ii++) { lastFinite = nvalues-1; /* Put the NA/NaN elements at the end of the vector and update the index vector appropriately. This may be a bit faster since it only uses one loop over the length of the vector, plus it shortens the sort in case there are missing values. /PL (2012-12-14) */ for (jj = 0; jj <= lastFinite; jj++) { tmp = x[ INDEX_OF(jj, ii, nvalues, nVec) ]; if (X_ISNAN(tmp)) { while (lastFinite > jj && X_ISNAN(x[ INDEX_OF(lastFinite, ii, nvalues, nVec) ])) { I[lastFinite] = lastFinite; lastFinite--; } I[lastFinite] = jj; I[jj] = lastFinite; values[ jj ] = x[ INDEX_OF(lastFinite, ii, nvalues, nVec) ]; values[ lastFinite ] = tmp; lastFinite--; } else { I[jj] = jj; values[ jj ] = tmp; } } /* for (jj ...) */ // Diagnostic print-outs /* Rprintf("Swapped vector:\n"); for (jj=0; jj < nvalues; jj++) { Rprintf(" %8.4f,", values[jj]); if (((jj+1) % 5==0) || (jj==nvalues-1)) Rprintf("\n"); } Rprintf("Index vector:\n"); for (jj=0; jj 0) X_QSORT_I(values, I, 1, lastFinite + 1); // Calculate the ranks. for (jj=0; jj <= lastFinite;) { firstTie = jj; current = values[jj]; while ((jj <= lastFinite) && (values[jj] == current)) jj++; aboveTie = jj; // Depending on rank method, get maximum, average, or minimum rank rank = RANK(firstTie, aboveTie); for (kk=firstTie; kk < aboveTie; kk++) { ans[ INDEX_OF(I[kk], ii, nvalues, nVec) ] = rank; } } // At this point jj = lastFinite + 1, no need to re-initialize again. for (; jj < nvalues; jj++) { ans[ INDEX_OF(I[jj], ii, nvalues, nVec) ] = ANS_NA; } // Rprintf("\n"); } } /* Undo template macros */ #undef RANK #undef INDEX_OF #undef TIESMETHOD #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-11-06 [HB] o CLEANUP: Moving away from R data types in low-level C functions. 2013-04-23 [HB] o BUG FIX: Ranks did not work for integers with NAs; now using X_ISNAN(). 2013-01-13 [HB] o Template cleanup. Extened tempate to integer matrices. o Added argument 'tiesMethod' to rowRanks(). 2012-12-14 [PL] o Added internal support for "min", "max" and "average" ties. Using template to generate the various versions of the functions. 2013-01-13 [HCB] o Created. Using "max" ties. **************************************************************************/ matrixStats/src/diff2_TYPE-template.h0000644000175100001440000000452412542546311017227 0ustar hornikusers/*********************************************************************** TEMPLATE: void diff2_(X_C_TYPE *x, R_xlen_t nx, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nans) Arguments: The following macros ("arguments") should be defined for the template to work as intended. - METHOD_NAME: the name of the resulting function - X_TYPE: 'i' or 'r' Copyright: Henrik Bengtsson, 2014 ***********************************************************************/ #include "types.h" /* Expand arguments: X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME]) */ #include "templates-types.h" #include #if X_TYPE == 'i' static R_INLINE int diff_int(int a, int b) { if (X_ISNA(a) || X_ISNA(b)) return(NA_INTEGER); return a-b; } #define X_DIFF diff_int #elif X_TYPE == 'r' #define X_DIFF(a,b) a-b #endif void METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nans) { int ii, tt, uu; X_C_TYPE *tmp = NULL; /* Nothing to do? */ if (nans <= 0) return; /* Special case (difference == 1) */ if (differences == 1) { uu = lag; tt = 0; for (ii=0; ii < nans; ii++) { ans[ii] = X_DIFF(x[uu++], x[tt++]); } } else { /* Allocate temporary work vector (to hold intermediate differences) */ tmp = Calloc(nx - lag, X_C_TYPE); /* (a) First order of differences */ uu = lag; tt = 0; for (ii=0; ii < nx-lag; ii++) { tmp[ii] = X_DIFF(x[uu++], x[tt++]); } nx -= lag; /* (b) All other orders of differences but the last */ while (--differences > 1) { uu = lag; tt = 0; for (ii=0; ii < nx-lag; ii++) { tmp[ii] = X_DIFF(tmp[uu++], tmp[tt++]); } nx -= lag; } /* Sanity check */ /* if (nx-lag != nans) error("nx != nans: %d != %d\n", nx, nans); */ /* (c) Last order of differences */ uu = lag; tt = 0; for (ii=0; ii < nans; ii++) { ans[ii] = X_DIFF(tmp[uu++], tmp[tt++]); } /* Deallocate temorary work vector */ Free(tmp); } /* if (differences ...) */ } #undef X_DIFF /* Undo template macros */ #include "templates-types_undef.h" /*************************************************************************** HISTORY: 2014-12-29 [HB] o Created. **************************************************************************/ matrixStats/src/rowMads.c0000644000175100001440000000372112542546311015170 0ustar hornikusers/*************************************************************************** Public methods: SEXP rowMads(SEXP x, SEXP constant, SEXP naRm, SEXP hasNA) SEXP colMads(SEXP x, SEXP constant, SEXP naRm, SEXP hasNA) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD rowMads #define X_TYPE 'i' #include "rowMads_TYPE-template.h" #define X_TYPE 'r' #include "rowMads_TYPE-template.h" #undef METHOD SEXP rowMads(SEXP x, SEXP dim, SEXP constant, SEXP naRm, SEXP hasNA, SEXP byRow) { int narm, hasna, byrow; SEXP ans; R_xlen_t nrow, ncol; double scale; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); /* Argument 'constant': */ if (!isNumeric(constant)) error("Argument 'constant' must be a numeric scale."); scale = asReal(constant); /* Argument 'naRm': */ narm = asLogicalNoNA(naRm, "na.rm"); /* Argument 'hasNA': */ hasna = asLogicalNoNA(hasNA, "hasNA"); /* Argument 'byRow': */ byrow = asLogical(byRow); /* Get dimensions of 'x'. */ if (byrow) { nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; } else { nrow = INTEGER(dim)[1]; ncol = INTEGER(dim)[0]; } /* R allocate a double vector of length 'nrow' Note that 'nrow' means 'ncol' if byrow=FALSE. */ PROTECT(ans = allocVector(REALSXP, nrow)); /* Double matrices are more common to use. */ if (isReal(x)) { rowMads_Real(REAL(x), nrow, ncol, scale, narm, hasna, byrow, REAL(ans)); } else if (isInteger(x)) { rowMads_Integer(INTEGER(x), nrow, ncol, scale, narm, hasna, byrow, REAL(ans)); } UNPROTECT(1); return(ans); } /* rowMads() */ /*************************************************************************** HISTORY: 2014-11-17 [HB] o Created from rowMedians.c. **************************************************************************/ matrixStats/src/rowCumMinMaxs.c0000644000175100001440000000456112542546311016330 0ustar hornikusers/*************************************************************************** Public methods: SEXP rowCummins(SEXP x, SEXP naRm, SEXP hasNA) SEXP colCummins(SEXP x, SEXP naRm, SEXP hasNA) Authors: Henrik Bengtsson Copyright Henrik Bengtsson, 2014 **************************************************************************/ #include #include "types.h" #include "utils.h" #define METHOD rowCummins #define COMP '<' #define X_TYPE 'i' #include "rowCumMinMaxs_TYPE-template.h" #define X_TYPE 'r' #include "rowCumMinMaxs_TYPE-template.h" #undef COMP #undef METHOD SEXP rowCummins(SEXP x, SEXP dim, SEXP byRow) { int byrow; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'byRow': */ byrow = asLogical(byRow); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrow, ncol)); rowCummins_Real(REAL(x), nrow, ncol, byrow, REAL(ans)); UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); rowCummins_Integer(INTEGER(x), nrow, ncol, byrow, INTEGER(ans)); UNPROTECT(1); } return(ans); } /* rowCummins() */ #define METHOD rowCummaxs #define COMP '>' #define X_TYPE 'i' #include "rowCumMinMaxs_TYPE-template.h" #define X_TYPE 'r' #include "rowCumMinMaxs_TYPE-template.h" #undef COMP #undef METHOD SEXP rowCummaxs(SEXP x, SEXP dim, SEXP byRow) { int byrow; SEXP ans = NILSXP; R_xlen_t nrow, ncol; /* Argument 'x' and 'dim': */ assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x"); nrow = INTEGER(dim)[0]; ncol = INTEGER(dim)[1]; /* Argument 'byRow': */ byrow = asLogical(byRow); /* Double matrices are more common to use. */ if (isReal(x)) { PROTECT(ans = allocMatrix(REALSXP, nrow, ncol)); rowCummaxs_Real(REAL(x), nrow, ncol, byrow, REAL(ans)); UNPROTECT(1); } else if (isInteger(x)) { PROTECT(ans = allocMatrix(INTSXP, nrow, ncol)); rowCummaxs_Integer(INTEGER(x), nrow, ncol, byrow, INTEGER(ans)); UNPROTECT(1); } return(ans); } /* rowCummaxs() */ /*************************************************************************** HISTORY: 2014-11-26 [HB] o Created from rowVars.c. **************************************************************************/ matrixStats/src/x_OP_y_TYPE-template.h0000644000175100001440000001761212542546311017434 0ustar hornikusers#include "types.h" #include "templates-types.h" #if OP == '+' #define METHOD_NAME_T CONCAT_MACROS(METHOD_NAME, Add) #define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME_T) static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) { #if X_TYPE == 'i' if (X_ISNAN(x)) return NA_REAL; #endif #if Y_TYPE == 'i' if (Y_ISNAN(y)) return NA_REAL; #endif return (double)x + (double)y; } #define FUN_narm CONCAT_MACROS(FUN, METHOD_NAME_T) static R_INLINE double FUN_narm(X_C_TYPE x, Y_C_TYPE y) { if (X_ISNAN(x)) { return (double)y; } else if (Y_ISNAN(y)) { return (double)x; } else { return (double)x + (double)y; } } #elif OP == '-' #define METHOD_NAME_T CONCAT_MACROS(METHOD_NAME, Sub) #define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME_T) static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) { #if X_TYPE == 'i' if (X_ISNAN(x)) return NA_REAL; #endif #if Y_TYPE == 'i' if (Y_ISNAN(y)) return NA_REAL; #endif return (double)x - (double)y; } #define FUN_narm FUN_no_NA #elif OP == '*' #define METHOD_NAME_T CONCAT_MACROS(METHOD_NAME, Mul) #define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME_T) static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) { #if X_TYPE == 'i' if (X_ISNAN(x)) return NA_REAL; #endif #if Y_TYPE == 'i' if (Y_ISNAN(y)) return NA_REAL; #endif return (double)x * (double)y; } #define FUN_narm CONCAT_MACROS(FUN, METHOD_NAME_T) static R_INLINE double FUN_narm(X_C_TYPE x, Y_C_TYPE y) { if (X_ISNAN(x)) { return (double)y; } else if (Y_ISNAN(y)) { return (double)x; } else { return (double)x * (double)y; } } #elif OP == '/' #define METHOD_NAME_T CONCAT_MACROS(METHOD_NAME, Div) #define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME_T) static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) { #if X_TYPE == 'i' if (X_ISNAN(x)) return NA_REAL; #endif #if Y_TYPE == 'i' if (Y_ISNAN(y)) return NA_REAL; #endif return (double)x / (double)y; } #define FUN_narm FUN_no_NA #else #error "INTERNAL ERROR: Failed to set C inline function FUN(x, y): Unknown OP" #endif void METHOD_NAME_T(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, Y_C_TYPE *y, R_xlen_t ny, int byrow, int commute, int narm, int hasna, ANS_C_TYPE *ans, R_xlen_t n) { R_xlen_t kk, xi, yi, nx = nrow * ncol; R_xlen_t row, col, txi; double value; #if ANS_TYPE == 'i' int ok = 1; /* OK, i.e. no integer overflow yet? */ double R_INT_MIN_d = (double)R_INT_MIN, R_INT_MAX_d = (double)R_INT_MAX; #endif xi = 0; txi = row = col = 0; yi = 0; if (byrow) { if (commute) { if (narm) { for (kk=0; kk < n; kk++) { value = FUN_narm(y[yi], x[xi]); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk] = (ANS_C_TYPE) value; #endif if (++xi >= nx) xi = 0; if (++row >= nrow) { /* Current index in t(x): */ row = 0; /* col = xi / nrow; */ col++; /* row = xi % nrow; */ txi = col; /* txi = row * ncol + col; */ } else { txi += ncol; } yi = txi % ny; } } else { for (kk=0; kk < n; kk++) { value = FUN_no_NA(y[yi], x[xi]); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk] = (ANS_C_TYPE) value; #endif if (++xi >= nx) xi = 0; if (++row >= nrow) { /* Current index in t(x): */ row = 0; /* col = xi / nrow; */ col++; /* row = xi % nrow; */ txi = col; /* txi = row * ncol + col; */ } else { txi += ncol; } yi = txi % ny; } } } else { if (narm) { for (kk=0; kk < n; kk++) { value = FUN_narm(x[xi], y[yi]); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk] = (ANS_C_TYPE) value; #endif if (++xi >= nx) xi = 0; if (++row >= nrow) { /* Current index in t(x): */ row = 0; /* col = xi / nrow; */ col++; /* row = xi % nrow; */ txi = col; /* txi = row * ncol + col; */ } else { txi += ncol; } yi = txi % ny; } } else { for (kk=0; kk < n; kk++) { value = FUN_no_NA(x[xi], y[yi]); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk] = (ANS_C_TYPE) value; #endif if (++xi >= nx) xi = 0; if (++row >= nrow) { /* Current index in t(x): */ row = 0; /* col = xi / nrow; */ col++; /* row = xi % nrow; */ txi = col; /* txi = row * ncol + col; */ } else { txi += ncol; } yi = txi % ny; } } } } else { if (commute) { if (narm) { for (kk=0; kk < n; kk++) { value = FUN_narm(y[yi], x[xi]); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk] = (ANS_C_TYPE) value; #endif if (++xi >= nx) xi = 0; if (++yi >= ny) yi = 0; } } else { for (kk=0; kk < n; kk++) { value = FUN_no_NA(y[yi], x[xi]); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk] = (ANS_C_TYPE) value; #endif if (++xi >= nx) xi = 0; if (++yi >= ny) yi = 0; } } } else { if (narm) { for (kk=0; kk < n; kk++) { value = FUN_narm(x[xi], y[yi]); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk] = (ANS_C_TYPE) value; #endif if (++xi >= nx) xi = 0; if (++yi >= ny) yi = 0; } } else { for (kk=0; kk < n; kk++) { value = FUN_no_NA(x[xi], y[yi]); #if ANS_TYPE == 'i' if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) { ok = 0; value = NA_REAL; } ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value; #else ans[kk] = (ANS_C_TYPE) value; #endif if (++xi >= nx) xi = 0; if (++yi >= ny) yi = 0; } } } } /* if (byrow) */ #if ANS_TYPE == 'i' /* Warn on integer overflow? */ if (!ok) { warning("Integer overflow. Detected one or more elements whose absolute values were out of the range [%d,%d] that can be used to for integers. Such values are set to NA_integer_.", R_INT_MIN, R_INT_MAX); } #endif } #undef FUN #undef FUN_narm #undef METHOD_NAME_T /* Undo template macros */ #include "templates-types_undef.h" matrixStats/NAMESPACE0000644000175100001440000000466712542546241014054 0ustar hornikusersuseDynLib("matrixStats") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # IMPORTS # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - importFrom("methods", "setMethod") importFrom("methods", "setGeneric") importFrom("methods", "loadMethod") importFrom("methods", "signature") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # EXPORTS # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - export("allocVector") export("allocMatrix") export("allocArray") export("allValue") export("anyMissing") export("anyValue") export("binCounts") export("binMeans") export("colAlls") export("colAnys") export("colAnyMissings") export("colAvgsPerRowSet") export("colCollapse") export("colCounts") export("colCummins") export("colCummaxs") export("colCumprods") export("colCumsums") export("colDiffs") export("colIQRs") export("colIQRDiffs") export("colLogSumExps") export("colMadDiffs") export("colMads") export("colMaxs") export("colMedians") export("colMins") export("colOrderStats") export("colProds") export("colQuantiles") export("colRanges") export("colRanks") export("colSdDiffs") export("colSds") export("colTabulates") export("colVarDiffs") export("colVars") export("colWeightedMads") export("colWeightedMeans") export("colWeightedMedians") export("colWeightedSds") export("colWeightedVars") export("count") export("diff2") export("indexByRow") export("iqr") export("iqrDiff") export("logSumExp") export("madDiff") export("meanOver") export("product") export("rowAlls") export("rowAnys") export("rowAnyMissings") export("rowAvgsPerColSet") export("rowCollapse") export("rowCounts") export("rowCummins") export("rowCummaxs") export("rowCumprods") export("rowCumsums") export("rowDiffs") export("rowIQRs") export("rowIQRDiffs") export("rowLogSumExps") export("rowMadDiffs") export("rowMads") export("rowMaxs") export("rowMedians") export("rowMins") export("rowOrderStats") export("rowProds") export("rowQuantiles") export("rowRanges") export("rowRanks") export("rowSdDiffs") export("rowSds") export("rowTabulates") export("rowVarDiffs") export("rowVars") export("rowWeightedMads") export("rowWeightedMeans") export("rowWeightedMedians") export("rowWeightedSds") export("rowWeightedVars") export("sdDiff") export("signTabulate") export("sumOver") export("x_OP_y") export("t_tx_OP_y") export("varDiff") export("weightedMad") export("weightedMean") export("weightedMedian") export("weightedSd") export("weightedVar") matrixStats/NEWS0000644000175100001440000006726612542546241013340 0ustar hornikusersPackage: matrixStats ==================== Version: 0.14.2 [2015-06-23] o BUG FIX: x_OP_y() and t_tx_OP_y() would return garbage on Solaris SPARC (and possibly other architectures as well) when input was integer and had missing values. Version: 0.14.1 [2015-06-17] o BUG FIX: product(x, na.rm=FALSE) for integer 'x' with both zeros and NAs returned zero rather than NA. o BUG FIX: weightedMean(x, w, na.rm=TRUE) did not handle missing values in 'x' properly, if it was an integer. It would also return NaN if there were weights 'w' with missing values, whereas stats::weighted.mean() would skip such data points. Now weightedMean() does the same. o BUG FIX: (col|row)WeightedMedians() did not handle infinite weights as weightedMedian() does. o BUG FIX: x_OP_y(x, y, OP, na.rm=FALSE) returned garbage iff 'x' or 'y' had missing values of type integer. o BUG FIX: rowQuantiles() and rowIQRs() did not work for single-row matrices. Analogously for the corresponding column functions. o BUG FIX: rowCumsums(), rowCumprods() rowCummins(), and rowCummaxs(), accessed out-of-bound elements for Nx0 matrices where N > 0. The corresponding column methods has similar memory errors for 0xK matrices where K > 0. o BUG FIX: anyMissing(list(NULL)) returned NULL; now FALSE. o BUG FIX: rowCounts() resulted in garbage if a previous column had NAs (because forgot to update index kk in such cases). o BUG FIX: rowCumprods(x) handled missing values and zeros incorrectly for integer 'x (not double); a zero would trump an existing missing value causing the following cumulative products to become zero. It was only a zero that trumped NAs; any other integer would work as expected. Also, this bug was not in colCumprods(). o BUG FIX: rowAnys(x, value, na.rm=FALSE) did not handle missing values in a numeric 'x' properly. Similarly, for non-numeric and non-logical 'x', row- and colAnys(), row- and colAlls(), anyValue() and allValue() did not handle when 'value' was a missing value. o All of the above bugs were identified and fixed by Dongcan Jiang (Peking University, China), who also added corresponding unit tests. Version: 0.14.0 [2015-02-13] o ROBUSTNESS/TESTS: Package tests cover 96% of the code (was 91%). o CONSISTENCY: Renamed argument 'centers' of col- and rowMads() to 'center'. This is consistent with col- and rowVars(). o CONSISTENCY: col- and rowVars() now using na.rm=FALSE as the default (na.rm=TRUE was mistakenly introduced as the default in v0.9.7). o SPEEDUP: The check for user interrupts at the C level is now done less frequently of the functions. It does every k:th iteration, where k = 2^20, which is tested for using (iter % k == 0). It turns out, at least with the default compiler optimization settings that I use, that this test is 3 times faster if k = 2^n where n is an integer. The following functions checks for user interrupts: logSumExp(), (col|row)LogSumExps(), (col|row)Medians(),, (col|row)Mads(), (col|row)Vars(), and (col|row)Cum(Min|Max|prod|sum)s(). o SPEEDUP: logSumExp(x) is now faster if 'x' does not contain any missing values. It is also faster if all values are missing or the maximum value is +Inf - in both cases it can skip the actual summation step. o BUG FIX: all() and any() flavored methods on non-numeric and non-logical (e.g. character) vectors and matrices with na.rm=FALSE did not give results consistent with all() and any() if there were missing values. For example, with x <- c("a", NA, "b") we have all(x == "a") == FALSE and any(x == "a") == TRUE whereas our corresponding methods would return NA in those cases. The methods fixed are allValue(), anyValue(), col- and rowAlls(), and col- and rowAnys(). Added more package tests to cover these cases. o BUG FIX: Now logSumExp(x, na.rm=TRUE) would return NA if all values were NA and length(x) > 1. Now it returns -Inf for all length(x):s. o CLEANUP: anyMissing() is no longer an S4 generic. This was done as part of the migration of making all functions of matrixStats plain R functions, which minimizes calling overhead and it will also allow us to drop 'methods' from the package dependencies. I've scanned all CRAN and Bioconductor packages depending on matrixStats and none of them relied on anyMissing() dispatching on class, so hopefully this move has little impact. The only remaining S4 methods are now colMedians() and rowMedians(). o CLEANUP: Package no longer depends on R.methodsS3. Version: 0.13.1 [2015-01-21] o BUG FIX: diff2() with differences >= 3 would *read* spurious values beyond the allocated memory. This error, introduced in 0.13.0, was harmless in the sense that the returned value was unaffected and still correct. Thanks to Brian Ripley and the CRAN check tools for catching this. I could reproduce it locally with 'valgrind'. Version: 0.13.0 [2015-01-20] o Added iqrDiff() and (col|row)IqrDiffs(). o CONSISTENCY: Now rowQuantiles(x, na.rm=TRUE) returns all NAs for rows with missing values. Analogously for colQuantiles(), colIQRs(), rowIQRs() and iqr(). Previously, all these functions gave an error saying missing values are not allowed. o SPEEDUP: (col|row)Diffs() are now implemented in native code and notably faster than diff() for matrices. o SPEEDUP: Added diff2(), which is notably faster than base::diff() for vectors, which it is designed for. o DOCUMENTATION: Added vignette summarizing available functions. o COMPLETENESS: Added corresponding "missing" vector functions for already existing column and row functions. Similarly, added "missing" column and row functions for already existing vector functions, e.g. added iqr() and count() to complement already existing (col|row)IQRs() and (col|row)Counts() functions. o SPEEDUP: Made binCounts() and binMeans() a bit faster. o SPEEDUP: Added count(x, value) which is a notably faster than sum(x == value). This can also be used to count missing values etc. Also, added allValue() and anyValue() for all(x == value) and any(x == value). o SPEEDUP: Implemented weightedMedian() in native code, which made it ~3-10 times faster. Dropped support for ties="both", because it would have to return two values in case of ties, which made the API unnecessarily complicated. If really needed, then call the function twice with ties="min" and ties="max". o SPEEDUP: Added weightedMean(), which is ~10 times faster than stats::weighted.mean(). o SPEEDUP: (col|row)Anys() and (col|row)Alls() is now notably faster compared to previous versions. o SPEEDUP/CLEANUP: Turned several S3 and S4 methods into plain R functions, which decreases the overhead of calling the functions. After this there are no longer any S3 methods. Remaining S4 methods are anyMissing() and rowMedians(). o ROBUSTNESS: Now column and row methods give slightly more informative error messages if a data.frame is passed instead of a matrix. o CLEANUP: In the effort of migrating anyMissing() into a plain R function, the specific anyMissing() implementations for data.frame:s and and list:s were dropped and is now handled by anyMissing() for "ANY", which is the only S4 method remaining now. In a near future release, this remaining "ANY" method will turned into a plain R function and the current S4 generic will be dropped. We know of know CRAN and Bioconductor packages that relies on it being a generic function. Note also that since R (>= 3.1.0) there is a base::anyNA() function that does the exact same thing making anyMissing() obsolete. o BUG FIX: weightedMedian(..., ties="both") would give an error if there was a tie. Added package test for this case. Version: 0.12.2 [2014-12-07] o CODE FIX: The native code for product() on integer vector incorrectly used C-level abs() on intermediate values despite those being doubles requiring fabs(). Despite this, the calculated product would still be correct (at least when validated on several local setups as well as on the CRAN servers). Again, thanks to Brian Ripley for pointing out another invalid integer-double coersion at the C level. Version: 0.12.1 [2014-12-06] o ROBUSTNESS: Updated package tests to check methods in more scenarios, especially with both integer and numeric input data. o BUG FIX: (col|row)Cumsums(x) where 'x' is integer would return garbage for columns (rows) containing missing values. o BUG FIX: rowMads(x) where 'x' is numeric (not integer) would give incorrect results for rows that had an *odd* number of values (no ties). Analogously issues with colMads(). Added package tests for such cases too. Thanks to Brian Ripley and the CRAN check tools for (yet again) catching another coding mistake. Details: This was because the C-level calculation of the absolute value of residuals toward the median would use integer-based abs() rather than double- based fabs(). Now it fabs() is used when the values are double and abs() when they are integers. Version: 0.12.0 [2014-12-05] o Submitted to CRAN. Version: 0.11.9 [2014-11-26] o Added (col|row)Cumsums(), (col|row)Cumprods(), (col|row)Cummins(), and (col|row)Cummaxs(). o BUG FIX: (col|row)WeightedMeans() with all zero weights gave mean estimates with values 0 instead of NaN. Version: 0.11.8 [2014-11-25] o SPEEDUP: Implemented (col|row)Mads(), (col|row)Sds() and (col|row)Vars() in native code. o SPEEDUP: Made (col|row)Quantiles(x) faster for 'x' without missing values (and default type=7L quantiles). It should still be implemented in native code. o SPEEDUP: Made rowWeightedMeans() faster. o BUG FIX: (col|row)Medians(x) when 'x' is integer would give invalid median values in case (a) it was calculated as the mean of two values ("ties"), and (b) the sum of those values where greater than .Machine$integer.max. Now such ties are calculated using floating point precision. Add lots of package tests. Version: 0.11.6 [2014-11-16] o SPEEDUP: Now (col|row)Mins(), (col|row)Maxs() and (col|row)Ranges() are implemented in native code providing a significant speedup. o SPEEDUP: Now colOrderStats() also is implemented in native code, which indirectly makes colMins(), colMaxs() and colRanges() faster. o SPEEDUP: colTabulates(x) no longer uses rowTabulates(t(x)). o SPEEDUP: colQuantiles(x) no longer uses rowQuantiles(t(x)). o CLEANUP: Argument 'flavor' of (col|row)Ranks() is now ignored. Version: 0.11.5 [2014-11-15] o SPEEDUP: Now colCollapse(x) no longer utilizes rowCollapse(t(x)). Added package tests for (col|row)Collapse(). o SPEEDUP: Now colDiffs(x) no longer uses rowDiffs(t(x)). Added package tests for (col|row)Diffs(). o SPEEDUP: Package no longer utilizes match.arg() due to its overhead; methods sumOver(), (col|row)Prods() and (col|row)Ranks() were updated. o (col|row)Prods() now uses default method="direct" (was "expSumLog"). Version: 0.11.4 [2014-11-14] o Added support for vector input to several of the row- and column methods as long as the "intended" matrix dimension is specified via argument 'dim'. For instance, rowCounts(x, dim=c(nrow, ncol)) is the same as rowCounts(matrix(x, nrow, ncol)), but more efficient since it avoids creating/allocating a temporary matrix. o SPEEDUP: Now colCounts() is implemented in native code. Moreover, (col|row)Counts() are now also implemented in native code for logical input (previously only for integer and double input). Added more package tests and benchmarks for these functions. Version: 0.11.3 [2014-11-11] o Turned sdDiff(), madDiff(), varDiff(), weightedSd(), weightedVar() and weightedMad() into plain functions (were generic functions). o Removed unnecessary usage of '::'. Version: 0.11.2 [2014-11-09] o SPEEDUP: Implemented indexByRow() in native code and it is no longer a generic function, but a regular function, which is also faster to call. The first argument of indexByRow() has been changed to 'dim' such that one should use indexByRow(dim(X)) instead of indexByRow(X) as in the past. The latter form is still supported, but deprecated. o Added allocVector(), allocMatrix() and allocArray() for faster allocation numeric vectors, matrices and arrays, particularly when filled with non-missing values. Version: 0.11.1 [2014-11-07] o Better support for long vectors. o ROBUSTNESS: Although unlikely, with long vectors support for binCounts() and binMeans() it is possible that a bin gets a higher count than what can be represented by an R integer (.Machine$integer.max=2^31-1). If that happens, an informative warning is generated and the bin count is set to .Machine$integer.max. If this happens for binMeans(), the corresponding mean is still properly calculated and valid. o PRECISION: Using greater floating-point precision in more internal intermediate calculations, where possible. o CLEANUP: Cleanup and harmonized the internal C API such there are two well defined API levels. The high-level API is called by R via .Call() and takes care of most of the argument validation and construction of the return value. This function dispatch to functions in the low-level API based on data type(s) and other arguments. The low-level API is written to work with basic C data types only. o BUG FIX: Package incorrectly redefined R_xlen_t on R (>= 3.0.0) systems where LONG_VECTOR_SUPPORT is not supported. Version: 0.11.0 [2014-11-02] o Added sumOver() and meanOver(), which are notably faster versions of sum(x[idxs]) and mean(x[idxs]). Moreover, instead of having to do sum(as.numeric(x)) to avoid integer overflow when 'x' is an integer vector, one can do sumOver(x, mode="numeric"), which avoids the extra copy created when coercing to numeric (this numeric copy is also twice as large as the integer vector). Added package tests and benchmark reports for these functions. Version: 0.10.4 [2014-11-01] o SPEEDUP: Made anyMissing(), logSumExp(), (col|row)Medians(), (col|row)Counts() slightly faster by making the native code assign the results directly to the native vector instead of to the R vector, e.g. ansp[i] = v where ansp=REAL(ans) instead of REAL(ans)[i] = v. o Added benchmark reports for anyMissing() and logSumExp(). Version: 0.10.3 [2014-10-01] o BUG FIX: binMeans() returned 0.0 instead of NA_real_ for empty bins. Version: 0.10.2 [2014-09-01] o BUG FIX: On some systems, the package failed to build on R (<= 2.15.3) with compilation error: "redefinition of typedef 'R_xlen_t'". Version: 0.10.1 [2014-06-09] o Added benchmark reports for also non-matrixStats functions col/rowSums() and col/rowMeans(). o Now all colNnn() and rowNnn() methods are benchmarked in a combined report making it possible to also compare colNnn(x) with rowNnn(t(x)). Version: 0.10.0 [2014-06-07] o BUG FIX: The package tests for product() incorrectly assumed that the value of prod(c(NaN, NA)) is uniquely defined. However, as documented in help("is.nan"), it may be NA or NaN depending on R system/platform. o Relaxed some packages tests such that they assert numerical correctness via all.equal() rather than identical(). o Submitted to CRAN. Version: 0.9.7 [2014-06-05] o BUG FIX: Introduced a bug in v0.9.5 causing col- and rowVars() and hence also col- and rowSds() to return garbage. Add package tests for these now. o Submitted to CRAN. Version: 0.9.6 [2014-06-04] o SPEEDUP: Now col- and rowProds() utilizes new product() function. o SPEEDUP: Added product() for calculating the product of a numeric vector via the logarithm. o Added signTabulate() for tabulating the number of negatives, zeros, positives and missing values. For doubles, the number of negative and positive infinite values are also counted. Version: 0.9.5 [2014-06-04] o Added argument 'method' to col- and rowProds() for controlling how the product is calculated. o SPEEDUP: Package is now byte compiled. o SPEEDUP: Made weightedMedian() a plain function (was an S3 method). o SPEEDUP: Made rowProds() and rowTabulates() notably faster. o SPEEDUP: Now rowCounts(), rowAnys(), rowAlls() and corresponding column methods can search for any value in addition to the default TRUE. The search for a matching integer or double value is done in native code, which is notably faster (and more memory efficient because it avoids creating any new objects). o SPEEDUP: Made colVars() and colSds() notably faster and rowVars() and rowSds() a slightly bit faster. o SPEEDUP: Turned more S4 methods into S3 methods, e.g. rowCounts(), rowAlls(), rowAnys(), rowTabulates() and rowCollapse(). o Added benchmark reports, e.g. matrixStats:::benchmark('colMins'). o CLEANUP: Now only exporting plain functions and generic functions. Version: 0.9.4 [2014-05-23] o SPEEDUP: Turned several S4 methods into S3 methods, e.g. indexByRow(), madDiff(), sdDiff() and varDiff(). Version: 0.9.3 [2014-04-26] o Added argument 'trim' to madDiff(), sdDiff() and varDiff(). Version: 0.9.2 [2014-04-04] o BUG FIX: The native code of binMeans(x, bx) would try to access an out-of-bounds value of argument 'y' iff 'x' contained elements that are left of all bins in 'bx'. This bug had no impact on the results and since no assignment was done it should also not crash/ core dump R. This was discovered thanks to new memtests (ASAN and valgrind) provided by CRAN. Version: 0.9.1 [2014-03-31] o BUG FIX: rowProds() would throw "Error in rowSums(isNeg) : 'x' must be an array of at least two dimensions" on matrices where all rows contained at least on zero. Thanks to Roel Verbelen at KU Leuven for the report. Version: 0.9.0 [2014-03-26] o Added weighedVar() and weightedSd(). Version: 0.8.14 [2013-11-23] o MEMORY: Updated all functions to do a better job of cleaning out temporarily allocated objects as soon as possible such that the garbage collector can remove them sooner, iff wanted. This increase the chance for a smaller memory footprint. o Submitted to CRAN. Version: 0.8.13 [2013-10-08] o Added argument 'right' to binCounts() and binMeans() to specify whether binning should be done by (u,v] or [u,v). Added system tests validating the correctness of the two cases. o Bumped up package dependencies. Version: 0.8.12 [2013-09-26] o SPEEDUP: Now utilizing anyMissing() everywhere possible. Version: 0.8.11 [2013-09-21] o ROBUSTNESS: Now importing 'loadMethod' from 'methods' package such that 'matrixStats' S4-based methods also work when 'methods' is not loaded, e.g. when 'Rscript' is used, cf. Section 'Default packages' in 'R Installation and Administration'. o ROBUSTNESS: Updates package system tests such that the can run with only the 'base' package loaded. Version: 0.8.10 [2013-09-15] o CLEANUP: Now only importing two functions from the 'methods' package. o Bumped up package dependencies. Version: 0.8.9 [2013-08-29] o CLEANUP: Now the package startup message acknowledges argument 'quietly' of library()/require(). Version: 0.8.8 [2013-07-29] o DOCUMENTATION: The dimension of the return value was swapped in help("rowQuantiles"). Version: 0.8.7 [2013-07-28] o SPEEDUP: Made (col|row)Mins() and (col|row)Maxs() much faster. o BUG FIX: rowRanges(x) on an Nx0 matrix would give an error. Same for colRanges(x) on an 0xN matrix. Added system tests for these and other special cases. Version: 0.8.6 [2013-07-20] o Forgot to declare S3 methods (col|row)WeightedMedians(). o Bumped up package dependencies. Version: 0.8.5 [2013-05-25] o Minor speedup of (col|row)Tabulates() by replacing rm() calls with NULL assignments. Version: 0.8.4 [2013-05-20] o CRAN POLICY: Now all Rd \usage{} lines are at most 90 characters long. Version: 0.8.3 [2013-05-10] o SPEEDUP: binCounts() and binMeans() now uses Hoare's Quicksort for presorting 'x' before counting/averaging. They also no longer test in every iteration (=for every data point) whether the last bin has been reached or not, but only after completing a bin. Version: 0.8.2 [2013-05-02] o DOCUMENTATION: Minor corrections and updates to help pages. Version: 0.8.1 [2013-05-02] o BUG FIX: Native code of logSumExp() used an invalid check for missing value of an integer argument. Detected by Brian Ripley upon CRAN submission. Version: 0.8.0 [2013-05-01] o Added logSumExp(lx) and (col|row)LogSumExps(lx) for accurately computing of log(sum(exp(lx))) for standalone vectors, and row and column vectors of matrices. Thanks to Nakayama (Japan) for the suggestion and contributing a draft in R. Version: 0.7.1 [2013-04-23] o Added argument 'preserveShape' to colRanks(). For backward compatibility the default is preserveShape=FALSE, but it may change in the future. o BUG FIX: Since v0.6.4, (col|row)Ranks() gave the incorrect results for integer matrices with missing values. o BUG FIX: Since v0.6.4, (col|row)Medians() for integers would calculate ties as floor(tieAvg). Version: 0.7.0 [2013-01-14] o Now (col|row)Ranks() support "max" (default), "min" and "average" for argument 'ties.method'. Added system tests validation these cases. Thanks Peter Langfelder (UCLA) for contributing this. Version: 0.6.4 [2013-01-13] o Added argument 'ties.method' to rowRanks() and colRanks(), but still only support for "max" (as before). o ROBUSTNESS: Lots of cleanup of the internal/native code. Native code for integer and double cases have been harmonized and are now generated from a common code template. This was inspired by code contributions from Peter Langfelder (UCLA). Version: 0.6.3 [2013-01-13] o Added anyMissing() for data type 'raw', which always returns FALSE. o ROBUSTNESS: Added system test for anyMissing(). o ROBUSTNESS: Now S3 methods are declared in the namespace. Version: 0.6.2 [2012-11-15] o CRAN POLICY: Made example(weightedMedian) faster. Version: 0.6.1 [2012-10-10] o BUG FIX: In some cases binCounts() and binMeans() could try to go past the last bin resulting a core dump. o BUG FIX: binCounts() and binMeans() would return random/garbage values for bins that were beyond the last data point. Version: 0.6.0 [2012-10-04] o Added binMeans() for fast sample-mean calculation in bins. Thanks to Martin Morgan at the Fred Hutchinson Cancer Research Center, Seattle, for contributing the core code for this. o Added binCounts() for fast element counting in bins. Version: 0.5.3 [2012-09-10] o CRAN POLICY: Replaced the .Internal(psort(...)) call with a call to a new internal partial sorting function, which utilizes the native rPsort() part of the R internals. Version: 0.5.2 [2012-07-02] o Updated package dependencies to match CRAN. Version: 0.5.1 [2012-06-25] o GENERALIZATION: Now (col|row)Prods() handle missing values. o BUG FIX: In certain cases, (col|row)Prods() would return NA instead of 0 for some elements. Added a redundancy test for the case. Thanks Brenton Kenkel at University of Rochester for reporting on this. o Now this package only imports methods. Version: 0.5.0 [2012-04-16] o Added weightedMad() from aroma.core v2.5.0. o Added weightedMedian() from aroma.light v1.25.2. o This package no longer depends on the aroma.light package for any of its functions. o Now this package only imports R.methodsS3, meaning it no longer loads R.methodsS3 when it is loaded. Version: 0.4.5 [2012-03-19] o Updated the default argument 'centers' of rowMads()/colMads() to explicitly be (col|row)Medians(x,...). The default behavior has not changed. Version: 0.4.4 [2012-03-05] o BUG FIX: colMads() would return the incorrect estimates. This bug was introduced in matrixStats v0.4.0 (2011-11-11). o ROBUSTNESS: Added system/redundancy tests for rowMads()/colMads(). o CRAN: Made the system tests "lighter" by default, but full tests can still be run, cf. tests/*.R scripts. Version: 0.4.3 [2011-12-11] o BUG FIX: rowMedians(..., na.rm=TRUE) did not handle NaN (only NA). The reason for this was the the native code used ISNA() to test for NA and NaN, but it should have been ISNAN(), which is opposite to how is.na() and is.nan() at the R level work. Added system tests for this case. Version: 0.4.2 [2011-11-29] o Added rowAvgsPerColSet() and colAvgsPerRowSet(). Version: 0.4.1 [2011-11-25] o Added help pages with an example to rowIQRs() and colIQRs(). o Added example to rowQuantiles(). o BUG FIX: rowIQRs() and colIQRs() would return the 25% and the 75% quantiles, not the difference between them. Thanks Pierre Neuvial at CNRS, Evry, France for the report. Version: 0.4.0 [2011-11-11] o Added rowRanks() and colRanks(). Thanks Hector Corrada Bravo (University of Maryland) and Harris Jaffee (John Hopkins). o Dropped the previously introduced expansion of 'center' in rowMads() and colMads(). It added unnecessary overhead if not needed. Version: 0.3.0 [2011-10-13] o SPEEDUP/LESS MEMORY: colMedians(x) no longer uses rowMedians(t(x)); instead there is now an optimized native-code implementation. Also, colMads() utilizes the new colMedians() directly. This improvement was kindly contributed by Harris Jaffee at Biostatistics of John Hopkins, USA. o Added additional unit tests for colMedians() and rowMedians(). Version: 0.2.2 [2010-10-06] o Now the result of (col|row)Quantiles() contains column names. Version: 0.2.1 [2010-04-05] o Added a startup message when package is loaded. o CLEAN UP: Removed obsolete internal .First.lib() and .Last.lib(). Version: 0.2.0 [2010-03-30] o DOCUMENTATION: Fixed some incorrect cross references. Version: 0.1.9 [2010-02-03] o BUG FIX: (col|row)WeightedMeans(..., na.rm=TRUE) would incorrectly treat missing values as zeros. Added corresponding redundancy tests (also for the median case). Thanks Pierre Neuvial for reporting this. Version: 0.1.8 [2009-11-13] o BUG FIX: colRanges(x) would return a matrix of wrong dimension if 'x' did not have any missing values. This would affect all functions relying on colRanges(), e.g. colMins() and colMaxs(). Added a redundancy test for this case. Thanks Pierre Neuvial at UC Berkeley for reporting this. o BUG FIX: (col|row)Ranges() return a matrix with dimension names. Version: 0.1.7 [2009-06-20] WORKAROUND: Cannot use "%#x" in rowTabulates() when creating the column names of the result matrix. It gave an error OSX with R v2.9.0 devel (2009-01-13 r47593b) current the OSX server at R-forge. Version: 0.1.6 [2009-06-17] o Updated the Rdoc example for rowWeightedMedians() to run conditionally on aroma.light, which is only a suggested package - not a required one. This in order to prevent R CMD check to fail on CRAN, which prevents it for building binaries (as it currently happens on their OSX servers). Version: 0.1.5 [2009-02-04] o BUG FIX: For some errors in rowOrderStats(), the stack would not become UNPROTECTED before calling error. Version: 0.1.4 [2009-02-02] o Added methods (col|row)Weighted(Mean|Median)s() for weighted averaging. o Added more Rdoc comments. o Package passes R CMD check flawlessly. Version: 0.1.3 [2008-07-30] o Added (col|row)Tabulates() for integer and raw matrices. o BUG FIX: rowCollapse(x) was broken and returned the wrong elements. Version: 0.1.2 [2008-04-13] o Added (col|row)Collapse(). o Added varDiff(), sdDiff() and madDiff(). o Added indexByRow(). Version: 0.1.1 [2008-03-25] o Added (col|row)OrderStats(). o Added (col|row)Ranges() and (col|row)(Min|Max)s(). o Added colMedians(). o Now anyMissing() support most data types as structures. Version: 0.1.0 [2007-11-26] o Imported the rowNnn() methods from Biobase. o Created. matrixStats/R/0000755000175100001440000000000012542546242013022 5ustar hornikusersmatrixStats/R/signTabulate.R0000644000175100001440000000205512542546242015571 0ustar hornikusers############################################################################/** # @RdocFunction signTabulate # @alias signTabulate # # @title "Calculates the number of negative, zero, positive and missing values" # # @synopsis # # \description{ # @get "title" in a @numeric vector. For @double vectors, the number of # negative and positive infinite values are also counted. # } # # \arguments{ # \item{x}{a @numeric @vector.} # \item{...}{Not used.} # } # # \value{ # Returns a @named @numeric @vector. # } # # \seealso{ # @see "base::sign". # } # # @author "HB" # # @keyword internal #*/############################################################################ signTabulate <- function(x, ...) { res <- .Call("signTabulate", x, PACKAGE="matrixStats"); names(res) <- c("-1", "0", "+1", "NA", "-Inf", "+Inf")[1:length(res)]; res; } # signTabulate() ############################################################################ # HISTORY: # 2014-06-04 [HB] # o Created. ############################################################################ matrixStats/R/pkgStartupMessage.R0000644000175100001440000000332512542546242016621 0ustar hornikusers## covr: skip=all pkgStartupMessage <- function(..., quietly=NA) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Infer 'quietly' from argument 'argument' in library() call? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.na(quietly)) { quietly <- FALSE # Just in case the below won't work one day due to R updates... tryCatch({ # The default, if not found quietly <- formals(base::library)$quietly # Identify the environment/frame of interest by making sure # it at least contains all the arguments of source(). argsToFind <- names(formals(base::library)) # Scan the call frames/environments backwards... srcfileList <- list() for (ff in sys.nframe():0) { env <- sys.frame(ff) # Does the environment look like a library() environment? exist <- sapply(argsToFind, FUN=exists, envir=env, inherits=FALSE) if (!all(exist)) { # Nope, then skip to the next one next } # Was argument 'quietly' specified? missing <- eval(expression(missing(quietly)), envir=env) if (!missing) { quietly <- get("quietly", envir=env, inherits=FALSE) break } # ...otherwise keep searching due to nested library() calls. } # for (ff ...) }, error = function() {}) } # if (is.na(quietly) # Output message? if (!quietly) { packageStartupMessage(...) } } ############################################################################ # HISTORY: # 2015-01-27 # o Copied from R.methodsS3. Here it will only be used internally. ############################################################################ matrixStats/R/diff2.R0000644000175100001440000000300112542546242014131 0ustar hornikusers############################################################################/** # @RdocFunction diff2 # # @title "Fast lagged differences" # # @synopsis # # \description{ # Computes the lagged and iterated differences. # } # # \arguments{ # \item{x}{A @numeric @vector of length N.} # \item{lag}{An @integer specifying the lag.} # \item{differences}{An @integer specifying the order of difference.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric @vector of length N - \code{differences}. # } # # @examples "../incl/diff2.Rex" # # \seealso{ # @see "base::diff". # } # # @author # # @keyword univar # @keyword internal #*/############################################################################ diff2 <- function(x, lag=1L, differences=1L, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'lag': if (length(lag) != 1L) { stop("Argument 'lag' is not a scalar: ", length(lag)) } # Argument 'differences': if (length(differences) != 1L) { stop("Argument 'differences' is not a scalar: ", length(differences)) } lag <- as.integer(lag) differences <- as.integer(differences) .Call("diff2", x, lag, differences, PACKAGE="matrixStats"); } # diff2() ############################################################################ # HISTORY: # 2014-12-29 # o Created. ############################################################################ matrixStats/R/rowRanks.R0000644000175100001440000001205612542546242014757 0ustar hornikusers###########################################################################/** # @RdocFunction rowRanks # @alias colRanks # # @title "Gets the rank of each row (column) of a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage rowRanks # @usage colRanks # } # # \arguments{ # \item{x}{A @numeric or @integer NxK @matrix.} # \item{ties.method}{A @character string specifying how ties are treated. # For details, see below.} # \item{dim.}{An @integer @vector of length two specifying the # dimension of \code{x}, also when not a @matrix.} # \item{preserveShape}{A @logical specifying whether the @matrix # returned should preserve the input shape of \code{x}, or not.} # \item{...}{Not used.} # } # # \value{ # An @integer @matrix is returned. # The \code{rowRanks()} function always returns an NxK @matrix, # where N (K) is the number of rows (columns) whose ranks are calculated. # # The \code{colRanks()} function returns an NxK @matrix, # if \code{preserveShape = TRUE}, otherwise a KxN @matrix. # # %% The mode of the returned matrix is @integer, except for # %% \code{ties.method == "average"} when it is @double. # } # # \details{ # The row ranks of \code{x} are collected as \emph{rows} # of the result matrix. # # The column ranks of \code{x} are collected as \emph{rows} # if \code{preserveShape = FALSE}, otherwise as \emph{columns}. # # The implementation is optimized for both speed and memory. # To avoid coercing to @doubles (and hence memory allocation), there # is a unique implementation for @integer matrices. # It is more memory efficient to do # \code{colRanks(x, preserveShape=TRUE)} than # \code{t(colRanks(x, preserveShape=FALSE))}. # # Any @names of \code{x} are ignored and absent in the result. # } # # \section{Missing and non- values}{ # These are ranked as \code{NA}, as with \code{na.last="keep"} # in the @see "base::rank" function. # } # # \section{Ties}{ # When some values are equal ("ties"), argument \code{ties.method} # specifies what their ranks should be. # If \code{ties.method} is \code{"max"}, ties # are ranked as the maximum value. # If \code{ties.method} is \code{"average"}, ties are ranked # by their average. # If \code{ties.method} is \code{"max"} (\code{"min"}), ties # are ranked as the maximum (minimum) value. # If \code{ties.method} is \code{"average"}, ties are ranked # by their average. # For further details, see @see "base::rank". # } # # \author{ # Hector Corrada Bravo and Harris Jaffee. # Peter Langfelder for adding 'ties.method' support. # Henrik Bengtsson adapted the original native implementation # of \code{rowRanks()} from Robert Gentleman's \code{rowQ()} # in the \pkg{Biobase} package. # } # # \seealso{ # @see "base::rank". # For developers, see also Section 'Utility functions' in # 'Writing R Extensions manual', particularly the native functions # \code{R_qsort_I()} and \code{R_qsort_int_I()}. # } # # @keyword array # @keyword iteration # @keyword robust # @keyword univar #*/########################################################################### rowRanks <- function(x, ties.method=c("max", "average", "min"), dim.=dim(x), ...) { # Argument 'ties.method': ties.method <- ties.method[1L] if (is.element("flavor", names(list(...)))) { .Deprecated(old="Argument 'flavor' of rowRanks()", package="matrixStats") } tiesMethod <- charmatch(ties.method, c("max", "average", "min"), nomatch=0L) if (tiesMethod == 0L) { stop("Unknown value of argument 'ties.method': ", ties.method) } dim. <- as.integer(dim.) # byrow=TRUE .Call("rowRanksWithTies", x, dim., tiesMethod, TRUE, PACKAGE="matrixStats") } colRanks <- function(x, ties.method=c("max", "average", "min"), dim.=dim(x), preserveShape=FALSE, ...) { # Argument 'ties.method': ties.method <- ties.method[1L] if (is.element("flavor", names(list(...)))) { .Deprecated(old="Argument 'flavor' of rowRanks()", package="matrixStats") } # Argument 'preserveShape' preserveShape <- as.logical(preserveShape) tiesMethod <- charmatch(ties.method, c("max", "average", "min"), nomatch=0L) if (tiesMethod == 0L) { stop("Unknown value of argument 'ties.method': ", ties.method) } dim. <- as.integer(dim.) # byrow=FALSE y <- .Call("rowRanksWithTies", x, dim., tiesMethod, FALSE, PACKAGE="matrixStats") if (!preserveShape) y <- t(y) y } ############################################################################ # HISTORY: # 2014-12-17 [HB] # o CLEANUP: Made col- and rowRanks() plain R functions. # 2014-11-15 [HB] # o SPEEDUP: No longer using match.arg() due to its overhead. # 2013-04-23 [HB] # o Added argument 'preserveShape' to colRanks(), cf. private email # 'row- and colRanks in package matrixStats' on 2012-10-05 until # 2013-02-28. # 2013-01-14 [HB] # o Added internal support for rowRanks() with ties "max", "min" and # "average". # 2011-11-11 [HB] # o Added '...' to generic functions rowRanks() and colRanks(). # 2011-10-17 [HJ] # o Added rowRanks and colRanks(). ############################################################################ matrixStats/R/rowCollapse.R0000644000175100001440000000456312542546242015447 0ustar hornikusers###########################################################################/** # @RdocFunction rowCollapse # @alias colCollapse # # @title "Extracts one cell per row (column) from a matrix" # # \description{ # @get "title". # The implementation is optimized for memory and speed. # } # # \usage{ # @usage rowCollapse # @usage colCollapse # } # # \arguments{ # \item{x}{An NxK @matrix.} # \item{idxs}{An index @vector of (maximum) length N (K) specifying the # columns (rows) to be extracted.} # \item{dim.}{An @integer @vector of length two specifying the # dimension of \code{x}, also when not a @matrix.} # \item{...}{Not used.} # } # # \value{ # Returns a @vector of length N (K). # } # # @examples "../incl/rowCollapse.Rex" # # @author "HB" # # \seealso{ # \emph{Matrix indexing} to index elements in matrices and arrays, # cf. @see "base::[". # } # # @keyword utilities #*/########################################################################### rowCollapse <- function(x, idxs, dim.=dim(x), ...) { # Argument 'idxs': idxs <- rep(idxs, length.out=dim.[1L]) # Columns of interest cols <- 0:(dim.[2L]-1L) cols <- cols[idxs] # Calculate column-based indices idxs <- dim.[1L] * cols + seq_len(dim.[1L]) cols <- NULL # Not needed anymore x[idxs] } colCollapse <- function(x, idxs, dim.=dim(x), ...) { # Argument 'idxs': idxs <- rep(idxs, length.out=dim.[2L]) # Rows of interest rows <- seq_len(dim.[1L]) rows <- rows[idxs] # Calculate column-based indices idxs <- dim.[1L] * 0:(dim.[2L]-1L) + rows rows <- NULL # Not needed anymore x[idxs] } ############################################################################ # HISTORY: # 2014-12-19 [HB] # o CLEANUP: Made col- and rowCollapse() plain R functions. # 2014-11-15 # o SPEEDUP: Made calculation of colOffsets faster. # o SPEEDUP: Now colCollapse(x) no longer utilizes rowCollapse(t(x)). # 2014-06-02 # o Made rowCollapse() an S3 method (was S4). # 2013-11-23 # o MEMORY: rowCollapse() does a better job cleaning out allocated # objects sooner. # 2008-06-13 # o BUG FIX: rowCollapse(x) was broken and returned the wrong elements. # 2008-04-13 # o Added Rdocs. # o Added colCollapse(). # 2007-10-21 # o Created. ############################################################################ matrixStats/R/weightedMean.R0000644000175100001440000000474012542546242015553 0ustar hornikusers############################################################################/** # @RdocFunction weightedMean # # \encoding{latin1} # # @title "Weighted Arithmetic Mean" # # @synopsis # # \description{ # Computes the weighted sample mean of a numeric vector. # } # # \arguments{ # \item{x}{a @numeric @vector containing the values whose weighted mean is # to be computed.} # \item{w}{a vector of weights the same length as \code{x} giving the weights # to use for each element of \code{x}. Negative weights are treated # as zero weights. Default value is equal weight to all values.} # \item{na.rm}{a logical value indicating whether @NA values in # \code{x} should be stripped before the computation proceeds, # or not. If @NA, no check at all for @NAs is done. # Default value is @NA (for efficiency).} # \item{refine}{If @TRUE and \code{x} is @numeric, then extra effort is # used to calculate the average with greater numerical precision, # otherwise not.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric scalar. # If \code{x} is of zero length, then \code{NaN} is returned, # which is consistent with @see "base::mean". # } # # @examples "../incl/weightedMean.Rex" # # \section{Missing values}{ # This function handles missing values consistently # @see "stats::weighted.mean". More precisely, # if \code{na.rm=FALSE}, then any missing values in either \code{x} # or \code{w} will give result \code{NA_real_}. # If \code{na.rm=TRUE}, then all \code{(x,w)} data points for which # \code{x} is missing are skipped. Note that if both \code{x} and # \code{w} are missing for a data points, then it is also skipped # (by the same rule). However, if only \code{w} is missing, then # the final results will always be \code{NA_real_} regardless of # \code{na.rm}. # } # # \seealso{ # @see "base::mean" and @see "stats::weighted.mean". # } # # @author # # @keyword "univar" # @keyword "robust" #*/############################################################################ weightedMean <- function(x, w, na.rm=FALSE, refine=FALSE, ...) { # Argument 'refine': refine <- as.logical(refine) w <- as.numeric(w) .Call("weightedMean", x, w, na.rm, refine, PACKAGE="matrixStats") } # weightedMean() ############################################################################### # HISTORY: # 2014-12-08 # o Created. ############################################################################### matrixStats/R/rowProds.R0000644000175100001440000000764312542546242014776 0ustar hornikusers###########################################################################/** # @RdocFunction rowProds # @alias rowProds # @alias colProds # @alias product # # @title "Calculates the product for each row (column) in a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage rowProds # @usage colProds # @usage product # } # # \arguments{ # \item{x}{A @numeric NxK @matrix.} # \item{na.rm}{If @TRUE, missing values are ignored, otherwise not.} # \item{method}{A @character string specifying how each product # is calculated.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric @vector of length N (K). # } # # \details{ # If \code{method="expSumLog"}, then then @see "product" function # is used, which calculates the produce via the logarithmic transform # (treating negative values specially). This improves the precision # and lowers the risk for numeric overflow. # If \code{method="direct"}, the direct product is calculated via # the @see "base::prod" function. # } # # \section{Missing values}{ # Note, if \code{method="expSumLog"}, \code{na.rm=FALSE}, and \code{x} # contains missing values (@NA or @NaN), then the calculated value # is also missing value. # Note that it depends on platform whether @NaN or @NA is returned # when an @NaN exists, cf. @see "base::is.nan". # } # # @author "HB" # # @keyword array # @keyword iteration # @keyword robust # @keyword univar #*/########################################################################### rowProds <- function(x, na.rm=FALSE, method=c("direct", "expSumLog"), ...) { # Preallocate result (zero:ed by default) n <- nrow(x) y <- double(length=n) # Nothing todo? if (n == 0L) return(y) # Argument 'method': method <- method[1L] # How to calculate product? if (method == "expSumLog") { prod <- product } else if (method == "direct") { } else { stop("Unknown value of argument 'method': ", method) } for (ii in seq_len(n)) { y[ii] <- prod(x[ii,,drop=TRUE], na.rm=na.rm) } y; } # rowProds() colProds <- function(x, na.rm=FALSE, method=c("direct", "expSumLog"), ...) { # Preallocate result (zero:ed by default) n <- ncol(x) y <- double(length=n) # Nothing todo? if (n == 0L) return(y) # Argument 'method': method <- method[1L] # How to calculate product? if (method == "expSumLog") { prod <- product } else if (method == "direct") { } else { stop("Unknown value of argument 'method': ", method) } for (ii in seq_len(n)) { y[ii] <- prod(x[,ii,drop=TRUE], na.rm=na.rm) } y } # colProds() ############################################################################ # HISTORY: # 2014-11-15 [HB] # o SPEEDUP: No longer using match.arg() due to its overhead. # 2014-06-04 [HB] # o Now col- and rowProds() utilizes new product() function. # o Added argument 'method' to col- and rowProds(). # 2014-06-02 [HB] # o Now rowProds() uses rowCounts(x) when 'x' is logical. # o Now rowProds() avoids subsetting rows unless needed. # 2014-03-31 [HB] # o BUG FIX: rowProds() would throw "Error in rowSums(isNeg) : 'x' must # be an array of at least two dimensions" on matrices where all rows # contained at least on zero. Thanks to Roel Verbelen at KU Leuven # for the report. # 2013-11-23 [HB] # o MEMORY: rowProbs() does a better job cleaning out allocated # objects sooner. # 2012-06-25 [HB] # o GENERALIZATION: Now row- and colProds() handles missing values. # o BUG FIX: In certain cases, row- and colProds() would return NA instead # of 0 for some elements. Thanks Brenton Kenkel at University of # Rochester for reporting on this. # 2008-07-30 [HB] # o Now it is only rows without zeros for which the calculation is # actually performed. # 2008-03-26 [HB] # o Created. ############################################################################ matrixStats/R/x_OP_y.R0000644000175100001440000000431612542546242014346 0ustar hornikusers############################################################################/** # @RdocFunction x_OP_y # @alias x_OP_y # @alias t_tx_OP_y # # @title "Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)'" # # \usage{ # @usage x_OP_y # @usage t_tx_OP_y # } # # \description{ # @get "title", where OP can be +, -, *, and /. # For + and *, na.rm=TRUE will drop missing values first. # } # # \arguments{ # \item{x}{A @numeric NxK @matrix.} # \item{y}{A @numeric @vector of length L.} # \item{OP}{A @character specifying which operator to use.} # \item{commute}{If @TRUE, 'y OP x' ('t(y OP t(x))') is calculated, # otherwise 'x OP y' ('t(t(x) OP y)').} # \item{na.rm}{If @TRUE, missing values are ignored, otherwise not.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric NxK @matrix. # } # # \section{Missing values}{ # If \code{na.rm=TRUE}, then missing values are "dropped" before applying # the operator to each pair of values. For instance, if \code{x[1,1]} is # a missing value, then the result of \code{x[1,1] + y[1]} equals # \code{y[1]}. If also \code{y[1]} is a missing value, then the result # is a missing value. This only applies to additions and multiplications. # For subtractions and divisions, argument \code{na.rm} is ignored. # } # # @examples "../incl/x_OP_y.Rex" # # @author # # @keyword internal #*/############################################################################ x_OP_y <- function(x, y, OP, commute=FALSE, na.rm=FALSE) { commute <- as.logical(commute) na.rm <- as.logical(na.rm) op <- charmatch(OP, c("+", "-", "*", "/"), nomatch=0L) stopifnot(op > 0L) .Call("x_OP_y", x, y, dim(x), op, commute, na.rm, TRUE, FALSE, package="matrixStats") } # x_OP_y() t_tx_OP_y <- function(x, y, OP, commute=FALSE, na.rm=FALSE) { commute <- as.logical(commute) na.rm <- as.logical(na.rm) op <- charmatch(OP, c("+", "-", "*", "/"), nomatch=0L) stopifnot(op > 0L) .Call("x_OP_y", x, y, dim(x), op, commute, na.rm, TRUE, TRUE, package="matrixStats") } # t_tx_OP_y() ############################################################################ # HISTORY: # 2014-11-24 [HB] # o Created. ############################################################################ matrixStats/R/rowRanges.R0000644000175100001440000000610012542546242015111 0ustar hornikusers###########################################################################/** # @RdocFunction rowRanges # @alias colRanges # @alias rowMins # @alias rowMaxs # @alias colMins # @alias colMaxs # # @title "Gets the range of values in each row (column) of a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage rowRanges # @usage colRanges # @usage rowMins # @usage colMins # @usage rowMaxs # @usage colMaxs # } # # \arguments{ # \item{x}{A @numeric NxK @matrix.} # \item{na.rm}{If @TRUE, @NAs are excluded first, otherwise not.} # \item{dim.}{An @integer @vector of length two specifying the # dimension of \code{x}, also when not a @matrix.} # \item{...}{Not used.} # } # # \value{ # \code{rowRanges()} (\code{colRanges()}) returns a # @numeric Nx2 (Kx2) @matrix, where # N (K) is the number of rows (columns) for which the ranges are # calculated. # # \code{rowMins()/rowMaxs()} (\code{colMins()/colMaxs()}) returns a # @numeric @vector of length N (K). # } # # @author "HB" # # \seealso{ # @see "rowOrderStats" and @see "base::pmin.int". # } # # @keyword array # @keyword iteration # @keyword robust # @keyword univar #*/########################################################################### rowRanges <- function(x, na.rm=FALSE, dim.=dim(x), ...) { dim. <- as.integer(dim.) na.rm <- as.logical(na.rm) .Call("rowRanges", x, dim., 2L, na.rm, TRUE, PACKAGE="matrixStats") } rowMins <- function(x, na.rm=FALSE, dim.=dim(x), ...) { dim. <- as.integer(dim.) na.rm <- as.logical(na.rm) .Call("rowRanges", x, dim., 0L, na.rm, TRUE, PACKAGE="matrixStats") } rowMaxs <- function(x, na.rm=FALSE, dim.=dim(x), ...) { dim. <- as.integer(dim.) na.rm <- as.logical(na.rm) .Call("rowRanges", x, dim., 1L, na.rm, TRUE, PACKAGE="matrixStats") } colRanges <- function(x, na.rm=FALSE, dim.=dim(x), ...) { dim. <- as.integer(dim.) na.rm <- as.logical(na.rm) .Call("colRanges", x, dim., 2L, na.rm, TRUE, PACKAGE="matrixStats") } colMins <- function(x, na.rm=FALSE, dim.=dim(x), ...) { dim. <- as.integer(dim.) na.rm <- as.logical(na.rm) .Call("colRanges", x, dim., 0L, na.rm, TRUE, PACKAGE="matrixStats") } colMaxs <- function(x, na.rm=FALSE, dim.=dim(x), ...) { dim. <- as.integer(dim.) na.rm <- as.logical(na.rm) .Call("colRanges", x, dim., 1L, na.rm, TRUE, PACKAGE="matrixStats") } ############################################################################ # HISTORY: # 2014-12-17 [HB] # o CLEANUP: Made col- and rowRanges() plain R functions. # 2014-11-16 # o SPEEDUP: Implemented in native code. # 2013-07-28 # o SPEEDUP: Made (col|row)Mins() and (col|row)Maxs() faster. # o BUG FIX: rowRanges(x) on an Nx0 matrix 'x' would give an error. # Ditto for colRanges(x). # 2009-02-01 # o BUG FIX: colRanges(x) would give an error if nrow(x) == 0. # 2008-03-25 # o Since colOrderStats() cannot handle missing values we use the slower # colRanges() for the case when na.rm=TRUE. # o Added {row|col}{Min|Max}s(). # o Created {row|col}Ranges() for scratch. Handles NAs. ############################################################################ matrixStats/R/sumOver.R0000644000175100001440000000602612542546242014611 0ustar hornikusers############################################################################/** # @RdocFunction sumOver # # @title "Fast sum over subset of vector elements" # # @synopsis # # \description{ # Computes the sum of all or a subset of values. # } # # \arguments{ # \item{x}{A @numeric @vector of length N.} # \item{idxs}{A @numeric index @vector in [1,N] of elements to sum over. # If @NULL, all elements are considered.} # \item{na.rm}{If @TRUE, missing values are skipped, otherwise not.} # \item{mode}{A @character string specifying the data type of the # return value. Default is to use the same mode as argument # \code{x}.} # \item{...}{Not used.} # } # # \value{ # Returns a scalar of the data type specified by argument \code{mode}. # If \code{mode="integer"}, then integer overflow occurs if the # \emph{sum} is outside the range of defined integer values. # } # # \details{ # \code{sumOver(x, idxs)} gives equivalent results as # \code{sum(x[idxs])}, but is faster and more memory efficient # since it avoids the actual subsetting which requires copying # of elements and garbage collection thereof. # # Furthermore, \code{sumOver(x, mode="double")} is equivalent to # \code{sum(as.numeric(x))}, but is much more memory efficient when # \code{x} is an @integer vector. # } # # @examples "../incl/sumOver.Rex" # # \seealso{ # @see "base::sum". # To efficiently average over a subset, see @see "meanOver". # } # # @author # # @keyword univar # @keyword internal #*/############################################################################ sumOver <- function(x, idxs=NULL, na.rm=FALSE, mode=typeof(x), ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.numeric(x)) { stop("Argument 'x' is not numeric: ", mode(x)); } n <- length(x); # Argument 'na.rm': if (!is.logical(na.rm)) { stop("Argument 'na.rm' is not logical: ", mode(na.rm)); } # Argument 'idxs': if (is.null(idxs)) { } else if (is.integer(idxs)) { } else if (is.logical(idxs)) { if (length(idxs) != n) { stop(sprintf("Lengths of arguments 'idxs' and 'x' do not match: %d != %d", length(idxs), n)); } idxs <- which(idxs); } else { idxs <- as.integer(idxs); } # Argument 'mode': mode <- mode[1L] modeI <- charmatch(mode, c("integer", "double"), nomatch=0L) if (modeI == 0L) { stop("Unknown value of argument 'mode': ", mode) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Summing # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - .Call("sumOver", x, idxs, na.rm, modeI, PACKAGE="matrixStats"); } # sumOver() ############################################################################ # HISTORY: # 2014-11-15 [HB] # o SPEEDUP: No longer using match.arg() due to its overhead. # 2014-11-02 [HB] # o Created. ############################################################################ matrixStats/R/rowWeightedMeans.R0000644000175100001440000001430012542546242016417 0ustar hornikusers###########################################################################/** # @RdocFunction rowWeightedMeans # @alias colWeightedMeans # # @title "Calculates the weighted means for each row (column) in a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage rowWeightedMeans # @usage colWeightedMeans # } # # \arguments{ # \item{x}{A @numeric NxK @matrix.} # \item{w}{A @numeric @vector of length K (N).} # \item{na.rm}{If @TRUE, missing values are excluded from the calculation, # otherwise not.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric @vector of length N (K). # } # # \details{ # The implementations of these methods are optimized for both speed # and memory. # If no weights are given, the corresponding # \code{rowMeans()}/\code{colMeans()} is used. # } # # @examples "../incl/rowWeightedMeans.Rex" # # @author "HB" # # \seealso{ # See \code{rowMeans()} and \code{colMeans()} in @see "base::colSums" # for non-weighted means. # See also @see "stats::weighted.mean". # } # # @keyword array # @keyword iteration # @keyword robust # @keyword univar #*/########################################################################### rowWeightedMeans <- function(x, w=NULL, na.rm=FALSE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'w': hasWeights <- !is.null(w); if (hasWeights) { n <- ncol(x); if (length(w) != n) { stop("The length of argument 'w' is does not match the number of column in 'x': ", length(w), " != ", n); } if (!is.numeric(w)) { stop("Argument 'w' is not numeric: ", mode(w)); } if (any(!is.na(w) & w < 0)) { stop("Argument 'w' has negative weights."); } } if (hasWeights) { # Allocate results m <- nrow(x); if (m == 0L) return(double(0L)); # Drop entries with zero weight? ...but keep NAs idxs <- which(is.na(w) | w != 0); nw <- length(idxs); if (nw == 0L) { return(rep(NaN, times=m)); } else if (nw < n) { w <- w[idxs]; x <- x[,idxs,drop=FALSE]; } idxs <- NULL; # Not needed anymore # Has missing values? if (na.rm) { # Really? na.rm <- anyMissing(x); } if (na.rm) { # Indices of missing values nas <- which(is.na(x)); # Weight matrix W <- matrix(w, nrow=nrow(x), ncol=ncol(x), byrow=TRUE); w <- NULL; # Not needed anymore W[nas] <- NA; wS <- rowSums(W, na.rm=TRUE); # Standarized weights summing to one w/out missing values W[nas] <- 0; W <- W / wS; x[nas] <- 0; nas <- NULL; # Not needed anymore x <- W * x; W <- NULL; # Not needed anymore } else { wS <- sum(w); # Standardize weights summing to one. w <- w / wS; # Weighted values ## SLOW: for (rr in 1:m) x[rr,] <- w * x[rr,,drop=TRUE]; ## FAST: x <- t_tx_OP_y(x, w, OP="*", na.rm=FALSE) w <- NULL; # Not needed anymore } # Here we know there are no missing value in the new 'x' res <- rowSums(x, na.rm=FALSE); } else { res <- rowMeans(x, na.rm=na.rm); } res; } # rowWeightedMeans() colWeightedMeans <- function(x, w=NULL, na.rm=FALSE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'w': hasWeights <- !is.null(w); if (hasWeights) { n <- nrow(x); if (length(w) != n) { stop("The length of argument 'w' is does not match the number of rows in 'x': ", length(w), " != ", n); } if (!is.numeric(w)) { stop("Argument 'w' is not numeric: ", mode(w)); } if (any(!is.na(w) & w < 0)) { stop("Argument 'w' has negative weights."); } } if (hasWeights) { # Allocate results m <- ncol(x); if (m == 0L) return(double(0L)); # Drop entries with zero weight? ...but keep NAs idxs <- which(is.na(w) | w != 0); nw <- length(idxs); if (nw == 0L) { return(rep(NaN, times=m)); } else if (nw < n) { w <- w[idxs]; x <- x[idxs,,drop=FALSE]; } idxs <- NULL; # Not needed anymore # Has missing values? if (na.rm) { # Really? na.rm <- anyMissing(x); } if (na.rm) { # Indices of missing values nas <- which(is.na(x)); # Weight matrix W <- matrix(w, nrow=nrow(x), ncol=ncol(x), byrow=FALSE); w <- NULL; # Not needed anymore W[nas] <- NA; wS <- colSums(W, na.rm=TRUE); # Standarized weights summing to one w/out missing values W[nas] <- 0; for (cc in 1:m) { W[,cc] <- W[,cc,drop=TRUE] / wS[cc]; } x[nas] <- 0; nas <- NULL; # Not needed anymore x <- W * x; W <- NULL; # Not needed anymore } else { wS <- sum(w); # Standardize weights summing to one. w <- w / wS; # Weighted values x <- w*x; ## SLIGHTLY SLOWER: x <- x_OP_y(x, w, OP="*"); w <- NULL; # Not needed anymore } # Here we know there are no missing value in the new 'x' res <- colSums(x, na.rm=FALSE); } else { res <- colMeans(x, na.rm=na.rm); } res; } # colWeightedMeans() ############################################################################## # HISTORY: # 2014-12-19 [HB] # o CLEANUP: Made col- and rowWeightedMeans() plain R functions. # 2013-11-29 # o BUG FIX: (col|row)WeightedMeans() with all zero weights gave an # invalid result. # 2013-11-23 # o MEMORY: Now (col|row)WeightedMeans() clean out allocated objects sooner. # 2010-02-03 # o BUG FIX: (col|row)WeightedMeans(..., na.rm=TRUE) would incorrectly treat # missing values as zeros. Thanks Pierre Neuvial for reporting this. # 2008-02-01 # o Added special implementation for column version. # o Added Rdoc comments. # o Created. ############################################################################## matrixStats/R/meanOver.R0000644000175100001440000000603512542546242014725 0ustar hornikusers############################################################################/** # @RdocFunction meanOver # # @title "Fast averaging over subset of vector elements" # # @synopsis # # \description{ # Computes the sample mean of all or a subset of values. # } # # \arguments{ # \item{x}{A @numeric @vector of length N.} # \item{idxs}{A @numeric index @vector in [1,N] of elements to mean over. # If @NULL, all elements are considered.} # \item{na.rm}{If @TRUE, missing values are skipped, otherwise not.} # \item{refine}{If @TRUE and \code{x} is @numeric, then extra effort is # used to calculate the average with greater numerical precision, # otherwise not.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric scalar. # } # # \details{ # \code{meanOver(x, idxs)} gives equivalent results as # \code{mean(x[idxs])}, but is faster and more memory efficient # since it avoids the actual subsetting which requires copying # of elements and garbage collection thereof. # # If \code{x} is @numeric and \code{refine=TRUE}, then a two-pass scan # is used to calculate the average. The first scan calculates the total # sum and divides by the number of (non-missing) values. In the second # scan, this average is refined by adding the residuals towards the first # average. The @see "base::mean" uses this approach. # \code{meanOver(..., refine=FALSE)} is almost twice as fast as # \code{meanOver(..., refine=TRUE)}. # } # # @examples "../incl/meanOver.Rex" # # \seealso{ # @see "base::mean". # To efficiently sum over a subset, see @see "sumOver". # } # # @author # # @keyword univar # @keyword internal #*/############################################################################ meanOver <- function(x, idxs=NULL, na.rm=FALSE, refine=TRUE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.numeric(x)) { stop("Argument 'x' is not numeric: ", mode(x)); } n <- length(x); # Argument 'na.rm': if (!is.logical(na.rm)) { stop("Argument 'na.rm' is not logical: ", mode(na.rm)); } # Argument 'idxs': if (is.null(idxs)) { } else if (is.integer(idxs)) { } else if (is.logical(idxs)) { if (length(idxs) != n) { stop(sprintf("Lengths of arguments 'idxs' and 'x' do not match: %d != %d", length(idxs), n)); } idxs <- which(idxs); } else { idxs <- as.integer(idxs); } # Argument 'refine': if (!is.logical(refine)) { stop("Argument 'refine' is not logical: ", mode(refine)); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Averaging # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - .Call("meanOver", x, idxs, na.rm, refine, PACKAGE="matrixStats"); } # meanOver() ############################################################################ # HISTORY: # 2014-11-02 [HB] # o Created. ############################################################################ matrixStats/R/indexByRow.R0000644000175100001440000000257312542546242015246 0ustar hornikusers###########################################################################/** # @RdocFunction indexByRow # # @title "Translates matrix indices by rows into indices by columns" # # \description{ # @get "title". # } # # \usage{ # @usage indexByRow # } # # \arguments{ # \item{dim}{A @numeric @vector of length two specifying the length # of the "template" matrix.} # \item{idxs}{A @vector of indices. If @NULL, all indices are returned.} # \item{...}{Not use.} # } # # \value{ # Returns an @integer @vector of indices. # } # # @examples "../incl/indexByRow.Rex" # # @author "HB" # # @keyword iteration # @keyword logic #*/########################################################################### indexByRow <- function(dim, idxs=NULL, ...) { if (is.matrix(dim)) { # BACKWARD COMPATIBILITY: Keep for a while, but deprecate # in the future. dim <- dim(dim) } else { dim <- as.integer(dim) } if (!is.null(idxs)) idxs <- as.integer(idxs) .Call("indexByRow", dim, idxs, package="matrixStats") } ############################################################################## # HISTORY: # 2014-11-09 # o Now indexByRow() is a plain R function (was a generic function). # o Implemented in C. # 2014-05-23 # o CLEANUP: Made indexByRow() an S3 rather than S4 generic. # 2007-04-12 # o Created. ############################################################################## matrixStats/R/binCounts.R0000644000175100001440000001010512542546242015106 0ustar hornikusers############################################################################/** # @RdocFunction binCounts # # @title "Fast element counting in non-overlapping bins" # # @synopsis # # \description{ # Counts the number of elements in non-overlapping bins # } # # \arguments{ # \item{x}{A @numeric @vector of K positions for to be binned and counted.} # \item{bx}{A @numeric @vector of B+1 ordered positions specifying # the B > 0 bins \code{[bx[1],bx[2])}, \code{[bx[2],bx[3])}, ..., # \code{[bx[B],bx[B+1])}.} # \item{right}{If @TRUE, the bins are right-closed (left open), # otherwise left-closed (right open).} # \item{...}{Not used.} # } # # \value{ # Returns an @integer @vector of length B with non-negative integers. # } # # \details{ # \code{binCounts(x, bx, right=TRUE)} gives equivalent results as # \code{rev(binCounts(-x, bx=rev(-bx), right=FALSE))}, but is # faster and more memory efficient. # } # # \section{Missing and non-finite values}{ # Missing values in \code{x} are ignored/dropped. # Missing values in \code{bx} are not allowed and gives an error. # } # # \seealso{ # An alternative for counting occurrences within bins is # @see "graphics::hist", e.g. \code{hist(x, breaks=bx, plot=FALSE)$counts}. # That approach is ~30-60\% slower than \code{binCounts(..., right=TRUE)}. # # To count occurrences of indices \code{x} (positive @integers) in # \code{[1,B]}, use \code{tabulate(x, nbins=B)}, where \code{x} does # \emph{not} have to be sorted first. # For details, see @see "base::tabulate". # # To average values within bins, see @see "binMeans". # } # # @author "HB" # # @keyword "univar" #*/############################################################################ binCounts <- function(x, bx, right=FALSE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (!is.numeric(x)) { stop("Argument 'x' is not numeric: ", mode(x)); } # Argument 'bx': if (!is.numeric(bx)) { stop("Argument 'bx' is not numeric: ", mode(bx)); } if (any(is.infinite(bx))) { stop("Argument 'bx' must not contain Inf values."); } if (is.unsorted(bx)) { stop("Argument 'bx' is not ordered."); } # Argument 'right': right <- as.logical(right); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Preprocessing of x # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Drop missing values keep <- which(!is.na(x)); if (length(keep) < length(x)) { x <- x[keep]; } keep <- NULL; # Not needed anymore # Order x (by increasing x). # If 'x' is already sorted, the overhead of (re)sorting is # relatively small. x <- sort.int(x, method="quick"); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Bin # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x <- as.numeric(x); bx <- as.numeric(bx); .Call("binCounts", x, bx, right, PACKAGE="matrixStats"); } # binCounts() ############################################################################ # HISTORY: # 2014-12-29 [HB] # o SPEEDUP: Now binCounts() and binMeans() uses is.unsorted() instead # of o <- order(); any(diff(o) != 1L). # 2014-12-17 [HB] # o CLEANUP: Made binCounts() and binMeans() plain R functions. # 2013-11-24 [HB] # o DOCUMENTATION: Added reference to base::tabulate(). # 2013-11-23 [HB] # o MEMORY: binCounts() cleans out more temporary variables as soon as # possible such that the garbage collector can remove them sooner. # 2012-05-10 [HB] # o DOCUMENTATION: Now help(binCounts) cross references hist(), which is # almost as fast. Thanks Ilari Scheinin (Finland) for pointing this out. # o SPEEDUP: Now binMeans() and binCounts() use Hoare's Quicksort # method for sorting 'x'. # 2012-10-03 [HB] # o Created. ############################################################################ matrixStats/R/rowOrderStats.R0000644000175100001440000000455312542546242015776 0ustar hornikusers###########################################################################/** # @RdocFunction rowOrderStats # @alias colOrderStats # # @title "Gets an order statistic for each row (column) in a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage rowOrderStats # @usage colOrderStats # } # # \arguments{ # \item{x}{A @numeric NxK @matrix.} # \item{which}{An @integer index in [1,K] ([1,N]) indicating which # order statistic to be returned.} # \item{dim.}{An @integer @vector of length two specifying the # dimension of \code{x}, also when not a @matrix.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric @vector of length N (K). # } # # \details{ # The implementation of \code{rowOrderStats()} is optimized for both # speed and memory. # To avoid coercing to @doubles (and hence memory allocation), there # is a unique implementation for @integer matrices. # } # # \section{Missing values}{ # This method does \emph{not} handle missing values, that is, the result # corresponds to having \code{na.rm=FALSE} (if such an argument would # be available). # } # # \author{ # The native implementation of \code{rowOrderStats()} was adopted # by Henrik Bengtsson from Robert Gentleman's \code{rowQ()} # in the \pkg{Biobase} package. # } # # \seealso{ # See \code{rowMeans()} in @see "base::colSums". # } # # @keyword array # @keyword iteration # @keyword robust # @keyword univar #*/########################################################################### rowOrderStats <- function(x, which, dim.=dim(x), ...) { dim. <- as.integer(dim.) which <- as.integer(which) .Call("rowOrderStats", x, dim., which, PACKAGE="matrixStats"); } colOrderStats <- function(x, which, dim.=dim(x), ...) { dim. <- as.integer(dim.) which <- as.integer(which) .Call("colOrderStats", x, dim., which, PACKAGE="matrixStats"); } ############################################################################ # HISTORY: # 2014-12-19 [HB] # o CLEANUP: Made col- and rowOrderStats() plain R functions. # 2014-11-16 # o SPEEDUP: Now colOrderStats() also is implemented in native code. # 2008-03-25 # o Added colOrderStats(). # o Renamed from rowQuantiles() to rowOrderStats(), especially because it # is not returning quantiles like quantile(). # o Created (again?) ############################################################################ matrixStats/R/rowCumsums.R0000644000175100001440000000466012542546242015337 0ustar hornikusers###########################################################################/** # @RdocFunction rowCumsums # @alias rowCumsums # @alias colCumsums # @alias rowCumprods # @alias colCumprods # @alias rowCummins # @alias colCummins # @alias rowCummaxs # @alias colCummaxs # # @title "Cumulative sums, products, minima and maxima for each row (column) in a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage rowCumsums # @usage colCumsums # @usage rowCumprods # @usage colCumprods # @usage rowCummins # @usage colCummins # @usage rowCummaxs # @usage colCummaxs # } # # \arguments{ # \item{x}{A @numeric NxK @matrix.} # \item{dim.}{An @integer @vector of length two specifying the # dimension of \code{x}, also when not a @matrix.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric NxK @matrix of the same mode as \code{x}. # } # # @examples "../incl/rowCumsums.Rex" # # @author "HB" # # \seealso{ # See @see "base::cumsum", @see "base::cumprod", # @see "base::cummin", and @see "base::cummax". # } # # @keyword array # @keyword iteration # @keyword univar #*/########################################################################### rowCumsums <- function(x, dim.=dim(x), ...) { dim <- as.integer(dim.); .Call("rowCumsums", x, dim, TRUE, PACKAGE="matrixStats") } colCumsums <- function(x, dim.=dim(x), ...) { dim <- as.integer(dim.); .Call("rowCumsums", x, dim, FALSE, PACKAGE="matrixStats") } rowCumprods <- function(x, dim.=dim(x), ...) { dim <- as.integer(dim.); .Call("rowCumprods", x, dim, TRUE, PACKAGE="matrixStats") } colCumprods <- function(x, dim.=dim(x), ...) { dim <- as.integer(dim.); .Call("rowCumprods", x, dim, FALSE, PACKAGE="matrixStats") } rowCummins <- function(x, dim.=dim(x), ...) { dim <- as.integer(dim.); .Call("rowCummins", x, dim, TRUE, PACKAGE="matrixStats") } colCummins <- function(x, dim.=dim(x), ...) { dim <- as.integer(dim.); .Call("rowCummins", x, dim, FALSE, PACKAGE="matrixStats") } rowCummaxs <- function(x, dim.=dim(x), ...) { dim <- as.integer(dim.); .Call("rowCummaxs", x, dim, TRUE, PACKAGE="matrixStats") } colCummaxs <- function(x, dim.=dim(x), ...) { dim <- as.integer(dim.); .Call("rowCummaxs", x, dim, FALSE, PACKAGE="matrixStats") } ############################################################################ # HISTORY: # 2014-11-26 [HB] # o Created. ############################################################################ matrixStats/R/benchmark.R0000644000175100001440000000131612542546242015100 0ustar hornikusersbenchmark <- function(fcn, tags=NULL, path=NULL, workdir="reports", envir=parent.frame(), ...) { requireNamespace("R.rsp") || stop("R.rsp not installed.") if (is.function(fcn)) { fcn <- deparse(substitute(fcn)) } if (is.null(path)) { path <- system.file("benchmarking", package="matrixStats") } fullname <- paste(c(fcn, tags), collapse=",") filename <- sprintf("%s.md.rsp", fullname) pathname <- file.path(path, filename) R.rsp::rfile(pathname, workdir=workdir, envir=envir, ...) } # benchmark() ############################################################################ # HISTORY: # 2014-06-02 # o Created. ############################################################################ matrixStats/R/rowMedians.S4.R0000644000175100001440000000536412542546242015552 0ustar hornikusers###########################################################################/** # @RdocFunction rowMedians # @alias colMedians # \alias{rowMedians,matrix-method} # \alias{colMedians,matrix-method} # # @title "Calculates the median for each row (column) in a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage rowMedians # @usage colMedians # } # # \arguments{ # \item{x}{A @numeric NxK @matrix.} # \item{na.rm}{If @TRUE, @NAs are excluded first, otherwise not.} # \item{dim.}{An @integer @vector of length two specifying the # dimension of \code{x}, also when not a @matrix.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric @vector of length N (K). # } # # \details{ # The implementation of \code{rowMedians()} and \code{colMedians()} # is optimized for both speed and memory. # To avoid coercing to @doubles (and hence memory allocation), there # is a special implementation for @integer matrices. # That is, if \code{x} is an @integer @matrix, then # \code{rowMedians(as.double(x))} (\code{rowMedians(as.double(x))}) # would require three times the memory of \code{rowMedians(x)} # (\code{colMedians(x)}), but all this is avoided. # } # # @author "HB, HJ" # # \seealso{ # See @see "rowMedians" and \code{colMedians()} for weighted medians. # For mean estimates, see \code{rowMeans()} in @see "base::colSums". # } # # @keyword array # @keyword iteration # @keyword robust # @keyword univar #*/########################################################################### setGeneric("rowMedians", function(x, na.rm=FALSE, dim.=dim(x), ...) { standardGeneric("rowMedians"); }) setMethod("rowMedians", signature(x="matrix"), function(x, na.rm=FALSE, dim.=dim(x), ...) { dim. <- as.integer(dim.) na.rm <- as.logical(na.rm); hasNAs <- TRUE; # Add as an argument? /2007-08-24 .Call("rowMedians", x, dim., na.rm, hasNAs, TRUE, PACKAGE="matrixStats"); }) setGeneric("colMedians", function(x, na.rm=FALSE, dim.=dim(x), ...) { standardGeneric("colMedians"); }) setMethod("colMedians", signature(x="matrix"), function(x, na.rm=FALSE, dim.=dim(x), ...) { dim. <- as.integer(dim.) na.rm <- as.logical(na.rm); hasNAs <- TRUE; # Add as an argument? /2007-08-24 .Call("rowMedians", x, dim., na.rm, hasNAs, FALSE, PACKAGE="matrixStats"); }) ############################################################################ # HISTORY: # 2011-10-13 [HJ] # o In the past, colMedians(x) was accomplished as rowMedians(t(x)); # it is now done directly. # 2008-03-25 # o Added colMedians() - a wrapper around rowMedians() for now. # o Turned into a S4 method as it used to be in Biobase. # 2007-08-14 # o Added argument 'hasNA'. # 2005-11-25 # o Created. ############################################################################ matrixStats/R/rowAvgsPerColSet.R0000644000175100001440000001310312542546242016354 0ustar hornikusers###########################################################################/** # @RdocFunction rowAvgsPerColSet # @alias colAvgsPerRowSet # # @title "Applies a row-by-row (column-by-column) averaging function to equally-sized subsets of matrix columns (rows)" # # \description{ # @get "title". # Each subset is averaged independently of the others. # } # # @synopsis # # \arguments{ # \item{X}{A @numeric NxM @matrix.} # \item{W}{An optional @numeric NxM @matrix of weights.} # \item{S}{An @integer KxJ @matrix specifying the J subsets. Each # column holds K column (row) indices for the corresponding subset.} # \item{FUN}{The row-by-row (column-by-column) @function used to average # over each subset of \code{X}. This function must accept a @numeric # NxK (KxM) @matrix and the @logical argument \code{na.rm} (which is # automatically set), and return a @numeric @vector of length N (M).} # \item{...}{Additional arguments passed to then \code{FUN} @function.} # \item{tFUN}{If @TRUE, the NxK (KxM) @matrix passed to \code{FUN()} # is transposed first.} # } # # \value{ # Returns a @numeric JxN (MxJ) @matrix, # where row names equal \code{rownames(X)} (\code{colnames(S)}) # and column names \code{colnames(S)} (\code{colnames(X)}). # } # # \details{ # If argument \code{S} is a single column vector with indices # \code{1:N}, then \code{rowAvgsPerColSet(X, S=S, FUN=rowMeans)} # gives the same result as \code{rowMeans(X)}. # Analogously, for \code{rowAvgsPerColSet()}. # } # # @examples "../incl/rowAvgsPerColSet.Rex" # # @author "HB" # # @keyword internal # @keyword utilities #*/########################################################################### rowAvgsPerColSet <- function(X, W=NULL, S, FUN=rowMeans, ..., tFUN=FALSE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'X': if (!is.matrix(X)) { stop("Argument 'X' is not a matrix: ", class(X)[1L]); } dimX <- dim(X); # Argument 'W': hasW <- !is.null(W); if (hasW) { if (!is.matrix(W)) { stop("Argument 'W' is not a matrix: ", class(W)[1L]); } if (any(dim(W) != dimX)) { stop("Argument 'W' does not have the same dimension as 'X': ", paste(dim(W), collapse="x"), " != ", paste(dimX, collapse="x")); } if (!is.numeric(W)) { stop("Argument 'W' is not numeric: ", mode(W)); } } # Argument 'S': if (!is.matrix(S)) { stop("Argument 'S' is not a matrix: ", class(S)[1L]); } nbrOfSets <- ncol(S); setNames <- colnames(S); # Argument 'FUN': if (!is.function(FUN)) { stop("Argument 'FUN' is not a function: ", mode(S)); } # Argument 'tFUN': tFUN <- as.logical(tFUN); # Check if missing values have to be excluded while averaging na.rm <- (anyMissing(X) || anyMissing(S)); # Record names of dimension rownamesX <- rownames(X); dimnames(X) <- NULL; # Average in sets of columns of X. Z <- apply(S, MARGIN=2L, FUN=function(jj) { # Extract set of columns from X jj <- jj[is.finite(jj)]; Zjj <- X[,jj,drop=FALSE]; jj <- NULL; # Not needed anymore if (tFUN) { Zjj <- t(Zjj); } # Average by weights if (hasW) { Wjj <- W[,jj,drop=FALSE]; Zjj <- FUN(Zjj, W=Wjj, ..., na.rm=na.rm); Wjj <- NULL; # Not needed anymore } else { Zjj <- FUN(Zjj, ..., na.rm=na.rm); } # Sanity check stopifnot(length(Zjj) == dimX[1L]); # Return set average Zjj; }); # Sanity check stopifnot(dim(Z) == c(dimX[1L], nbrOfSets)); # Set names rownames(Z) <- rownamesX; colnames(Z) <- setNames; Z; } # rowAvgsPerColSet() colAvgsPerRowSet <- function(X, W=NULL, S, FUN=colMeans, tFUN=FALSE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'X': if (!is.matrix(X)) { stop("Argument 'X' is not a matrix: ", class(X)[1L]); } # Argument 'W': # Argument 'S': if (!is.matrix(S)) { stop("Argument 'S' is not a matrix: ", class(S)[1L]); } # Argument 'FUN': if (!is.function(FUN)) { stop("Argument 'FUN' is not a function: ", mode(S)); } # Argument 'tFUN': tFUN <- as.logical(tFUN); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Transpose # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - tX <- t(X); if (is.null(W)) { tW <- NULL; } else { tW <- t(W); } # ... tZ <- rowAvgsPerColSet(X=tX, W=tW, S=S, FUN=FUN, tFUN=!tFUN, ...); tX <- tW <- NULL; # Not needed anymore # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Transpose back # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Z <- t(tZ); tZ <- NULL; # Not needed anymore Z; } # colAvgsPerRowSet() ############################################################################## # HISTORY: # 2014-12-17 [HB] # o CLEANUP: Made col- and rowAvgsPerColSet() plain R functions. # 2013-11-23 # o MEMORY: rowAvgsPerColSet() and colAvgsPerRowSet() do a better job # cleaning out allocated objects sooner. # 2011-11-29 # o Added rowAvgsPerColSet() and colAvgsPerRowSet(). # o Created from blockAvg() in the aroma.cn.eval package. ############################################################################## matrixStats/R/allocMatrix.R0000644000175100001440000000313012542546242015421 0ustar hornikusers############################################################################/** # @RdocFunction allocMatrix # @alias allocVector # @alias allocArray # # @title "Allocates an empty vector, matrix or array" # # \usage{ # @usage allocVector # @usage allocMatrix # @usage allocArray # } # # \description{ # @get title faster than the corresponding function in R. # } # # \arguments{ # \item{length, nrow, ncol, dim}{@numerics specifying the dimension of # the created @vector, @matrix or @array.} # \item{value}{A @numeric scalar that all elements will have as value.} # \item{...}{Not used.} # } # # \value{ # Returns a @vector, @matrix and @array respectively of the same data # type as \code{value}. # } # # @author "HB" # # \seealso{ # See also @vector, @matrix and @array. # } # # @keyword programming # @keyword internal #*/############################################################################ allocVector <- function(length, value=0.0, ...) { length <- as.integer(length) .Call("allocVector2", length, value, PACKAGE="matrixStats") } # allocVector() allocMatrix <- function(nrow, ncol, value=0.0, ...) { nrow <- as.integer(nrow) ncol <- as.integer(ncol) .Call("allocMatrix2", nrow, ncol, value, PACKAGE="matrixStats") } # allocMatrix() allocArray <- function(dim, value=0.0, ...) { dim <- as.integer(dim) .Call("allocArray2", dim, value, PACKAGE="matrixStats") } # allocArray() ############################################################################ # HISTORY: # 2014-11-08 [HB] # o Created. ############################################################################ matrixStats/R/rowWeightedMedians.R0000644000175100001440000001002712542546242016736 0ustar hornikusers###########################################################################/** # @RdocFunction rowWeightedMedians # @alias colWeightedMedians # # @title "Calculates the weighted medians for each row (column) in a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage rowWeightedMedians # @usage colWeightedMedians # } # # \arguments{ # \item{x}{A @numeric NxK @matrix.} # \item{w}{A @numeric @vector of length K (N).} # \item{na.rm}{If @TRUE, missing values are excluded from the calculation, # otherwise not.} # \item{...}{Additional arguments passed to @see "weightedMedian".} # } # # \value{ # Returns a @numeric @vector of length N (K). # } # # \details{ # The implementations of these methods are optimized for both speed # and memory. # If no weights are given, the corresponding # @see "rowMedians"/\code{colMedians()} is used. # } # # \examples{ # @include "../incl/rowWeightedMedians.Rex" # } # # @author "HB" # # \seealso{ # See @see "rowMedians" and \code{colMedians()} for non-weighted medians. # Internally, @see "weightedMedian" is used. # } # # @keyword array # @keyword iteration # @keyword robust # @keyword univar #*/########################################################################### rowWeightedMedians <- function(x, w=NULL, na.rm=FALSE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'w': hasWeights <- !is.null(w); if (hasWeights) { n <- ncol(x); if (length(w) != n) { stop("The length of argument 'w' is does not match the number of column in 'x': ", length(w), " != ", n); } if (!is.numeric(w)) { stop("Argument 'w' is not numeric: ", mode(w)); } if (any(!is.na(w) & w < 0)) { stop("Argument 'w' has negative weights."); } } if (hasWeights) { # Allocate results m <- nrow(x); if (m == 0L) return(double(0L)); res <- apply(x, MARGIN=1L, FUN=function(x) { weightedMedian(x, w=w, na.rm=na.rm, ...); }); w <- NULL; # Not needed anymore } else { res <- rowMedians(x, na.rm=na.rm); } res; } # rowWeightedMedians() colWeightedMedians <- function(x, w=NULL, na.rm=FALSE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'w': hasWeights <- !is.null(w); if (hasWeights) { n <- nrow(x); if (length(w) != n) { stop("The length of argument 'w' is does not match the number of rows in 'x': ", length(w), " != ", n); } if (!is.numeric(w)) { stop("Argument 'w' is not numeric: ", mode(w)); } if (any(!is.na(w) & w < 0)) { stop("Argument 'w' has negative weights."); } } if (hasWeights) { # Allocate results m <- ncol(x); if (m == 0L) return(double(0L)); res <- apply(x, MARGIN=2L, FUN=function(x) { weightedMedian(x, w=w, na.rm=na.rm, ...); }); w <- NULL; # Not needed anymore } else { res <- colMedians(x, na.rm=na.rm); } res; } ############################################################################## # HISTORY: # 2014-12-19 [HB] # o CLEANUP: Made col- and rowWeightedMedians() plain R functions. # 2013-11-23 # o MEMORY: Now (col|row)WeightedMedians() clean out allocated objects sooner. # 2012-04-16 # o Now {col|row}WeightedMedians() no longer require aroma.light, because # weightedMedian() is now in this package. # 2009-06-17 # o Updated the Rdoc example to run conditionally on aroma.light, which is # only a suggested package - not a required one. This in order to prevent # R CMD check to fail on CRAN (as currently done on their OSX servers). # 2008-02-02 # o Created from rowWeightedMeans.matrix.R. ############################################################################## matrixStats/R/rowLogSumExps.R0000644000175100001440000000511212542546242015742 0ustar hornikusers###########################################################################/** # @RdocFunction rowLogSumExps # @alias colLogSumExps # \alias{rowLogSumExps,matrix-method} # \alias{colLogSumExps,matrix-method} # # @title "Accurately computes the logarithm of the sum of exponentials across rows or columns" # # \description{ # @get "title". # } # # \usage{ # @usage rowLogSumExps # @usage colLogSumExps # } # # \arguments{ # \item{lx}{A @numeric NxK @matrix. # Typically \code{lx} are \eqn{log(x)} values.} # \item{na.rm}{If @TRUE, any missing values are ignored, otherwise not.} # \item{dim.}{An @integer @vector of length two specifying the # dimension of \code{x}, also when not a @matrix.} # \item{...}{Not used.} # } # # \value{ # A @numeric @vector of length N (K). # } # # \section{Benchmarking}{ # These methods are implemented in native code and have been optimized # for speed and memory. # } # # \author{ # Native implementation by Henrik Bengtsson. # Original R code by Nakayama ??? (Japan). # } # # \seealso{ # To calculate the same on vectors, @see "logSumExp". # } # # @keyword array #*/########################################################################### rowLogSumExps <- function(lx, na.rm=FALSE, dim.=dim(lx), ...) { dim. <- as.integer(dim.) hasNA <- TRUE; res <- .Call("rowLogSumExps", lx, dim., as.logical(na.rm), as.logical(hasNA), TRUE, PACKAGE="matrixStats"); # Preserve names names <- rownames(lx); if (!is.null(names)) { names(res) <- names; } res; } # rowLogSumExps() colLogSumExps <- function(lx, na.rm=FALSE, dim.=dim(lx), ...) { dim. <- as.integer(dim.) hasNA <- TRUE; res <- .Call("rowLogSumExps", lx, dim., as.logical(na.rm), as.logical(hasNA), FALSE, PACKAGE="matrixStats"); # Preserve names names <- colnames(lx); if (!is.null(names)) { names(res) <- names; } res; } # rowLogSumExps() ############################################################################ # HISTORY: # 2013-04-30 [HB] # o SPEEDUP: (col|row)LogSumExps() are now implemented natively. # o Renamed to (col|row)LogSumExps(). # 2013-04-29 [HB] # o Added rowSumsInLogspace(). # o Renamed to colSumsInLogspace() which utilizes logSumExp(). # 2013-04-24 [HB] # o Added colSumsP() adopted from log.colSums.exp() code contributed # by Nakayama ??? (Japan) on 2013-01-08. # o Created. ############################################################################ matrixStats/R/rowVars.R0000644000175100001440000000742712542546242014622 0ustar hornikusers###########################################################################/** # @RdocFunction rowVars # @alias rowVars # @alias colVars # \alias{rowVars,matrix-method} # \alias{colVars,matrix-method} # # @title "Variance estimates for each row (column) in a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage rowVars # @usage colVars # } # # \arguments{ # \item{x}{A @numeric NxK @matrix.} # \item{center}{(optional) The center, defaults to the row means.} # \item{na.rm}{If @TRUE, @NAs are excluded first, otherwise not.} # \item{dim.}{An @integer @vector of length two specifying the # dimension of \code{x}, also when not a @matrix.} # \item{...}{Additional arguments passed to \code{rowMeans()} and # \code{rowSums()}.} # } # # \value{ # Returns a @numeric @vector of length N (K). # } # # @examples "../incl/rowMethods.Rex" # # @author "HB" # # \seealso{ # See \code{rowMeans()} and \code{rowSums()} in @see "base::colSums". # } # # @keyword array # @keyword iteration # @keyword robust # @keyword univar #*/########################################################################### rowVars <- function(x, na.rm=FALSE, center=NULL, dim.=dim(x), ...) { if (is.null(center)) { dim. <- as.integer(dim.) na.rm <- as.logical(na.rm) hasNAs <- TRUE sigma2 <- .Call("rowVars", x, dim., na.rm, hasNAs, TRUE, PACKAGE="matrixStats"); return(sigma2) } ncol <- ncol(x); # Nothing to do? if (ncol <= 1L) { x <- rep(NA_real_, times=nrow(x)); return(x); } if (na.rm) { # Count number of missing values in each row nNA <- rowCounts(x, value=NA_real_, na.rm=FALSE); # Number of non-missing values n <- ncol - nNA; hasNA <- any(nNA > 0L); if (hasNA) { # Set NA estimates for rows with less than two observations n[n <= 1L] <- NA_integer_; } else { # No need to check for missing values below na.rm <- FALSE; } } else { # Assuming no missing values n <- ncol; } # Spread x <- x*x; x <- rowMeans(x, na.rm=na.rm); # Variance x <- (x - center^2); x * (n/(n-1)); } colVars <- function(x, na.rm=FALSE, center=NULL, dim.=dim(x), ...) { if (is.null(center)) { dim. <- as.integer(dim.) na.rm <- as.logical(na.rm) hasNAs <- TRUE sigma2 <- .Call("rowVars", x, dim., na.rm, hasNAs, FALSE, PACKAGE="matrixStats"); return(sigma2) } nrow <- nrow(x); # Nothing to do? if (nrow <= 1L) { x <- rep(NA_real_, times=ncol(x)); return(x); } if (na.rm) { # Count number of missing values in each column nNA <- colCounts(x, value=NA_real_, na.rm=FALSE); # Number of non-missing values n <- nrow - nNA; hasNA <- any(nNA > 0L); if (hasNA) { # Set NA estimates for rows with less than two observations n[n <= 1L] <- NA_integer_; } else { # No need to check for missing values below na.rm <- FALSE; } } else { # Assuming no missing values n <- nrow; } # Spread x <- x*x; x <- colMeans(x, na.rm=na.rm); # Variance x <- (x - center^2); x * (n/(n-1)); } ############################################################################ # HISTORY: # 2015-02-09 [HB] # o Now using na.rm=FALSE as the default. # 2014-06-02 [HB] # o Now rowVars() are utilizing rowCounts() instead of rowSums(). # o SPEEDUP: Made colVars() and colSds() significantly faster and # rowVars() and rowSds() a slightly bit faster. # o Now using NA_integer_ instead of NA. # 2008-03-26 [HB] # o Added argument 'center=NULL', cf. base::mad(). # o Created from genefilter::rowVars() by Wolfgang Huber. ############################################################################ matrixStats/R/product.R0000644000175100001440000000051712542546242014630 0ustar hornikusersproduct <- function(x, na.rm=FALSE, ...) { .Call("productExpSumLog", x, as.logical(na.rm), TRUE, PACKAGE="matrixStats"); } # product() ############################################################################ # HISTORY: # 2014-06-04 [HB] # o Created. ############################################################################ matrixStats/R/weightedMedian.R0000644000175100001440000001655312542546242016075 0ustar hornikusers############################################################################/** # @RdocFunction weightedMedian # # \encoding{latin1} # # @title "Weighted Median Value" # # @synopsis # # \description{ # Computes a weighted median of a numeric vector. # } # # \arguments{ # \item{x}{a @numeric @vector containing the values whose weighted median is # to be computed.} # \item{w}{a vector of weights the same length as \code{x} giving the weights # to use for each element of \code{x}. Negative weights are treated # as zero weights. Default value is equal weight to all values.} # \item{na.rm}{a logical value indicating whether @NA values in # \code{x} should be stripped before the computation proceeds, # or not. If @NA, no check at all for @NAs is done. # Default value is @NA (for efficiency).} # \item{interpolate}{If @TRUE, linear interpolation is used to get a # consistent estimate of the weighted median.} # \item{ties}{If \code{interpolate == FALSE}, # a character string specifying how to solve ties between two # \code{x}'s that are satisfying the weighted median criteria. # Note that at most two values can satisfy the criteria. # When \code{ties} is \code{"min"}, the smaller value of the two # is returned and when it is \code{"max"}, the larger value is # returned. # If \code{ties} is \code{"mean"}, the mean of the two values is # returned. # Finally, if \code{ties} is \code{"weighted"} (or @NULL) a # weighted average of the two are returned, where the weights are # weights of all values \code{x[i] <= x[k]} and \code{x[i] >= x[k]}, # respectively.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric scalar. # } # # \details{ # For the \code{n} elements \code{x = c(x[1], x[2], ..., x[n])} with positive # weights \code{w = c(w[1], w[2], ..., w[n])} such that \code{sum(w) = S}, # the \emph{weighted median} is defined as the element \code{x[k]} for which # the total weight of all elements \code{x[i] < x[k]} is less or equal to # \code{S/2} and for which the total weight of all elements \code{x[i] > x[k]} # is less or equal to \code{S/2} (c.f. [1]). # # If \code{w} is missing then all elements of \code{x} are given the same # positive weight. If all weights are zero, @NA_real_ is returned. # # If one or more weights are \code{Inf}, it is the same as these weights # have the same weight and the others has zero. This makes things easier for # cases where the weights are result of a division with zero. # # The weighted median solves the following optimization problem: # # \deqn{\alpha^* = \arg_\alpha \min \sum_{k=1}{K} w_k |x_k-\alpha|} # where \eqn{x=(x_1,x_2,\ldots,x_K)} are scalars and # \eqn{w=(w_1,w_2,\ldots,w_K)} are the corresponding "weights" for # each individual \eqn{x} value. # } # # @examples "../incl/weightedMedian.Rex" # # \seealso{ # @see "stats::median", @see "base::mean" and @see "weightedMean". # } # # \references{ # [1] T.H. Cormen, C.E. Leiserson, R.L. Rivest, Introduction to Algorithms, # The MIT Press, Massachusetts Institute of Technology, 1989. # } # # \author{ # Henrik Bengtsson and Ola Hossjer, Centre for Mathematical # Sciences, Lund University. # Thanks to Roger Koenker, Econometrics, University of Illinois, for # the initial ideas. # } # # @keyword "univar" # @keyword "robust" #*/############################################################################ weightedMedian <- function(x, w=rep(1, times=length(x)), na.rm=FALSE, interpolate=is.null(ties), ties=NULL, ...) { # Argument 'x': # Argument 'w': w <- as.double(w) # Argument 'na.rm': na.rm <- as.logical(na.rm) if (is.na(na.rm)) na.rm <- FALSE # Argument 'interpolate': interpolate <- as.logical(interpolate) # Argument 'ties': if (is.null(ties)) { tiesC <- 1L } else { if (ties == "weighted") { tiesC <- 1L } else if (ties == "min") { tiesC <- 2L } else if (ties == "max") { tiesC <- 4L } else if (ties == "mean") { tiesC <- 8L } else if (ties == "both") { .Defunct("As of matrixStats (> 0.12.2), weightedMedian(..., interpolate=FALSE, ties=\"both\") is no longer supported. Use ties=\"min\" and then ties=\"max\" to achieve the same result.") } else { stop("Unknown value on 'ties': ", ties) } } .Call("weightedMedian", x, w, na.rm, interpolate, tiesC, package="matrixStats") } # weightedMedian() ############################################################################### # HISTORY: # 2015-01-26 # o CLEANUP: Drop old internally-renamed .weightedMedian(). # 2015-01-01 # o Dropped support for weightedMedian(..., ties="both"). # o BUG FIX: weightedMedian(..., ties="both") would give "Error in # .subset(x, k, k + 1L) : incorrect number of dimensions" if there # was a tie. # 2014-06-03 # o SPEEDUP: Made weightedMedian() a plain function (was an S3 method). # 2013-11-23 # o MEMORY: Now weightedMad() cleans out allocated objects sooner. # 2013-09-26 # o Now utilizing anyMissing(). # 2012-09-10 # o Replaced an .Internal(psort(...)) call with new .psortKM(). # 2012-04-16 # o Added local function qsort() to weightedMedian(), which was adopted # from calculateResidualSet() for ProbeLevelModel in aroma.affymatrix 2.5.0. # o Added local function psortGet() to weightedMedian(). # 2011-04-08 # o Now weightedMedian() returns NA:s of the same mode as argument 'x'. # 2006-04-21 # o Now negative weights are not check for, but instead treated as zero # weights. This was done to minimize the overhead of the function. # o Replace all "[[" and "[" with .subset2() and .subset() to minimize # overhead from method dispatching. # o Remove all calls to rm(). # o It is now possible to specify via na.rm=NA that there are no NAs. # o Small speed up, especially when all weights were the same. # 2006-01-31 # o Rdoc bug fix: 'reference' to 'references'. # 2005-07-26 # o Argument 'interpolate' defaults to TRUE only if 'ties' is NULL. # 2005-06-03 # o Renamed from weighted. median() to weightedMedian(). # o Made into a default function. # o Now using setMethodS3(). # 2003-02-01 # o Update the Rdoc with comments about the method and interpolate argument. # 2002-06-27 # * Reduced memory usage a little bit by calling rm() when possible; minimized # the risk for automatic garbage collecting. # * Improved speed when looking for largest index k s.t. wcum[k] < wmid. # * Improved speed by making use of the R v1.5.0 internal quick sort. # * Made na.rm=FALSE by default for efficiency. # 2002-02-28 # * Calls plain median(x) in two cases: i) all weights are equal, ii) some of # the weights are Inf's. See code for more information. # 2002-02-14 # * BUG FIX: If interpolate==TRUE and sum(lows) == 0 then use k <- 1. # * Added the interpolation version of the weighted median for consistent # estimates. # 2002-02-07 # * Optimized the code for speed. # * Added support for zero and Inf weights. # * Added the 'ties' argument. # * Created! # * Thanks to the following people for helping me out with this one: # - David Brahm, brahm@alum.mit.edu # - David Eppstein, eppstein@ics.uci.edu # - Frank E Harrell Jr, fharrell@virginia.edu # - Markus Jantti, markus.jantti@iki.fi # - Roger Koenker, roger@ysidro.econ.uiuc.edu ############################################################################### matrixStats/R/rowMads.R0000644000175100001440000000545212542546242014567 0ustar hornikusersrowMads <- function(x, center=NULL, constant=1.4826, na.rm=FALSE, dim.=dim(x), centers=NULL, ...) { ## BACKWARD COMPATIBILITY: ## - Added to matrixStats 0.14.0. ## - Remove in matrixStats (>= 0.15.0) if (!is.null(centers)) { center <- centers .Deprecated(msg="Argument 'centers' for matrixStats::rowMads() has been renamed to 'center'. Please update code accordingly.") } if (is.null(center)) { dim. <- as.integer(dim.) na.rm <- as.logical(na.rm) constant = as.numeric(constant) hasNAs <- TRUE x <- .Call("rowMads", x, dim., constant, na.rm, hasNAs, TRUE, PACKAGE="matrixStats") } else { x <- x - center x <- abs(x) x <- rowMedians(x, na.rm=na.rm, ...) x <- constant*x } x } # rowMads() colMads <- function(x, center=NULL, constant=1.4826, na.rm=FALSE, dim.=dim(x), centers=NULL, ...) { ## BACKWARD COMPATIBILITY: ## - Added to matrixStats 0.14.0. ## - Remove in matrixStats (>= 0.15.0) if (!is.null(centers)) { center <- centers .Deprecated(msg="Argument 'centers' for matrixStats::colMads() has been renamed to 'center'. Please update code accordingly.") } if (is.null(center)) { dim. <- as.integer(dim.) na.rm <- as.logical(na.rm) constant = as.numeric(constant) hasNAs <- TRUE x <- .Call("rowMads", x, dim., constant, na.rm, hasNAs, FALSE, PACKAGE="matrixStats") } else { ## SLOW: # for (cc in seq(length=ncol(x))) { # x[,cc] <- x[,cc] - center[cc] # } ## FAST: x <- t_tx_OP_y(x, center, OP="-", na.rm=FALSE) x <- abs(x) x <- colMedians(x, na.rm=na.rm, ...) x <- constant*x } x } # colMads() ############################################################################ # HISTORY: # 2015-02-10 [HB] # o CONSISTENCY: Renamed argument 'centers' of col- and rowMads() to # 'center'. This is consistent with col- and rowVars(). Added # backward compatibility code/test for the migration. # 2014-11-17 [HB] # o SPEEDUP: Implemented (col|row)Mads(..., centers=NULL) in native code. # 2012-03-19 [HJ] # o Changed default value of centers to row/colMedians(x,...) # 2012-03-04 [HC] # o BUG FIX: colMads() would return the incorrect estimates. This bug # was introduced in matrixStats v0.4.0 (2011-11-11). # 2011-11-11 [HB] # o Dropped the previously introduced expansion of 'center' in rowMads() # and colMads(). It added unnecessary overhead if not needed. # 2011-10-13 [HJ] # o Implemented colMads() as rowMads() by using the improved colMedians(). # o Now rowMads() expands 'center' to a matrix of the same dimensions as # 'x'. This is not actually necessary, but it enforces that 'x' must be # a matrix. # 2008-03-26 [HB] # o Created. ############################################################################ matrixStats/R/varDiff.R0000644000175100001440000001672712542546242014543 0ustar hornikusers###########################################################################/** # @RdocFunction varDiff # @alias sdDiff # @alias madDiff # @alias iqrDiff # @alias colVarDiffs # @alias rowVarDiffs # @alias colSdDiffs # @alias rowSdDiffs # @alias colMadDiffs # @alias rowMadDiffs # @alias colIQRDiffs # @alias rowIQRDiffs # # @title "Estimation of scale based on sequential-order differences" # # \description{ # @get "title", corresponding to the scale estimates provided by # @see "stats::var", @see "stats::sd", @see "stats::mad" and # @see "stats::IQR". # } # # \usage{ # @usage varDiff # @usage colVarDiffs # @usage rowVarDiffs # # @usage sdDiff # @usage colSdDiffs # @usage rowSdDiffs # # @usage madDiff # @usage colMadDiffs # @usage rowMadDiffs # # @usage iqrDiff # @usage colIQRDiffs # @usage rowIQRDiffs # } # # \arguments{ # \item{x}{A @numeric @vector of length N or a @numeric NxK @matrix.} # \item{na.rm}{If @TRUE, @NAs are excluded, otherwise not.} # \item{diff}{The positional distance of elements for which the # difference should be calculated.} # \item{trim}{A @double in [0,1/2] specifying the fraction of # observations to be trimmed from each end of (sorted) \code{x} # before estimation.} # \item{constant}{A scale factor adjusting for asymptotically # normal consistency.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric @vector of length 1, length N, or length K. # } # # @author "HB" # # \seealso{ # For the corresponding non-differentiated estimates, see # @see "stats::var", @see "stats::sd", @see "stats::mad" and # @see "stats::IQR". # Internally, @see "diff2" is used which is a faster version # of @see "base::diff". # } # # \details{ # Note that n-order difference MAD estimates, just like the ordinary # MAD estimate by @see "stats::mad", apply a correction factor such # that the estimates are consistent with the standard deviation # under Gaussian distributions. # # The interquartile range (IQR) estimates does \emph{not} apply such # a correction factor. If asymptotically normal consistency is wanted, # the correction factor for IQR estimate is \code{1 / (2 * qnorm(3/4))}, # which is half of that used for MAD estimates, which is # \code{1 / qnorm(3/4)}. This correction factor needs to be applied # manually, i.e. there is no \code{constant} argument for the IQR # functions. # } # # \references{ # [1] J. von Neumann et al., \emph{The mean square successive difference}. # Annals of Mathematical Statistics, 1941, 12, 153-162.\cr # } # # @keyword iteration # @keyword robust # @keyword univar #*/########################################################################### varDiff <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) { if (na.rm) x <- x[!is.na(x)] # Nothing to do? n <- length(x) if (n <= 1L) return(NA_real_) # Calculate differences? if (diff > 0L) { x <- diff2(x, differences=diff) # Nothing to do? n <- length(x) if (n == 1L) return(NA_real_) } # Trim? if (trim > 0 && n > 0L) { if (anyMissing(x)) return(NA_real_) lo <- floor(n*trim)+1 hi <- (n+1)-lo partial <- unique(c(lo, hi)) x <- sort.int(x, partial=partial) x <- x[lo:hi] } # Estimate var <- var(x, na.rm=FALSE) x <- NULL # Not needed anymore # Correction for the differentiation var / (2^diff) } # varDiff() sdDiff <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) { if (na.rm) x <- x[!is.na(x)] # Nothing to do? n <- length(x) if (n <= 1L) return(NA_real_) # Calculate differences? if (diff > 0L) { x <- diff2(x, differences=diff) # Nothing to do? n <- length(x) if (n == 1L) return(NA_real_) } # Trim? if (trim > 0 && n > 0L) { if (anyMissing(x)) return(NA_real_) lo <- floor(n*trim)+1 hi <- (n+1)-lo partial <- unique(c(lo, hi)) x <- sort.int(x, partial=partial) x <- x[lo:hi] } # Estimate sd <- sd(x, na.rm=FALSE) x <- NULL # Not needed anymore # Correction for the differentiation sd / (sqrt(2)^diff) } # sdDiff() madDiff <- function(x, na.rm=FALSE, diff=1L, trim=0, constant=1.4826, ...) { if (na.rm) x <- x[!is.na(x)] # Nothing to do? n <- length(x) if (n <= 0L) return(NA_real_) # Calculate differences? if (diff > 0L) { x <- diff2(x, differences=diff) # Nothing to do? n <- length(x) if (n == 1L) return(NA_real_) } # Trim? if (trim > 0 && n > 0L) { if (anyMissing(x)) return(NA_real_) lo <- floor(n*trim)+1 hi <- (n+1)-lo partial <- unique(c(lo, hi)) x <- sort.int(x, partial=partial) x <- x[lo:hi] } # Estimate sd <- mad(x, na.rm=FALSE, constant=constant, ...) x <- NULL # Not needed anymore # Correction for the differentiation sd / (sqrt(2)^diff) } # madDiff() iqrDiff <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) { if(na.rm) { x <- x[!is.na(x)] } else if (anyMissing(x)) { return(NA_real_) } # At this point, there should be no missing values # Nothing to do? n <- length(x) if (n == 0L) { return(NA_real_) } else if (n == 1L) { return(0) } # Calculate differences? if (diff > 0L) { x <- diff2(x, differences=diff) # Nothing to do? n <- length(x) if (n == 1L) return(0) } # Trim? if (trim > 0 && n > 0L) { lo <- floor(n*trim)+1 hi <- (n+1)-lo partial <- unique(c(lo, hi)) x <- sort.int(x, partial=partial) x <- x[lo:hi] } # Estimate qs <- quantile(x, probs=c(0.25, 0.75), na.rm=FALSE, names=FALSE, ...) x <- NULL # Not needed anymore iqr <- (qs[2L] - qs[1L]) # Correction for the differentiation iqr / (sqrt(2)^diff) } # iqrDiff() rowVarDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) { apply(x, MARGIN=1L, FUN=varDiff, na.rm=na.rm, diff=diff, trim=trim, ...) } colVarDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) { apply(x, MARGIN=2L, FUN=varDiff, na.rm=na.rm, diff=diff, trim=trim, ...) } rowSdDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) { apply(x, MARGIN=1L, FUN=sdDiff, na.rm=na.rm, diff=diff, trim=trim, ...) } colSdDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) { apply(x, MARGIN=2L, FUN=sdDiff, na.rm=na.rm, diff=diff, trim=trim, ...) } rowMadDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) { apply(x, MARGIN=1L, FUN=madDiff, na.rm=na.rm, diff=diff, trim=trim, ...) } colMadDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) { apply(x, MARGIN=2L, FUN=madDiff, na.rm=na.rm, diff=diff, trim=trim, ...) } rowIQRDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) { apply(x, MARGIN=1L, FUN=iqrDiff, na.rm=na.rm, diff=diff, trim=trim, ...) } colIQRDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) { apply(x, MARGIN=2L, FUN=iqrDiff, na.rm=na.rm, diff=diff, trim=trim, ...) } ############################################################################ # HISTORY: # 2015-01-16 # o Added iqrDiff() and (col|row)IQRDiffs(). # 2014-14-19 # o Added (col|row)(Var|Sd|Mad)Diffs() for completeness. # 2014-11-10 # o Turned *Diff() into a function. # 2014-05-24 # o Turned *Diff() into an S3 method (was S4). # 2014-04-26 # o Added argument 'trim' to madDiff(), sdDiff() and varDiff(). # 2013-11-23 # o MEMORY: Now *Diff() cleans out allocated objects sooner. # 2012-07-17 # o Added the reference to von Neumann et al. (1941). # 2009-02-02 # o Added Rdoc comments. # 2008-04-13 # o Added varDiff(), sdDiff() and madDiff(). # 2008-04-10 [on UA930 SFO-LHR] # o Created. ############################################################################ matrixStats/R/rowTabulates.R0000644000175100001440000001063512542546242015626 0ustar hornikusers###########################################################################/** # @RdocFunction rowTabulates # @alias colTabulates # # @title "Tabulates the values in a matrix by row (column)" # # \description{ # @get "title". # } # # \usage{ # @usage rowTabulates # @usage colTabulates # } # # \arguments{ # \item{x}{An @integer or @raw NxK @matrix.} # \item{values}{An @vector of J values of count. If @NULL, all (unique) # values are counted.} # \item{...}{Not used.} # } # # \value{ # Returns a NxJ (KxJ) @matrix where # N (K) is the number of row (column) @vectors tabulated and # J is the number of values counted. # } # # @examples "../incl/rowTabulates.Rex" # # @author "HB" # # @keyword utilities #*/########################################################################### rowTabulates <- function(x, values=NULL, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (is.integer(x)) { } else if (is.raw(x)) { } else { stop("Argument 'x' is not of type integer or raw: ", class(x)[1]); } # Argument 'values': if (is.null(values)) { values <- as.vector(x); values <- unique(values); if (is.raw(values)) { values <- as.integer(values); values <- sort(values); # WORKAROUND: Cannot use "%#x" because it gives an error OSX with # R v2.9.0 devel (2009-01-13 r47593b) at R-forge. /HB 2009-06-20 names <- sprintf("%x", values); names <- paste("0x", names, sep=""); values <- as.raw(values); } else { values <- sort(values); names <- as.character(values); } } else { if (is.raw(values)) { names <- sprintf("%x", as.integer(values)); names <- paste("0x", names, sep=""); } else { names <- as.character(values); } } nbrOfValues <- length(values); counts <- matrix(0L, nrow=nrow(x), ncol=nbrOfValues); colnames(counts) <- names; for (kk in seq(length=nbrOfValues)) { counts[,kk] <- rowCounts(x, value=values[kk], ...); } counts; } colTabulates <- function(x, values=NULL, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'x': if (is.integer(x)) { } else if (is.raw(x)) { } else { stop("Argument 'x' is not of type integer or raw: ", class(x)[1]); } # Argument 'values': if (is.null(values)) { values <- as.vector(x); values <- unique(values); if (is.raw(values)) { values <- as.integer(values); values <- sort(values); # WORKAROUND: Cannot use "%#x" because it gives an error OSX with # R v2.9.0 devel (2009-01-13 r47593b) at R-forge. /HB 2009-06-20 names <- sprintf("%x", values); names <- paste("0x", names, sep=""); values <- as.raw(values); } else { values <- sort(values); names <- as.character(values); } } else { if (is.raw(values)) { names <- sprintf("%x", as.integer(values)); names <- paste("0x", names, sep=""); } else { names <- as.character(values); } } transpose <- FALSE if (transpose) { ## nbrOfValues <- length(values); ## counts <- matrix(0L, nrow=nbrOfValues, ncol=ncol(x)); ## rownames(counts) <- names; ## for (kk in seq(length=nbrOfValues)) { ## counts[kk,] <- colCounts(x, value=values[kk], ...); ## } } else { nbrOfValues <- length(values); counts <- matrix(0L, nrow=ncol(x), ncol=nbrOfValues); colnames(counts) <- names; for (kk in seq(length=nbrOfValues)) { counts[,kk] <- colCounts(x, value=values[kk], ...); } } counts; } ############################################################################ # HISTORY: # 2014-12-19 [HB] # o CLEANUP: Made col- and rowTabulates() plain R functions. # 2014-11-16 # o Now colTabulates(x) no longer calls rowTabulates(t(x)). # 2014-06-02 # o Made rowTabulates() an S3 method (was S4). # o SPEEDUP: Now rowTabulates() utilizes rowCounts(). # 2009-06-20 # WORKAROUND: Cannot use "%#x" in rowTabulates() when creating the column # names of the result matrix. It gav an error OSX with R v2.9.0 devel # (2009-01-13 r47593b) current the OSX server at R-forge. # 2009-02-02 # o Fixed Rdoc comments. # 2008-07-01 # o Created. ############################################################################ matrixStats/R/weightedMad.R0000644000175100001440000001163612542546242015376 0ustar hornikusers############################################################################/** # @RdocFunction weightedMad # @alias rowWeightedMads # @alias colWeightedMads # # @title "Weighted Median Absolute Deviation (MAD)" # # \usage{ # @usage weightedMad # @usage colWeightedMads # @usage rowWeightedMads # } # # \description{ # Computes a weighted MAD of a numeric vector. # } # # \arguments{ # \item{x}{a @numeric @vector containing the values whose weighted MAD is # to be computed.} # \item{w}{a vector of weights the same length as \code{x} giving the weights # to use for each element of \code{x}. Negative weights are treated # as zero weights. Default value is equal weight to all values.} # \item{na.rm}{a logical value indicating whether @NA values in # \code{x} should be stripped before the computation proceeds, # or not. If @NA, no check at all for @NAs is done. # Default value is @NA (for efficiency).} # \item{constant}{A @numeric scale factor, cf. @see "stats::mad".} # \item{center}{Optional @numeric scalar specifying the center # location of the data. If @NULL, it is estimated from data.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric scalar. # } # # \section{Missing values}{ # Missing values are dropped at the very beginning, if argument # \code{na.rm} is @TRUE, otherwise not. # } # # @examples "../incl/weightedMad.Rex" # # \seealso{ # For the non-weighted MAD, see @see "stats::mad". # Internally @see "weightedMedian" is used to # calculate the weighted median. # } # # @author "HB" # # @keyword "univar" # @keyword "robust" #*/############################################################################ weightedMad <- function(x, w, na.rm=FALSE, constant=1.4826, center=NULL, ...) { # Argument 'x': n <- length(x); # Argument 'w': if (missing(w)) { # By default use weights that are one. w <- rep(1, times=n); } else if (length(w) != n) { stop("The number of elements in arguments 'w' and 'x' does not match: ", length(w), " != ", n); } # Argument 'na.rm': naValue <- NA; storage.mode(naValue) <- storage.mode(x); # Remove values with zero (and negative) weight. This will: # 1) take care of the case when all weights are zero, # 2) it will most likely speed up the sorting. tmp <- (w > 0); if (!all(tmp)) { x <- .subset(x, tmp); w <- .subset(w, tmp); n <- length(x); } tmp <- NULL; # Not needed anymore # Drop missing values? if (na.rm) { keep <- which(!is.na(x) & !is.na(w)); x <- .subset(x, keep); w <- .subset(w, keep); n <- length(x); keep <- NULL; # Not needed anymore } else if (anyMissing(x)) { return(naValue); } # Are any weights Inf? Then treat them with equal weight and all others # with weight zero. tmp <- is.infinite(w); if (any(tmp)) { keep <- tmp; x <- .subset(x, keep); n <- length(x); w <- rep(1, times=n); keep <- NULL; # Not needed anymore } tmp <- NULL; # Not needed anymore # Are there any values left to calculate the weighted median of? # This is consistent with how stats::mad() works. if (n == 0L) { return(naValue); } else if (n == 1L) { zeroValue <- 0; storage.mode(zeroValue) <- storage.mode(x); return(zeroValue); } # Estimate the mean? if (is.null(center)) { center <- weightedMedian(x, w=w, na.rm=NA); } # Estimate the standard deviation x <- abs(x - center); sigma <- weightedMedian(x, w=w, na.rm=NA); x <- w <- NULL; # Not needed anymore # Rescale for normal distributions sigma <- constant * sigma; sigma; } # weightedMad() rowWeightedMads <- function(x, w=NULL, na.rm=FALSE, ...) { apply(x, MARGIN=1L, FUN=weightedMad, w=w, na.rm=na.rm, ...) } # rowWeightedMads() colWeightedMads <- function(x, w=NULL, na.rm=FALSE, ...) { apply(x, MARGIN=2L, FUN=weightedMad, w=w, na.rm=na.rm, ...) } # colWeightedMads() ############################################################################ # HISTORY: # 2014-11-10 # o Turned weightedMad() into a plain function. # 2013-11-23 # o MEMORY: Now weightedMad() cleans out allocated objects sooner. # 2013-09-26 # o Now utilizing anyMissing(). # 2012-03-22 # o Added an Rdoc example, which also serves as a redundancy test. # o SPEEDUP: Now weightedMad() lets weightedMedian() know that there are # now missing values remaining. # o Now weightedMad() is smarter about returning early, e.g. if missing # values are not removed, there are none or only one value left. # o Added validation of argument 'w' for weightedMad(). # o Added Rdoc comments to weightedMad(). # o Made weightedMad() into a default method. # 2009-05-13 # o Added weightedMad(). # o Created. ############################################################################ matrixStats/R/rowCounts.R0000644000175100001440000001666312542546242015164 0ustar hornikusers###########################################################################/** # @RdocFunction rowCounts # @alias colCounts # @alias count # @alias allValue # @alias anyValue # @alias rowAnys # @alias colAnys # @alias rowAlls # @alias colAlls # # @title "Counts the number of TRUE values in each row (column) of a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage count # @usage rowCounts # @usage colCounts # @usage rowAlls # @usage colAlls # @usage rowAnys # @usage colAnys # } # # \arguments{ # \item{x}{An NxK @matrix or an N*K @vector.} # \item{value}{A value to search for.} # \item{na.rm}{If @TRUE, @NAs are excluded first, otherwise not.} # \item{dim.}{An @integer @vector of length two specifying the # dimension of \code{x}, also when not a @matrix.} # \item{...}{Not used.} # } # # \value{ # \code{rowCounts()} (\code{colCounts()}) returns an @integer @vector # of length N (K). # The other methods returns a @logical @vector of length N (K). # } # # \details{ # @include "../incl/rowNNN_by_vector.Rd" # } # # @examples "../incl/rowCounts.Rex" # # @author "HB" # # @keyword array # @keyword logic # @keyword iteration # @keyword univar #*/########################################################################### rowCounts <- function(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) { # Argument 'x': if (is.matrix(x)) { } else if (is.vector(x)) { } else { stop("Argument 'x' must be a matrix or a vector: ", mode(x)[1L]) } # Argument 'dim.': dim. <- as.integer(dim.) # Argument 'value': if (length(value) != 1L) { stop("Argument 'value' has to be a single value: ", length(value)) } # Coerce 'value' to matrix storage.mode(value) <- storage.mode(x) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Count # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.numeric(x) || is.logical(x)) { na.rm <- as.logical(na.rm) hasNAs <- TRUE counts <- .Call("rowCounts", x, dim., value, 2L, na.rm, hasNAs, PACKAGE="matrixStats") } else { if (is.vector(x)) dim(x) <- dim. if (is.na(value)) { counts <- apply(x, MARGIN=1L, FUN=function(x) sum(is.na(x))) } else { counts <- apply(x, MARGIN=1L, FUN=function(x) sum(x == value, na.rm=na.rm)) } } as.integer(counts) } # rowCounts() colCounts <- function(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) { # Argument 'x': if (is.matrix(x)) { } else if (is.vector(x)) { } else { stop("Argument 'x' must be a matrix or a vector: ", mode(x)[1L]) } # Argument 'dim.': dim. <- as.integer(dim.) # Argument 'value': if (length(value) != 1L) { stop("Argument 'value' has to be a single value: ", length(value)) } # Coerce 'value' to matrix storage.mode(value) <- storage.mode(x) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Count # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.numeric(x) || is.logical(x)) { na.rm <- as.logical(na.rm) hasNAs <- TRUE counts <- .Call("colCounts", x, dim., value, 2L, na.rm, hasNAs, PACKAGE="matrixStats") } else { if (is.vector(x)) dim(x) <- dim. if (is.na(value)) { counts <- apply(x, MARGIN=2L, FUN=function(x) sum(is.na(x))) } else { counts <- apply(x, MARGIN=2L, FUN=function(x) sum(x == value, na.rm=na.rm)) } } as.integer(counts) } # colCounts() count <- function(x, value=TRUE, na.rm=FALSE, ...) { # Argument 'x': if (!is.vector(x)) { stop("Argument 'x' must be a vector: ", mode(x)[1L]) } # Argument 'value': if (length(value) != 1L) { stop("Argument 'value' has to be a single value: ", length(value)) } # Coerce 'value' to matrix storage.mode(value) <- storage.mode(x) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Count # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.numeric(x) || is.logical(x)) { na.rm <- as.logical(na.rm) hasNAs <- TRUE counts <- .Call("count", x, value, 2L, na.rm, hasNAs, PACKAGE="matrixStats") } else { if (is.na(value)) { counts <- sum(is.na(x)) } else { counts <- sum(x == value, na.rm=na.rm) } } as.integer(counts) } # count() rowAlls <- function(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) { if (is.numeric(x) || is.logical(x)) { na.rm <- as.logical(na.rm) hasNAs <- TRUE counts <- .Call("rowCounts", x, dim., value, 0L, na.rm, hasNAs, PACKAGE="matrixStats") as.logical(counts) } else { if (is.na(value)) { rowAlls(is.na(x), na.rm=na.rm, dim.=dim., ...) } else { rowAlls(x == value, na.rm=na.rm, dim.=dim., ...) } } } colAlls <- function(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) { if (is.numeric(x) || is.logical(x)) { na.rm <- as.logical(na.rm) hasNAs <- TRUE counts <- .Call("colCounts", x, dim., value, 0L, na.rm, hasNAs, PACKAGE="matrixStats") as.logical(counts) } else { if (is.na(value)) { colAlls(is.na(x), na.rm=na.rm, dim.=dim., ...) } else { colAlls(x == value, na.rm=na.rm, dim.=dim., ...) } } } allValue <- function(x, value=TRUE, na.rm=FALSE, ...) { if (is.numeric(x) || is.logical(x)) { na.rm <- as.logical(na.rm) hasNAs <- TRUE counts <- .Call("count", x, value, 0L, na.rm, hasNAs, PACKAGE="matrixStats") as.logical(counts) } else { if (is.na(value)) { allValue(is.na(x), na.rm=na.rm, ...) } else { allValue(x == value, na.rm=na.rm, ...) } } } rowAnys <- function(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) { if (is.numeric(x) || is.logical(x)) { na.rm <- as.logical(na.rm) hasNAs <- TRUE counts <- .Call("rowCounts", x, dim., value, 1L, na.rm, hasNAs, PACKAGE="matrixStats") as.logical(counts) } else { if (is.na(value)) { rowAnys(is.na(x), na.rm=na.rm, dim.=dim., ...) } else { rowAnys(x == value, na.rm=na.rm, dim.=dim., ...) } } } colAnys <- function(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) { if (is.numeric(x) || is.logical(x)) { na.rm <- as.logical(na.rm) hasNAs <- TRUE counts <- .Call("colCounts", x, dim., value, 1L, na.rm, hasNAs, PACKAGE="matrixStats") as.logical(counts) } else { if (is.na(value)) { colAnys(is.na(x), na.rm=na.rm, dim.=dim., ...) } else { colAnys(x == value, na.rm=na.rm, dim.=dim., ...) } } } anyValue <- function(x, value=TRUE, na.rm=FALSE, ...) { if (is.numeric(x) || is.logical(x)) { na.rm <- as.logical(na.rm) hasNAs <- TRUE counts <- .Call("count", x, value, 1L, na.rm, hasNAs, PACKAGE="matrixStats") as.logical(counts) } else { if (is.na(value)) { anyValue(is.na(x), na.rm=na.rm, ...) } else { anyValue(x == value, na.rm=na.rm, ...) } } } ############################################################################ # HISTORY: # 2014-11-14 [HB] # o SPEEDUP: Now colCounts() is implemented in native code. # o CLEANUP: Now (col|row)Count(x) when x is logical utilizes the # same code as as.integer(x). # o As a part of transitioning to plain functions, rowCounts() for matrix # was turned into a default method that understands matrix inputs. # It also understand vector input, if argument 'dim' is given. # 2014-06-02 [HB] # o Made rowCounts() an S3 method (was S4). # o Added argument 'value' to col- and rowCounts(). # 2008-03-25 [HB] # o Created. ############################################################################ matrixStats/R/psortKM.R0000644000175100001440000000060712542546242014547 0ustar hornikusers.psortKM <- function(x, k=length(x), m=1L, ...) { .Call("psortKM", as.numeric(x), k=as.integer(k), m=as.integer(m), PACKAGE="matrixStats"); } # .psortKM() ############################################################################ # HISTORY: # 2012-09-10 # o Added internal .psortKM() method. # o Created. ############################################################################ matrixStats/R/rowDiffs.R0000644000175100001440000000266112542546242014735 0ustar hornikusers###########################################################################/** # @RdocFunction rowDiffs # @alias colDiffs # # @title "Calculates difference for each row (column) in a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage rowDiffs # @usage colDiffs # } # # \arguments{ # \item{x}{A @numeric NxK @matrix.} # \item{lag}{An @integer specifying the lag.} # \item{differences}{An @integer specifying the order of difference.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric Nx(K-1) or (N-1)xK @matrix. # } # # @examples "../incl/rowDiffs.Rex" # # @author "HB" # # \seealso{ # See also @see "diff2". # } # # @keyword array # @keyword iteration # @keyword robust # @keyword univar #*/########################################################################### rowDiffs <- function(x, lag=1L, differences=1L, ...) { .Call("rowDiffs", x, dim(x), as.integer(lag), as.integer(differences), TRUE, PACKAGE="matrixStats") } colDiffs <- function(x, lag=1L, differences=1L, ...) { .Call("rowDiffs", x, dim(x), as.integer(lag), as.integer(differences), FALSE, PACKAGE="matrixStats") } ############################################################################ # HISTORY: # 2014-11-15 [HB] # o SPEEDUP: Now colDiffs(x) no longer uses rowDiffs(t(x)). # 2008-03-26 [HB] # o Created. ############################################################################ matrixStats/R/logSumExp.R0000644000175100001440000000652212542546242015075 0ustar hornikusers###########################################################################/** # @RdocFunction logSumExp # # @title "Accurately computes the logarithm of the sum of exponentials" # # \description{ # @get "title", that is, \eqn{log(sum(exp(lx)))}. # If \eqn{lx = log(x)}, then this is equivalently to calculating # \eqn{log(sum(x))}. # # This function, which avoid numerical underflow, is often used when # computing the logarithm of the sum of small numbers (\eqn{|x| << 1}) # such as probabilities. # } # # @synopsis # # \arguments{ # \item{lx}{A @numeric @vector. # Typically \code{lx} are \eqn{log(x)} values.} # \item{na.rm}{If @TRUE, any missing values are ignored, otherwise not.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric scalar. # } # # \details{ # This is function is more accurate than \code{log(sum(exp(lx)))} # when the values of \eqn{x = exp(lx)} are \eqn{|x| << 1}. # The implementation of this function is based on the observation that # \deqn{ # log(a + b) # = [ la = log(a), lb = log(b) ] # = log( exp(la) + exp(lb) ) # = la + log ( 1 + exp(lb - la) ) # } # Assuming \eqn{la > lb}, then \eqn{|lb - la| < |lb|}, and it is # less likely that the computation of \eqn{1 + exp(lb - la)} will # not underflow/overflow numerically. Because of this, the overall # result from this function should be more accurate. # Analogously to this, the implementation of this function finds the # maximum value of \code{lx} and subtracts it from the remaining values # in \code{lx}. # } # # \section{Benchmarking}{ # This method is optimized for correctness, that avoiding underflowing. # It is implemented in native code that is optimized for speed and memory. # } # # @examples "../incl/logSumExp.Rex" # # @author "HB" # # \seealso{ # To compute this function on rows or columns of a matrix, # see @see "rowLogSumExps". # # For adding \emph{two} double values in native code, R provides # the C function \code{logspace_add()} [1]. # For properties of the log-sum-exponential function, see [2]. # } # # \references{ # [1] R Core Team, \emph{Writing R Extensions}, v3.0.0, April 2013. \cr # [2] Laurent El Ghaoui, # \emph{Hyper-Textbook: Optimization Models and Applications}, # University of California at Berkeley, August 2012. # (Chapter 'Log-Sum-Exp (LSE) Function and Properties', # \url{http://inst.eecs.berkeley.edu/~ee127a/book/login/def_lse_fcn.html}) # \cr # [3] R-help thread \emph{logsumexp function in R}, 2011-02-17. # \url{https://stat.ethz.ch/pipermail/r-help/2011-February/269205.html}\cr # } # #*/########################################################################### logSumExp <- function(lx, na.rm=FALSE, ...) { hasNA <- TRUE; .Call("logSumExp", as.numeric(lx), as.logical(na.rm), as.logical(hasNA), PACKAGE="matrixStats"); } # logSumExp() ############################################################################## # HISTORY: # 2013-04-30 [HB] # o Added native implementation. # o Renamed to logSumExp(), because that seems to be the naming convention # elsewhere, e.g. Python. # 2013-04-29 [HB] # o Added sumInLogspace(). # o Created. ############################################################################## matrixStats/R/binMeans.R0000644000175100001440000001247412542546242014711 0ustar hornikusers############################################################################/** # @RdocFunction binMeans # # @title "Fast mean calculations in non-overlapping bins" # # @synopsis # # \description{ # Computes the sample means in non-overlapping bins # } # # \arguments{ # \item{y}{A @numeric @vector of K values to calculate means on.} # \item{x}{A @numeric @vector of K positions for to be binned.} # \item{bx}{A @numeric @vector of B+1 ordered positions specifying # the B > 0 bins \code{[bx[1],bx[2])}, \code{[bx[2],bx[3])}, ..., # \code{[bx[B],bx[B+1])}.} # \item{na.rm}{If @TRUE, missing values in \code{y} are dropped # before calculating the mean, otherwise not.} # \item{count}{If @TRUE, the number of data points in each bins is # returned as attribute \code{count}, which is an @integer @vector # of length B.} # \item{right}{If @TRUE, the bins are right-closed (left open), # otherwise left-closed (right open).} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric @vector of length B. # } # # \details{ # \code{binMeans(x, bx, right=TRUE)} gives equivalent results as # \code{rev(binMeans(-x, bx=sort(-bx), right=FALSE))}, but is faster. # } # # \section{Missing and non-finite values}{ # Data points where either of \code{y} and \code{x} is missing are # dropped (and therefore are also not counted). # Non-finite values in \code{y} are not allowed and gives an error. # Missing values in \code{bx} are not allowed and gives an error. # } # # \section{Empty bins}{ # Empty bins will get value @NaN. # } # # @examples "../incl/binMeans.Rex" # # \seealso{ # @see "binCounts". # @see "stats::aggregate" and @see "base::mean". # } # # \references{ # [1] R-devel thread \emph{Fastest non-overlapping binning mean function # out there?} on Oct 3, 2012\cr # } # # \author{ # Henrik Bengtsson with initial code contributions by Martin Morgan [1]. # } # # @keyword "univar" #*/############################################################################ binMeans <- function(y, x, bx, na.rm=TRUE, count=TRUE, right=FALSE, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'y': if (!is.numeric(y)) { stop("Argument 'y' is not numeric: ", mode(y)); } if (any(is.infinite(y))) { stop("Argument 'y' must not contain Inf values."); } n <- length(y); # Argument 'x': if (!is.numeric(x)) { stop("Argument 'x' is not numeric: ", mode(x)); } if (length(x) != n) { stop("Argument 'y' and 'x' are of different lengths: ", length(y), " != ", length(x)); } # Argument 'bx': if (!is.numeric(bx)) { stop("Argument 'bx' is not numeric: ", mode(bx)); } if (any(is.infinite(bx))) { stop("Argument 'bx' must not contain Inf values."); } if (is.unsorted(bx)) { stop("Argument 'bx' is not ordered."); } # Argument 'na.rm': if (!is.logical(na.rm)) { stop("Argument 'na.rm' is not logical: ", mode(na.rm)); } # Argument 'count': if (!is.logical(count)) { stop("Argument 'count' is not logical: ", mode(count)); } # Argument 'right': right <- as.logical(right); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Preprocessing of (x,y) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Drop missing values in 'x' keep <- which(!is.na(x)); if (length(keep) < n) { x <- x[keep]; y <- y[keep]; n <- length(y); } keep <- NULL; # Not needed anymore # Drop missing values in 'y'? if (na.rm) { keep <- which(!is.na(y)); if (length(keep) < n) { x <- x[keep]; y <- y[keep]; } keep <- NULL; # Not needed anymore } # Order (x,y) by increasing x. # If 'x' is already sorted, the overhead of (re)sorting is # relatively small. x <- sort.int(x, method="quick", index.return=TRUE); y <- y[x$ix]; x <- x$x; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Bin # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - y <- as.numeric(y); x <- as.numeric(x); bx <- as.numeric(bx); count <- as.logical(count); .Call("binMeans", y, x, bx, count, right, PACKAGE="matrixStats"); } # binMeans() ############################################################################ # HISTORY: # 2014-12-29 [HB] # o SPEEDUP: Now binCounts() and binMeans() uses is.unsorted() instead # of o <- order(); any(diff(o) != 1L). # 2014-12-17 [HB] # o CLEANUP: Made binCounts() and binMeans() plain R functions. # 2013-11-23 [HB] # o MEMORY: binMeans() cleans out more temporary variables as soon as # possible such that the garbage collector can remove them sooner. # 2013-05-10 [HB] # o SPEEDUP: Now binMeans() and binCounts() use Hoare's Quicksort # method for sorting 'x'. # 2012-10-04 [HB in Anahola] # o Added argument 'na.rm' to binMeans(). # o Updated Rdocs. # 2012-10-03 [HB] # o Added binMeans() based on native code adopted from code by # Martin Morgan, Fred Hutchinson Cancer Research Center, Seattle. # o Created. ############################################################################ matrixStats/R/999.package.R0000644000175100001440000000126312542546242015073 0ustar hornikusers#########################################################################/** # @RdocPackage matrixStats # # \description{ # @eval "packageDescription('matrixStats')$Description" # } # # \section{Installation}{ # To install this package, please do: # \preformatted{ # install.packages("matrixStats") # } # } # # \section{Vignettes}{ # For an overview of the package, see the '\href{../doc/index.html}{vignettes}'; # \enumerate{ # \item Summary of functions. # } # } # # \section{How to cite this package}{ # @eval "x <- citation('matrixStats'); format(x, 'textVersion')" # } # # @author "*" #*/######################################################################### matrixStats/R/weightedVar.R0000644000175100001440000001127212542546242015421 0ustar hornikusers############################################################################/** # @RdocFunction weightedVar # @alias weightedSd # @alias colWeightedVars # @alias rowWeightedVars # @alias colWeightedSds # @alias rowWeightedSds # # @title "Weighted variance and weighted standard deviation" # # \usage{ # @usage weightedVar # @usage colWeightedVars # @usage rowWeightedVars # # @usage weightedSd # @usage colWeightedSds # @usage rowWeightedSds # } # # # \description{ # Computes a weighted variance / standard deviation of a numeric # vector or across rows or columns of a matrix. # } # # \arguments{ # \item{x}{a @numeric @vector containing the values whose # weighted variance is to be computed.} # \item{w}{a vector of weights the same length as \code{x} giving the weights # to use for each element of \code{x}. Negative weights are treated # as zero weights. Default value is equal weight to all values.} # \item{na.rm}{a logical value indicating whether @NA values in # \code{x} should be stripped before the computation proceeds, # or not. If @NA, no check at all for @NAs is done. # Default value is @NA (for efficiency).} # \item{center}{Optional @numeric scalar specifying the center # location of the data. If @NULL, it is estimated from data.} # \item{...}{Not used.} # } # # \value{ # Returns a @numeric scalar. # } # # \section{Missing values}{ # Missing values are dropped at the very beginning, if argument # \code{na.rm} is @TRUE, otherwise not. # } # # \seealso{ # For the non-weighted variance, see @see "stats::var". # } # # @author "HB" # # @keyword "univar" # @keyword "robust" #*/############################################################################ weightedVar <- function(x, w, na.rm=FALSE, center=NULL, ...) { # Argument 'x': n <- length(x); # Argument 'w': if (missing(w)) { # By default use weights that are one. w <- rep(1, times=n); } else if (length(w) != n) { stop("The number of elements in arguments 'w' and 'x' does not match: ", length(w), " != ", n); } # Argument 'na.rm': naValue <- NA; storage.mode(naValue) <- storage.mode(x); # Remove values with zero (and negative) weight. This will: # 1) take care of the case when all weights are zero, # 2) it will most likely speed up the sorting. tmp <- (w > 0); if (!all(tmp)) { x <- .subset(x, tmp); w <- .subset(w, tmp); n <- length(x); } tmp <- NULL; # Not needed anymore # Drop missing values? if (na.rm) { keep <- which(!is.na(x) & !is.na(w)); x <- .subset(x, keep); w <- .subset(w, keep); n <- length(x); keep <- NULL; # Not needed anymore } else if (anyMissing(x)) { return(naValue); } # Are any weights Inf? Then treat them with equal weight and all others # with weight zero. tmp <- is.infinite(w); if (any(tmp)) { keep <- tmp; x <- .subset(x, keep); n <- length(x); w <- rep(1, times=n); keep <- NULL; # Not needed anymore } tmp <- NULL; # Not needed anymore # Are there any values left to calculate the weighted median of? # This is consistent with how stats::mad() works. if (n == 0L) { return(naValue); } else if (n == 1L) { zeroValue <- 0; storage.mode(zeroValue) <- storage.mode(x); return(zeroValue); } # Standardize weights to sum to one w <- w / sum(w); # Estimate the mean? if (is.null(center)) { center <- sum(w*x); } # Estimate the variance x <- x - center; # Residuals x <- x^2; # Squared residuals sigma2 <- sum(w*x) * (n / (n-1L)) x <- w <- NULL; # Not needed anymore sigma2; } # weightedVar() weightedSd <- function(...) { sqrt(weightedVar(...)) } # weightedSd() rowWeightedVars <- function(x, w=NULL, na.rm=FALSE, ...) { apply(x, MARGIN=1L, FUN=weightedVar, w=w, na.rm=na.rm, ...) } # rowWeightedVars() colWeightedVars <- function(x, w=NULL, na.rm=FALSE, ...) { apply(x, MARGIN=2L, FUN=weightedVar, w=w, na.rm=na.rm, ...) } # colWeightedVars() rowWeightedSds <- function(x, w=NULL, na.rm=FALSE, ...) { sqrt(rowWeightedVars(x=x, w=w, na.rm=na.rm, ...)) } # rowWeightedSds() colWeightedSds <- function(x, w=NULL, na.rm=FALSE, ...) { sqrt(colWeightedVars(x=x, w=w, na.rm=na.rm, ...)) } # colWeightedSds() ############################################################################ # HISTORY: # 2014-11-10 # o Turned weightedSd() and weightedVar() into plain function. # 2014-03-26 # o Created from weightedMad.R. ############################################################################ matrixStats/R/rowSds.R0000644000175100001440000000363412542546242014434 0ustar hornikusers###########################################################################/** # @RdocFunction rowSds # @alias rowSds # @alias colSds # @alias rowMads # @alias colMads # \alias{rowSds,matrix-method} # \alias{colSds,matrix-method} # # @title "Standard deviation estimates for each row (column) in a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage rowSds # @usage colSds # @usage rowMads # @usage colMads # } # # \arguments{ # \item{x}{A @numeric NxK @matrix.} # \item{center}{A optional @numeric @vector of length N (K) with centers. # By default, they are calculated using @see "rowMedians".} # \item{constant}{A scale factor. See @see "stats::mad" for details.} # \item{na.rm}{If @TRUE, missing values are removed first, otherwise not.} # \item{dim.}{An @integer @vector of length two specifying the # dimension of \code{x}, also when not a @matrix.} # \item{...}{Additional arguments passed to @see "rowVars" and # @see "rowMedians", respectively.} # \item{centers}{(deprectated) use \code{center} instead.} # } # # \value{ # Returns a @numeric @vector of length N (K). # } # # @author "HB" # # \seealso{ # @see "stats::sd", @see "stats::mad" and \code{\link[stats:cor]{var}}. # @see "rowIQRs". # } # # @keyword array # @keyword iteration # @keyword robust # @keyword univar #*/########################################################################### rowSds <- function(x, ...) { x <- rowVars(x, ...); sqrt(x); } colSds <- function(x, ...) { x <- colVars(x, ...); sqrt(x); } ############################################################################ # HISTORY: # 2012-03-19 [HC] # o Changed description of centers argument to rowMads and colMads # 2008-03-26 [HB] # o Created from genefilter::rowVars() by Wolfgang Huber. ############################################################################ matrixStats/R/zzz.R0000644000175100001440000000047012542546242014003 0ustar hornikusers.onUnload <- function (libpath) { library.dynam.unload("matrixStats", libpath) } .onAttach <- function(libname, pkgname) { ## covr: skip=3 pd <- utils::packageDescription(pkgname); pkgStartupMessage(pkgname, " v", pd$Version, " (", pd$Date, ") successfully loaded. See ?", pkgname, " for help."); } matrixStats/R/anyMissing.R0000644000175100001440000000506012542546242015267 0ustar hornikusers###########################################################################/** # @RdocFunction anyMissing # \alias{colAnyMissings} # \alias{rowAnyMissings} # # @title "Checks if there are any missing values in an object or not" # # \description{ # @get "title". # } # # \usage{ # @usage anyMissing # @usage colAnyMissings # @usage rowAnyMissings # } # # \arguments{ # \item{x}{A @vector, a @list, a @matrix, a @data.frame, or @NULL.} # \item{...}{Not used.} # } # # \value{ # Returns @TRUE if a missing value was detected, otherwise @FALSE. # } # # \details{ # The implementation of this method is optimized for both speed and memory. # The method will return @TRUE as soon as a missing value is detected. # } # # \examples{ # x <- rnorm(n=1000) # x[seq(300,length(x),by=100)] <- NA # stopifnot(anyMissing(x) == any(is.na(x))) # } # # @author "HB" # # \seealso{ # Starting with R v3.1.0, there is \code{anyNA()} in the \pkg{base}, # which provides the same functionality as this function. # } # # @keyword iteration # @keyword logic #*/########################################################################### anyMissing <- function(x, ...) { ## All list or a data.frame? if (is.list(x)) { for (kk in seq(along=x)) { if (.Call("anyMissing", x[[kk]], PACKAGE="matrixStats")) return(TRUE) } return(FALSE) } else { ## All other data types .Call("anyMissing", x, PACKAGE="matrixStats") } } colAnyMissings <- function(x, ...) { colAnys(x, value=NA, ...) } rowAnyMissings <- function(x, ...) { rowAnys(x, value=NA, ...) } ############################################################################ # HISTORY: # 2015-02-10 # o CLEANUP: anyMissing() is no longer an S4 generic, cf. base::anyNA(). # 2015-01-20 # o CLEANUP: In the effort of migrating anyMissing() into a plain R # function, specific anyMissing() implementations for data.frame:s and # and list:s were dropped and is now handled by anyMissing() for "ANY". # 2014-12-08 # o Added (col|row)AnyMissings(). # 2013-09-26 # o Added help reference to base::anyNA(). # 2013-01-13 # o Added anyMissing() for raw, which always returns FALSE. # 2008-03-25 # o Added anyMissing() for matrices, data.frames, lists and NULL. # o Added anyMissing() for numeric, logical, complex and character. # o Made anyMissing() into an S4 method. # 2007-08-14 # o Created. See also R-devel thread "hasNA()/anyNA()?" on 2007-08-13. ############################################################################ matrixStats/R/rowIQRs.R0000644000175100001440000000520612542546242014516 0ustar hornikusers###########################################################################/** # @RdocFunction rowIQRs # @alias colIQRs # @alias iqr # # @title "Estimates of the interquartile range for each row (column) in a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage rowIQRs # @usage colIQRs # @usage iqr # } # # \arguments{ # \item{x}{A @numeric NxK @matrix.} # \item{na.rm}{If @TRUE, missing values are dropped first, otherwise not.} # \item{...}{Additional arguments passed to @see "rowQuantiles" # (\code{colQuantiles()}).} # } # # \value{ # Returns a @numeric @vector of length N (K). # } # # \section{Missing values}{ # Contrary to @see "stats::IQR", which gives an error if there are missing # values and \code{na.rm=FALSE}, \code{iqr()} and its corresponding row and # column-specific functions return @NA_real_. # } # # @examples "../incl/rowIQRs.Rex" # # @author "HB" # # \seealso{ # See @see "stats::IQR". # See @see "rowSds". # } # # @keyword array # @keyword iteration # @keyword robust # @keyword univar #*/########################################################################### rowIQRs <- function(x, na.rm=FALSE, ...) { Q <- rowQuantiles(x, probs=c(0.25, 0.75), na.rm=na.rm, drop=FALSE, ...) ans <- Q[,2L,drop=TRUE] - Q[,1L,drop=TRUE] # Remove attributes attributes(ans) <- NULL ans } colIQRs <- function(x, na.rm=FALSE, ...) { Q <- colQuantiles(x, probs=c(0.25, 0.75), na.rm=na.rm, drop=FALSE, ...) ans <- Q[,2L,drop=TRUE] - Q[,1L,drop=TRUE] # Remove attributes attributes(ans) <- NULL ans } iqr <- function(x, na.rm=FALSE, ...) { if(na.rm) { x <- x[!is.na(x)] } else if (anyMissing(x)) { return(NA_real_) } # At this point, there should be no missing values # Nothing to do? n <- length(x) if (n == 0L) { return(NA_real_) } else if (n == 1L) { return(0) } q <- quantile(x, probs=c(0.25, 0.75), names=FALSE, na.rm=FALSE, ...) q[2L] - q[1L] } ############################################################################ # HISTORY: # 2015-01-16 # o Now iqr(..., na.rm=FALSE) returns NA_real_ if there are missing values. # 2015-01-11 # o Now iqr() no longer returns a named value. # 2014-12-19 # o Added iqr(). # 2011-11-25 # o Added help and example to rowIQRs() and colIQRs(). # o BUG FIX: rowIQRs() and colIQRs() would return the 25% and the 75% # quantiles, not the difference between them. Thanks Pierre Neuvial # at CNRS, Evry, France for the report. # 2008-03-26 [HB] # o Created. ############################################################################ matrixStats/R/rowQuantiles.R0000644000175100001440000001515312542546242015647 0ustar hornikusers###########################################################################/** # @RdocFunction rowQuantiles # @alias colQuantiles # # @title "Estimates quantiles for each row (column) in a matrix" # # \description{ # @get "title". # } # # \usage{ # @usage rowQuantiles # @usage colQuantiles # } # # \arguments{ # \item{x}{A @numeric NxK @matrix with N >= 0.} # \item{probs}{A @numeric @vector of J probabilities in [0,1].} # \item{na.rm}{If @TRUE, @NAs are excluded first, otherwise not.} # \item{type}{An @integer specify the type of estimator. # See @see "stats::quantile" for more details.} # \item{...}{Additional arguments passed to @see "stats::quantile".} # \item{drop}{If TRUE, singleton dimensions in the result are dropped, # otherwise not.} # } # # \value{ # Returns a @numeric NxJ (KxJ) @matrix, where # N (K) is the number of rows (columns) for which the J quantiles are # calculated. # } # # @examples "../incl/rowQuantiles.Rex" # # @author "HB" # # \seealso{ # @see "stats::quantile". # } # # @keyword array # @keyword iteration # @keyword robust # @keyword univar #*/########################################################################### rowQuantiles <- function(x, probs=seq(from=0, to=1, by=0.25), na.rm=FALSE, type=7L, ..., drop=TRUE) { # Argument 'probs': if (anyMissing(probs)) { stop("Argument 'probs' contains missing values") } eps <- 100 * .Machine$double.eps if (any((probs < -eps | probs > 1 + eps))) { stop("Argument 'probs' is out of range [0-eps,1+eps]") } # Argument 'x': nrow <- nrow(x) ncol <- ncol(x) if (nrow > 0L && ncol > 0L) { naRows <- rowAnyMissings(x) hasNA <- any(naRows) if (!hasNA) na.rm <- FALSE if (!hasNA && type == 7L) { n <- ncol idxs <- 1 + (n-1) * probs idxs_lo <- floor(idxs) idxs_hi <- ceiling(idxs) partial <- sort(unique(c(idxs_lo, idxs_hi))) xp <- apply(x, MARGIN=1L, FUN=sort, partial=partial) if (is.null(dim(xp))) dim(xp) <- c(1L, length(xp)) q <- apply(xp, MARGIN=2L, FUN=.subset, idxs_lo) if (is.null(dim(q))) dim(q) <- c(1L, length(q)) # Adjust idxs_adj <- which(idxs > idxs_lo) if (length(idxs_adj) > 0L) { qL <- q[idxs_adj,,drop=FALSE] idxs_hi <- idxs_hi[idxs_adj] qH <- apply(xp, MARGIN=2L, FUN=.subset, idxs_hi) w <- (idxs - idxs_lo)[idxs_adj] q[idxs_adj,] <- (1-w)*qL + w*qH # Not needed anymore xp <- qL <- qH <- NULL } # Backward compatibility q <- t(q) } else { # Allocate result naValue <- NA_real_ storage.mode(naValue) <- storage.mode(x) q <- matrix(naValue, nrow=nrow, ncol=length(probs)) # For each row... rows <- seq_len(nrow) # Rows with NAs should return all NAs (so skip those) if (hasNA && !na.rm) rows <- rows[!naRows] for (kk in rows) { xkk <- x[kk,] if (na.rm) xkk <- xkk[!is.na(xkk)] q[kk,] <- quantile(xkk, probs=probs, na.rm=FALSE, type=type, ...) } } # if (type ...) } else { naValue <- NA_real_ storage.mode(naValue) <- storage.mode(x) q <- matrix(naValue, nrow=nrow, ncol=length(probs)) } # Add names digits <- max(2L, getOption("digits")) colnames(q) <- sprintf("%.*g%%", digits, 100*probs) # Drop singleton dimensions? if (drop) { q <- drop(q) } q } # rowQuantiles() colQuantiles <- function(x, probs=seq(from=0, to=1, by=0.25), na.rm=FALSE, type=7L, ..., drop=TRUE) { # Argument 'probs': if (anyMissing(probs)) { stop("Argument 'probs' contains missing values") } eps <- 100 * .Machine$double.eps if (any((probs < -eps | probs > 1 + eps))) { stop("Argument 'probs' is out of range [0-eps,1+eps]") } # Argument 'x': nrow <- nrow(x) ncol <- ncol(x) if (nrow > 0L && ncol > 0L) { naCols <- colAnyMissings(x) hasNA <- any(naCols) if (!hasNA) na.rm <- FALSE if (!hasNA && type == 7L) { n <- nrow idxs <- 1 + (n-1) * probs idxs_lo <- floor(idxs) idxs_hi <- ceiling(idxs) partial <- sort(unique(c(idxs_lo, idxs_hi))) xp <- apply(x, MARGIN=2L, FUN=sort, partial=partial) if (is.null(dim(xp))) dim(xp) <- c(1L, length(xp)) q <- apply(xp, MARGIN=2L, FUN=.subset, idxs_lo) if (is.null(dim(q))) dim(q) <- c(1L, length(q)) # Adjust idxs_adj <- which(idxs > idxs_lo) if (length(idxs_adj) > 0L) { qL <- q[idxs_adj,,drop=FALSE] idxs_hi <- idxs_hi[idxs_adj] qH <- apply(xp, MARGIN=2L, FUN=.subset, idxs_hi) w <- (idxs - idxs_lo)[idxs_adj] q[idxs_adj,] <- (1-w)*qL + w*qH # Not needed anymore xp <- qL <- qH <- NULL } # Backward compatibility q <- t(q) } else { # Allocate result naValue <- NA_real_ storage.mode(naValue) <- storage.mode(x) q <- matrix(naValue, nrow=ncol, ncol=length(probs)) # For each column... cols <- seq_len(ncol) # Columns with NAs should return all NAs (so skip those) if (hasNA && !na.rm) cols <- cols[!naCols] for (kk in cols) { xkk <- x[,kk] if (na.rm) xkk <- xkk[!is.na(xkk)] q[kk,] <- quantile(xkk, probs=probs, na.rm=FALSE, type=type, ...) } } # if (type ...) } else { naValue <- NA_real_ storage.mode(naValue) <- storage.mode(x) q <- matrix(naValue, nrow=ncol, ncol=length(probs)) } # Add names digits <- max(2L, getOption("digits")) colnames(q) <- sprintf("%.*g%%", digits, 100*probs) # Drop singleton dimensions? if (drop) { q <- drop(q) } q } ############################################################################ # HISTORY: # 2015-01-26 # o CONSISTENCY: Now rowQuantiles(x, na.rm=TRUE) returns all NAs for rows # with missing values. Analogously for colQuantiles(). Previously, an # error was thrown saying missing values are not allowed. # 2014-11-18 [HB] # o SPEEDUP: Made (col|row)Quantiles(x) faster for 'x' without missing # values (and default type=7L quantiles). # 2014-11-16 [HB] # o SPEEDUP: colQuantiles(x) is no longer using colQuantiles(t(x)). # 2013-07-29 [HB] # o DOCUMENTATION: The dimension of the return value was swapped # in help("rowQuantiles"). Noticed by PL. # 2011-11-29 [HB] # o Added an Rdoc example. # 2010-10-06 [HB] # o Now the result of {row|col}Quantiles() contains column names. # 2008-03-26 [HB] # o Created. ############################################################################ matrixStats/vignettes/0000755000175100001440000000000012542546311014626 5ustar hornikusersmatrixStats/vignettes/matrixStats-methods.md.rsp0000644000175100001440000002060612542546242021746 0ustar hornikusers<%@meta language="R-vignette" content="-------------------------------- DIRECTIVES FOR R: %\VignetteIndexEntry{matrixStats: Summary of functions} %\VignetteAuthor{Henrik Bengtsson} %\VignetteKeyword{matrix} %\VignetteKeyword{vector} %\VignetteKeyword{apply} %\VignetteKeyword{rows} %\VignetteKeyword{columns} %\VignetteKeyword{memory} %\VignetteKeyword{speed} %\VignetteEngine{R.rsp::rsp} %\VignetteTangle{FALSE} --------------------------------------------------------------------"%> <% pkgName <- "matrixStats" library(pkgName, character.only=TRUE) ns <- getNamespace(pkgName) env <- as.environment(sprintf("package:%s", pkgName)) R.utils::use("R.utils") kable <- function(df, ...) { fcns <- as.character(df$Functions) fcns <- strsplit(fcns, split=",") fcns <- sapply(fcns, FUN=function(names) { names <- trim(names) ok <- sapply(names, FUN=exists, envir=ns, mode="function") names[ok] <- sprintf("%s()", names[ok]) names[!ok] <- sprintf("~~%s()~~", names[!ok]) names <- paste(names, collapse=", ") }) df$Functions <- fcns df$Example <- sprintf("`%s`", df$Example) print(knitr::kable(df, ..., format="markdown")) } # Find all functions all <- ls(envir=env) keep <- sapply(all, FUN=function(name) { is.function(get(name, envir=env)) }) all <- all[keep] keep <- !grepl("[.]([^.]*)$", all) all <- all[keep] # Hidden functions skip <- c("rowAvgsPerColSet", "colAvgsPerRowSet") skip <- c(skip, "allocArray", "allocMatrix", "allocVector") all <- setdiff(all, skip) # Column and row functions crfcns <- grep("^(col|row)", all, value=TRUE) # Vector functions vfcns <- setdiff(all, crfcns) %> # <%@meta name="title"%> <% pkg <- R.oo::Package(pkgName) %> <%@meta name="author"%> on <%=format(as.Date(pkg$date), format="%B %d, %Y")%> <% fcns <- crfcns base <- gsub("^(col|row)", "", fcns) groups <- tapply(fcns, base, FUN=list) stopifnot(all(sapply(groups, FUN=length) == 2L)) groups <- matrix(unlist(groups, use.names=FALSE), nrow=2L) %> <%--- ## Functions that apply to column and rows of matrices ``` <% print(fcns) %> ``` ---%> <% fcns <- vfcns %> <%--- ## Functions that apply to vectors ``` <% print(fcns) %> ``` ---%> ## Location and scale estimators <% tbl <- NULL row <- data.frame( "Estimator" = "Weighted sample mean", "Functions" = "weightedMean, colWeightedMeans, rowWeightedMeans", "Example" = "weightedMean(x, w); rowWeightedMeans(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median", "Functions" = "median, colMedians, rowMedians", "Example" = "median(x); rowMedians(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted median", "Functions" = "weightedMedian, colWeightedMedians, rowWeightedMedians", "Example" = "weightedMedian(x, w); rowWeightedMedians(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample variance", "Functions" = "var, colVars, rowVars", "Example" = "var(x); rowVars(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted sample variance", "Functions" = "weightedVar, colWeightedVars, rowWeightedVars", "Example" = "weightedVar(x, w), rowWeightedVars(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample variance by n-order differences", "Functions" = "varDiff, colVarDiffs, rowVarDiffs", "Example" = "varDiff(x); rowVarDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample standard deviation", "Functions" = "sd, colSds, rowSds", "Example" = "sd(x); rowSds(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted sample deviation", "Functions" = "weightedSd, colWeightedSds, rowWeightedSds", "Example" = "weightedSd(x, w), rowWeightedSds(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample standard deviation by n-order differences", "Functions" = "sdDiff, colSdDiffs, rowSdDiffs", "Example" = "sdDiff(x); rowSdDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median absolute deviation (MAD)", "Functions" = "mad, colMads, rowMads", "Example" = "mad(x); rowMads(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Weighted median absolute deviation (MAD)", "Functions" = "weightedMad, colWeightedMads, rowWeightedMads", "Example" = "weightedMad(x, w), rowWeightedMads(x, w)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Median absolute deviation (MAD) by n-order differences", "Functions" = "madDiff, colMadDiffs, rowMadDiffs", "Example" = "madDiff(x); rowMadDiffs()" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Quantile", "Functions" = "quantile, colQuantiles, rowQuantiles", "Example" = "quantile(x, probs); rowQuantiles(x, probs)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Interquartile range (IQR)", "Functions" = "iqr, colIQRs, rowIQRs", "Example" = "iqr(x); rowIQRs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Interquartile range (IQR) by n-order differences", "Functions" = "iqrDiff, colIQRDiffs, rowIQRDiffs", "Example" = "iqrDiff(x); rowIQRDiffs(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Range", "Functions" = "range, colRanges, rowRanges", "Example" = "range(x); rowRanges(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Minimum", "Functions" = "min, colMins, rowMins", "Example" = "min(x); rowMins(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Maximum", "Functions" = "max, colMaxs, rowMaxs", "Example" = "max(x); rowMaxs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Testing for and counting values <% tbl <- NULL row <- data.frame( "Operator" = "Are there any missing values?", "Functions" = "anyMissing, colAnyMissings, rowAnyMissings", "Example" = "anyMissing(x); rowAnyMissings(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Does TRUE exists?", "Functions" = "any, colAnys, rowAnys", "Example" = "any(x); rowAnys(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Are all values TRUE?", "Functions" = "all, colAlls, rowAlls", "Example" = "all(x); rowAlls(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Does value exists?", "Functions" = "anyValue, colAnys, rowAnys", "Example" = "anyValue(x, value); rowAnys(x, value)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Do all element have a given value?", "Functions" = "allValue, colAlls, rowAlls", "Example" = "allValue(x, value); rowAlls(x, value)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Number of occurrences of a value?", "Functions" = "count, colCounts, rowCounts", "Example" = "count(x, value); rowCounts(x, value)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Cumulative functions <% tbl <- NULL row <- data.frame( "Operator" = "Cumulative sum", "Functions" = "cumsum, colCumsums, rowCumsums", "Example" = "cumsum(x); rowCumsums(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative product", "Functions" = "cumprod, colCumprods, rowCumprods", "Example" = "cumprod(x); rowCumprods(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative minimum", "Functions" = "cummin, colCummins, rowCummins", "Example" = "cummin(x); rowCummins(x)" ) tbl <- rbind(tbl, row) row <- data.frame( "Operator" = "Cumulative maximum", "Functions" = "cummax, colCummaxs, rowCummaxs", "Example" = "cummax(x); rowCummaxs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Binning <% tbl <- NULL row <- data.frame( "Estimator" = "Counts in disjoint bins", "Functions" = "binCounts", "Example" = "binCounts(x, bx)" ) tbl <- rbind(tbl, row) row <- data.frame( "Estimator" = "Sample means (and counts) in disjoint bins", "Functions" = "binMeans", "Example" = "binMeans(y, x, bx)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ## Miscellaneous <% tbl <- NULL row <- data.frame( "Operation" = "Lagged differences", "Functions" = c("diff2, colDiffs, rowDiffs"), "Example" = "diff2(x), rowDiffs(x)" ) tbl <- rbind(tbl, row) %> <% kable(tbl) %> ------------------------------------------------------------- <%=pkgName%> v<%=getVersion(pkg)%>. Release: [CRAN](http://cran.r-project.org/package=<%=pkgName%>), Development: [GitHub](<%=getUrl(pkg)%>). matrixStats/MD50000644000175100001440000003252412542554541013140 0ustar hornikusersdab7f205c5bcaeaad15e117d08c73599 *DESCRIPTION 42f6507f74264f16d54851706a146a8e *NAMESPACE 1049916234ad2c20de78d7ecdfdc99f8 *NEWS e11dfd66d9a104fbc5598c52bd2f516d *R/999.package.R bc8d37bc4988831234eea6d7d6066e4a *R/allocMatrix.R 73feb0716c01b707b332fb56b0ea14c9 *R/anyMissing.R 93d3ad6fb517ebf2f4011fd0f21903d9 *R/benchmark.R fa58bfb1e01279b6f9c8442463bbee0a *R/binCounts.R 9815c4e21f5374bc7568fc983f46dc3f *R/binMeans.R 00f9f170fcc64e4d4019b8df23b961e6 *R/diff2.R 3ae809e6c90777039d408aba05934f7e *R/indexByRow.R 351c0d25c7870c255224b76477f9414b *R/logSumExp.R 907f12741c8541f5eda282e69881cae9 *R/meanOver.R 51602457a9df5aecf0ef4755ec0cf546 *R/pkgStartupMessage.R a4582e300fa9ef7a75493edfa0ef528b *R/product.R 934740512f4b37d67740ae31ab288251 *R/psortKM.R 8801c5bb7366daede4acd34d34fe384a *R/rowAvgsPerColSet.R 9a483696d5b2b81b0be2a646b2a448f5 *R/rowCollapse.R ef31b87fb36b51431cc5e8bf8f6fe41f *R/rowCounts.R 738a09811f93fecc48c6d6a430795820 *R/rowCumsums.R 30ee83aa0f04a8437a20578153b5e0db *R/rowDiffs.R 054299075b22b9c24b9c5eb7f5e62271 *R/rowIQRs.R a0bad869fa6811da08df8d4e2aef98f0 *R/rowLogSumExps.R 75cf6345a14478c4bae92347fd9bc558 *R/rowMads.R 2cc6857bb0199e06abecb738e956eb40 *R/rowMedians.S4.R 22942d09b53a6e830eb6aa87252fa6bb *R/rowOrderStats.R 5df083adc25dd0cacfee4813328edd4f *R/rowProds.R e4bdeeee5c7aaf98cf841553245bc058 *R/rowQuantiles.R 2c9e5f27450a8814a21a1d92465fbad8 *R/rowRanges.R f270ced4c3ea9fa78f3464b6838dda26 *R/rowRanks.R 640689af849a8249df850fc4ed105dce *R/rowSds.R 5423ef502727587e71135b2efa8392d9 *R/rowTabulates.R bcebec7173597479ddcdd6534d5a4310 *R/rowVars.R 15998fc70fa30c96861e93a96393b2b4 *R/rowWeightedMeans.R c174648188253fb8b6a955e973b5d44b *R/rowWeightedMedians.R 8bb515dea9f9fd9a09756c926be9f023 *R/signTabulate.R 5ded10f28438fc575f28aadb3329fb40 *R/sumOver.R b1b14cc21e3acdd5bc4cf12359ced39d *R/varDiff.R f3e211bee40bb36de2cb0bebfa957232 *R/weightedMad.R 8d62aa96333f44a71f88988c0bdd0a84 *R/weightedMean.R c2a4666b83e496e70ae705c24c4a1a2c *R/weightedMedian.R 97954e9842b4395ae9b3f870770849e7 *R/weightedVar.R 4b547323019375a9fc89ff14675675cc *R/x_OP_y.R c07b43bd895eab9749e812b1870baf95 *R/zzz.R dfc2bd98cb36498deed1e78cd414d3d2 *build/vignette.rds ca0c103b1be9f97f60c0623aea65f5c2 *inst/benchmarking/R/random-matrices.R 4a38dc3c38475a36786249204cf20588 *inst/benchmarking/R/random-vectors.R 7256378a1eb54d9d41e0d075a11b3612 *inst/benchmarking/allocMatrix.md.rsp e5a5e47c87dad6280a47d91e1cbdf46a *inst/benchmarking/allocVector.md.rsp c4e95805c1acbd37aab9d7ec7de023d7 *inst/benchmarking/anyMissing.md.rsp 330dac464260cce1864cacd0ecf73f26 *inst/benchmarking/binCounts.md.rsp 7b71826081ac00e187f63ab9821a8d8e *inst/benchmarking/binMeans.md.rsp 2955e6a91517fd43ad799e52824176ba *inst/benchmarking/colRowAlls.md.rsp 861057a45857a4626c6179dba6cadd0c *inst/benchmarking/colRowAnyMissings.md.rsp 98a3d92a9b8217cb3d4eb73c3ae91eed *inst/benchmarking/colRowAnys.md.rsp 4c5523efdcf9c92b073a5ade831c331a *inst/benchmarking/colRowCounts.md.rsp 82c16b2f37c797888940e4bd02d866ce *inst/benchmarking/colRowCummins.md.rsp aa66215e97c52a6d18f89775e3b27fca *inst/benchmarking/colRowCumprods.md.rsp 590182276674b77503d28a6be8d1ccfa *inst/benchmarking/colRowCumsums.md.rsp 3c876337a72df11a91441bea1761f115 *inst/benchmarking/colRowDiffs.md.rsp a6523b452344ef2b78073d50fe9a534c *inst/benchmarking/colRowLogSumExps.md.rsp f3baebca9ee8ad627605269e996dffb4 *inst/benchmarking/colRowMads.md.rsp eccd2ee0c78f2eb286aa4d110a0558a1 *inst/benchmarking/colRowMeans.md.rsp 68e0230c9aa586c216cf1a83094c3acc *inst/benchmarking/colRowMedians.md.rsp f870694f8d86741fb3194661879db142 *inst/benchmarking/colRowMins.md.rsp dd1f69270726961bc78cb2e723495610 *inst/benchmarking/colRowOrderStats.md.rsp f0647a93c27525f62e8b5250001b2a37 *inst/benchmarking/colRowProds.md.rsp 45789c523622419efbc47059b6574273 *inst/benchmarking/colRowQuantiles.md.rsp 90e3f4c1d92a47457ca70bbc52c1c145 *inst/benchmarking/colRowRanges.md.rsp 07a64514251f5f55a62951a871e23d21 *inst/benchmarking/colRowRanks.md.rsp 409e4c37addbf7a2c15cb0b276692ca7 *inst/benchmarking/colRowSums.md.rsp ac5da10e80f1ff0ca8722176b7b41cf9 *inst/benchmarking/colRowTabulates.md.rsp 72cacc64cf3e106dadfe865650c0c57f *inst/benchmarking/colRowVars.md.rsp 7a8c58c7090624f9536603917180509f *inst/benchmarking/colRowWeightedMeans.md.rsp beb6d3d46a9903d3cdc37bba972030f4 *inst/benchmarking/colRowWeightedMedians.md.rsp 27de27a4248555e4fad67ee53e8d622e *inst/benchmarking/count.md.rsp ab9f5049c780d39eca80e084f60cf68e *inst/benchmarking/includes/appendix.md.rsp 75a4dbe1cebc11442ce3f8626cb2f786 *inst/benchmarking/includes/footer.md.rsp 80fec8731611547e148434c4a8af80a7 *inst/benchmarking/includes/header.md.rsp d4fe14ce0a8fe23b829bd5d40ecb8638 *inst/benchmarking/includes/references.md.rsp 0ede7b741c87e3b9552562b25aa84950 *inst/benchmarking/includes/results.md.rsp 1127a9839354eb9de5fd6641e0d2949e *inst/benchmarking/includes/setup.md.rsp 8744fca6100198e786187172e3e04beb *inst/benchmarking/index.md.rsp 672790929425f88eb1235e579c7ae487 *inst/benchmarking/indexByRow.md.rsp 713004c069039309a947587cae09b9c4 *inst/benchmarking/logSumExp.md.rsp 03268c8274ecc261af5a583484679b94 *inst/benchmarking/madDiff.md.rsp 20b9686d1ccade9286f73e8a9a80e96c *inst/benchmarking/meanOver.md.rsp 87bd2dbcf60d75ca69eaf37f52c4b436 *inst/benchmarking/product.md.rsp 5e0977113a979580db793bce8c0bdbda *inst/benchmarking/sumOver.md.rsp 4438b091dd392c8a0c305cbf48e1b354 *inst/benchmarking/t_tx_OP_y.md.rsp 2e5cf31ad74af1915311a48f5c83732a *inst/benchmarking/varDiff.md.rsp 3e75a042f402c6d60fbdb4c236bdb0a9 *inst/benchmarking/weightedMean.md.rsp 8e98dd36afed6de19c3f9cee6fd6516f *inst/benchmarking/weightedMedian.md.rsp 7b063158ba0bca2a2550bd7c4f9524c4 *inst/benchmarking/x_OP_y.md.rsp 4c60cf123ab3a6f4bbc1fa104dd340e9 *inst/doc/matrixStats-methods.html 582b682419b6ba0c71915913bfb93be4 *inst/doc/matrixStats-methods.md.rsp 3be5b91c497cfc857af456c5458bb6e0 *man/allocMatrix.Rd 6802f901274f465d85960be20c594bc6 *man/anyMissing.Rd b8b516f170c13a1eae9720526c6572f9 *man/binCounts.Rd d0baceff9a154bd804fd4f649ee49f57 *man/binMeans.Rd 9630ce05eefdd0ca82adcdf85dd71a52 *man/diff2.Rd 553286071b44b0c10001ec59d437642a *man/indexByRow.Rd 45c48dae20f6411127fffac4b2fa0cc6 *man/logSumExp.Rd 6be5a9ac1eb991947dc901e4608e1909 *man/matrixStats-package.Rd aba0b7e73733a5832840a144413e05c8 *man/meanOver.Rd 3c28f9576492c952ec24547672ff423e *man/rowAvgsPerColSet.Rd 151d60d30326172538f26016c92d570d *man/rowCollapse.Rd 20420a3b0dacfc58a8ec0164609e3b73 *man/rowCounts.Rd 67e0da9580f75d0b0a5ffdb1945b355e *man/rowCumsums.Rd 9cf589f37acbbf753c2a07f727ad75bb *man/rowDiffs.Rd 1d670261d4c847d534b97fb20a2fa391 *man/rowIQRs.Rd ce1971ace17567d261da000b417b6d61 *man/rowLogSumExps.Rd b32e2de3b302f0d8b035a035fd28672b *man/rowMedians.Rd 68c7d2b6c4648f46430e47aa074d5a8e *man/rowOrderStats.Rd bb938ccc32375f5972dd63aba0aa2672 *man/rowProds.Rd 55c7420af5c986a765ce762906495131 *man/rowQuantiles.Rd 230807a1ab0e186ce9d6233f630a4c58 *man/rowRanges.Rd ef6dad5770bd7c5dbe50a2d04c53ced6 *man/rowRanks.Rd ec9c07afc5da5adba37f0485cd0a5ef5 *man/rowSds.Rd 6ea08c053265b8ce0adc815d92229f5a *man/rowTabulates.Rd 2ae459ef917ef4564100e842ecc46a07 *man/rowVars.Rd aaa62497523fd11029ec56cc5b449b3f *man/rowWeightedMeans.Rd 13d24b46725782d3a536480d12b0c569 *man/rowWeightedMedians.Rd e64eb1340b963b776807cfcd45cb49f6 *man/signTabulate.Rd f78cf336fa52437301f854bc1372e427 *man/sumOver.Rd 51f34f8007755bc3c4376a56c1cb4701 *man/varDiff.Rd 11c6c137c9e9473d02acbe9a99c26140 *man/weightedMad.Rd 927e23972d04b71d7acff9edc2e6feed *man/weightedMean.Rd 57e5484018f9adf4b19a659c709469d8 *man/weightedMedian.Rd 1726d21237faa5093379ea6edc9ea1cd *man/weightedVar.Rd 01c6a83158307172f1d51d45e69bf8b6 *man/xUNDERSCOREOPUNDERSCOREy.Rd 1c6ff48974c75f5640d09f0919304d35 *src/allocMatrix2.c dd0a5a51a4beb5f240611b379355974e *src/anyMissing.c 7eb8fcd98f39e47623a2b85583906632 *src/binCounts-BINBY-template.h 4ced9b328349ef007414044632c398ea *src/binCounts.c 553e87b0967e23606116ef44054836fd *src/binMeans-BINBY-template.h b73e99b94350876ba274b7964499ec35 *src/binMeans.c 82a634bcb3722cb35a1460185bdb2dc5 *src/colCounts.c e5f276f3c58aa0fc2427e4776305655a *src/colCounts_TYPE-template.h c148b21db92c74008311c15b4720e9b7 *src/colOrderStats.c d547a6be4752200e2f0f42eb3fb64f89 *src/colOrderStats_TYPE-template.h 02ba21b3917368d337adf7add87625a7 *src/colRanges.c a867a5496ab04e8ab5d4a49262dbd73e *src/colRanges_TYPE-template.h 2c871e6b2476286f48719a205f3a9c5e *src/diff2.c b1a80b6033ed39718422109a0bab98b1 *src/diff2_TYPE-template.h 122818a15b1026f4617413b21a43f1a1 *src/indexByRow.c dff19ccbacd1467c4762d78bf1e28943 *src/logSumExp.c c84dba16e698ca588e987679937c4d19 *src/logSumExp_internal.c 10e83fcf5abc6f470b78866955cd3eb5 *src/logSumExp_internal.h cea76871ca495cdaf172a0324091fdcd *src/meanOver.c d20995d2f2b6106d5c89857791e8fa23 *src/meanOver_TYPE-template.h 8b7240246168563c8ff34c7a716d2908 *src/productExpSumLog.c ebe837c6edd137ddd45593f75c12b7d9 *src/productExpSumLog_TYPE-template.h b0e357e87f4826e34602a1e739b6c1b0 *src/psortKM.c 427e0e0b915a2131bbaf7b0ea8e9fa23 *src/rowCounts.c 0e2d0d426fbbe10ba3afdf4ae5c1bfcf *src/rowCounts_TYPE-template.h b4d9d1c221e27a87af7f681354a855b2 *src/rowCumMinMaxs.c 9251465451b640135bb3052c8f701e50 *src/rowCumMinMaxs_TYPE-template.h 8a68febb8b652a3b815f81d317d9efaf *src/rowCumprods.c 3657ab012153b21b7b2735e8085c8ea6 *src/rowCumprods_TYPE-template.h f55ef3d1110b2f3308ea57f6fbd022d7 *src/rowCumsums.c 23aa90044e6e700da513e01ba8d917d0 *src/rowCumsums_TYPE-template.h 8dbd2448b2bf9332aa6d365b7f14602a *src/rowDiffs.c 26c82aefba665e025ae5491716c7c7db *src/rowDiffs_TYPE-template.h e5ef7b7ad5e06479598f237b73b78367 *src/rowLogSumExp.c 04849d41cd620515daa2cc7ac2790255 *src/rowMads.c 48bb539cf54ec1d4b37c11703e7c1b24 *src/rowMads_TYPE-template.h 692e21c160d2a6f3aebe14b5180a0ede *src/rowMedians.c e8caf7ee5ba1afe8fc678dfb749ca6ca *src/rowMedians_TYPE-template.h fbe2f52fa8d1f55bffb3f0bc858662b8 *src/rowOrderStats.c bc888497ca6e4192e05d2c4b54dbfbeb *src/rowOrderStats_TYPE-template.h 09ab341736ee3ae5a5440214c46086d9 *src/rowRanges.c 253e54b4039e4d22d57cb616f8a27cf5 *src/rowRanges_TYPE-template.h 559d563d7933cc7812e330b05b8fae8a *src/rowRanksWithTies.c 73037c62b5d39bbc61e50eb4c0ef5d8e *src/rowRanksWithTies_TYPE_TIES-template.h f03ebb783ef8e534cdc582dda37f9d84 *src/rowVars.c c1d9ee48aa2ed322993b5034e67e9336 *src/rowVars_TYPE-template.h 1d49333c1c268d62b0fd0a2f8082f172 *src/signTabulate.c a52a4563500b0342c567eef1610f07ce *src/signTabulate_TYPE-template.h f7bdb2b04143fc5653e1ea354c6bb730 *src/sumOver.c bfebc523926a7c74df61a83c93e338b8 *src/sumOver_TYPE-template.h 99698bd63ccb7c17cb4e5dc7b07c9e97 *src/templates-types.h 7d48cfb1024e60465d7c6964206e310e *src/templates-types_undef.h ec49eaadeaacde7c6c624caf640960d9 *src/types.h b122d69ccf94104d5b3011d985b78b13 *src/utils.h ea3621211c22893848083535e4eda33e *src/weightedMean.c 4f25f36f4da2341e8242d9523fbedd2a *src/weightedMean_TYPE-template.h af4c80e388864cf4790700f347b1a197 *src/weightedMedian.c b0290ced0f75f7ab1d605a7d5d031522 *src/weightedMedian_TYPE-template.h 3d4e2f4571c5ab285a3173eaeafc6cec *src/x_OP_y.c 47a57b15b5e985c1a233491fbf8b9b81 *src/x_OP_y_TYPE-template.h 6b09aca9e9b79f8fcb85cdca51b305ad *tests/allocArray.R 0844bf090717347c46eeae7e7761fd31 *tests/allocMatrix.R 21b5cbdf48874928843e56f60ee8c449 *tests/allocVector.R f3f5505626d59ecc6c8d498bf6885de0 *tests/anyMissing.R 33ccf3764c08913bc1fa6ed4fa00ada6 *tests/benchmark.R fd636ec43f81ad7f1ecef7bb12062450 *tests/binCounts.R 0ed9bb218de4455a7ba63036d18cdfb1 *tests/binMeans,binCounts.R 53679ebbeee7d4ef5e7217ff8076ddce *tests/count.R 851e382ed2c520ccfe239a1edb720200 *tests/diff2.R 323eff3b3c96e679600ed54cbc281bb2 *tests/indexByRow.R d229e4e189c7de0db2fd280c6bd4fde6 *tests/logSumExp.R 361afd9da7276beab8ed7999d92c9e32 *tests/meanOver.R 0890c6dab2f723c5194917924d817066 *tests/product.R e996793a93f55357ddea79aa865c45f3 *tests/psortKM.R 1e61292da20a213bbccc7461421b3c48 *tests/rowAllAnys.R 6b645494332ef6d9e128bfa98cc73960 *tests/rowAvgsPerColSet.R 1408f031afac3258638f8e91cbac75bd *tests/rowCollapse.R 993e34a7064fe8bb019bef8288336231 *tests/rowCounts.R ddaf3ca847d84668b552e4f2b8911cd3 *tests/rowCumMinMaxs.R 8fa680f875c13e4e6739ac9a626a6e30 *tests/rowCumprods.R 984184fbadd7344bdd5ccbe93a2dfaa0 *tests/rowCumsums.R e20a6674d9c33035c2bed8b7eb4193a2 *tests/rowDiffs.R 9dc45d060ce75b9ae14eec0fadf1dc36 *tests/rowIQRs.R 89451022b4573db0619d7c3a98a7afdd *tests/rowLogSumExps.R 3ce723b5a981c34b8c4f892a5db9bfd5 *tests/rowMads.R 51155625a0ac69cb17e3e42f2ff7fab4 *tests/rowMedians.R 08dfbd77077dad6ceb84e76547464999 *tests/rowOrderStats.R 6ac776e7386e3798ec63371259e2815d *tests/rowProds.R 6fa79a760b002afb3801d810d6a919a8 *tests/rowQuantiles.R 267dc45a8a893e1461425f1975702d37 *tests/rowRanges.R 6d7f9ee3fdf75cb8533f2170e101c2a3 *tests/rowRanks.R 09eef93a3460acac4ce33fccca26fdef *tests/rowSds.R b4fa113751df716127c5e3edda1e6153 *tests/rowTabulates.R 8694849a466b91f95fe7540c24e6d78c *tests/rowVarDiffs.R 410338392cd50c0c953263be0063fa38 *tests/rowVars.R 0cbe7b6379097b2ea7aa1c14d980339a *tests/rowWeightedMeans.R 65d1b8c60a95d11c90a2ace6679017ec *tests/rowWeightedMedians.R 6bac1702606b78c8cd82697494e74bb1 *tests/rowWeightedVars.R 824536a7e210775091eddc2fc7f5aac1 *tests/signTabulate.R 3d8e7ebbc2249cb0249856b4a6ab8f25 *tests/sumOver.R 8e4cf994f2ecd7f21347931fd03c4392 *tests/varDiff_etal.R f681d2b636aadef71934fb37eaa3fb2c *tests/weightedMean.R 7e51437418f75869a9d7b8759f9b6b6c *tests/weightedMedian.R 93e729b911e5844a2d8d715ecf1f4367 *tests/weightedVar_etal.R 8a54c1e37d221d2ada391942bef3a760 *tests/x_OP_y.R 5087a4cbb2884533520210c7bb1fe20d *tests/zzz.package-unload.R 582b682419b6ba0c71915913bfb93be4 *vignettes/matrixStats-methods.md.rsp matrixStats/build/0000755000175100001440000000000012542546263013723 5ustar hornikusersmatrixStats/build/vignette.rds0000644000175100001440000000041512542546263016262 0ustar hornikusers‹mPËNÃ0tMR/ˆSøò=W½pA”W+Þ¨‘ü’½¡äÆ—S6$®ZKkïÚ³³ãùX1ÆR–¥ K3J³5mÅ=EÂr¶¤óQqtí×9úgx0ÂWJTÎÛùtÜ”ûN)îúÒ4eÓé[£}Ôððõ•ŒpƒÊõ_=ÞÓÊÓ‹÷™æ bòù,h1\ÿDýó RŒóCõ 58¹µ²ŸŠÜ™£¼µ‘ÒþÌʸœy FyÉ)R´xþh\t…)¤]+!½·x.²×ínJ“·ø‹7üWö,I},ºÌü¦íD+ö±–ÜÇ>®G^5ŽúÝ¿)„4matrixStats/DESCRIPTION0000644000175100001440000000273612542554541014340 0ustar hornikusersPackage: matrixStats Version: 0.14.2 Depends: R (>= 2.9.0) Imports: methods Suggests: base64enc, ggplot2, knitr, microbenchmark, R.devices, R.rsp VignetteBuilder: R.rsp Date: 2015-06-23 Title: Methods that Apply to Rows and Columns of Matrices (and to Vectors) Authors@R: c( person("Henrik", "Bengtsson", role=c("aut", "cre", "cph"), email="henrikb@braju.com"), person("Hector", "Corrada Bravo", role="ctb"), person("Robert", "Gentleman", role="ctb"), person("Ola", "Hossjer", role="ctb"), person("Harris", "Jaffee", role="ctb"), person("Dongcan", "Jiang", role="ctb"), person("Peter", "Langfelder", role="ctb")) Author: Henrik Bengtsson [aut, cre, cph], Hector Corrada Bravo [ctb], Robert Gentleman [ctb], Ola Hossjer [ctb], Harris Jaffee [ctb], Dongcan Jiang [ctb], Peter Langfelder [ctb] Maintainer: Henrik Bengtsson Description: Methods operating on rows and columns of matrices, e.g. col / rowMedians(), col / rowRanks(), and col / rowSds(). There are also some vector-based methods, e.g. binMeans(), madDiff() and weightedMedians(). All methods have been optimized for speed and memory usage. License: Artistic-2.0 LazyLoad: TRUE NeedsCompilation: yes ByteCompile: TRUE biocViews: Infrastructure, Statistics URL: https://github.com/HenrikBengtsson/matrixStats BugReports: https://github.com/HenrikBengtsson/matrixStats/issues Packaged: 2015-06-24 15:29:13 UTC; hb Repository: CRAN Date/Publication: 2015-06-24 18:22:57 matrixStats/man/0000755000175100001440000000000012542546241013373 5ustar hornikusersmatrixStats/man/rowOrderStats.Rd0000644000175100001440000000353712542546241016514 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowOrderStats.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowOrderStats} \alias{rowOrderStats} \alias{colOrderStats} \title{Gets an order statistic for each row (column) in a matrix} \description{ Gets an order statistic for each row (column) in a matrix. } \usage{ rowOrderStats(x, which, dim.=dim(x), ...) colOrderStats(x, which, dim.=dim(x), ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{which}{An \code{\link[base]{integer}} index in [1,K] ([1,N]) indicating which order statistic to be returned.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \details{ The implementation of \code{rowOrderStats()} is optimized for both speed and memory. To avoid coercing to \code{\link[base]{double}}s (and hence memory allocation), there is a unique implementation for \code{\link[base]{integer}} matrices. } \section{Missing values}{ This method does \emph{not} handle missing values, that is, the result corresponds to having \code{na.rm=FALSE} (if such an argument would be available). } \author{ The native implementation of \code{rowOrderStats()} was adopted by Henrik Bengtsson from Robert Gentleman's \code{rowQ()} in the \pkg{Biobase} package. } \seealso{ See \code{rowMeans()} in \code{\link[base]{colSums}}(). } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowWeightedMedians.Rd0000644000175100001440000000447312542546241017463 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowWeightedMedians.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowWeightedMedians} \alias{rowWeightedMedians} \alias{colWeightedMedians} \title{Calculates the weighted medians for each row (column) in a matrix} \description{ Calculates the weighted medians for each row (column) in a matrix. } \usage{ rowWeightedMedians(x, w=NULL, na.rm=FALSE, ...) colWeightedMedians(x, w=NULL, na.rm=FALSE, ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{w}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length K (N).} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded from the calculation, otherwise not.} \item{...}{Additional arguments passed to \code{\link{weightedMedian}}().} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \details{ The implementations of these methods are optimized for both speed and memory. If no weights are given, the corresponding \code{\link{rowMedians}}()/\code{colMedians()} is used. } \examples{ x <- matrix(rnorm(20), nrow=5, ncol=4) print(x) # Non-weighted row averages xM0 <- rowMedians(x) xM <- rowWeightedMedians(x) stopifnot(all.equal(xM, xM0)) # Weighted row averages (uniform weights) w <- rep(2.5, ncol(x)) xM <- rowWeightedMedians(x, w=w) stopifnot(all.equal(xM, xM0)) # Weighted row averages (excluding some columns) w <- c(1,1,0,1) xM0 <- rowMedians(x[,(w == 1),drop=FALSE]); xM <- rowWeightedMedians(x, w=w) stopifnot(all.equal(xM, xM0)) # Weighted row averages (excluding some columns) w <- c(0,1,0,0) xM0 <- rowMedians(x[,(w == 1),drop=FALSE]); xM <- rowWeightedMedians(x, w=w) stopifnot(all.equal(xM, xM0)) # Weighted averages by rows and columns w <- 1:4 xM1 <- rowWeightedMedians(x, w=w) xM2 <- colWeightedMedians(t(x), w=w) stopifnot(all.equal(xM2, xM1)) } \author{Henrik Bengtsson} \seealso{ See \code{\link{rowMedians}}() and \code{colMedians()} for non-weighted medians. Internally, \code{\link{weightedMedian}}() is used. } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/weightedMedian.Rd0000644000175100001440000001233212542546241016601 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % weightedMedian.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{weightedMedian} \alias{weightedMedian} \encoding{latin1} \title{Weighted Median Value} \usage{ weightedMedian(x, w=rep(1, times = length(x)), na.rm=FALSE, interpolate=is.null(ties), ties=NULL, ...) } \description{ Computes a weighted median of a numeric vector. } \arguments{ \item{x}{a \code{\link[base]{numeric}} \code{\link[base]{vector}} containing the values whose weighted median is to be computed.} \item{w}{a vector of weights the same length as \code{x} giving the weights to use for each element of \code{x}. Negative weights are treated as zero weights. Default value is equal weight to all values.} \item{na.rm}{a logical value indicating whether \code{\link[base]{NA}} values in \code{x} should be stripped before the computation proceeds, or not. If \code{\link[base]{NA}}, no check at all for \code{\link[base]{NA}}s is done. Default value is \code{\link[base]{NA}} (for efficiency).} \item{interpolate}{If \code{\link[base:logical]{TRUE}}, linear interpolation is used to get a consistent estimate of the weighted median.} \item{ties}{If \code{interpolate == FALSE}, a character string specifying how to solve ties between two \code{x}'s that are satisfying the weighted median criteria. Note that at most two values can satisfy the criteria. When \code{ties} is \code{"min"}, the smaller value of the two is returned and when it is \code{"max"}, the larger value is returned. If \code{ties} is \code{"mean"}, the mean of the two values is returned. Finally, if \code{ties} is \code{"weighted"} (or \code{\link[base]{NULL}}) a weighted average of the two are returned, where the weights are weights of all values \code{x[i] <= x[k]} and \code{x[i] >= x[k]}, respectively.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. } \details{ For the \code{n} elements \code{x = c(x[1], x[2], ..., x[n])} with positive weights \code{w = c(w[1], w[2], ..., w[n])} such that \code{sum(w) = S}, the \emph{weighted median} is defined as the element \code{x[k]} for which the total weight of all elements \code{x[i] < x[k]} is less or equal to \code{S/2} and for which the total weight of all elements \code{x[i] > x[k]} is less or equal to \code{S/2} (c.f. [1]). If \code{w} is missing then all elements of \code{x} are given the same positive weight. If all weights are zero, \code{\link[base]{NA}}_real_ is returned. If one or more weights are \code{Inf}, it is the same as these weights have the same weight and the others has zero. This makes things easier for cases where the weights are result of a division with zero. The weighted median solves the following optimization problem: \deqn{\alpha^* = \arg_\alpha \min \sum_{k=1}{K} w_k |x_k-\alpha|} where \eqn{x=(x_1,x_2,\ldots,x_K)} are scalars and \eqn{w=(w_1,w_2,\ldots,w_K)} are the corresponding "weights" for each individual \eqn{x} value. } \examples{ x <- 1:10 n <- length(x) m1 <- median(x) # 5.5 m2 <- weightedMedian(x) # 5.5 stopifnot(identical(m1, m2)) w <- rep(1, n) m1 <- weightedMedian(x, w) # 5.5 (default) m2 <- weightedMedian(x, ties="weighted") # 5.5 (default) m3 <- weightedMedian(x, ties="min") # 5 m4 <- weightedMedian(x, ties="max") # 6 stopifnot(identical(m1,m2)) # Pull the median towards zero w[1] <- 5 m1 <- weightedMedian(x, w) # 3.5 y <- c(rep(0,w[1]), x[-1]) # Only possible for integer weights m2 <- median(y) # 3.5 stopifnot(identical(m1,m2)) # Put even more weight on the zero w[1] <- 8.5 weightedMedian(x, w) # 2 # All weight on the first value w[1] <- Inf weightedMedian(x, w) # 1 # All weight on the last value w[1] <- 1 w[n] <- Inf weightedMedian(x, w) # 10 # All weights set to zero w <- rep(0, n) weightedMedian(x, w) # NA # Simple benchmarking bench <- function(N=1e5, K=10) { x <- rnorm(N) gc() t <- c() t[1] <- system.time(for (k in 1:K) median(x))[3] t[2] <- system.time(for (k in 1:K) weightedMedian(x))[3] t <- t / t[1] names(t) <- c("median", "weightedMedian") t } print(bench(N= 5, K=100)) print(bench(N= 50, K=100)) print(bench(N= 200, K=100)) print(bench(N= 1000, K=100)) print(bench(N= 10e3, K= 20)) print(bench(N=100e3, K= 20)) } \seealso{ \code{\link[stats]{median}}, \code{\link[base]{mean}}() and \code{\link{weightedMean}}(). } \references{ [1] T.H. Cormen, C.E. Leiserson, R.L. Rivest, Introduction to Algorithms, The MIT Press, Massachusetts Institute of Technology, 1989. } \author{ Henrik Bengtsson and Ola Hossjer, Centre for Mathematical Sciences, Lund University. Thanks to Roger Koenker, Econometrics, University of Illinois, for the initial ideas. } \keyword{univar} \keyword{robust} matrixStats/man/rowCollapse.Rd0000644000175100001440000000312612542546241016156 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowCollapse.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowCollapse} \alias{rowCollapse} \alias{colCollapse} \title{Extracts one cell per row (column) from a matrix} \description{ Extracts one cell per row (column) from a matrix. The implementation is optimized for memory and speed. } \usage{ rowCollapse(x, idxs, dim.=dim(x), ...) colCollapse(x, idxs, dim.=dim(x), ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}}.} \item{idxs}{An index \code{\link[base]{vector}} of (maximum) length N (K) specifying the columns (rows) to be extracted.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{vector}} of length N (K). } \examples{ x <- matrix(1:27, ncol=3) y <- rowCollapse(x, 1) stopifnot(identical(y, x[,1])) y <- rowCollapse(x, 2) stopifnot(identical(y, x[,2])) y <- rowCollapse(x, c(1,1,1,1,1,3,3,3,3)) stopifnot(identical(y, c(x[1:5,1], x[6:9,3]))) y <- rowCollapse(x, 1:3) print(y) yT <- c(x[1,1],x[2,2],x[3,3],x[4,1],x[5,2],x[6,3],x[7,1],x[8,2],x[9,3]) stopifnot(identical(y, yT)) } \author{Henrik Bengtsson} \seealso{ \emph{Matrix indexing} to index elements in matrices and arrays, cf. \code{\link[base]{[}}(). } \keyword{utilities} matrixStats/man/xUNDERSCOREOPUNDERSCOREy.Rd0000644000175100001440000000411212542546241017543 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % x_OP_y.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{xUNDERSCOREOPUNDERSCOREy} \alias{xUNDERSCOREOPUNDERSCOREy} \alias{x_OP_y} \alias{t_tx_OP_y} \title{Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)'} \usage{ x_OP_y(x, y, OP, commute=FALSE, na.rm=FALSE) t_tx_OP_y(x, y, OP, commute=FALSE, na.rm=FALSE) } \description{ Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)', where OP can be +, -, *, and /. For + and *, na.rm=TRUE will drop missing values first. } \arguments{ \item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{y}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length L.} \item{OP}{A \code{\link[base]{character}} specifying which operator to use.} \item{commute}{If \code{\link[base:logical]{TRUE}}, 'y OP x' ('t(y OP t(x))') is calculated, otherwise 'x OP y' ('t(t(x) OP y)').} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are ignored, otherwise not.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}. } \section{Missing values}{ If \code{na.rm=TRUE}, then missing values are "dropped" before applying the operator to each pair of values. For instance, if \code{x[1,1]} is a missing value, then the result of \code{x[1,1] + y[1]} equals \code{y[1]}. If also \code{y[1]} is a missing value, then the result is a missing value. This only applies to additions and multiplications. For subtractions and divisions, argument \code{na.rm} is ignored. } \examples{ x <- matrix(c(1,2,3,NA,5,6), nrow=3, ncol=2) # Add 'y' to each column y <- 1:2 z0 <- x + y z1 <- x_OP_y(x, y, OP="+") print(z1) stopifnot(all.equal(z1, z0)) # Add 'y' to each row y <- 1:3 z0 <- t(t(x) + y) z1 <- t_tx_OP_y(x, y, OP="+") print(z1) stopifnot(all.equal(z1, z0)) } \author{Henrik Bengtsson} \keyword{internal} matrixStats/man/binCounts.Rd0000644000175100001440000000375512542546241015640 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % binCounts.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{binCounts} \alias{binCounts} \title{Fast element counting in non-overlapping bins} \usage{ binCounts(x, bx, right=FALSE, ...) } \description{ Counts the number of elements in non-overlapping bins } \arguments{ \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K positions for to be binned and counted.} \item{bx}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B+1 ordered positions specifying the B > 0 bins \code{[bx[1],bx[2])}, \code{[bx[2],bx[3])}, ..., \code{[bx[B],bx[B+1])}.} \item{right}{If \code{\link[base:logical]{TRUE}}, the bins are right-closed (left open), otherwise left-closed (right open).} \item{...}{Not used.} } \value{ Returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of length B with non-negative integers. } \details{ \code{binCounts(x, bx, right=TRUE)} gives equivalent results as \code{rev(binCounts(-x, bx=rev(-bx), right=FALSE))}, but is faster and more memory efficient. } \section{Missing and non-finite values}{ Missing values in \code{x} are ignored/dropped. Missing values in \code{bx} are not allowed and gives an error. } \seealso{ An alternative for counting occurrences within bins is \code{\link[graphics]{hist}}, e.g. \code{hist(x, breaks=bx, plot=FALSE)$counts}. That approach is ~30-60\% slower than \code{binCounts(..., right=TRUE)}. To count occurrences of indices \code{x} (positive \code{\link[base]{integer}}s) in \code{[1,B]}, use \code{tabulate(x, nbins=B)}, where \code{x} does \emph{not} have to be sorted first. For details, see \code{\link[base]{tabulate}}(). To average values within bins, see \code{\link{binMeans}}(). } \author{Henrik Bengtsson} \keyword{univar} matrixStats/man/rowProds.Rd0000644000175100001440000000373112542546241015505 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowProds.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowProds} \alias{rowProds} \alias{rowProds} \alias{colProds} \alias{product} \title{Calculates the product for each row (column) in a matrix} \description{ Calculates the product for each row (column) in a matrix. } \usage{ rowProds(x, na.rm=FALSE, method=c("direct", "expSumLog"), ...) colProds(x, na.rm=FALSE, method=c("direct", "expSumLog"), ...) product(x, na.rm=FALSE, ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are ignored, otherwise not.} \item{method}{A \code{\link[base]{character}} string specifying how each product is calculated.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \details{ If \code{method="expSumLog"}, then then \code{\link{product}}() function is used, which calculates the produce via the logarithmic transform (treating negative values specially). This improves the precision and lowers the risk for numeric overflow. If \code{method="direct"}, the direct product is calculated via the \code{\link[base]{prod}}() function. } \section{Missing values}{ Note, if \code{method="expSumLog"}, \code{na.rm=FALSE}, and \code{x} contains missing values (\code{\link[base]{NA}} or \code{\link[base:is.finite]{NaN}}), then the calculated value is also missing value. Note that it depends on platform whether \code{\link[base:is.finite]{NaN}} or \code{\link[base]{NA}} is returned when an \code{\link[base:is.finite]{NaN}} exists, cf. \code{\link[base]{is.nan}}(). } \author{Henrik Bengtsson} \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/binMeans.Rd0000644000175100001440000000521112542546241015415 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % binMeans.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{binMeans} \alias{binMeans} \title{Fast mean calculations in non-overlapping bins} \usage{ binMeans(y, x, bx, na.rm=TRUE, count=TRUE, right=FALSE, ...) } \description{ Computes the sample means in non-overlapping bins } \arguments{ \item{y}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K values to calculate means on.} \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K positions for to be binned.} \item{bx}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B+1 ordered positions specifying the B > 0 bins \code{[bx[1],bx[2])}, \code{[bx[2],bx[3])}, ..., \code{[bx[B],bx[B+1])}.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values in \code{y} are dropped before calculating the mean, otherwise not.} \item{count}{If \code{\link[base:logical]{TRUE}}, the number of data points in each bins is returned as attribute \code{count}, which is an \code{\link[base]{integer}} \code{\link[base]{vector}} of length B.} \item{right}{If \code{\link[base:logical]{TRUE}}, the bins are right-closed (left open), otherwise left-closed (right open).} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length B. } \details{ \code{binMeans(x, bx, right=TRUE)} gives equivalent results as \code{rev(binMeans(-x, bx=sort(-bx), right=FALSE))}, but is faster. } \section{Missing and non-finite values}{ Data points where either of \code{y} and \code{x} is missing are dropped (and therefore are also not counted). Non-finite values in \code{y} are not allowed and gives an error. Missing values in \code{bx} are not allowed and gives an error. } \section{Empty bins}{ Empty bins will get value \code{\link[base:is.finite]{NaN}}. } \examples{ x <- 1:200 mu <- double(length(x)) mu[1:50] <- 5 mu[101:150] <- -5 y <- mu + rnorm(length(x)) # Binning bx <- c(0,50,100,150,200)+0.5 yS <- binMeans(y, x=x, bx=bx) plot(x,y) for (kk in seq(along=yS)) { lines(bx[c(kk,kk+1)], yS[c(kk,kk)], col="blue", lwd=2) } } \seealso{ \code{\link{binCounts}}(). \code{\link[stats]{aggregate}} and \code{\link[base]{mean}}(). } \references{ [1] R-devel thread \emph{Fastest non-overlapping binning mean function out there?} on Oct 3, 2012\cr } \author{ Henrik Bengtsson with initial code contributions by Martin Morgan [1]. } \keyword{univar} matrixStats/man/anyMissing.Rd0000644000175100001440000000263712542546241016013 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % anyMissing.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{anyMissing} \alias{anyMissing} \alias{colAnyMissings} \alias{rowAnyMissings} \title{Checks if there are any missing values in an object or not} \description{ Checks if there are any missing values in an object or not. } \usage{ anyMissing(x, ...) colAnyMissings(x, ...) rowAnyMissings(x, ...) } \arguments{ \item{x}{A \code{\link[base]{vector}}, a \code{\link[base]{list}}, a \code{\link[base]{matrix}}, a \code{\link[base]{data.frame}}, or \code{\link[base]{NULL}}.} \item{...}{Not used.} } \value{ Returns \code{\link[base:logical]{TRUE}} if a missing value was detected, otherwise \code{\link[base:logical]{FALSE}}. } \details{ The implementation of this method is optimized for both speed and memory. The method will return \code{\link[base:logical]{TRUE}} as soon as a missing value is detected. } \examples{ x <- rnorm(n=1000) x[seq(300,length(x),by=100)] <- NA stopifnot(anyMissing(x) == any(is.na(x))) } \author{Henrik Bengtsson} \seealso{ Starting with R v3.1.0, there is \code{anyNA()} in the \pkg{base}, which provides the same functionality as this function. } \keyword{iteration} \keyword{logic} matrixStats/man/logSumExp.Rd0000644000175100001440000000656012542546241015614 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % logSumExp.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{logSumExp} \alias{logSumExp} \title{Accurately computes the logarithm of the sum of exponentials} \description{ Accurately computes the logarithm of the sum of exponentials, that is, \eqn{log(sum(exp(lx)))}. If \eqn{lx = log(x)}, then this is equivalently to calculating \eqn{log(sum(x))}. This function, which avoid numerical underflow, is often used when computing the logarithm of the sum of small numbers (\eqn{|x| << 1}) such as probabilities. } \usage{ logSumExp(lx, na.rm=FALSE, ...) } \arguments{ \item{lx}{A \code{\link[base]{numeric}} \code{\link[base]{vector}}. Typically \code{lx} are \eqn{log(x)} values.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, any missing values are ignored, otherwise not.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. } \details{ This is function is more accurate than \code{log(sum(exp(lx)))} when the values of \eqn{x = exp(lx)} are \eqn{|x| << 1}. The implementation of this function is based on the observation that \deqn{ log(a + b) = [ la = log(a), lb = log(b) ] = log( exp(la) + exp(lb) ) = la + log ( 1 + exp(lb - la) ) } Assuming \eqn{la > lb}, then \eqn{|lb - la| < |lb|}, and it is less likely that the computation of \eqn{1 + exp(lb - la)} will not underflow/overflow numerically. Because of this, the overall result from this function should be more accurate. Analogously to this, the implementation of this function finds the maximum value of \code{lx} and subtracts it from the remaining values in \code{lx}. } \section{Benchmarking}{ This method is optimized for correctness, that avoiding underflowing. It is implemented in native code that is optimized for speed and memory. } \examples{ ## EXAMPLE #1 lx <- c(1000.01, 1000.02) y0 <- log(sum(exp(lx))) print(y0) ## Inf y1 <- logSumExp(lx) print(y1) ## 1000.708 ## EXAMPLE #2 lx <- c(-1000.01, -1000.02) y0 <- log(sum(exp(lx))) print(y0) ## -Inf y1 <- logSumExp(lx) print(y1) ## -999.3218 ## EXAMPLE #3 ## R-help thread 'Beyond double-precision?' on May 9, 2009. set.seed(1) x <- runif(50) ## The logarithm of the harmonic mean y0 <- log(1/mean(1/x)) print(y0) ## -1.600885 lx <- log(x) y1 <- log(length(x)) - logSumExp(-lx) print(y1) ## [1] -1.600885 # Sanity check stopifnot(all.equal(y1, y0)) } \author{Henrik Bengtsson} \seealso{ To compute this function on rows or columns of a matrix, see \code{\link{rowLogSumExps}}(). For adding \emph{two} double values in native code, R provides the C function \code{logspace_add()} [1]. For properties of the log-sum-exponential function, see [2]. } \references{ [1] R Core Team, \emph{Writing R Extensions}, v3.0.0, April 2013. \cr [2] Laurent El Ghaoui, \emph{Hyper-Textbook: Optimization Models and Applications}, University of California at Berkeley, August 2012. (Chapter 'Log-Sum-Exp (LSE) Function and Properties', \url{http://inst.eecs.berkeley.edu/~ee127a/book/login/def_lse_fcn.html}) \cr [3] R-help thread \emph{logsumexp function in R}, 2011-02-17. \url{https://stat.ethz.ch/pipermail/r-help/2011-February/269205.html}\cr } matrixStats/man/varDiff.Rd0000644000175100001440000000651612542546241015253 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % varDiff.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{varDiff} \alias{varDiff} \alias{sdDiff} \alias{madDiff} \alias{iqrDiff} \alias{colVarDiffs} \alias{rowVarDiffs} \alias{colSdDiffs} \alias{rowSdDiffs} \alias{colMadDiffs} \alias{rowMadDiffs} \alias{colIQRDiffs} \alias{rowIQRDiffs} \title{Estimation of scale based on sequential-order differences} \description{ Estimation of scale based on sequential-order differences, corresponding to the scale estimates provided by \code{\link[stats]{var}}, \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and \code{\link[stats]{IQR}}. } \usage{ varDiff(x, na.rm=FALSE, diff=1L, trim=0, ...) colVarDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...) rowVarDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...) sdDiff(x, na.rm=FALSE, diff=1L, trim=0, ...) colSdDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...) rowSdDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...) madDiff(x, na.rm=FALSE, diff=1L, trim=0, constant=1.4826, ...) colMadDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...) rowMadDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...) iqrDiff(x, na.rm=FALSE, diff=1L, trim=0, ...) colIQRDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...) rowIQRDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N or a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded, otherwise not.} \item{diff}{The positional distance of elements for which the difference should be calculated.} \item{trim}{A \code{\link[base]{double}} in [0,1/2] specifying the fraction of observations to be trimmed from each end of (sorted) \code{x} before estimation.} \item{constant}{A scale factor adjusting for asymptotically normal consistency.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length 1, length N, or length K. } \author{Henrik Bengtsson} \seealso{ For the corresponding non-differentiated estimates, see \code{\link[stats]{var}}, \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and \code{\link[stats]{IQR}}. Internally, \code{\link{diff2}}() is used which is a faster version of \code{\link[base]{diff}}(). } \details{ Note that n-order difference MAD estimates, just like the ordinary MAD estimate by \code{\link[stats]{mad}}, apply a correction factor such that the estimates are consistent with the standard deviation under Gaussian distributions. The interquartile range (IQR) estimates does \emph{not} apply such a correction factor. If asymptotically normal consistency is wanted, the correction factor for IQR estimate is \code{1 / (2 * qnorm(3/4))}, which is half of that used for MAD estimates, which is \code{1 / qnorm(3/4)}. This correction factor needs to be applied manually, i.e. there is no \code{constant} argument for the IQR functions. } \references{ [1] J. von Neumann et al., \emph{The mean square successive difference}. Annals of Mathematical Statistics, 1941, 12, 153-162.\cr } \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/weightedVar.Rd0000644000175100001440000000416612542546241016142 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % weightedVar.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{weightedVar} \alias{weightedVar} \alias{weightedSd} \alias{colWeightedVars} \alias{rowWeightedVars} \alias{colWeightedSds} \alias{rowWeightedSds} \title{Weighted variance and weighted standard deviation} \usage{ weightedVar(x, w, na.rm=FALSE, center=NULL, ...) colWeightedVars(x, w=NULL, na.rm=FALSE, ...) rowWeightedVars(x, w=NULL, na.rm=FALSE, ...) weightedSd(...) colWeightedSds(x, w=NULL, na.rm=FALSE, ...) rowWeightedSds(x, w=NULL, na.rm=FALSE, ...) } \description{ Computes a weighted variance / standard deviation of a numeric vector or across rows or columns of a matrix. } \arguments{ \item{x}{a \code{\link[base]{numeric}} \code{\link[base]{vector}} containing the values whose weighted variance is to be computed.} \item{w}{a vector of weights the same length as \code{x} giving the weights to use for each element of \code{x}. Negative weights are treated as zero weights. Default value is equal weight to all values.} \item{na.rm}{a logical value indicating whether \code{\link[base]{NA}} values in \code{x} should be stripped before the computation proceeds, or not. If \code{\link[base]{NA}}, no check at all for \code{\link[base]{NA}}s is done. Default value is \code{\link[base]{NA}} (for efficiency).} \item{center}{Optional \code{\link[base]{numeric}} scalar specifying the center location of the data. If \code{\link[base]{NULL}}, it is estimated from data.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. } \section{Missing values}{ Missing values are dropped at the very beginning, if argument \code{na.rm} is \code{\link[base:logical]{TRUE}}, otherwise not. } \seealso{ For the non-weighted variance, see \code{\link[stats]{var}}. } \author{Henrik Bengtsson} \keyword{univar} \keyword{robust} matrixStats/man/rowDiffs.Rd0000644000175100001440000000227112542546241015447 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowDiffs.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowDiffs} \alias{rowDiffs} \alias{colDiffs} \title{Calculates difference for each row (column) in a matrix} \description{ Calculates difference for each row (column) in a matrix. } \usage{ rowDiffs(x, lag=1L, differences=1L, ...) colDiffs(x, lag=1L, differences=1L, ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{lag}{An \code{\link[base]{integer}} specifying the lag.} \item{differences}{An \code{\link[base]{integer}} specifying the order of difference.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} Nx(K-1) or (N-1)xK \code{\link[base]{matrix}}. } \examples{ x <- matrix(1:27, ncol=3) d1 <- rowDiffs(x) print(d1) d2 <- t(colDiffs(t(x))) stopifnot(all.equal(d2, d1)) } \author{Henrik Bengtsson} \seealso{ See also \code{\link{diff2}}(). } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/diff2.Rd0000644000175100001440000000173012542546241014655 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % diff2.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{diff2} \alias{diff2} \title{Fast lagged differences} \usage{ diff2(x, lag=1L, differences=1L, ...) } \description{ Computes the lagged and iterated differences. } \arguments{ \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N.} \item{lag}{An \code{\link[base]{integer}} specifying the lag.} \item{differences}{An \code{\link[base]{integer}} specifying the order of difference.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N - \code{differences}. } \examples{ diff2(1:10) } \seealso{ \code{\link[base]{diff}}(). } \author{Henrik Bengtsson} \keyword{univar} \keyword{internal} matrixStats/man/rowMedians.Rd0000644000175100001440000000372112542546241015775 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowMedians.S4.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowMedians} \alias{rowMedians} \alias{colMedians} \alias{rowMedians,matrix-method} \alias{colMedians,matrix-method} \title{Calculates the median for each row (column) in a matrix} \description{ Calculates the median for each row (column) in a matrix. } \usage{ rowMedians(x, na.rm=FALSE, dim.=dim(x), ...) colMedians(x, na.rm=FALSE, dim.=dim(x), ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded first, otherwise not.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \details{ The implementation of \code{rowMedians()} and \code{colMedians()} is optimized for both speed and memory. To avoid coercing to \code{\link[base]{double}}s (and hence memory allocation), there is a special implementation for \code{\link[base]{integer}} matrices. That is, if \code{x} is an \code{\link[base]{integer}} \code{\link[base]{matrix}}, then \code{rowMedians(as.double(x))} (\code{rowMedians(as.double(x))}) would require three times the memory of \code{rowMedians(x)} (\code{colMedians(x)}), but all this is avoided. } \author{Henrik Bengtsson, Harris Jaffee} \seealso{ See \code{\link{rowMedians}}() and \code{colMedians()} for weighted medians. For mean estimates, see \code{rowMeans()} in \code{\link[base]{colSums}}(). } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/meanOver.Rd0000644000175100001440000000476512542546241015452 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % meanOver.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{meanOver} \alias{meanOver} \title{Fast averaging over subset of vector elements} \usage{ meanOver(x, idxs=NULL, na.rm=FALSE, refine=TRUE, ...) } \description{ Computes the sample mean of all or a subset of values. } \arguments{ \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N.} \item{idxs}{A \code{\link[base]{numeric}} index \code{\link[base]{vector}} in [1,N] of elements to mean over. If \code{\link[base]{NULL}}, all elements are considered.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are skipped, otherwise not.} \item{refine}{If \code{\link[base:logical]{TRUE}} and \code{x} is \code{\link[base]{numeric}}, then extra effort is used to calculate the average with greater numerical precision, otherwise not.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. } \details{ \code{meanOver(x, idxs)} gives equivalent results as \code{mean(x[idxs])}, but is faster and more memory efficient since it avoids the actual subsetting which requires copying of elements and garbage collection thereof. If \code{x} is \code{\link[base]{numeric}} and \code{refine=TRUE}, then a two-pass scan is used to calculate the average. The first scan calculates the total sum and divides by the number of (non-missing) values. In the second scan, this average is refined by adding the residuals towards the first average. The \code{\link[base]{mean}}() uses this approach. \code{meanOver(..., refine=FALSE)} is almost twice as fast as \code{meanOver(..., refine=TRUE)}. } \examples{ x <- 1:10 n <- length(x) idxs <- seq(from=1, to=n, by=2) s1 <- mean(x[idxs]) # 25 s2 <- meanOver(x, idxs=idxs) # 25 stopifnot(identical(s1, s2)) idxs <- seq(from=n, to=1, by=-2) s1 <- mean(x[idxs]) # 25 s2 <- meanOver(x, idxs=idxs) # 25 stopifnot(identical(s1, s2)) s1 <- mean(x) # 55 s2 <- meanOver(x) # 55 stopifnot(identical(s1, s2)) } \seealso{ \code{\link[base]{mean}}(). To efficiently sum over a subset, see \code{\link{sumOver}}(). } \author{Henrik Bengtsson} \keyword{univar} \keyword{internal} matrixStats/man/weightedMean.Rd0000644000175100001440000000612012542546241016262 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % weightedMean.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{weightedMean} \alias{weightedMean} \encoding{latin1} \title{Weighted Arithmetic Mean} \usage{ weightedMean(x, w, na.rm=FALSE, refine=FALSE, ...) } \description{ Computes the weighted sample mean of a numeric vector. } \arguments{ \item{x}{a \code{\link[base]{numeric}} \code{\link[base]{vector}} containing the values whose weighted mean is to be computed.} \item{w}{a vector of weights the same length as \code{x} giving the weights to use for each element of \code{x}. Negative weights are treated as zero weights. Default value is equal weight to all values.} \item{na.rm}{a logical value indicating whether \code{\link[base]{NA}} values in \code{x} should be stripped before the computation proceeds, or not. If \code{\link[base]{NA}}, no check at all for \code{\link[base]{NA}}s is done. Default value is \code{\link[base]{NA}} (for efficiency).} \item{refine}{If \code{\link[base:logical]{TRUE}} and \code{x} is \code{\link[base]{numeric}}, then extra effort is used to calculate the average with greater numerical precision, otherwise not.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. If \code{x} is of zero length, then \code{NaN} is returned, which is consistent with \code{\link[base]{mean}}(). } \examples{ x <- 1:10 n <- length(x) w <- rep(1, times=n) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1,m0)) # Pull the mean towards zero w[1] <- 5 m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1,m0)) # Put even more weight on the zero w[1] <- 8.5 m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1,m0)) # All weight on the first value w[1] <- Inf m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1,m0)) # All weight on the last value w[1] <- 1 w[n] <- Inf m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1,m0)) # All weights set to zero w <- rep(0, n) m0 <- weighted.mean(x, w) m1 <- weightedMean(x, w) stopifnot(identical(m1,m0)) } \section{Missing values}{ This function handles missing values consistently \code{\link[stats]{weighted.mean}}. More precisely, if \code{na.rm=FALSE}, then any missing values in either \code{x} or \code{w} will give result \code{NA_real_}. If \code{na.rm=TRUE}, then all \code{(x,w)} data points for which \code{x} is missing are skipped. Note that if both \code{x} and \code{w} are missing for a data points, then it is also skipped (by the same rule). However, if only \code{w} is missing, then the final results will always be \code{NA_real_} regardless of \code{na.rm}. } \seealso{ \code{\link[base]{mean}}() and \code{\link[stats]{weighted.mean}}. } \author{Henrik Bengtsson} \keyword{univar} \keyword{robust} matrixStats/man/rowQuantiles.Rd0000644000175100001440000000374512542546241016370 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowQuantiles.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowQuantiles} \alias{rowQuantiles} \alias{colQuantiles} \title{Estimates quantiles for each row (column) in a matrix} \description{ Estimates quantiles for each row (column) in a matrix. } \usage{ rowQuantiles(x, probs=seq(from = 0, to = 1, by = 0.25), na.rm=FALSE, type=7L, ..., drop=TRUE) colQuantiles(x, probs=seq(from = 0, to = 1, by = 0.25), na.rm=FALSE, type=7L, ..., drop=TRUE) } \arguments{ \item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}} with N >= 0.} \item{probs}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of J probabilities in [0,1].} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded first, otherwise not.} \item{type}{An \code{\link[base]{integer}} specify the type of estimator. See \code{\link[stats]{quantile}} for more details.} \item{...}{Additional arguments passed to \code{\link[stats]{quantile}}.} \item{drop}{If TRUE, singleton dimensions in the result are dropped, otherwise not.} } \value{ Returns a \code{\link[base]{numeric}} NxJ (KxJ) \code{\link[base]{matrix}}, where N (K) is the number of rows (columns) for which the J quantiles are calculated. } \examples{ set.seed(1) x <- matrix(rnorm(50*40), nrow=50, ncol=40) str(x) probs <- c(0.25,0.5,0.75) # Row quantiles q <- rowQuantiles(x, probs=probs) print(q) q0 <- apply(x, MARGIN=1, FUN=quantile, probs=probs) stopifnot(all.equal(q0, t(q))) # Column IQRs q <- colQuantiles(x, probs=probs) print(q) q0 <- apply(x, MARGIN=2, FUN=quantile, probs=probs) stopifnot(all.equal(q0, t(q))) } \author{Henrik Bengtsson} \seealso{ \code{\link[stats]{quantile}}. } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/allocMatrix.Rd0000644000175100001440000000244012542546241016141 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % allocMatrix.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{allocMatrix} \alias{allocMatrix} \alias{allocVector} \alias{allocArray} \title{Allocates an empty vector, matrix or array} \usage{ allocVector(length, value=0, ...) allocMatrix(nrow, ncol, value=0, ...) allocArray(dim, value=0, ...) } \description{ Allocates an empty vector, matrix or array faster than the corresponding function in R. } \arguments{ \item{length, nrow, ncol, dim}{\code{\link[base]{numeric}}s specifying the dimension of the created \code{\link[base]{vector}}, \code{\link[base]{matrix}} or \code{\link[base]{array}}.} \item{value}{A \code{\link[base]{numeric}} scalar that all elements will have as value.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{vector}}, \code{\link[base]{matrix}} and \code{\link[base]{array}} respectively of the same data type as \code{value}. } \author{Henrik Bengtsson} \seealso{ See also \code{\link[base]{vector}}, \code{\link[base]{matrix}} and \code{\link[base]{array}}. } \keyword{programming} \keyword{internal} matrixStats/man/rowTabulates.Rd0000644000175100001440000000245712542546241016346 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowTabulates.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowTabulates} \alias{rowTabulates} \alias{colTabulates} \title{Tabulates the values in a matrix by row (column)} \description{ Tabulates the values in a matrix by row (column). } \usage{ rowTabulates(x, values=NULL, ...) colTabulates(x, values=NULL, ...) } \arguments{ \item{x}{An \code{\link[base]{integer}} or \code{\link[base]{raw}} NxK \code{\link[base]{matrix}}.} \item{values}{An \code{\link[base]{vector}} of J values of count. If \code{\link[base]{NULL}}, all (unique) values are counted.} \item{...}{Not used.} } \value{ Returns a NxJ (KxJ) \code{\link[base]{matrix}} where N (K) is the number of row (column) \code{\link[base]{vector}}s tabulated and J is the number of values counted. } \examples{ x <- matrix(1:5, nrow=10, ncol=5) print(x) print(rowTabulates(x)) print(colTabulates(x)) # Count only certain values print(rowTabulates(x, values=1:3)) y <- as.raw(x) dim(y) <- dim(x) print(y) print(rowTabulates(y)) print(colTabulates(y)) } \author{Henrik Bengtsson} \keyword{utilities} matrixStats/man/rowVars.Rd0000644000175100001440000000453712542546241015336 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowVars.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowVars} \alias{rowVars} \alias{rowVars} \alias{colVars} \alias{rowVars,matrix-method} \alias{colVars,matrix-method} \title{Variance estimates for each row (column) in a matrix} \description{ Variance estimates for each row (column) in a matrix. } \usage{ rowVars(x, na.rm=FALSE, center=NULL, dim.=dim(x), ...) colVars(x, na.rm=FALSE, center=NULL, dim.=dim(x), ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{center}{(optional) The center, defaults to the row means.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded first, otherwise not.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{...}{Additional arguments passed to \code{rowMeans()} and \code{rowSums()}.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \examples{ set.seed(1) x <- matrix(rnorm(20), nrow=5, ncol=4) print(x) # Row averages print(rowMeans(x)) print(rowMedians(x)) # Column averages print(colMeans(x)) print(colMedians(x)) # Row variabilities print(rowVars(x)) print(rowSds(x)) print(rowMads(x)) print(rowIQRs(x)) # Column variabilities print(rowVars(x)) print(colSds(x)) print(colMads(x)) print(colIQRs(x)) # Row ranges print(rowRanges(x)) print(cbind(rowMins(x), rowMaxs(x))) print(cbind(rowOrderStats(x, 1), rowOrderStats(x, ncol(x)))) # Column ranges print(colRanges(x)) print(cbind(colMins(x), colMaxs(x))) print(cbind(colOrderStats(x, 1), colOrderStats(x, nrow(x)))) x <- matrix(rnorm(2400), nrow=50, ncol=40) # Row standard deviations d <- rowDiffs(x) s1 <- rowSds(d)/sqrt(2) s2 <- rowSds(x) print(summary(s1-s2)) # Column standard deviations d <- colDiffs(x) s1 <- colSds(d)/sqrt(2) s2 <- colSds(x) print(summary(s1-s2)) } \author{Henrik Bengtsson} \seealso{ See \code{rowMeans()} and \code{rowSums()} in \code{\link[base]{colSums}}(). } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowWeightedMeans.Rd0000644000175100001440000000434612542546241017145 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowWeightedMeans.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowWeightedMeans} \alias{rowWeightedMeans} \alias{colWeightedMeans} \title{Calculates the weighted means for each row (column) in a matrix} \description{ Calculates the weighted means for each row (column) in a matrix. } \usage{ rowWeightedMeans(x, w=NULL, na.rm=FALSE, ...) colWeightedMeans(x, w=NULL, na.rm=FALSE, ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{w}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length K (N).} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded from the calculation, otherwise not.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \details{ The implementations of these methods are optimized for both speed and memory. If no weights are given, the corresponding \code{rowMeans()}/\code{colMeans()} is used. } \examples{ x <- matrix(rnorm(20), nrow=5, ncol=4) print(x) # Non-weighted row averages xM0 <- rowMeans(x) xM <- rowWeightedMeans(x) stopifnot(all.equal(xM, xM0)) # Weighted row averages (uniform weights) w <- rep(2.5, ncol(x)) xM <- rowWeightedMeans(x, w=w) stopifnot(all.equal(xM, xM0)) # Weighted row averages (excluding some columns) w <- c(1,1,0,1) xM0 <- rowMeans(x[,(w == 1),drop=FALSE]); xM <- rowWeightedMeans(x, w=w) stopifnot(all.equal(xM, xM0)) # Weighted row averages (excluding some columns) w <- c(0,1,0,0) xM0 <- rowMeans(x[,(w == 1),drop=FALSE]); xM <- rowWeightedMeans(x, w=w) stopifnot(all.equal(xM, xM0)) # Weighted averages by rows and columns w <- 1:4 xM1 <- rowWeightedMeans(x, w=w) xM2 <- colWeightedMeans(t(x), w=w) stopifnot(all.equal(xM2, xM1)) } \author{Henrik Bengtsson} \seealso{ See \code{rowMeans()} and \code{colMeans()} in \code{\link[base]{colSums}}() for non-weighted means. See also \code{\link[stats]{weighted.mean}}. } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/signTabulate.Rd0000644000175100001440000000176512542546241016315 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % signTabulate.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{signTabulate} \alias{signTabulate} \alias{signTabulate} \title{Calculates the number of negative, zero, positive and missing values} \usage{ signTabulate(x, ...) } \description{ Calculates the number of negative, zero, positive and missing values in a \code{\link[base]{numeric}} vector. For \code{\link[base]{double}} vectors, the number of negative and positive infinite values are also counted. } \arguments{ \item{x}{a \code{\link[base]{numeric}} \code{\link[base]{vector}}.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{name}}d \code{\link[base]{numeric}} \code{\link[base]{vector}}. } \seealso{ \code{\link[base]{sign}}(). } \author{Henrik Bengtsson} \keyword{internal} matrixStats/man/rowSds.Rd0000644000175100001440000000362112542546241015145 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowSds.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowSds} \alias{rowSds} \alias{rowSds} \alias{colSds} \alias{rowMads} \alias{colMads} \alias{rowSds,matrix-method} \alias{colSds,matrix-method} \title{Standard deviation estimates for each row (column) in a matrix} \description{ Standard deviation estimates for each row (column) in a matrix. } \usage{ rowSds(x, ...) colSds(x, ...) rowMads(x, center=NULL, constant=1.4826, na.rm=FALSE, dim.=dim(x), centers=NULL, ...) colMads(x, center=NULL, constant=1.4826, na.rm=FALSE, dim.=dim(x), centers=NULL, ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{center}{A optional \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K) with centers. By default, they are calculated using \code{\link{rowMedians}}().} \item{constant}{A scale factor. See \code{\link[stats]{mad}} for details.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are removed first, otherwise not.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{...}{Additional arguments passed to \code{\link{rowVars}}() and \code{\link{rowMedians}}(), respectively.} \item{centers}{(deprectated) use \code{center} instead.} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \author{Henrik Bengtsson} \seealso{ \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and \code{\link[stats:cor]{var}}. \code{\link{rowIQRs}}(). } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowAvgsPerColSet.Rd0000644000175100001440000000747512542546241017110 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowAvgsPerColSet.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowAvgsPerColSet} \alias{rowAvgsPerColSet} \alias{colAvgsPerRowSet} \title{Applies a row-by-row (column-by-column) averaging function to equally-sized subsets of matrix columns (rows)} \description{ Applies a row-by-row (column-by-column) averaging function to equally-sized subsets of matrix columns (rows). Each subset is averaged independently of the others. } \usage{ rowAvgsPerColSet(X, W=NULL, S, FUN=rowMeans, ..., tFUN=FALSE) } \arguments{ \item{X}{A \code{\link[base]{numeric}} NxM \code{\link[base]{matrix}}.} \item{W}{An optional \code{\link[base]{numeric}} NxM \code{\link[base]{matrix}} of weights.} \item{S}{An \code{\link[base]{integer}} KxJ \code{\link[base]{matrix}} specifying the J subsets. Each column holds K column (row) indices for the corresponding subset.} \item{FUN}{The row-by-row (column-by-column) \code{\link[base]{function}} used to average over each subset of \code{X}. This function must accept a \code{\link[base]{numeric}} NxK (KxM) \code{\link[base]{matrix}} and the \code{\link[base]{logical}} argument \code{na.rm} (which is automatically set), and return a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (M).} \item{...}{Additional arguments passed to then \code{FUN} \code{\link[base]{function}}.} \item{tFUN}{If \code{\link[base:logical]{TRUE}}, the NxK (KxM) \code{\link[base]{matrix}} passed to \code{FUN()} is transposed first.} } \value{ Returns a \code{\link[base]{numeric}} JxN (MxJ) \code{\link[base]{matrix}}, where row names equal \code{rownames(X)} (\code{colnames(S)}) and column names \code{colnames(S)} (\code{colnames(X)}). } \details{ If argument \code{S} is a single column vector with indices \code{1:N}, then \code{rowAvgsPerColSet(X, S=S, FUN=rowMeans)} gives the same result as \code{rowMeans(X)}. Analogously, for \code{rowAvgsPerColSet()}. } \examples{ X <- matrix(rnorm(20*6), nrow=20, ncol=6) rownames(X) <- LETTERS[1:nrow(X)] colnames(X) <- letters[1:ncol(X)] print(X) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Apply rowMeans() for 3 sets of 2 columns # - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSets <- 3 S <- matrix(1:ncol(X), ncol=nbrOfSets) colnames(S) <- sprintf("s\%d", 1:nbrOfSets) print(S) Z <- rowAvgsPerColSet(X, S=S) print(Z) # Validation Z0 <- cbind(s1=rowMeans(X[,1:2]), s2=rowMeans(X[,3:4]), s3=rowMeans(X[,5:6])) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # Apply colMeans() for 5 sets of 4 rows # - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSets <- 5 S <- matrix(1:nrow(X), ncol=nbrOfSets) colnames(S) <- sprintf("s\%d", 1:nbrOfSets) print(S) Z <- colAvgsPerRowSet(X, S=S) print(Z) # Validation Z0 <- rbind(s1=colMeans(X[1:4,]), s2=colMeans(X[5:8,]), s3=colMeans(X[9:12,]), s4=colMeans(X[13:16,]), s5=colMeans(X[17:20,])) stopifnot(identical(drop(Z), Z0)) # - - - - - - - - - - - - - - - - - - - - - - - - - - # When there is only one "complete" set # - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSets <- 1 S <- matrix(1:ncol(X), ncol=nbrOfSets) colnames(S) <- sprintf("s\%d", 1:nbrOfSets) print(S) Z <- rowAvgsPerColSet(X, S=S, FUN=rowMeans) print(Z) Z0 <- rowMeans(X) stopifnot(identical(drop(Z), Z0)) nbrOfSets <- 1 S <- matrix(1:nrow(X), ncol=nbrOfSets) colnames(S) <- sprintf("s\%d", 1:nbrOfSets) print(S) Z <- colAvgsPerRowSet(X, S=S, FUN=colMeans) print(Z) Z0 <- colMeans(X) stopifnot(identical(drop(Z), Z0)) } \author{Henrik Bengtsson} \keyword{internal} \keyword{utilities} matrixStats/man/rowCumsums.Rd0000644000175100001440000000354412542546241016054 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowCumsums.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowCumsums} \alias{rowCumsums} \alias{rowCumsums} \alias{colCumsums} \alias{rowCumprods} \alias{colCumprods} \alias{rowCummins} \alias{colCummins} \alias{rowCummaxs} \alias{colCummaxs} \title{Cumulative sums, products, minima and maxima for each row (column) in a matrix} \description{ Cumulative sums, products, minima and maxima for each row (column) in a matrix. } \usage{ rowCumsums(x, dim.=dim(x), ...) colCumsums(x, dim.=dim(x), ...) rowCumprods(x, dim.=dim(x), ...) colCumprods(x, dim.=dim(x), ...) rowCummins(x, dim.=dim(x), ...) colCummins(x, dim.=dim(x), ...) rowCummaxs(x, dim.=dim(x), ...) colCummaxs(x, dim.=dim(x), ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}} of the same mode as \code{x}. } \examples{ x <- matrix(1:12, nrow=4, ncol=3) print(x) yr <- rowCumsums(x) print(yr) yc <- colCumsums(x) print(yc) yr <- rowCumprods(x) print(yr) yc <- colCumprods(x) print(yc) yr <- rowCummaxs(x) print(yr) yc <- colCummaxs(x) print(yc) yr <- rowCummins(x) print(yr) yc <- colCummins(x) print(yc) } \author{Henrik Bengtsson} \seealso{ See \code{\link[base]{cumsum}}(), \code{\link[base]{cumprod}}(), \code{\link[base]{cummin}}(), and \code{\link[base]{cummax}}(). } \keyword{array} \keyword{iteration} \keyword{univar} matrixStats/man/rowRanges.Rd0000644000175100001440000000336112542546241015634 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowRanges.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowRanges} \alias{rowRanges} \alias{colRanges} \alias{rowMins} \alias{rowMaxs} \alias{colMins} \alias{colMaxs} \title{Gets the range of values in each row (column) of a matrix} \description{ Gets the range of values in each row (column) of a matrix. } \usage{ rowRanges(x, na.rm=FALSE, dim.=dim(x), ...) colRanges(x, na.rm=FALSE, dim.=dim(x), ...) rowMins(x, na.rm=FALSE, dim.=dim(x), ...) colMins(x, na.rm=FALSE, dim.=dim(x), ...) rowMaxs(x, na.rm=FALSE, dim.=dim(x), ...) colMaxs(x, na.rm=FALSE, dim.=dim(x), ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded first, otherwise not.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{...}{Not used.} } \value{ \code{rowRanges()} (\code{colRanges()}) returns a \code{\link[base]{numeric}} Nx2 (Kx2) \code{\link[base]{matrix}}, where N (K) is the number of rows (columns) for which the ranges are calculated. \code{rowMins()/rowMaxs()} (\code{colMins()/colMaxs()}) returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \author{Henrik Bengtsson} \seealso{ \code{\link{rowOrderStats}}() and \code{\link[base]{pmin.int}}(). } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/indexByRow.Rd0000644000175100001440000000225712542546241015762 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % indexByRow.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{indexByRow} \alias{indexByRow} \title{Translates matrix indices by rows into indices by columns} \description{ Translates matrix indices by rows into indices by columns. } \usage{ indexByRow(dim, idxs=NULL, ...) } \arguments{ \item{dim}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length two specifying the length of the "template" matrix.} \item{idxs}{A \code{\link[base]{vector}} of indices. If \code{\link[base]{NULL}}, all indices are returned.} \item{...}{Not use.} } \value{ Returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of indices. } \examples{ dim <- c(5, 4) X <- matrix(NA_integer_, nrow=dim[1], ncol=dim[2]) Y <- t(X) idxs <- seq(along=X) # Assign by columns X[idxs] <- idxs print(X) # Assign by rows Y[indexByRow(dim(Y), idxs)] <- idxs print(Y) stopifnot(X == t(Y)) } \author{Henrik Bengtsson} \keyword{iteration} \keyword{logic} matrixStats/man/rowCounts.Rd0000644000175100001440000000500012542546241015660 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowCounts.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowCounts} \alias{rowCounts} \alias{colCounts} \alias{count} \alias{allValue} \alias{anyValue} \alias{rowAnys} \alias{colAnys} \alias{rowAlls} \alias{colAlls} \title{Counts the number of TRUE values in each row (column) of a matrix} \description{ Counts the number of TRUE values in each row (column) of a matrix. } \usage{ count(x, value=TRUE, na.rm=FALSE, ...) rowCounts(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) colCounts(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) rowAlls(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) colAlls(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) rowAnys(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) colAnys(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) } \arguments{ \item{x}{An NxK \code{\link[base]{matrix}} or an N*K \code{\link[base]{vector}}.} \item{value}{A value to search for.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded first, otherwise not.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{...}{Not used.} } \value{ \code{rowCounts()} (\code{colCounts()}) returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of length N (K). The other methods returns a \code{\link[base]{logical}} \code{\link[base]{vector}} of length N (K). } \details{ These functions takes either a @matrix or a @vector as input. If a @vector, then argument \code{dim} must be specified and fulfill \code{prod(dim) == length(x)}. The result will be identical to the results obtained when passing \code{matrix(x, nrow=dim[1L], ncol=dim[2L])}, but avoids having to temporarily create/allocate a @matrix, if only such is needed only for these calculations. } \examples{ x <- matrix(FALSE, nrow=10, ncol=5) x[3:7,c(2,4)] <- TRUE x[2:4,] <- TRUE x[,1] <- TRUE x[5,] <- FALSE x[,5] <- FALSE print(x) print(rowCounts(x)) # 1 4 4 4 0 3 3 1 1 1 print(colCounts(x)) # 9 5 3 5 0 print(rowAnys(x)) print(which(rowAnys(x))) # 1 2 3 4 6 7 8 9 10 print(colAnys(x)) print(which(colAnys(x))) # 1 2 3 4 } \author{Henrik Bengtsson} \keyword{array} \keyword{logic} \keyword{iteration} \keyword{univar} matrixStats/man/matrixStats-package.Rd0000644000175100001440000000250212542546241017575 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % 999.package.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{matrixStats-package} \alias{matrixStats-package} \alias{matrixStats} \docType{package} \title{Package matrixStats} \description{ Methods operating on rows and columns of matrices, e.g. col / rowMedians(), col / rowRanks(), and col / rowSds(). There are also some vector-based methods, e.g. binMeans(), madDiff() and weightedMedians(). All methods have been optimized for speed and memory usage. } \section{Installation}{ To install this package, please do: \preformatted{ install.packages("matrixStats") } } \section{Vignettes}{ For an overview of the package, see the '\href{../doc/index.html}{vignettes}'; \enumerate{ \item Summary of functions. } } \section{How to cite this package}{ Henrik Bengtsson (2015). matrixStats: Methods that Apply to Rows and Columns of Matrices (and to Vectors). R package version 0.14.2. https://github.com/HenrikBengtsson/matrixStats } \author{Henrik Bengtsson, Hector Corrada Bravo, Robert Gentleman, Ola Hossjer, Harris Jaffee, Dongcan Jiang, Peter Langfelder} \keyword{package} matrixStats/man/sumOver.Rd0000644000175100001440000000574012542546241015330 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % sumOver.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{sumOver} \alias{sumOver} \title{Fast sum over subset of vector elements} \usage{ sumOver(x, idxs=NULL, na.rm=FALSE, mode=typeof(x), ...) } \description{ Computes the sum of all or a subset of values. } \arguments{ \item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N.} \item{idxs}{A \code{\link[base]{numeric}} index \code{\link[base]{vector}} in [1,N] of elements to sum over. If \code{\link[base]{NULL}}, all elements are considered.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are skipped, otherwise not.} \item{mode}{A \code{\link[base]{character}} string specifying the data type of the return value. Default is to use the same mode as argument \code{x}.} \item{...}{Not used.} } \value{ Returns a scalar of the data type specified by argument \code{mode}. If \code{mode="integer"}, then integer overflow occurs if the \emph{sum} is outside the range of defined integer values. } \details{ \code{sumOver(x, idxs)} gives equivalent results as \code{sum(x[idxs])}, but is faster and more memory efficient since it avoids the actual subsetting which requires copying of elements and garbage collection thereof. Furthermore, \code{sumOver(x, mode="double")} is equivalent to \code{sum(as.numeric(x))}, but is much more memory efficient when \code{x} is an \code{\link[base]{integer}} vector. } \examples{ x <- 1:10 n <- length(x) idxs <- seq(from=1, to=n, by=2) s1 <- sum(x[idxs]) # 25 s2 <- sumOver(x, idxs=idxs) # 25 stopifnot(identical(s1, s2)) idxs <- seq(from=n, to=1, by=-2) s1 <- sum(x[idxs]) # 25 s2 <- sumOver(x, idxs=idxs) # 25 stopifnot(identical(s1, s2)) s1 <- sum(x) # 55 s2 <- sumOver(x) # 55 stopifnot(identical(s1, s2)) # Total gives integer overflow x <- c(.Machine$integer.max, 1L, -.Machine$integer.max) s1 <- sum(x[1:2]) # NA_integer_ s2 <- sumOver(x[1:2]) # NA_integer_ stopifnot(identical(s1, s2)) # Total gives integer overflow (coerce to numeric) s1 <- sum(as.numeric(x[1:2])) # 2147483648 s2 <- sumOver(as.numeric(x[1:2])) # 2147483648 s3 <- sumOver(x[1:2], mode="double") # 2147483648 stopifnot(identical(s1, s2)) stopifnot(identical(s1, s3)) # Cumulative sum would give integer overflow but not the total s1 <- sum(x) # 1L s2 <- sumOver(x) # 1L stopifnot(identical(s1, s2)) } \seealso{ \code{\link[base]{sum}}(). To efficiently average over a subset, see \code{\link{meanOver}}(). } \author{Henrik Bengtsson} \keyword{univar} \keyword{internal} matrixStats/man/weightedMad.Rd0000644000175100001440000000502612542546241016107 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % weightedMad.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{weightedMad} \alias{weightedMad} \alias{rowWeightedMads} \alias{colWeightedMads} \title{Weighted Median Absolute Deviation (MAD)} \usage{ weightedMad(x, w, na.rm=FALSE, constant=1.4826, center=NULL, ...) colWeightedMads(x, w=NULL, na.rm=FALSE, ...) rowWeightedMads(x, w=NULL, na.rm=FALSE, ...) } \description{ Computes a weighted MAD of a numeric vector. } \arguments{ \item{x}{a \code{\link[base]{numeric}} \code{\link[base]{vector}} containing the values whose weighted MAD is to be computed.} \item{w}{a vector of weights the same length as \code{x} giving the weights to use for each element of \code{x}. Negative weights are treated as zero weights. Default value is equal weight to all values.} \item{na.rm}{a logical value indicating whether \code{\link[base]{NA}} values in \code{x} should be stripped before the computation proceeds, or not. If \code{\link[base]{NA}}, no check at all for \code{\link[base]{NA}}s is done. Default value is \code{\link[base]{NA}} (for efficiency).} \item{constant}{A \code{\link[base]{numeric}} scale factor, cf. \code{\link[stats]{mad}}.} \item{center}{Optional \code{\link[base]{numeric}} scalar specifying the center location of the data. If \code{\link[base]{NULL}}, it is estimated from data.} \item{...}{Not used.} } \value{ Returns a \code{\link[base]{numeric}} scalar. } \section{Missing values}{ Missing values are dropped at the very beginning, if argument \code{na.rm} is \code{\link[base:logical]{TRUE}}, otherwise not. } \examples{ x <- 1:10 n <- length(x) m1 <- mad(x) m2 <- weightedMad(x) stopifnot(identical(m1, m2)) w <- rep(1, times=n) m1 <- weightedMad(x, w) stopifnot(identical(m1,m2)) # All weight on the first value w[1] <- Inf m <- weightedMad(x, w) stopifnot(m == 0) # All weight on the first two values w[1:2] <- Inf m1 <- mad(x[1:2]) m2 <- weightedMad(x, w) stopifnot(identical(m1,m2)) # All weights set to zero w <- rep(0, times=n) m <- weightedMad(x, w) stopifnot(is.na(m)) } \seealso{ For the non-weighted MAD, see \code{\link[stats]{mad}}. Internally \code{\link{weightedMedian}}() is used to calculate the weighted median. } \author{Henrik Bengtsson} \keyword{univar} \keyword{robust} matrixStats/man/rowRanks.Rd0000644000175100001440000000714012542546241015472 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowRanks.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowRanks} \alias{rowRanks} \alias{colRanks} \title{Gets the rank of each row (column) of a matrix} \description{ Gets the rank of each row (column) of a matrix. } \usage{ rowRanks(x, ties.method=c("max", "average", "min"), dim.=dim(x), ...) colRanks(x, ties.method=c("max", "average", "min"), dim.=dim(x), preserveShape=FALSE, ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} or \code{\link[base]{integer}} NxK \code{\link[base]{matrix}}.} \item{ties.method}{A \code{\link[base]{character}} string specifying how ties are treated. For details, see below.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{preserveShape}{A \code{\link[base]{logical}} specifying whether the \code{\link[base]{matrix}} returned should preserve the input shape of \code{x}, or not.} \item{...}{Not used.} } \value{ An \code{\link[base]{integer}} \code{\link[base]{matrix}} is returned. The \code{rowRanks()} function always returns an NxK \code{\link[base]{matrix}}, where N (K) is the number of rows (columns) whose ranks are calculated. The \code{colRanks()} function returns an NxK \code{\link[base]{matrix}}, if \code{preserveShape = TRUE}, otherwise a KxN \code{\link[base]{matrix}}. %% The mode of the returned matrix is \code{\link[base]{integer}}, except for %% \code{ties.method == "average"} when it is \code{\link[base]{double}}. } \details{ The row ranks of \code{x} are collected as \emph{rows} of the result matrix. The column ranks of \code{x} are collected as \emph{rows} if \code{preserveShape = FALSE}, otherwise as \emph{columns}. The implementation is optimized for both speed and memory. To avoid coercing to \code{\link[base]{double}}s (and hence memory allocation), there is a unique implementation for \code{\link[base]{integer}} matrices. It is more memory efficient to do \code{colRanks(x, preserveShape=TRUE)} than \code{t(colRanks(x, preserveShape=FALSE))}. Any \code{\link[base]{names}} of \code{x} are ignored and absent in the result. } \section{Missing and non- values}{ These are ranked as \code{NA}, as with \code{na.last="keep"} in the \code{\link[base]{rank}}() function. } \section{Ties}{ When some values are equal ("ties"), argument \code{ties.method} specifies what their ranks should be. If \code{ties.method} is \code{"max"}, ties are ranked as the maximum value. If \code{ties.method} is \code{"average"}, ties are ranked by their average. If \code{ties.method} is \code{"max"} (\code{"min"}), ties are ranked as the maximum (minimum) value. If \code{ties.method} is \code{"average"}, ties are ranked by their average. For further details, see \code{\link[base]{rank}}(). } \author{ Hector Corrada Bravo and Harris Jaffee. Peter Langfelder for adding 'ties.method' support. Henrik Bengtsson adapted the original native implementation of \code{rowRanks()} from Robert Gentleman's \code{rowQ()} in the \pkg{Biobase} package. } \seealso{ \code{\link[base]{rank}}(). For developers, see also Section 'Utility functions' in 'Writing R Extensions manual', particularly the native functions \code{R_qsort_I()} and \code{R_qsort_int_I()}. } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowIQRs.Rd0000644000175100001440000000325312542546241015233 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowIQRs.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowIQRs} \alias{rowIQRs} \alias{colIQRs} \alias{iqr} \title{Estimates of the interquartile range for each row (column) in a matrix} \description{ Estimates of the interquartile range for each row (column) in a matrix. } \usage{ rowIQRs(x, na.rm=FALSE, ...) colIQRs(x, na.rm=FALSE, ...) iqr(x, na.rm=FALSE, ...) } \arguments{ \item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are dropped first, otherwise not.} \item{...}{Additional arguments passed to \code{\link{rowQuantiles}}() (\code{colQuantiles()}).} } \value{ Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \section{Missing values}{ Contrary to \code{\link[stats]{IQR}}, which gives an error if there are missing values and \code{na.rm=FALSE}, \code{iqr()} and its corresponding row and column-specific functions return \code{\link[base]{NA}}_real_. } \examples{ set.seed(1) x <- matrix(rnorm(50*40), nrow=50, ncol=40) str(x) # Row IQRs q <- rowIQRs(x) print(q) q0 <- apply(x, MARGIN=1, FUN=IQR) stopifnot(all.equal(q0, q)) # Column IQRs q <- colIQRs(x) print(q) q0 <- apply(x, MARGIN=2, FUN=IQR) stopifnot(all.equal(q0, q)) } \author{Henrik Bengtsson} \seealso{ See \code{\link[stats]{IQR}}. See \code{\link{rowSds}}(). } \keyword{array} \keyword{iteration} \keyword{robust} \keyword{univar} matrixStats/man/rowLogSumExps.Rd0000644000175100001440000000306012542546241016457 0ustar hornikusers%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Do not modify this file since it was automatically generated from: % % rowLogSumExps.R % % by the Rdoc compiler part of the R.oo package. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \name{rowLogSumExps} \alias{rowLogSumExps} \alias{colLogSumExps} \alias{rowLogSumExps,matrix-method} \alias{colLogSumExps,matrix-method} \title{Accurately computes the logarithm of the sum of exponentials across rows or columns} \description{ Accurately computes the logarithm of the sum of exponentials across rows or columns. } \usage{ rowLogSumExps(lx, na.rm=FALSE, dim.=dim(lx), ...) colLogSumExps(lx, na.rm=FALSE, dim.=dim(lx), ...) } \arguments{ \item{lx}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}. Typically \code{lx} are \eqn{log(x)} values.} \item{na.rm}{If \code{\link[base:logical]{TRUE}}, any missing values are ignored, otherwise not.} \item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the dimension of \code{x}, also when not a \code{\link[base]{matrix}}.} \item{...}{Not used.} } \value{ A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K). } \section{Benchmarking}{ These methods are implemented in native code and have been optimized for speed and memory. } \author{ Native implementation by Henrik Bengtsson. Original R code by Nakayama ??? (Japan). } \seealso{ To calculate the same on vectors, \code{\link{logSumExp}}(). } \keyword{array}