matrixStats/ 0000755 0001751 0000144 00000000000 12542554541 012622 5 ustar hornik users matrixStats/inst/ 0000755 0001751 0000144 00000000000 12542546311 013573 5 ustar hornik users matrixStats/inst/benchmarking/ 0000755 0001751 0000144 00000000000 12542546241 016225 5 ustar hornik users matrixStats/inst/benchmarking/colRowAlls.md.rsp 0000644 0001751 0000144 00000002562 12542546241 021440 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colAlls"%>
<%@string rowname="rowAlls"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-18"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + all()
* colSums() == n or rowSums() == n
## Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode="logical")
})%>
```
## Results
<% for (dataLabel in names(data)) { %>
<% message(dataLabel) %>
### <%=dataLabel%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colAlls = colAlls(X),
"apply+all" = apply(X, MARGIN=2L, FUN=all),
"colSums==n" = (colSums(X) == nrow(X)),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowAlls = rowAlls(X),
"apply+all" = apply(X, MARGIN=1L, FUN=all),
"rowSums==n" = (rowSums(X) == ncol(X)),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %>
<% } # for (dataLabel ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-18
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/binMeans.md.rsp 0000644 0001751 0000144 00000003721 12542546241 021111 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="binMeans"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-04"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* binMeans_R()
which is defined as
```r
<%=withCapture({
binMeans_R <- function(y, x, bx, na.rm=FALSE, count=TRUE, right=FALSE) {
B <- length(bx)-1L
res <- double(B)
counts <- integer(B)
# For each bin...
for (kk in seq(length=B)) {
if (right) {
idxs <- which(bx[kk] < x & x <= bx[kk+1L])
} else {
idxs <- which(bx[kk] <= x & x < bx[kk+1L])
}
yKK <- y[idxs]
muKK <- mean(yKK)
res[kk] <- muKK
counts[kk] <- length(idxs)
} # for (kk ...)
if (count) attr(res, "count") <- counts
res
} # binMeans_R()
})%>
```
## Results
### Non-sorted simulated data
```r
<%=withCapture({
nx <- 10e3 # Number of data points
set.seed(0xBEEF)
x <- runif(nx, min=0, max=1)
y <- runif(nx, min=0, max=1)
# Uniformely distributed bins
nb <- 1e3 # Number of bins
bx <- seq(from=0, to=1, length.out=nb+1L)
bx <- c(-1, bx, 2)
})%>
```
<% benchmark <- function() { %>
<% dataLabel <- if (is.unsorted(x)) "unsorted" else "sorted" %>
<% message(dataLabel) %>
```r
<%=withCapture({
gc()
stats <- microbenchmark(
binMeans = binMeans(x=x, y=y, bx=bx, count=TRUE),
binMeans_R = binMeans_R(x=x, y=y, bx=bx, count=TRUE),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=dataLabel) %>
<% } # benchmark() %>
<% benchmark() %>
### Sorted simulated data
```r
<%=withCapture({
x <- sort(x)
})%>
```
<% benchmark() %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-02
o Restructured.
2014-05-25
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/x_OP_y.md.rsp 0000644 0001751 0000144 00000004013 12542546241 020545 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="x_OP_y"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-26"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* x_OP_y_R()
as below
```r
<%=withCapture({
x_OP_y_R <- function(x, y, OP, na.rm=FALSE) {
if (na.rm) {
xnok <- is.na(x)
ynok <- is.na(y)
anok <- xnok & ynok
unit <- switch(OP,
"+" = 0,
"-" = NA_real_,
"*" = 1,
"/" = NA_real_,
stop("Unknown 'OP' operator: ", OP)
)
x[xnok] <- unit
y[ynok] <- unit
}
ans <- switch(OP,
"+" = x + y,
"-" = x - y,
"*" = x * y,
"/" = x / y,
stop("Unknown 'OP' operator: ", OP)
)
if (na.rm) {
ans[anok] <- NA_real_
}
ans
} # x_OP_y_R()
})%>
```
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
mprintf("%s: %s\n", mode, dataLabel)
x <- data[[dataLabel]]
gc()
%>
### <%=dataLabel%> vector
#### All elements
```r
<%=withCapture({
x <- data[[.dataLabel.]]
y <- x[,1L]
})%>
```
<% for (OP in c("+", "-", "*", "/")) { %>
<%
OPTag <- c("+"="add", "-"="sub", "*"="mul", "/"="div")[OP]
gc()
%>
```r
<%=withCapture({
OP
stats <- microbenchmark(
"x_OP_y" = x_OP_y(x, y, OP=OP, na.rm=FALSE),
"x_OP_y_R" = x_OP_y_R(x, y, OP=OP, na.rm=FALSE),
unit = "ms"
)
gc()
})%>
```
<% benchmarkResults(stats, tags=c(mode, dataLabel, OPTag)) %>
<% } # for (OP ...) %>
<% } # for (ii ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-26
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/varDiff.md.rsp 0000644 0001751 0000144 00000002434 12542546241 020736 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="varDiff"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-10"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* N/A
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-vectors.R"%>
data <- rvectors(mode=mode)
data <- data[1:4]
})%>
```
### Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
mprintf("%s: %s\n", mode, dataLabel)
x <- data[[dataLabel]]
gc()
%>
### <%=dataLabel%> vector
#### All elements
```r
<%=withCapture({
x <- data[[.dataLabel.]]
stats <- microbenchmark(
"varDiff" = varDiff(x),
"var" = var(x),
"diff" = diff(x),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=c(mode, dataLabel)) %>
<% } # for (ii ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-10
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/meanOver.md.rsp 0000644 0001751 0000144 00000004375 12542546241 021137 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="meanOver"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-02"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* mean() + [()
* mean.default() + [() - avoids method dispatching
as below
```r
<%=withCapture({
meanOver_R_v1 <- function(x, na.rm=FALSE, idxs) {
mean(x[idxs], na.rm=na.rm)
}
})%>
```
and
```r
<%=withCapture({
meanOver_R_v2 <- function(x, na.rm=FALSE, idxs) {
mean.default(x[idxs], na.rm=na.rm)
}
})%>
```
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-vectors.R"%>
data <- rvectors(mode=mode)
##data <- data[1:3]
})%>
```
### Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
mprintf("%s: %s\n", mode, dataLabel)
x <- data[[dataLabel]]
gc()
%>
### <%=dataLabel%> vector
#### All elements
```r
<%=withCapture({
x <- data[[.dataLabel.]]
gc()
stats <- microbenchmark(
"meanOver" = meanOver(x, refine=TRUE),
"meanOver_no_refine" = meanOver(x, refine=FALSE),
"mean" = mean(x),
"mean.default" = mean.default(x),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=c(dataLabel, "all")) %>
<% for (subset in c(0.2, 0.4, 0.8)) { %>
#### A <%=sprintf("%g", 100*subset)%>% subset
```r
<%=withCapture({
x <- data[[.dataLabel.]]
subset
idxs <- sort(sample(length(x), size=subset*length(x), replace=FALSE))
gc()
stats <- microbenchmark(
"meanOver" = meanOver(x, idxs=idxs, refine=TRUE),
"meanOver_no_refine" = meanOver(x, idxs=idxs, refine=FALSE),
"mean+[()" = meanOver_R_v1(x, idxs=idxs),
"mean.default+[()" = meanOver_R_v2(x, idxs=idxs),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=c(mode, dataLabel, subset)) %>
<% } # for (subset in ...) %>
<% } # for (ii ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-02
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowMedians.md.rsp 0000644 0001751 0000144 00000002725 12542546241 022126 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colMedians"%>
<%@string rowname="rowMedians"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + median()
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
#### <%=dataLabel%> <%=mode%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colMedians = colMedians(X, na.rm=FALSE),
"apply+median" = apply(X, MARGIN=2L, FUN=median, na.rm=FALSE),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowMedians = rowMedians(X, na.rm=FALSE),
"apply+median" = apply(X, MARGIN=1L, FUN=median, na.rm=FALSE),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowCummins.md.rsp 0000644 0001751 0000144 00000002641 12542546241 022156 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colCummins"%>
<%@string rowname="rowCummins"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-26"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + cummin()
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
#### <%=dataLabel%> <%=mode%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colCummins = colCummins(X),
"apply+cummin" = apply(X, MARGIN=2L, FUN=cummin),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowCummins = rowCummins(X),
"apply+cummin" = apply(X, MARGIN=1L, FUN=cummin),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-26
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowCumsums.md.rsp 0000644 0001751 0000144 00000002641 12542546241 022177 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colCumsums"%>
<%@string rowname="rowCumsums"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-26"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + cumsum()
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
#### <%=dataLabel%> <%=mode%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colCumsums = colCumsums(X),
"apply+cumsum" = apply(X, MARGIN=2L, FUN=cumsum),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowCumsums = rowCumsums(X),
"apply+cumsum" = apply(X, MARGIN=1L, FUN=cumsum),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-26
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowAnyMissings.md.rsp 0000644 0001751 0000144 00000003477 12542546241 023017 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colAnyMissings"%>
<%@string rowname="rowAnyMissings"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* colAnyMissings() and rowAnyMissings()
* apply() + anyMissing()
* colSums() + is.na() and rowSums() + is.na()
where
```r
<%=withCapture({
colAnyMissings <- function(x, ...) colAnys(x, value=NA)
})%>
```
and
```r
<%=withCapture({
rowAnyMissings <- function(x, ...) rowAnys(x, value=NA)
})%>
```
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
## Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
### <%=dataLabel%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colAnyMissings = colAnyMissings(X),
"apply+anyMissing" = apply(X, MARGIN=2L, FUN=anyMissing),
colSums = is.na(colSums(X, na.rm=FALSE)),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowAnyMissings = rowAnyMissings(X),
"apply+anyMissing" = apply(X, MARGIN=1L, FUN=anyMissing),
rowSums = is.na(rowSums(X, na.rm=FALSE)),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowMins.md.rsp 0000644 0001751 0000144 00000003551 12542546241 021452 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colMins"%>
<%@string rowname="rowMins"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + min()
* lapply() + pmin()
* lapply() + pmin.int()
See also [StackOverflow:colMins?].
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
#### <%=dataLabel%> <%=mode%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colMins = colMins(X, na.rm=FALSE),
"apply+min" = apply(X, MARGIN=2L, FUN=min, na.rm=FALSE),
"lapply+pmin" = do.call(pmin, lapply(seq_len(nrow(X)), function(i) X[i,])),
"lapply+pmin.int" = do.call(pmin.int, lapply(seq_len(nrow(X)), function(i) X[i,])),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowMins = rowMins(X, na.rm=FALSE),
"apply+min" = apply(X, MARGIN=1L, FUN=min, na.rm=FALSE),
"lapply+pmin" = do.call(pmin, lapply(seq_len(ncol(X)), function(i) X[,i])),
"lapply+pmin.int" = do.call(pmin.int, lapply(seq_len(ncol(X)), function(i) X[,i])),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/includes/ 0000755 0001751 0000144 00000000000 12542546241 020033 5 ustar hornik users matrixStats/inst/benchmarking/includes/footer.md.rsp 0000644 0001751 0000144 00000001363 12542546241 022461 0 ustar hornik users <%---------------------------------------------------------------
Page footer
---------------------------------------------------------------%>
<%@string appendix="${appendix}" default="false"%>
<%@ifeq appendix="true"%>
<%@include file="appendix.md.rsp"%>
<%@endif%>
<%@include file="${references}"%>
---------------------------------------
Copyright <%@meta name="author"%>. Last updated on <%=format(Sys.time(), format="%Y-%m-%d %H:%M:%S (%z UTC)")%>. Powered by [RSP].
<%---------------------------------------------------------------
Dynamically insert an HTML favicon
---------------------------------------------------------------%>
<%=toFavicon({
plot(1, col="blue", bg="yellow", pch=21, cex=4, lwd=4, axes=FALSE)
}, force=FALSE)%>
matrixStats/inst/benchmarking/includes/header.md.rsp 0000644 0001751 0000144 00000000112 12542546241 022402 0 ustar hornik users [matrixStats]: Benchmark report
---------------------------------------
matrixStats/inst/benchmarking/includes/results.md.rsp 0000644 0001751 0000144 00000011634 12542546241 022666 0 ustar hornik users <%--------------------------------------------------------------
BENCHMARK RESULTS
--------------------------------------------------------------%>
<%--------------------------------------------------------------
Local functions
--------------------------------------------------------------%>
<% toImage <- function(stats, name=levels(stats$expr)[1L], tags=NULL, ylim="auto", col=NULL, alpha=NULL, ...) { %>
, aspectRatio=2/3, {
if (identical(ylim, "auto")) {
y <- stats$time/1e6
ymax <- max(y, na.rm=TRUE)
y75 <- quantile(y, probs=0.75, na.rm=TRUE)
yupper <- min(c(1.5*y75, ymax), na.rm=TRUE)
ylim <- c(0, yupper)
}
if (!is.null(ylim)) {
stats$outlier <- (stats$time > ylim[2]*1e6)
stats$time[stats$outlier] <- ylim[2]*1e6
}
gg <- ggplot(data=stats, aes(x=seq_along(time)/length(levels(expr)), y=time/1e6))
gg <- gg + geom_point(aes(colour=expr, shape=outlier))
gg <- gg + scale_shape_manual(values=c(16,4), guide="none")
if (!is.null(col)) gg <- gg + scale_colour_manual(values=col)
if (!is.null(alpha)) gg <- gg + scale_alpha_manual(values=alpha)
gg <- gg + xlab("iteration") + ylab("time (ms)")
if (!is.null(ylim)) gg <- gg + ylim(ylim)
print(gg)
})%>)
<% } # toImage() %>
<%
toTable <- function(stats, tags=NULL, order="median", ...) {
kable({
s <- summary(stats)
s$neval <- NULL
s$cld <- NULL
s <- s[order(s[[order]]),]
s
})
kable({
s <- summary(stats, unit="relative")
s$neval <- NULL
s$cld <- NULL
s <- s[order(s[[order]]),]
s
})
}
%>
<%--------------------------------------------------------------
Benchmark results for vector functions
--------------------------------------------------------------%>
<% benchmarkResults <- function(stats, tags=NULL, ...) { %>
_Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data. The top panel shows times in milliseconds and the bottom panel shows relative times._
<% toTable(stats, tags=tags) %>
_Figure: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data. Outliers are displayed as crosses. Times are in milliseconds._
<% toImage(stats, tags=tags) %>
<% } # benchmarkResults() %>
<%--------------------------------------------------------------
Benchmark results for col- and row-specific functions
--------------------------------------------------------------%>
<% crBenchmarkResults <- function(colStats, rowStats=NULL, tags=NULL, ...) { %>
_Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(colStats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data. The top panel shows times in milliseconds and the bottom panel shows relative times._
<% toTable(colStats, tags=tags) %>
_Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(rowStats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data (transposed). The top panel shows times in milliseconds and the bottom panel shows relative times._
<% if (!is.null(rowStats)) { toTable(rowStats, tags=tags) } %>
_Figure: Benchmarking of <%=hpaste(sprintf("%s()", levels(colStats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data <% if (!is.null(rowStats)) { %> as well as <%=hpaste(sprintf("%s()", levels(rowStats$expr)), lastCollapse=" and ")%> on the same data transposed<% } # if (!is.null(rowStats)) %>. Outliers are displayed as crosses. Times are in milliseconds._
<%
y <- c(colStats$time, rowStats$time)/1e6
ymax <- max(y, na.rm=TRUE)
y75 <- quantile(y, probs=0.75, na.rm=TRUE)
yupper <- min(c(1.5*y75, ymax), na.rm=TRUE)
ylim <- c(0, yupper)
%>
<% toImage(colStats, tags=tags, ylim=ylim) %>
<% if (!is.null(rowStats)) toImage(rowStats, tags=tags, ylim=ylim) %>
<% if (!is.null(rowStats)) { %>
<%
# Compare performance or the column- and the row-specific methods
# for the "main" function.
stats <- list(colStats, rowStats)
stats <- lapply(stats, FUN=function(x) {
level <- levels(x$expr)[1]
x <- subset(x, expr %in% level)
x$expr <- factor(as.character(x$expr))
x
})
stats <- Reduce(rbind, stats)
odd <- seq(from=1L, to=nrow(stats), by=2L)
top <- 1:(nrow(stats)/2)
stats0 <- stats
stats[ odd,] <- stats0[ top,]
stats[-odd,] <- stats0[-top,]
%>
_Table: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data (original and transposed). The top panel shows times in milliseconds and the bottom panel shows relative times._
<% toTable(stats, tags=tags) %>
_Figure: Benchmarking of <%=hpaste(sprintf("%s()", levels(stats$expr)), lastCollapse=" and ")%> on <%=paste(tags, collapse="+")%> data (original and transposed). Outliers are displayed as crosses. Times are in milliseconds._
<% toImage(stats, name=paste(levels(stats$expr), collapse="_vs_"), tags=tags, col=c("#000000", "#999999")) %>
<% } # if (!is.null(rowStats)) %>
<% } # crBenchmarkResults() %>
matrixStats/inst/benchmarking/includes/setup.md.rsp 0000644 0001751 0000144 00000002364 12542546241 022325 0 ustar hornik users <%@string header="includes/header.md.rsp"%>
<%@string footer="includes/footer.md.rsp"%>
<%@string references="../includes/references.md.rsp"%>
<%@string appendix="${appendix}" default="true"%>
<%@string colname=""%>
<%@string rowname=""%>
<%@string fcnname=""%>
<%@string fcntags=""%>
<%@meta author="Henrik Bengtsson"%>
<%--------------------------------------------------------------
RSP specific
--------------------------------------------------------------%>
<%
R.utils::use("R.utils, R.devices (>= 2.12.0), knitr, ggplot2")
devOptions("png", width=390)
options("withCapture/newline"=FALSE)
options(deparse.cutoff=100)
kable <- function(...) {
t <- knitr::kable(..., format="markdown")
print(t)
}
%>
<%--------------------------------------------------------------
Report/package specific
--------------------------------------------------------------%>
<%
use("matrixStats")
use("microbenchmark")
%>
<%@include file="results.md.rsp"%>
<%--------------------------------------------------------------
Macros
--------------------------------------------------------------%>
<%--------------------------------------------------------------
Timing
--------------------------------------------------------------%>
<% rspStartTime <- Sys.time() %>
matrixStats/inst/benchmarking/includes/references.md.rsp 0000644 0001751 0000144 00000001212 12542546241 023275 0 ustar hornik users <%---------------------------------------------------------------
REFERENCES
---------------------------------------------------------------%>
[RSP]: http://cran.r-project.org/package=R.rsp
[matrixStats]: http://cran.r-project.org/package=matrixStats
[StackOverflow:colMins?]: http://stackoverflow.com/questions/13676878 "Stack Overflow: fastest way to get Min from every column in a matrix?"
[StackOverflow:colSds?]: http://stackoverflow.com/questions/17549762 "Stack Overflow: Is there such 'colsd' in R?"
[StackOverflow:rowProds?]: http://stackoverflow.com/questions/20198801/ "Stack Overflow: Row product of matrix and column sum of matrix"
matrixStats/inst/benchmarking/includes/appendix.md.rsp 0000644 0001751 0000144 00000000675 12542546241 023000 0 ustar hornik users ## Appendix
### Session information
```r
<% print(sessionInfo()) %>
```
Total processing time was <%=rspDuration <- round(Sys.time()-rspStartTime, digits=2)%> <%=attr(rspDuration, "units")%>.
### Reproducibility
To reproduce this report, do:
```r
<%@ifeq fcnname=""%><%@string fcnname="${colname}"%><%@endif%>
html <- matrixStats:::benchmark('<%@string name="fcnname"%>'<%@ifneq fcntags=""%>, tags='<%@string name="fcntags"%>'<%@endif%>)
```
matrixStats/inst/benchmarking/colRowOrderStats.md.rsp 0000644 0001751 0000144 00000003416 12542546241 022636 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colOrderStats"%>
<%@string rowname="rowOrderStats"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
<%
use("Biobase", how="load")
rowQ <- Biobase::rowQ
%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + quantile(..., type=3L)
* Biobase::rowQ()
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
#### <%=dataLabel%> <%=mode%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
probs <- 0.3
which <- round(probs*nrow(X))
colStats <- microbenchmark(
colOrderStats = colOrderStats(X, which=which, na.rm=FALSE),
"apply+quantile" = apply(X, MARGIN=2L, FUN=quantile, probs=probs, na.rm=FALSE, type=3L),
"rowQ(t(X))" = rowQ(t(X), which=which),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowOrderStats = rowOrderStats(X, which=which, na.rm=FALSE),
"apply+quantile" = apply(X, MARGIN=1L, FUN=quantile, probs=probs, na.rm=FALSE, type=3L),
rowQ = rowQ(X, which=which),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowCounts.md.rsp 0000644 0001751 0000144 00000003350 12542546241 022014 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colCounts"%>
<%@string rowname="rowCounts"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* colSums() and rowSums()
* apply() + sum()
<% for (mode in c("logical", "integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% message(dataLabel) %>
#### <%=dataLabel%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
value <- 42
})%>
```
<% gc() %>
```r
<%=withCapture({
colStats <- microbenchmark(
colCounts = colCounts(X, value=value, na.rm=FALSE),
colSums = colSums(X == value, na.rm=FALSE),
"apply+sum" = apply(X, MARGIN=2L, FUN=function(x) sum(x == value, na.rm=FALSE)),
unit = "ms"
)
})%>
```
```r
<%=withCapture({
X <- t(X)
})%>
```
<% gc() %>
```r
<%=withCapture({
rowStats <- microbenchmark(
rowCounts = rowCounts(X, value=value, na.rm=FALSE),
rowSums = rowSums(X == value, na.rm=FALSE),
"apply+sum" = apply(X, MARGIN=1L, FUN=function(x) sum(x == value, na.rm=FALSE)),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowMads.md.rsp 0000644 0001751 0000144 00000004135 12542546241 021427 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colMads"%>
<%@string rowname="rowMads"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-18"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + mad()
* colMads2() and rowMads2()
where `rowMads2()` and `colMads2()` are:
```r
<%=withCapture({
rowMads2 <- function(x, const=1.4826, na.rm=FALSE) {
mu <- rowMedians(x, na.rm=na.rm)
x <- abs(x - mu)
mad <- rowMedians(x, na.rm=FALSE)
const * mad
}
colMads2 <- function(x, const=1.4826, na.rm=FALSE) {
mu <- colMedians(x, na.rm=na.rm)
x <- abs(x - mu)
mad <- colMedians(x, na.rm=FALSE)
const * mad
}
})%>
```
<%
rowMads_R <- function(x, na.rm=FALSE) {
apply(x, MARGIN=1L, FUN=mad, na.rm=na.rm)
}
colMads_R <- function(x, na.rm=FALSE) {
apply(x, MARGIN=2L, FUN=mad, na.rm=na.rm)
}
%>
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
#### <%=dataLabel%> <%=mode%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colMads = colMads(X, na.rm=FALSE),
colMads2 = colMads2(X, na.rm=FALSE),
"apply+mad" = apply(X, MARGIN=2L, FUN=mad, na.rm=FALSE),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowMads = rowMads(X, na.rm=FALSE),
rowMads2 = rowMads2(X, na.rm=FALSE),
"apply+mad" = apply(X, MARGIN=1L, FUN=mad, na.rm=FALSE),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-17
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/index.md.rsp 0000644 0001751 0000144 00000003627 12542546241 020471 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@meta title="Benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-05"%>
<%@include file="${header}"%>
<%
use("matrixStats")
use("R.utils (>= 1.34.0)")
# Simple logging function
lenter <- function(...) {
s <- mprintf(...)
s <- gsub("[.][.][.](\n)*", "...done\\1", s)
lexit <<- function() mprintf(s)
}
%>
# <%@meta name="title"%>
List of benchmark report for some of the functions available in the [matrixStats] package.
<%
path <- cmdArg(path=system.file("benchmarking", package="matrixStats"))
path <- getAbsolutePath(path)
message("Processing benchmark report directory: ", path)
pattern <- "[.]md[.]rsp$"
filenames <- list.files(path=path, pattern=pattern)
filenames <- setdiff(filenames, "index.md.rsp")
names <- gsub(pattern, "", filenames)
# col- and rowAnyMissing() does not really exist
names <- setdiff(names, c("colAnyMissing", "rowAnyMissing"))
message("Number of reports found: ", length(names))
mprintf("Report #%d: %s\n", seq_along(names), names)
%>
<% for (ii in seq_along(names)) { %>
<%
name <- names[ii]
if (regexpr("^colRow", name) != -1L) {
label <- gsub("^colRow", "", name)
label <- sprintf("col%s() and row%s()", label, label)
} else {
label <- sprintf("%s()", name)
}
%>
* [<%=label%>](<%={
lenter("%d of %d. Benchmarking %s...\n", ii, length(names), label)
html <- sprintf("%s.html", name)
if (!file_test("-f", html)) {
html <- matrixStats:::benchmark(name, path=path, workdir=".", envir=new.env())
html <- getRelativePath(html)
gc()
}
lexit()
html
}%>)
<% } # for (ii ...) %>
## Appendix
To reproduce this page and all of its reports, do:
```r
path <- system.file("benchmarking", package="matrixStats")
R.rsp::rfile("index.md.rsp", path=path)
```
_Note: Each of the above reports takes up to several minutes to complete._
<%@string appendix="false"%>
<%@include file="${footer}"%>
<%@string appendix="true"%>
matrixStats/inst/benchmarking/sumOver.md.rsp 0000644 0001751 0000144 00000003513 12542546241 021014 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="sumOver"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-02"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* sum() + [()
as below
```r
<%=withCapture({
sumOver_R <- function(x, na.rm=FALSE, idxs) {
sum(x[idxs], na.rm=na.rm)
}
})%>
```
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-vectors.R"%>
data <- rvectors(mode=mode)
##data <- data[1:3]
})%>
```
### Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
mprintf("%s: %s\n", mode, dataLabel)
x <- data[[dataLabel]]
gc()
%>
### <%=dataLabel%> vector
#### All elements
```r
<%=withCapture({
x <- data[[.dataLabel.]]
gc()
stats <- microbenchmark(
"sumOver" = sumOver(x),
"sum" = sum(x),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=c(dataLabel, "all")) %>
<% for (subset in c(0.2, 0.4, 0.8)) { %>
#### A <%=sprintf("%g", 100*subset)%>% subset
```r
<%=withCapture({
x <- data[[.dataLabel.]]
subset
idxs <- sort(sample(length(x), size=subset*length(x), replace=FALSE))
gc()
stats <- microbenchmark(
"sumOver" = sumOver(x, idxs=idxs),
"sum+[()" = sumOver_R(x, idxs=idxs),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=c(mode, dataLabel, subset)) %>
<% } # for (subset in ...) %>
<% } # for (ii ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-02
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/product.md.rsp 0000644 0001751 0000144 00000003217 12542546241 021035 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="product"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-04"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* product_R()
* prod()
where
```r
<%=withCapture({
product_R <- function(x, na.rm=FALSE, ...) {
# Nothing todo?
if (length(x) == 0L) return(0);
# Any missing values?
if (na.rm) {
x <- x[!is.na(x)];
}
# Any zeros?
if (is.integer(x) && any(x == 0)) return(0);
# Calculate product via logarithmic sum
sign <- if (sum(x < 0) %% 2 == 0) +1 else -1;
x <- abs(x);
x <- log(x);
x <- sum(x, na.rm=FALSE);
x <- exp(x);
y <- sign*x;
y;
} # product_R()
})%>
```
## Data
```r
<%=withCapture({
<%@include file="R/random-vectors.R"%>
data <- rvectors(mode="double")
data <- data[1:4]
})%>
```
## Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
message(dataLabel)
x <- data[[dataLabel]]
gc()
%>
### <%=dataLabel%> vector
```r
<%=withCapture({
x <- data[[.dataLabel.]]
gc()
stats <- microbenchmark(
product = product(x, na.rm=FALSE),
product_R = product_R(x, na.rm=FALSE),
prod = prod(x, na.rm=FALSE),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=dataLabel) %>
<% } # for (ii ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-02
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/indexByRow.md.rsp 0000644 0001751 0000144 00000005516 12542546241 021453 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="indexByRow"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-09"%>
<%@include file="${header}"%>
<%
lfun <- local({
locals <- list()
function(txt=NULL) {
if (is.null(txt)) return(locals)
local <- list(txt)
locals <<- c(locals, local)
}
})
%>
# <%@meta name="title"%>
This report benchmark the performance of `<%=fcnname%>()` against alternative methods:
* `indexByRow_R1()` based in `matrix(..., byrow=TRUE)`
* `indexByRow_R2()` is a modified version of `indexByRow_R1()`
where `indexByRow_R1()` and `indexByRow_R2()` are defined as in the Appendix.
<%
lfun(withCapture({
indexByRow_R1 <- function(dim, idxs=NULL, ...) {
n <- prod(dim)
x <- matrix(seq_len(n), nrow=dim[2L], ncol=dim[1L], byrow=TRUE)
if (!is.null(idxs))
x <- x[idxs]
as.vector(x)
} # indexByRow_R1()
}))
lfun(withCapture({
indexByRow_R2 <- function(dim, idxs=NULL, ...) {
n <- prod(dim)
if (is.null(idxs)) {
x <- matrix(seq_len(n), nrow=dim[2L], ncol=dim[1L], byrow=TRUE)
as.vector(x)
} else {
idxs <- idxs - 1
cols <- idxs %/% dim[2L]
rows <- idxs %% dim[2L]
cols + dim[1L]*rows + 1L
}
} # indexByRow_R2()
}))
%>
## Data
<% lfun(withCapture({
<%@include file="R/random-matrices.R"%>
})) %>
```r
<%=withCapture({
data <- rmatrices(mode="index")
})%>
```
where `rmatrices()` is defined in the Appendix.
<%
# data <- data[1:2]
%>
## Results
<% for (dataLabel in names(data)) { %>
<% message(dataLabel) %>
### <%=dataLabel%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
dim <- dim(X)
idxsList <- list(
'all-by-NULL' = NULL,
all = seq_len(prod(dim)),
odd = seq(from=1, to=prod(dim), by=2L)
)
str(idxsList)
})%>
```
<% for (ii in seq_along(idxsList)) { %>
#### Index set '<%=names(idxsList)[ii]%>'
<%
idxs <- idxsList[[ii]]
idxsTag <- names(idxsList)[ii]
# Validate correctness
res <- list(
indexByRow = indexByRow(dim, idxs=idxs),
indexByRow_R1 = indexByRow_R1(dim, idxs=idxs),
indexByRow_R2 = indexByRow_R2(dim, idxs=idxs)
)
lapply(res, FUN=function(x) stopifnot(all.equal(x, res[[1]])))
gc()
%>
```r
<%=withCapture({
stats <- microbenchmark(
indexByRow = indexByRow(dim, idxs=idxs),
indexByRow_R1 = indexByRow_R1(dim, idxs=idxs),
indexByRow_R2 = indexByRow_R2(dim, idxs=idxs),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=c(dataLabel, idxsTag)) %>
<% } # for (ii ...) %>
<% } # for (dataLabel ...) %>
<%@include file="${footer}"%>
### Local functions
```r
<%=lfun()[[1]]%>
```
```r
<%=lfun()[[2]]%>
```
```r
<%=lfun()[[3]]%>
```
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/R/ 0000755 0001751 0000144 00000000000 12542546241 016426 5 ustar hornik users matrixStats/inst/benchmarking/R/random-matrices.R 0000644 0001751 0000144 00000002262 12542546241 021640 0 ustar hornik users rmatrix <- function(nrow, ncol, mode=c("logical", "double", "integer", "index"), range=c(-100,+100), naProb=0) {
mode <- match.arg(mode)
n <- nrow*ncol
if (mode == "logical") {
X <- sample(c(FALSE, TRUE), size=n, replace=TRUE)
} else if (mode == "index") {
X <- seq_len(n)
mode <- "integer";
} else {
X <- runif(n, min=range[1], max=range[2])
}
storage.mode(X) <- mode
if (naProb > 0) X[sample(n, size=naProb*n)] <- NA
dim(X) <- c(nrow, ncol)
X
} # rmatrix()
rmatrices <- function(scale=10, seed=1, ...) {
set.seed(seed)
data <- list()
data[[1]] <- rmatrix(nrow=scale* 1, ncol=scale* 1, ...)
data[[2]] <- rmatrix(nrow=scale* 10, ncol=scale* 10, ...)
data[[3]] <- rmatrix(nrow=scale*100, ncol=scale* 1, ...)
data[[4]] <- t(data[[3]])
data[[5]] <- rmatrix(nrow=scale* 10, ncol=scale*100, ...)
data[[6]] <- t(data[[5]])
names(data) <- sapply(data, FUN=function(x) paste(dim(x), collapse="x"))
data
} # rmatrices()
############################################################################
# HISTORY:
# 2014-11-09
# o Added 'index' mode.
# 2014-06-02
# o Created.
############################################################################
matrixStats/inst/benchmarking/R/random-vectors.R 0000644 0001751 0000144 00000001630 12542546241 021514 0 ustar hornik users rvector <- function(n, mode=c("logical", "double", "integer"), range=c(-100,+100), naProb=0) {
mode <- match.arg(mode)
if (mode == "logical") {
X <- sample(c(FALSE, TRUE), size=n, replace=TRUE)
} else {
x <- runif(n, min=range[1], max=range[2])
}
storage.mode(x) <- mode
if (naProb > 0) x[sample(n, size=naProb*n)] <- NA
x
} # rvector()
rvectors <- function(scale=10, seed=1, ...) {
set.seed(seed)
data <- list()
data[[1]] <- rvector(n=scale*1e2, ...)
data[[2]] <- rvector(n=scale*1e3, ...)
data[[3]] <- rvector(n=scale*1e4, ...)
data[[4]] <- rvector(n=scale*1e5, ...)
data[[5]] <- rvector(n=scale*1e6, ...)
names(data) <- sprintf("n=%d", sapply(data, FUN=length))
data
} # rvectors()
############################################################################
# HISTORY:
# 2014-06-04
# o Created.
############################################################################
matrixStats/inst/benchmarking/colRowDiffs.md.rsp 0000644 0001751 0000144 00000003105 12542546241 021572 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colDiffs"%>
<%@string rowname="rowDiffs"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-12-30"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + diff()
* apply() + diff2()
* diff()
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
#### <%=dataLabel%> <%=mode%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colDiffs = colDiffs(X),
"apply+diff" = apply(X, MARGIN=2L, FUN=diff),
"apply+diff2" = apply(X, MARGIN=2L, FUN=diff2),
diff = diff(X),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowDiffs = rowDiffs(X),
"apply+diff" = apply(X, MARGIN=1L, FUN=diff),
"apply+diff2" = apply(X, MARGIN=1L, FUN=diff2),
"diff + t" = diff(t(X)),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-17
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/anyMissing.md.rsp 0000644 0001751 0000144 00000002544 12542546241 021500 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="anyMissing"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-01"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* anyNA()
* any() + is.na()
as below
```r
<%=withCapture({
any_is.na <- function(x) {
any(is.na(x))
}
})%>
```
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-vectors.R"%>
data <- rvectors(mode=mode)
})%>
```
### Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
mprintf("%s: %s\n", mode, dataLabel)
%>
### <%=dataLabel%> vector
```r
<%=withCapture({
x <- data[[.dataLabel.]]
gc()
stats <- microbenchmark(
"anyMissing" = anyMissing(x),
"anyNA" = anyNA(x),
"any_is.na" = any_is.na(x),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=c(mode, dataLabel)) %>
<% } # for (ii ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-01
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowCumprods.md.rsp 0000644 0001751 0000144 00000002673 12542546241 022344 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colCumprods"%>
<%@string rowname="rowCumprods"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-26"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + cumprod()
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode, range=c(-1,1))
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
#### <%=dataLabel%> <%=mode%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colCumprods = colCumprods(X),
"apply+cumprod" = apply(X, MARGIN=2L, FUN=cumprod),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowCumprods = rowCumprods(X),
"apply+cumprod" = apply(X, MARGIN=1L, FUN=cumprod),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-26
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowTabulates.md.rsp 0000644 0001751 0000144 00000002336 12542546241 022470 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colTabulates"%>
<%@string rowname="rowTabulates"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* ???
## Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode="integer", range=c(-10,10))
})%>
```
## Results
<% for (dataLabel in names(data)) { %>
<% message(dataLabel) %>
### <%=dataLabel%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colTabulates = colTabulates(X, na.rm=FALSE),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowTabulates = rowTabulates(X, na.rm=FALSE),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %>
<% } # for (dataLabel ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowRanges.md.rsp 0000644 0001751 0000144 00000002727 12542546241 021767 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colRanges"%>
<%@string rowname="rowRanges"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + range()
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
#### <%=dataLabel%> <%=mode%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colRanges = colRanges(X, na.rm=FALSE),
"apply+range" = apply(X, MARGIN=2L, FUN=range, na.rm=FALSE),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowRanges = rowRanges(X, na.rm=FALSE),
"apply+range" = apply(X, MARGIN=1L, FUN=range, na.rm=FALSE),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/allocVector.md.rsp 0000644 0001751 0000144 00000004154 12542546241 021633 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="allocVector"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* vector() + assignment
* rep()
* matrix() + as.vector()
where
```r
<%=withCapture({
allocVector_R1 <- function(length, value=NA) {
x <- vector(mode=typeof(value), length=length)
if (!is.finite(value) || value != 0) x[] <- value
x
} # allocVector_R1()
allocVector_R2 <- function(length, value=NA) {
x <- matrix(data=value, nrow=length, ncol=1L)
as.vector(x)
} # allocVector_R2()
})%>
```
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-vectors.R"%>
data <- rvectors(mode=mode)
values <- list(zero=0, one=1, "NA"=NA_real_)
if (mode != "double")
values <- lapply(values, FUN=function(x) { storage.mode(x) <- mode; x })
})%>
```
### Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
x <- data[[dataLabel]]
gc()
%>
### <%=dataLabel%> matrix
<% for (value in values) { %>
<%
valueLabel <- as.character(value)
mprintf("%s: %s, value=%s\n", mode, dataLabel, valueLabel)
%>
```r
<%=withCapture({
n <- length(data[[.dataLabel.]])
str(value)
})%>
```
<% gc() %>
```r
<%=withCapture({
stats <- microbenchmark(
"allocVector" = allocVector(length=n, value=value),
"rep" = rep(value, times=n),
"allocVector_R1" = allocVector_R1(length=n, value=value),
"allocVector_R2" = allocVector_R2(length=n, value=value),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=c(mode, dataLabel, valueLabel)) %>
<% } # for (value in values) %>
<% } # for (ii ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-01
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowMeans.md.rsp 0000644 0001751 0000144 00000003136 12542546241 021606 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colMeans"%>
<%@string rowname="rowMeans"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* .colMeans() and .rowMeans()
* apply() + mean()
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
#### <%=dataLabel%> <%=mode%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colMeans = colMeans(X, na.rm=FALSE),
.colMeans = .colMeans(X, m=nrow(X), n=ncol(X), na.rm=FALSE),
"apply+mean" = apply(X, MARGIN=2L, FUN=mean, na.rm=FALSE),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowMeans = rowMeans(X, na.rm=FALSE),
.rowMeans = .rowMeans(X, m=nrow(X), n=ncol(X), na.rm=FALSE),
"apply+mean" = apply(X, MARGIN=1L, FUN=mean, na.rm=FALSE),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/t_tx_OP_y.md.rsp 0000644 0001751 0000144 00000004065 12542546241 021263 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="t_tx_OP_y"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-26"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* t_tx_OP_y_R()
as below
```r
<%=withCapture({
t_tx_OP_y_R <- function(x, y, OP, na.rm=FALSE) {
x <- t(x)
if (na.rm) {
xnok <- is.na(x)
ynok <- is.na(y)
anok <- xnok & ynok
unit <- switch(OP,
"+" = 0,
"-" = NA_real_,
"*" = 1,
"/" = NA_real_,
stop("Unknown 'OP' operator: ", OP)
)
x[xnok] <- unit
y[ynok] <- unit
}
ans <- switch(OP,
"+" = x + y,
"-" = x - y,
"*" = x * y,
"/" = x / y,
stop("Unknown 'OP' operator: ", OP)
)
if (na.rm) {
ans[anok] <- NA_real_
}
t(ans)
} # t_tx_OP_y_R()
})%>
```
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
mprintf("%s: %s\n", mode, dataLabel)
x <- data[[dataLabel]]
gc()
%>
### <%=dataLabel%> vector
#### All elements
```r
<%=withCapture({
x <- data[[.dataLabel.]]
y <- x[,1L]
})%>
```
<% for (OP in c("+", "-", "*", "/")) { %>
<%
OPTag <- c("+"="add", "-"="sub", "*"="mul", "/"="div")[OP]
gc()
%>
```r
<%=withCapture({
OP
stats <- microbenchmark(
"t_tx_OP_y" = t_tx_OP_y(x, y, OP=OP, na.rm=FALSE),
"t_tx_OP_y_R" = t_tx_OP_y_R(x, y, OP=OP, na.rm=FALSE),
unit = "ms"
)
gc()
})%>
```
<% benchmarkResults(stats, tags=c(mode, dataLabel, OPTag)) %>
<% } # for (OP ...) %>
<% } # for (ii ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-26
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/madDiff.md.rsp 0000644 0001751 0000144 00000002434 12542546241 020707 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="madDiff"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-10"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* N/A
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-vectors.R"%>
data <- rvectors(mode=mode)
data <- data[1:4]
})%>
```
### Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
mprintf("%s: %s\n", mode, dataLabel)
x <- data[[dataLabel]]
gc()
%>
### <%=dataLabel%> vector
#### All elements
```r
<%=withCapture({
x <- data[[.dataLabel.]]
stats <- microbenchmark(
"madDiff" = madDiff(x),
"mad" = mad(x),
"diff" = diff(x),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=c(mode, dataLabel)) %>
<% } # for (ii ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-10
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowQuantiles.md.rsp 0000644 0001751 0000144 00000002676 12542546241 022520 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colQuantiles"%>
<%@string rowname="rowQuantiles"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + quantile()
## Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode="double")
})%>
```
## Results
<% for (dataLabel in names(data)) { %>
<% message(dataLabel) %>
### <%=dataLabel%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
probs <- seq(from=0, to=1, by=0.25)
colStats <- microbenchmark(
colQuantiles = colQuantiles(X, probs=probs, na.rm=FALSE),
"apply+quantile" = apply(X, MARGIN=2L, FUN=quantile, probs=probs, na.rm=FALSE),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowQuantiles = rowQuantiles(X, probs=probs, na.rm=FALSE),
"apply+quantile" = apply(X, MARGIN=1L, FUN=quantile, probs=probs, na.rm=FALSE),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %>
<% } # for (dataLabel ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/weightedMean.md.rsp 0000644 0001751 0000144 00000003054 12542546241 021755 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="weightedMean"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-12-09"%>
<%@include file="${header}"%>
<%
library("stats")
weighted.mean.default <- stats:::weighted.mean.default
%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* stats::weighted.mean()
* stats:::weighted.mean.default()
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-vectors.R"%>
data <- rvectors(mode=mode)
data <- data[1:4]
})%>
```
## Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
mprintf("%s: %s\n", mode, dataLabel)
x <- data[[dataLabel]]
gc()
%>
### <%=dataLabel%> vector
```r
<%=withCapture({
x <- data[[.dataLabel.]]
w <- runif(length(x))
gc()
stats <- microbenchmark(
"weightedMean" = weightedMean(x, w=w, na.rm=FALSE),
"stats::weighted.mean" = weighted.mean(x, w=w, na.rm=FALSE),
"stats:::weighted.mean.default" = weighted.mean.default(x, w=w, na.rm=FALSE),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=c(mode, dataLabel)) %>
<% } # for (ii ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-12-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowWeightedMeans.md.rsp 0000644 0001751 0000144 00000002674 12542546241 023275 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colWeightedMeans"%>
<%@string rowname="rowWeightedMeans"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + weighted.mean()
## Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode="double")
})%>
```
## Results
<% for (dataLabel in names(data)) { %>
<% message(dataLabel) %>
### <%=dataLabel%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
w <- runif(nrow(X))
gc()
colStats <- microbenchmark(
colWeightedMeans = colWeightedMeans(X, w=w, na.rm=FALSE),
"apply+weigthed.mean" = apply(X, MARGIN=2L, FUN=weighted.mean, w=w, na.rm=FALSE),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowWeightedMeans = rowWeightedMeans(X, w=w, na.rm=FALSE),
"apply+weigthed.mean" = apply(X, MARGIN=1L, FUN=weighted.mean, w=w, na.rm=FALSE),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %>
<% } # for (dataLabel ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/count.md.rsp 0000644 0001751 0000144 00000002403 12542546241 020501 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="count"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-12-08"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* sum(x == value)
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-vectors.R"%>
data <- rvectors(mode=mode)
})%>
```
### Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
mprintf("%s: %s\n", mode, dataLabel)
x <- data[[dataLabel]]
value <- 42
gc()
%>
### <%=dataLabel%> vector
```r
<%=withCapture({
x <- data[[.dataLabel.]]
gc()
stats <- microbenchmark(
"count" = count(x, value),
"sum(x == value)" = sum(x == value),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=c(mode, dataLabel)) %>
<% } # for (ii ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-01
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/weightedMedian.md.rsp 0000644 0001751 0000144 00000004315 12542546241 022273 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="weightedMedian"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-03"%>
<%@include file="${header}"%>
<%---
'ergm' could be hard to install, because it imports
'Rglpk', which requires GLPK library on the system.
---%>
<%@string test_ergm="FALSE"%>
<%
use("limma", how="load")
limma_weighted.median <- limma::weighted.median
use("cwhmisc", how="load")
cwhmisc_w.median <- cwhmisc::w.median
use("laeken", how="load")
laeken_weightedMedian <- laeken::weightedMedian
<%@ifeq test_ergm="TRUE"%>
use("ergm", how="load")
ergm_wtd.median <- ergm::wtd.median
<%@endif%>
weightedMedian <- matrixStats::weightedMedian
%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* apply() + limma::weighted.median()
* apply() + cwhmisc::w.median()
* apply() + laeken::weightedMedian()
<%@ifeq test_ergm="TRUE"%>
* apply() + ergm::wtd.median() --%>
<%@endif%>
## Data
```r
<%=withCapture({
<%@include file="R/random-vectors.R"%>
data <- rvectors(mode="double")
data <- data[1:3]
})%>
```
## Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
message(dataLabel)
x <- data[[dataLabel]]
gc()
%>
### <%=dataLabel%> vector
```r
<%=withCapture({
x <- data[[.dataLabel.]]
w <- runif(length(x))
gc()
stats <- microbenchmark(
"weightedMedian" = weightedMedian(x, w=w, ties="mean", na.rm=FALSE),
"limma::weighted.median" = limma_weighted.median(x, w=w, na.rm=FALSE),
"cwhmisc::w.median" = cwhmisc_w.median(x, w=w),
"laeken::weightedMedian" = laeken_weightedMedian(x, w=w),
<%@ifeq test_ergm="TRUE"%>
"ergm::wtd.median" = ergm_wtd.median(x, w=w),
<%@endif%>
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=dataLabel) %>
<% } # for (ii ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-12-17
o Made 'ergm::wtd.median' optional.
2014-06-03
o Created using benchmark snippet in incl/weightedMedian.Rex.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/binCounts.md.rsp 0000644 0001751 0000144 00000003637 12542546241 021327 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="binCounts"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-05-25"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* hist()
as below
```r
<%=withCapture({
hist <- graphics::hist
binCounts_hist <- function(x, bx, right=FALSE, ...) {
hist(x, breaks=bx, right=right, include.lowest=TRUE, plot=FALSE)$counts
}
})%>
```
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Non-sorted simulated data
```r
<%=withCapture({
set.seed(0xBEEF)
nx <- 100e3 # Number of data points
xmax <- 0.01*nx
x <- runif(nx, min=0, max=xmax)
storage.mode(x) <- mode
str(x)
# Uniformely distributed bins
nb <- 10e3 # Number of bins
bx <- seq(from=0, to=xmax, length.out=nb+1L)
bx <- c(-1, bx, xmax+1)
})%>
```
### Results
<% benchmark <- function() { %>
<% dataLabel <- if (is.unsorted(x)) "unsorted" else "sorted" %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
```r
<%=withCapture({
gc()
stats <- microbenchmark(
binCounts = binCounts(x, bx=bx),
hist = binCounts_hist(x, bx=bx),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=c(mode, dataLabel)) %>
<%
# Sanity checks
n0 <- binCounts_hist(x, bx=bx)
n1 <- binCounts(x, bx=bx)
stopifnot(identical(n1, n0))
n1r <- rev(binCounts(-x, bx=rev(-bx), right=TRUE))
stopifnot(identical(n1r, n1))
%>
<% } # benchmark() %>
<% benchmark() %>
### Sorted simulated data
```r
<%=withCapture({
x <- sort(x)
})%>
```
<% benchmark() %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-02
o Restructured.
2014-05-25
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowLogSumExps.md.rsp 0000644 0001751 0000144 00000003321 12542546241 022605 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colLogSumExps"%>
<%@string rowname="rowLogSumExps"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + matrixStats::logSumExp()
* apply() + logSumExp0()
where
```r
<%=withCapture({
logSumExp0 <- function(lx, ...) {
iMax <- which.max(lx)
log1p(sum(exp(lx[-iMax] - lx[iMax]))) + lx[iMax]
} # logSumExp0()
})%>
```
## Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode="double")
})%>
```
## Results
<% for (dataLabel in names(data)) { %>
<% message(dataLabel) %>
### <%=dataLabel%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colLogSumExps = colLogSumExps(X, na.rm=FALSE),
"apply+logSumExp" = apply(X, MARGIN=2L, FUN=logSumExp, na.rm=FALSE),
"apply+logSumExp0" = apply(X, MARGIN=2L, FUN=logSumExp0, na.rm=FALSE),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowLogSumExps = rowLogSumExps(X, na.rm=FALSE),
"apply+logSumExp" = apply(X, MARGIN=1L, FUN=logSumExp, na.rm=FALSE),
"apply+logSumExp0" = apply(X, MARGIN=1L, FUN=logSumExp0, na.rm=FALSE),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %>
<% } # for (dataLabel ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/logSumExp.md.rsp 0000644 0001751 0000144 00000002472 12542546241 021302 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="logSumExp"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-01"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* logSumExp_R()
where
```r
<%=withCapture({
logSumExp_R <- function(lx, ...) {
iMax <- which.max(lx)
log1p(sum(exp(lx[-iMax] - lx[iMax]))) + lx[iMax]
} # logSumExp_R()
})%>
```
## Data
```r
<%=withCapture({
<%@include file="R/random-vectors.R"%>
data <- rvectors(mode="double")
data <- data[1:4]
})%>
```
## Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
message(dataLabel)
x <- data[[dataLabel]]
gc()
%>
### <%=dataLabel%> vector
```r
<%=withCapture({
x <- data[[.dataLabel.]]
gc()
stats <- microbenchmark(
"logSumExp" = logSumExp(x),
"logSumExp_R" = logSumExp_R(x),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=dataLabel) %>
<% } # for (ii ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-01
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowRanks.md.rsp 0000644 0001751 0000144 00000002756 12542546241 021630 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colRanks"%>
<%@string rowname="rowRanks"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + rank()
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
#### <%=dataLabel%> <%=mode%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colRanks = colRanks(X, na.rm=FALSE),
"apply+rank" = apply(X, MARGIN=2L, FUN=rank, na.last="keep", ties.method="max"),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowRanks = rowRanks(X, na.rm=FALSE),
"apply+rank" = apply(X, MARGIN=1L, FUN=rank, na.last="keep", ties.method="max"),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowWeightedMedians.md.rsp 0000644 0001751 0000144 00000002713 12542546241 023604 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colWeightedMedians"%>
<%@string rowname="rowWeightedMedians"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + weightedMedian()
## Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode="double")
})%>
```
## Results
<% for (dataLabel in names(data)) { %>
<% message(dataLabel) %>
### <%=dataLabel%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
w <- runif(nrow(X))
gc()
colStats <- microbenchmark(
colWeightedMedians = colWeightedMedians(X, w=w, na.rm=FALSE),
"apply+weigthedMedian" = apply(X, MARGIN=2L, FUN=weightedMedian, w=w, na.rm=FALSE),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowWeightedMedians = rowWeightedMedians(X, w=w, na.rm=FALSE),
"apply+weigthedMedian" = apply(X, MARGIN=1L, FUN=weightedMedian, w=w, na.rm=FALSE),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %>
<% } # for (dataLabel ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowVars.md.rsp 0000644 0001751 0000144 00000005364 12542546241 021463 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colVars"%>
<%@string rowname="rowVars"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2015-01-06"%>
<%@include file="${header}"%>
<%@string test_genefilter="TRUE"%>
<%@ifeq test_genefilter="TRUE"%>
<%
use("genefilter", how="load")
genefilter_rowVars <- genefilter::rowVars
genefilter_colVars <- function(x, ...) genefilter_rowVars(t(x), ...)
%>
<%@endif%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + var()
* colVarColMeans() and rowVarColMeans()
<%@ifeq test_genefilter="TRUE"%>
* genefilter::rowVars(t(.)) and genefilter::rowVars()
<%@endif%>
where
```r
<%=withCapture({
colVarColMeans <- function(x, na.rm=TRUE) {
if (na.rm) {
n <- colSums(!is.na(x))
} else {
n <- nrow(x)
}
var <- colMeans(x*x, na.rm=na.rm) - (colMeans(x, na.rm=na.rm))^2
var * n/(n-1)
}
})%>
```
and
```r
<%=withCapture({
rowVarRowMeans <- function(x, na.rm=TRUE) {
if (na.rm) {
n <- rowSums(!is.na(x))
} else {
n <- ncol(x)
}
mu <- rowMeans(x, na.rm=na.rm)
var <- rowMeans(x*x, na.rm=na.rm) - mu^2
var * (n/(n-1))
}
})%>
```
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
#### <%=dataLabel%> <%=mode%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colVars = colVars(X, na.rm=FALSE),
colVarColMeans = colVarColMeans(X, na.rm=FALSE),
"apply+var" = apply(X, MARGIN=2L, FUN=var, na.rm=FALSE),
<%@ifeq test_genefilter="TRUE"%>
"genefilter::rowVars(t(.))" = genefilter_colVars(X, na.rm=FALSE),
<%@endif%>
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowVars = rowVars(X, na.rm=FALSE),
rowVarRowMeans = rowVarRowMeans(X, na.rm=FALSE),
"apply+var" = apply(X, MARGIN=1L, FUN=var, na.rm=FALSE),
<%@ifeq test_genefilter="TRUE"%>
"genefilter::rowVars" = genefilter_rowVars(X, na.rm=FALSE),
<%@endif%>
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2015-01-06
o Now benchmarking 'genefilter' functions too.
2014-11-23
o Now benchmarking rowVars() instead of rowSds() since the latter uses
the former.
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowSums.md.rsp 0000644 0001751 0000144 00000003116 12542546241 021470 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colSums"%>
<%@string rowname="rowSums"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-06-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* .colSums() and .rowSums()
* apply() + sum()
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
})%>
```
### Results
<% for (dataLabel in names(data)) { %>
<% mprintf("%s: %s\n", mode, dataLabel) %>
#### <%=dataLabel%> <%=mode%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colSums = colSums(X, na.rm=FALSE),
.colSums = .colSums(X, m=nrow(X), n=ncol(X), na.rm=FALSE),
"apply+sum" = apply(X, MARGIN=2L, FUN=sum, na.rm=FALSE),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowSums = rowSums(X, na.rm=FALSE),
.rowSums = .rowSums(X, m=nrow(X), n=ncol(X), na.rm=FALSE),
"apply+sum" = apply(X, MARGIN=1L, FUN=sum, na.rm=FALSE),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=c(mode, dataLabel)) %>
<% } # for (dataLabel ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowAnys.md.rsp 0000644 0001751 0000144 00000002554 12542546241 021460 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colAnys"%>
<%@string rowname="rowAnys"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-18"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* apply() + any()
* colSums() > 0 or rowSums() > 0
## Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode="logical")
})%>
```
## Results
<% for (dataLabel in names(data)) { %>
<% message(dataLabel) %>
### <%=dataLabel%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
gc()
colStats <- microbenchmark(
colAnys = colAnys(X),
"apply+any" = apply(X, MARGIN=2L, FUN=any),
"colSums > 0" = (colSums(X) > 0L),
unit = "ms"
)
X <- t(X)
gc()
rowStats <- microbenchmark(
rowAnys = rowAnys(X),
"apply+any" = apply(X, MARGIN=1L, FUN=any),
"rowSums > 0" = (rowSums(X) > 0L),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %>
<% } # for (dataLabel ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-18
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/allocMatrix.md.rsp 0000644 0001751 0000144 00000004026 12542546241 021633 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string fcnname="allocMatrix"%>
<% fcnname <- "<%@string name="fcnname"%>" %>
<%@meta title="${fcnname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-09"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=fcnname%>() against alternative methods.
## Alternative methods
* matrix()
* matrix() special trick for NA
where
```r
<%=withCapture({
allocMatrix_R <- function(nrow, ncol, value=NA) {
if (is.na(value) && !is.nan(value)) {
matrix(data=value[c()], nrow=nrow, ncol=ncol)
} else {
matrix(data=value, nrow=nrow, ncol=ncol)
}
} # allocMatrix_R()
})%>
```
<% for (mode in c("integer", "double")) { %>
## Data type "<%=mode%>"
### Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode=mode)
values <- list(zero=0, one=1, "NA"=NA_real_)
if (mode != "double")
values <- lapply(values, FUN=function(x) { storage.mode(x) <- mode; x })
})%>
```
### Results
<% for (ii in seq_along(data)) { %>
<%
dataLabel <- names(data)[ii]
x <- data[[dataLabel]]
gc()
%>
### <%=dataLabel%> matrix
<% for (value in values) { %>
<%
valueLabel <- as.character(value)
mprintf("%s: %s, value=%s\n", mode, dataLabel, valueLabel)
%>
```r
<%=withCapture({
dim <- dim(data[[.dataLabel.]])
nrow <- dim[1L]
ncol <- dim[2L]
str(value)
})%>
```
<% gc() %>
```r
<%=withCapture({
stats <- microbenchmark(
"allocMatrix" = allocMatrix(nrow=nrow, ncol=ncol, value=value),
"matrix" = matrix(data=value, nrow=nrow, ncol=ncol),
"allocMatrix_R" = allocMatrix_R(nrow=nrow, ncol=ncol, value=value),
unit = "ms"
)
})%>
```
<% benchmarkResults(stats, tags=c(mode, dataLabel, valueLabel)) %>
<% } # for (value in values) %>
<% } # for (ii ...) %>
<% } # for (mode ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-11-01
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/benchmarking/colRowProds.md.rsp 0000644 0001751 0000144 00000003507 12542546241 021634 0 ustar hornik users <%@include file="includes/setup.md.rsp"%>
<%@string colname="colProds"%>
<%@string rowname="rowProds"%>
<%@meta title="${colname}() and ${rowname}() benchmarks"%>
<%@meta author="Henrik Bengtsson"%>
<%@meta date="2014-11-15"%>
<%@include file="${header}"%>
# <%@meta name="title"%>
This report benchmark the performance of <%=colname%>() and <%=rowname%>() against alternative methods.
## Alternative methods
* <%=colname%>()/<%=rowname%>() using method="expSumLog"
* apply() + prod()
* apply() + product()
## Data
```r
<%=withCapture({
<%@include file="R/random-matrices.R"%>
data <- rmatrices(mode="double")
})%>
```
## Results
<% for (dataLabel in names(data)) { %>
<% message(dataLabel) %>
### <%=dataLabel%> matrix
```r
<%=withCapture({
X <- data[[.dataLabel.]]
})%>
<% gc() %>
<%=withCapture({
colStats <- microbenchmark(
"colProds w/ direct" = colProds(X, method="direct", na.rm=FALSE),
"colProds w/ expSumLog" = colProds(X, method="expSumLog", na.rm=FALSE),
"apply+prod" = apply(X, MARGIN=2L, FUN=prod, na.rm=FALSE),
"apply+product" = apply(X, MARGIN=2L, FUN=product, na.rm=FALSE),
unit = "ms"
)
})%>
<%=withCapture({
X <- t(X)
})%>
<% gc() %>
<%=withCapture({
rowStats <- microbenchmark(
"rowProds w/ direct" = rowProds(X, method="direct", na.rm=FALSE),
"rowProds w/ expSumLog" = rowProds(X, method="expSumLog", na.rm=FALSE),
"apply+prod" = apply(X, MARGIN=1L, FUN=prod, na.rm=FALSE),
"apply+product" = apply(X, MARGIN=1L, FUN=product, na.rm=FALSE),
unit = "ms"
)
})%>
```
<% crBenchmarkResults(colStats, rowStats, tags=dataLabel) %>
<% } # for (dataLabel ...) %>
<%@include file="${footer}"%>
<%---------------------------------------------------------------------------
HISTORY:
2014-06-09
o Created.
---------------------------------------------------------------------------%>
matrixStats/inst/doc/ 0000755 0001751 0000144 00000000000 12542546263 014346 5 ustar hornik users matrixStats/inst/doc/matrixStats-methods.md.rsp 0000644 0001751 0000144 00000020606 12542546263 021463 0 ustar hornik users <%@meta language="R-vignette" content="--------------------------------
DIRECTIVES FOR R:
%\VignetteIndexEntry{matrixStats: Summary of functions}
%\VignetteAuthor{Henrik Bengtsson}
%\VignetteKeyword{matrix}
%\VignetteKeyword{vector}
%\VignetteKeyword{apply}
%\VignetteKeyword{rows}
%\VignetteKeyword{columns}
%\VignetteKeyword{memory}
%\VignetteKeyword{speed}
%\VignetteEngine{R.rsp::rsp}
%\VignetteTangle{FALSE}
--------------------------------------------------------------------"%>
<%
pkgName <- "matrixStats"
library(pkgName, character.only=TRUE)
ns <- getNamespace(pkgName)
env <- as.environment(sprintf("package:%s", pkgName))
R.utils::use("R.utils")
kable <- function(df, ...) {
fcns <- as.character(df$Functions)
fcns <- strsplit(fcns, split=",")
fcns <- sapply(fcns, FUN=function(names) {
names <- trim(names)
ok <- sapply(names, FUN=exists, envir=ns, mode="function")
names[ok] <- sprintf("%s()", names[ok])
names[!ok] <- sprintf("~~%s()~~", names[!ok])
names <- paste(names, collapse=", ")
})
df$Functions <- fcns
df$Example <- sprintf("`%s`", df$Example)
print(knitr::kable(df, ..., format="markdown"))
}
# Find all functions
all <- ls(envir=env)
keep <- sapply(all, FUN=function(name) {
is.function(get(name, envir=env))
})
all <- all[keep]
keep <- !grepl("[.]([^.]*)$", all)
all <- all[keep]
# Hidden functions
skip <- c("rowAvgsPerColSet", "colAvgsPerRowSet")
skip <- c(skip, "allocArray", "allocMatrix", "allocVector")
all <- setdiff(all, skip)
# Column and row functions
crfcns <- grep("^(col|row)", all, value=TRUE)
# Vector functions
vfcns <- setdiff(all, crfcns)
%>
# <%@meta name="title"%>
<% pkg <- R.oo::Package(pkgName) %>
<%@meta name="author"%> on <%=format(as.Date(pkg$date), format="%B %d, %Y")%>
<%
fcns <- crfcns
base <- gsub("^(col|row)", "", fcns)
groups <- tapply(fcns, base, FUN=list)
stopifnot(all(sapply(groups, FUN=length) == 2L))
groups <- matrix(unlist(groups, use.names=FALSE), nrow=2L)
%>
<%---
## Functions that apply to column and rows of matrices
```
<% print(fcns) %>
```
---%>
<%
fcns <- vfcns
%>
<%---
## Functions that apply to vectors
```
<% print(fcns) %>
```
---%>
## Location and scale estimators
<%
tbl <- NULL
row <- data.frame(
"Estimator" = "Weighted sample mean",
"Functions" = "weightedMean, colWeightedMeans, rowWeightedMeans",
"Example" = "weightedMean(x, w); rowWeightedMeans(x, w)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Median",
"Functions" = "median, colMedians, rowMedians",
"Example" = "median(x); rowMedians(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Weighted median",
"Functions" = "weightedMedian, colWeightedMedians, rowWeightedMedians",
"Example" = "weightedMedian(x, w); rowWeightedMedians(x, w)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Sample variance",
"Functions" = "var, colVars, rowVars",
"Example" = "var(x); rowVars(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Weighted sample variance",
"Functions" = "weightedVar, colWeightedVars, rowWeightedVars",
"Example" = "weightedVar(x, w), rowWeightedVars(x, w)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Sample variance by n-order differences",
"Functions" = "varDiff, colVarDiffs, rowVarDiffs",
"Example" = "varDiff(x); rowVarDiffs(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Sample standard deviation",
"Functions" = "sd, colSds, rowSds",
"Example" = "sd(x); rowSds(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Weighted sample deviation",
"Functions" = "weightedSd, colWeightedSds, rowWeightedSds",
"Example" = "weightedSd(x, w), rowWeightedSds(x, w)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Sample standard deviation by n-order differences",
"Functions" = "sdDiff, colSdDiffs, rowSdDiffs",
"Example" = "sdDiff(x); rowSdDiffs(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Median absolute deviation (MAD)",
"Functions" = "mad, colMads, rowMads",
"Example" = "mad(x); rowMads(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Weighted median absolute deviation (MAD)",
"Functions" = "weightedMad, colWeightedMads, rowWeightedMads",
"Example" = "weightedMad(x, w), rowWeightedMads(x, w)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Median absolute deviation (MAD) by n-order differences",
"Functions" = "madDiff, colMadDiffs, rowMadDiffs",
"Example" = "madDiff(x); rowMadDiffs()"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Quantile",
"Functions" = "quantile, colQuantiles, rowQuantiles",
"Example" = "quantile(x, probs); rowQuantiles(x, probs)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Interquartile range (IQR)",
"Functions" = "iqr, colIQRs, rowIQRs",
"Example" = "iqr(x); rowIQRs(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Interquartile range (IQR) by n-order differences",
"Functions" = "iqrDiff, colIQRDiffs, rowIQRDiffs",
"Example" = "iqrDiff(x); rowIQRDiffs(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Range",
"Functions" = "range, colRanges, rowRanges",
"Example" = "range(x); rowRanges(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Minimum",
"Functions" = "min, colMins, rowMins",
"Example" = "min(x); rowMins(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Maximum",
"Functions" = "max, colMaxs, rowMaxs",
"Example" = "max(x); rowMaxs(x)"
)
tbl <- rbind(tbl, row)
%>
<% kable(tbl) %>
## Testing for and counting values
<%
tbl <- NULL
row <- data.frame(
"Operator" = "Are there any missing values?",
"Functions" = "anyMissing, colAnyMissings, rowAnyMissings",
"Example" = "anyMissing(x); rowAnyMissings(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Does TRUE exists?",
"Functions" = "any, colAnys, rowAnys",
"Example" = "any(x); rowAnys(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Are all values TRUE?",
"Functions" = "all, colAlls, rowAlls",
"Example" = "all(x); rowAlls(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Does value exists?",
"Functions" = "anyValue, colAnys, rowAnys",
"Example" = "anyValue(x, value); rowAnys(x, value)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Do all element have a given value?",
"Functions" = "allValue, colAlls, rowAlls",
"Example" = "allValue(x, value); rowAlls(x, value)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Number of occurrences of a value?",
"Functions" = "count, colCounts, rowCounts",
"Example" = "count(x, value); rowCounts(x, value)"
)
tbl <- rbind(tbl, row)
%>
<% kable(tbl) %>
## Cumulative functions
<%
tbl <- NULL
row <- data.frame(
"Operator" = "Cumulative sum",
"Functions" = "cumsum, colCumsums, rowCumsums",
"Example" = "cumsum(x); rowCumsums(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Cumulative product",
"Functions" = "cumprod, colCumprods, rowCumprods",
"Example" = "cumprod(x); rowCumprods(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Cumulative minimum",
"Functions" = "cummin, colCummins, rowCummins",
"Example" = "cummin(x); rowCummins(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Cumulative maximum",
"Functions" = "cummax, colCummaxs, rowCummaxs",
"Example" = "cummax(x); rowCummaxs(x)"
)
tbl <- rbind(tbl, row)
%>
<% kable(tbl) %>
## Binning
<%
tbl <- NULL
row <- data.frame(
"Estimator" = "Counts in disjoint bins",
"Functions" = "binCounts",
"Example" = "binCounts(x, bx)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Sample means (and counts) in disjoint bins",
"Functions" = "binMeans",
"Example" = "binMeans(y, x, bx)"
)
tbl <- rbind(tbl, row)
%>
<% kable(tbl) %>
## Miscellaneous
<%
tbl <- NULL
row <- data.frame(
"Operation" = "Lagged differences",
"Functions" = c("diff2, colDiffs, rowDiffs"),
"Example" = "diff2(x), rowDiffs(x)"
)
tbl <- rbind(tbl, row)
%>
<% kable(tbl) %>
-------------------------------------------------------------
<%=pkgName%> v<%=getVersion(pkg)%>. Release: [CRAN](http://cran.r-project.org/package=<%=pkgName%>), Development: [GitHub](<%=getUrl(pkg)%>).
matrixStats/inst/doc/matrixStats-methods.html 0000644 0001751 0000144 00000022573 12542546263 021231 0 ustar hornik users
matrixStats: Summary of functions
matrixStats: Summary of functions
Henrik Bengtsson on June 23, 2015
Location and scale estimators
| Estimator |
Functions |
Example |
| Weighted sample mean |
weightedMean(), colWeightedMeans(), rowWeightedMeans() |
weightedMean(x, w); rowWeightedMeans(x, w) |
| Median |
median(), colMedians(), rowMedians() |
median(x); rowMedians(x) |
| Weighted median |
weightedMedian(), colWeightedMedians(), rowWeightedMedians() |
weightedMedian(x, w); rowWeightedMedians(x, w) |
| Sample variance |
var(), colVars(), rowVars() |
var(x); rowVars(x) |
| Weighted sample variance |
weightedVar(), colWeightedVars(), rowWeightedVars() |
weightedVar(x, w), rowWeightedVars(x, w) |
| Sample variance by n-order differences |
varDiff(), colVarDiffs(), rowVarDiffs() |
varDiff(x); rowVarDiffs(x) |
| Sample standard deviation |
sd(), colSds(), rowSds() |
sd(x); rowSds(x) |
| Weighted sample deviation |
weightedSd(), colWeightedSds(), rowWeightedSds() |
weightedSd(x, w), rowWeightedSds(x, w) |
| Sample standard deviation by n-order differences |
sdDiff(), colSdDiffs(), rowSdDiffs() |
sdDiff(x); rowSdDiffs(x) |
| Median absolute deviation (MAD) |
mad(), colMads(), rowMads() |
mad(x); rowMads(x) |
| Weighted median absolute deviation (MAD) |
weightedMad(), colWeightedMads(), rowWeightedMads() |
weightedMad(x, w), rowWeightedMads(x, w) |
| Median absolute deviation (MAD) by n-order differences |
madDiff(), colMadDiffs(), rowMadDiffs() |
madDiff(x); rowMadDiffs() |
| Quantile |
quantile(), colQuantiles(), rowQuantiles() |
quantile(x, probs); rowQuantiles(x, probs) |
| Interquartile range (IQR) |
iqr(), colIQRs(), rowIQRs() |
iqr(x); rowIQRs(x) |
| Interquartile range (IQR) by n-order differences |
iqrDiff(), colIQRDiffs(), rowIQRDiffs() |
iqrDiff(x); rowIQRDiffs(x) |
| Range |
range(), colRanges(), rowRanges() |
range(x); rowRanges(x) |
| Minimum |
min(), colMins(), rowMins() |
min(x); rowMins(x) |
| Maximum |
max(), colMaxs(), rowMaxs() |
max(x); rowMaxs(x) |
Testing for and counting values
| Operator |
Functions |
Example |
| Are there any missing values? |
anyMissing(), colAnyMissings(), rowAnyMissings() |
anyMissing(x); rowAnyMissings(x) |
| Does TRUE exists? |
any(), colAnys(), rowAnys() |
any(x); rowAnys(x) |
| Are all values TRUE? |
all(), colAlls(), rowAlls() |
all(x); rowAlls(x) |
| Does value exists? |
anyValue(), colAnys(), rowAnys() |
anyValue(x, value); rowAnys(x, value) |
| Do all element have a given value? |
allValue(), colAlls(), rowAlls() |
allValue(x, value); rowAlls(x, value) |
| Number of occurrences of a value? |
count(), colCounts(), rowCounts() |
count(x, value); rowCounts(x, value) |
Cumulative functions
| Operator |
Functions |
Example |
| Cumulative sum |
cumsum(), colCumsums(), rowCumsums() |
cumsum(x); rowCumsums(x) |
| Cumulative product |
cumprod(), colCumprods(), rowCumprods() |
cumprod(x); rowCumprods(x) |
| Cumulative minimum |
cummin(), colCummins(), rowCummins() |
cummin(x); rowCummins(x) |
| Cumulative maximum |
cummax(), colCummaxs(), rowCummaxs() |
cummax(x); rowCummaxs(x) |
Binning
| Estimator |
Functions |
Example |
| Counts in disjoint bins |
binCounts() |
binCounts(x, bx) |
| Sample means (and counts) in disjoint bins |
binMeans() |
binMeans(y, x, bx) |
Miscellaneous
| Operation |
Functions |
Example |
| Lagged differences |
diff2(), colDiffs(), rowDiffs() |
diff2(x), rowDiffs(x) |
matrixStats v0.14.2. Release: CRAN, Development: GitHub.
matrixStats/tests/ 0000755 0001751 0000144 00000000000 12542546242 013763 5 ustar hornik users matrixStats/tests/signTabulate.R 0000644 0001751 0000144 00000002003 12542546242 016523 0 ustar hornik users library("matrixStats")
signTabulate0 <- function(x, ...) {
nneg <- sum(x < 0, na.rm=TRUE)
nzero <- sum(x == 0, na.rm=TRUE)
npos <- sum(x > 0, na.rm=TRUE)
nna <- sum(is.na(x))
nneginf <- sum(is.infinite(x) & x < 0, na.rm=TRUE)
nposinf <- sum(is.infinite(x) & x > 0, na.rm=TRUE)
res <- c(nneg, nzero, npos, nna, nneginf, nposinf)
res <- as.double(res)
names(res) <- c("-1", "0", "+1", "NA", "-Inf", "+Inf")
if (is.integer(x)) res <- res[1:4]
res
} # signTabulate0()
# Simulate data
set.seed(0xBEEF)
n <- 1e3
x <- runif(n)
x[sample(n, size=0.1*n)] <- 0
x[sample(n, size=0.1*n)] <- NA_real_
x[sample(n, size=0.1*n)] <- -Inf
x[sample(n, size=0.1*n)] <- +Inf
# Doubles
message("Doubles:")
counts0 <- signTabulate0(x)
print(counts0)
counts1 <- signTabulate(x)
print(counts1)
stopifnot(identical(counts1, counts0))
# Integers
message("Integers:")
x <- suppressWarnings(as.integer(x))
counts0 <- signTabulate0(x)
print(counts0)
counts1 <- signTabulate(x)
print(counts1)
stopifnot(identical(counts1, counts0))
matrixStats/tests/diff2.R 0000644 0001751 0000144 00000001050 12542546242 015074 0 ustar hornik users library("matrixStats")
set.seed(0x42)
for (mode in c("integer", "double")) {
x <- rnorm(10, sd=5)
storage.mode(x) <- mode
str(x)
for (hasNA in c(FALSE, TRUE)) {
if (hasNA) {
x[sample(1:10, size=3)] <- NA
}
for (l in 1:3) {
for (d in 1:4) {
cat(sprintf("%s: NAs=%s, lag=%d, differences=%d\n", mode, hasNA, l, d))
y0 <- diff(x, lag=l, differences=d)
str(y0)
y1 <- diff2(x, lag=l, differences=d)
str(y1)
stopifnot(identical(y1, y0))
}
}
} # for (hasNA ...)
}
matrixStats/tests/rowRanks.R 0000644 0001751 0000144 00000002632 12542546242 015717 0 ustar hornik users library("matrixStats")
set.seed(1)
cat("Consistency checks:\n")
for (kk in 1:4) {
cat("Random test #", kk, "\n", sep="")
# Simulate data in a matrix of any shape
dim <- sample(40:80, size=2L)
n <- prod(dim)
x <- rnorm(n, sd=10)
dim(x) <- dim
# Add NAs?
if ((kk %% 4) %in% c(3,0)) {
cat("Adding NAs\n")
nna <- sample(n, size=1L)
x[sample(length(x), size=nna)] <- NA_real_
}
# Integer or double?
if ((kk %% 4) %in% c(2,0)) {
cat("Coercing to integers\n")
storage.mode(x) <- "integer"
}
str(x)
for (ties in c("max", "min", "average")) {
cat(sprintf("ties.method=%s\n", ties))
# rowRanks():
y1 <- matrixStats::rowRanks(x, ties.method=ties)
y2 <- t(apply(x, MARGIN=1L, FUN=rank, na.last="keep", ties.method=ties))
stopifnot(identical(y1,y2))
y3 <- matrixStats::colRanks(t(x), ties.method=ties)
stopifnot(identical(y1,y3))
# colRanks():
y1 <- matrixStats::colRanks(x, ties.method=ties)
y2 <- t(apply(x, MARGIN=2L, FUN=rank, na.last="keep", ties.method=ties))
stopifnot(identical(y1,y2))
y3 <- matrixStats::rowRanks(t(x), ties.method=ties)
stopifnot(identical(y1,y3))
}
} # for (kk ...)
## Exception handling
x <- matrix(1:12, nrow=3L)
y <- try(rowRanks(x, ties.method="unknown"), silent=TRUE)
stopifnot(inherits(y, "try-error"))
y <- try(colRanks(x, ties.method="unknown"), silent=TRUE)
stopifnot(inherits(y, "try-error"))
matrixStats/tests/rowCollapse.R 0000644 0001751 0000144 00000001245 12542546242 016402 0 ustar hornik users library("matrixStats")
x <- matrix(1:27, ncol=3)
idxs <- 1L
y <- rowCollapse(x, idxs)
stopifnot(identical(y, x[,idxs]))
y2 <- colCollapse(t(x), idxs)
stopifnot(identical(y2, y))
idxs <- 2L
y <- rowCollapse(x, idxs)
stopifnot(identical(y, x[,idxs]))
y2 <- colCollapse(t(x), idxs)
stopifnot(identical(y2, y))
idxs <- c(1,1,1,1,1,3,3,3,3)
y <- rowCollapse(x, idxs)
stopifnot(identical(y, c(x[1:5,1], x[6:9,3])))
y2 <- colCollapse(t(x), idxs)
stopifnot(identical(y2, y))
idxs <- 1:3
y <- rowCollapse(x, idxs)
print(y)
yT <- c(x[1,1],x[2,2],x[3,3],x[4,1],x[5,2],x[6,3],x[7,1],x[8,2],x[9,3])
stopifnot(identical(y, yT))
y2 <- colCollapse(t(x), idxs)
stopifnot(identical(y2, y))
matrixStats/tests/weightedMean.R 0000644 0001751 0000144 00000004254 12542546242 016514 0 ustar hornik users library("matrixStats")
for (mode in c("integer", "double")) {
cat("mode: ", mode, "", sep="")
n <- 2L
x <- runif(n, min=-5, max=5)
storage.mode(x) <- mode
str(x)
cat("All weights are 1\n")
w <- rep(1, times=n)
m0 <- weighted.mean(x, w)
m1 <- weightedMean(x, w)
str(list(m0=m0, m1=m1))
stopifnot(identical(m1,m0))
cat("First weight is 5\n")
# Pull the mean towards zero
w[1] <- 5
str(w)
m0 <- weighted.mean(x, w)
m1 <- weightedMean(x, w)
str(list(m0=m0, m1=m1))
stopifnot(identical(m1,m0))
cat("All weights are 0\n")
# All weights set to zero
w <- rep(0, times=n)
m0 <- weighted.mean(x, w)
m1 <- weightedMean(x, w)
str(list(m0=m0, m1=m1))
stopifnot(identical(m1,m0))
cat("First weight is 8.5\n")
# Put even more weight on the zero
w[1] <- 8.5
m0 <- weighted.mean(x, w)
m1 <- weightedMean(x, w)
str(list(m0=m0, m1=m1))
stopifnot(identical(m1,m0))
cat("First weight is Inf\n")
# All weight on the first value
w[1] <- Inf
m0 <- weighted.mean(x, w)
m1 <- weightedMean(x, w)
str(list(m0=m0, m1=m1))
stopifnot(identical(m1,m0))
cat("Last weight is Inf\n")
# All weight on the last value
w[1] <- 1
w[n] <- Inf
m0 <- weighted.mean(x, w)
m1 <- weightedMean(x, w)
str(list(m0=m0, m1=m1))
stopifnot(identical(m1,m0))
} # for (mode ...)
message("*** Testing for missing values")
# NA tests
xs <- list(A=c(1, 2, 3), B=c(NA, 2, 3), C=c(NA, 2, 3))
ws <- list(A=c(1, 1, 1), B=c(NA, 1, 1), C=c(1, NA, 1))
## NOTE: The (B,B) case with na.rm=TRUE is interesting because
## even if NAs in 'w' are not dropped by na.rm=TRUE, this one
## is because 'x' is dropped and therefore that first element
## is skipped in the computation. It basically does
## keep <- !is.na(x); x <- x[keep]; w <- w[keep]
## without looking at 'w'.
for (x in xs) {
for (mode in c("integer", "double")) {
storage.mode(x) <- mode
for (w in ws) {
for (na.rm in c(FALSE, TRUE)) {
cat(sprintf("mode: %s, na.rm=%s\n", mode, na.rm))
str(list(x=x, w=w))
m0 <- weighted.mean(x, w, na.rm=na.rm)
m1 <- weightedMean(x, w, na.rm=na.rm)
str(list(m0=m0, m1=m1))
stopifnot(all.equal(m1,m0))
}
}
}
}
matrixStats/tests/rowProds.R 0000644 0001751 0000144 00000004252 12542546242 015730 0 ustar hornik users library("matrixStats")
rowProds_R <- function(x, na.rm=FALSE) {
apply(x, MARGIN=1L, FUN=prod, na.rm=na.rm)
}
colProds_R <- function(x, na.rm=FALSE) {
apply(x, MARGIN=2L, FUN=prod, na.rm=na.rm)
}
all.equal.na <- function(target, current, ...) {
# Computations involving NaN may return NaN or NA, cf. ?is.nan
current[is.nan(current)] <- NA_real_
target[is.nan(target)] <- NA_real_
all.equal(target, current, ...)
}
for (mode in c("integer", "double")) {
# Missing values
x <- matrix(c(1,NA,NaN,1, 1,0,1,0), nrow=4, ncol=2)
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
y0 <- rowProds_R(x, na.rm=TRUE)
print(y0)
y1 <- rowProds(x, na.rm=TRUE)
print(y1)
y2 <- colProds(t(x), na.rm=TRUE)
print(y2)
stopifnot(all.equal(y1, y0))
stopifnot(all.equal(y1, x[,2]))
stopifnot(all.equal(y2, y1))
# Missing values
y0 <- rowProds_R(x, na.rm=FALSE)
print(y0)
y1 <- rowProds(x, na.rm=FALSE)
print(y1)
y2 <- colProds(t(x), na.rm=FALSE)
print(y2)
stopifnot(all.equal.na(y1, y0))
stopifnot(all.equal(y2, y1))
y3 <- x[,1]*x[,2]
print(y3)
stopifnot(all.equal.na(y1, y3))
# "Empty" rows
y0 <- rowProds_R(x[integer(0),,drop=FALSE], na.rm=FALSE)
print(y0)
y1 <- rowProds(x[integer(0),,drop=FALSE], na.rm=FALSE)
print(y1)
y2 <- colProds(t(x[integer(0),,drop=FALSE]), na.rm=FALSE)
print(y2)
stopifnot(all.equal.na(y1, y0))
stopifnot(all.equal(y2, y1))
stopifnot(length(y1) == 0L)
# Using product()
y1 <- rowProds(x, method="expSumLog", na.rm=FALSE)
print(y1)
y2 <- colProds(t(x), method="expSumLog", na.rm=FALSE)
print(y2)
stopifnot(all.equal(y2, y1))
} # for (mode ...)
# Bug report 2012-06-25
x <- matrix(c(1,1,1,1, 1,0,1,0), nrow=4, ncol=2)
y0 <- rowProds_R(x)
print(y0)
y1 <- rowProds(x)
print(y1)
y2 <- colProds(t(x))
print(y2)
stopifnot(all.equal.na(y1, y0))
stopifnot(all.equal.na(y1, x[,1]*x[,2]))
stopifnot(all.equal.na(y2, y1))
# Bug report 2014-03-25 ("all rows contains a zero")
x <- matrix(c(0,1,1,0), nrow=2, ncol=2)
y0 <- rowProds_R(x)
print(y0)
y1 <- rowProds(x)
print(y1)
y2 <- colProds(t(x))
print(y2)
stopifnot(all.equal.na(y1, y0))
stopifnot(all.equal.na(y1, c(0,0)))
stopifnot(all.equal.na(y2, y1))
matrixStats/tests/x_OP_y.R 0000644 0001751 0000144 00000007130 12542546242 015304 0 ustar hornik users library("matrixStats")
x_OP_y_R <- function(x, y, OP, na.rm=FALSE) {
if (na.rm) {
xnok <- is.na(x)
ynok <- is.na(y)
anok <- xnok & ynok
unit <- switch(OP,
"+" = 0,
"-" = NA_real_,
"*" = 1,
"/" = NA_real_,
stop("Unknown 'OP' operator: ", OP)
)
x[xnok] <- unit
y[ynok] <- unit
}
ans <- switch(OP,
"+" = x + y,
"-" = x - y,
"*" = x * y,
"/" = x / y,
stop("Unknown 'OP' operator: ", OP)
)
if (na.rm) {
ans[anok] <- NA_real_
}
ans
} # x_OP_y_R()
t_tx_OP_y_R <- function(x, y, OP, na.rm=FALSE) {
t(x_OP_y_R(x=t(x), y=y, OP=OP, na.rm=na.rm))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# No missing values
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(1:16, nrow=4, ncol=4)
y <- 1:nrow(x)
storage.mode(y) <- storage.mode(x)
for (OP in c("+", "-", "*", "/")) {
for (na.rm in c(FALSE, TRUE)) {
cat(sprintf("OP='%s', na.rm=%s\n", OP, na.rm))
a0 <- x_OP_y_R(x,y, OP, na.rm=na.rm)
a1 <- x_OP_y(x,y, OP, na.rm=na.rm)
str(a1)
stopifnot(all.equal(a1, a0))
b0 <- t_tx_OP_y_R(x,y, OP, na.rm=na.rm)
b1 <- t_tx_OP_y(x,y, OP, na.rm=na.rm)
str(b1)
stopifnot(all.equal(b1, b0))
}
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Missing values in x, y, or both.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (which in c("x", "y", "both")) {
x <- matrix(1:16, nrow=4, ncol=4)
y <- 1:nrow(x)
storage.mode(y) <- storage.mode(x)
if (which == "x") {
x[3:6] <- NA_real_
} else if (which == "y") {
y[c(1,3)] <- NA_real_
} else if (which == "both") {
x[3:6] <- NA_real_
y[c(1,3)] <- NA_real_
}
for (OP in c("+", "-", "*", "/")) {
for (na.rm in c(FALSE, TRUE)) {
cat(sprintf("OP='%s', na.rm=%s\n", OP, na.rm))
a0 <- x_OP_y_R(x,y, OP, na.rm=na.rm)
a1 <- x_OP_y(x,y, OP, na.rm=na.rm)
str(a1)
stopifnot(all.equal(a1, a0))
b0 <- t_tx_OP_y_R(x,y, OP, na.rm=na.rm)
b1 <- t_tx_OP_y(x,y, OP, na.rm=na.rm)
str(b1)
stopifnot(all.equal(b1, b0))
}
}
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Length differences
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(1:8, nrow=2, ncol=4)
y <- 1:ncol(x)
storage.mode(y) <- storage.mode(x)
for (OP in c("+", "-", "*", "/")) {
for (na.rm in c(FALSE, TRUE)) {
cat(sprintf("OP='%s', na.rm=%s\n", OP, na.rm))
a0 <- x_OP_y_R(x,y, OP, na.rm=na.rm)
a1 <- x_OP_y(x,y, OP, na.rm=na.rm)
str(a1)
stopifnot(all.equal(a1, a0))
b0 <- t_tx_OP_y_R(x,y, OP, na.rm=na.rm)
b1 <- t_tx_OP_y(x,y, OP, na.rm=na.rm)
str(b1)
stopifnot(all.equal(b1, b0))
}
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# All missing values
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
xs <- list(
A=matrix(1:2, nrow=2, ncol=2),
B=matrix(NA_integer_, nrow=2, ncol=2)
)
ys <- list(
A=1L,
B=NA_integer_
)
for (x in xs) {
for (y in ys) {
for (mode in c("integer", "double")) {
storage.mode(x) <- mode
storage.mode(y) <- mode
str(list(x=x, y=y))
for (OP in c("+", "-", "*", "/")) {
for (na.rm in c(FALSE, TRUE)) {
cat(sprintf("mode='%s', OP='%s', na.rm=%s\n", mode, OP, na.rm))
suppressWarnings({
z0 <- x_OP_y_R(x, y, OP, na.rm=na.rm)
z <- x_OP_y(x, y, OP, na.rm=na.rm)
})
str(z)
stopifnot(all.equal(z, z0))
}
}
} # for (mode ...)
} # for (y ...)
} # for (x ...)
matrixStats/tests/rowRanges.R 0000644 0001751 0000144 00000011340 12542546242 016054 0 ustar hornik users library("matrixStats")
rowMins_R <- function(x, ...) {
suppressWarnings({
apply(x, MARGIN=1L, FUN=min, ...)
})
} # rowMins_R()
rowMaxs_R <- function(x, ...) {
suppressWarnings({
apply(x, MARGIN=1L, FUN=max, ...)
})
} # rowMaxs_R()
rowRanges_R <- function(x, ...) {
suppressWarnings({
t(apply(x, MARGIN=1L, FUN=range, ...))
})
} # rowRanges_R()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# With and without some NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
for (addNA in c(FALSE, TRUE)) {
cat("addNA=", addNA, "\n", sep="")
x <- matrix(1:100+0.1, nrow=20, ncol=5)
if (addNA) {
x[13:17,c(2,4)] <- NA_real_
}
storage.mode(x) <- mode
str(x)
# Row/column extremes
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
# Ranges
cat("range:\n")
r0 <- rowRanges_R(x, na.rm=na.rm)
r1 <- rowRanges(x, na.rm=na.rm)
r2 <- colRanges(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
# Min
cat("min:\n")
m0 <- rowMins_R(x, na.rm=na.rm)
m1 <- rowMins(x, na.rm=na.rm)
m2 <- colMins(t(x), na.rm=na.rm)
stopifnot(all.equal(m1, m2))
stopifnot(all.equal(m1, m0))
# Max
cat("max:\n")
m0 <- rowMaxs_R(x, na.rm=na.rm)
m1 <- rowMaxs(x, na.rm=na.rm)
m2 <- colMaxs(t(x), na.rm=na.rm)
stopifnot(all.equal(m1, m2))
stopifnot(all.equal(m1, m0))
}
} # for (addNA ...)
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# All NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
x <- matrix(NA_real_, nrow=20, ncol=5)
storage.mode(x) <- mode
str(x)
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
r0 <- rowRanges_R(x, na.rm=na.rm)
r1 <- rowRanges(x, na.rm=na.rm)
r2 <- colRanges(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
}
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Special cases
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Nx0 matrix
x <- matrix(double(0L), nrow=5L, ncol=0L)
r0 <- rowRanges_R(x)
#r1 <- rowRanges(x)
#rT <- matrix(c(Inf,-Inf), nrow=nrow(x), ncol=2L, byrow=TRUE)
#stopifnot(all.equal(r1,rT))
# 0xN matrix
x <- t(x)
#r1 <- colRanges(x)
#stopifnot(all.equal(r1,rT))
# Nx1 matrix
x <- matrix(1:5, nrow=5L, ncol=1L)
r1 <- rowRanges(x)
rT <- matrix(1:5, nrow=nrow(x), ncol=2L, byrow=FALSE)
stopifnot(all.equal(r1,rT))
# 1xN matrix
x <- t(x)
r1 <- colRanges(x)
stopifnot(all.equal(r1,rT))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Additional tests with NA_integer_, NA_real, NaN, -Inf, +Inf
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(1:12, nrow=4, ncol=3)
naList <- list(
"integer" = matrix(1:12, nrow=4, ncol=3),
"integer w/ NA" = matrix(NA_integer_, nrow=4, ncol=3),
"real" = matrix(as.double(1:12), nrow=4, ncol=3),
"real w/ NA" = matrix(NA_real_, nrow=4, ncol=3)
)
na <- naList[["real"]]
na[2,2] <- NA
naList[["real + NA cell"]] <- na
na <- naList[["real"]]
na[2,] <- NA
naList[["real + NA row"]] <- na
na <- naList[["real"]]
na[2,] <- NaN
naList[["real + NaN row"]] <- na
na <- naList[["real"]]
na[2,2] <- Inf
naList[["real + Inf cell"]] <- na
na <- naList[["real"]]
na[2,] <- Inf
naList[["real + Inf row"]] <- na
na <- naList[["real"]]
na[2,2] <- NaN
naList[["real + NaN cell"]] <- na
na <- naList[["real w/ NA"]]
na[2,2] <- NaN
naList[["real w/ NA + NaN cell"]] <- na
na <- naList[["real w/ NA"]]
na[2,] <- NaN
naList[["real w/ NA + NaN row"]] <- na
for (na.rm in c(FALSE, TRUE)) {
for (name in names(naList)) {
na <- naList[[name]]
cat(sprintf("%s (%s) w/ na.rm=%s:\n", name, typeof(na), na.rm))
print(na)
cat(" min:\n")
y0 <- rowMins_R(na, na.rm=na.rm)
str(y0)
y1 <- rowMins(na, na.rm=na.rm)
str(y1)
stopifnot(all.equal(y1, y0))
y1c <- colMins(t(na), na.rm=na.rm)
str(y1c)
stopifnot(all.equal(y1c, y1))
cat(" max:\n")
y0 <- rowMaxs_R(na, na.rm=na.rm)
str(y0)
y1 <- rowMaxs(na, na.rm=na.rm)
str(y1)
stopifnot(all.equal(y1, y0))
y1c <- colMaxs(t(na), na.rm=na.rm)
str(y1c)
stopifnot(all.equal(y1c, y1))
cat(" range:\n")
y0 <- rowRanges_R(na, na.rm=na.rm)
str(y0)
y1 <- rowRanges(na, na.rm=na.rm)
str(y1)
stopifnot(all.equal(y1, y0))
y1c <- colRanges(t(na), na.rm=na.rm)
str(y1c)
stopifnot(all.equal(y1c, y1))
} # for (name ...)
} # for (na.rm ...)
matrixStats/tests/allocArray.R 0000644 0001751 0000144 00000001033 12542546242 016174 0 ustar hornik users library("matrixStats")
allocArray_R <- function(nrow, ncol, value=NA) {
array(data=value, dim=dim)
} # allocArray_R()
values <- list(
-1L, 0L, +1L, NA_integer_, .Machine$integer.max,
-1, 0, +1, NA_real_, NaN, -Inf, +Inf, .Machine$double.xmin, .Machine$double.xmax, .Machine$double.eps, .Machine$double.neg.eps,
FALSE, TRUE, NA
)
dim <- c(5L, 10L, 4L)
for (value in values) {
X0 <- allocArray_R(dim, value=value)
X <- allocArray(dim, value=value)
str(list(dim=dim, value=value, X=X, X0=X0))
stopifnot(identical(X,X0))
}
matrixStats/tests/count.R 0000644 0001751 0000144 00000004747 12542546242 015252 0 ustar hornik users library("matrixStats")
count_R <- function(x, value=TRUE, na.rm=FALSE, ...) {
if (is.na(value)) {
counts <- sum(is.na(x))
} else {
counts <- sum(x == value, na.rm=na.rm)
}
as.integer(counts)
} # count_R()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Data type: integer and numeric
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- runif(20*5, min=-3, max=3)
x[sample.int(length(x), size=7)] <- 0
storage.mode(x) <- mode
for (na.rm in c(FALSE, TRUE)) {
# Count zeros
n0 <- count_R(x, value=0, na.rm=na.rm)
n1 <- count(x, value=0, na.rm=na.rm)
stopifnot(identical(n1, n0))
all <- allValue(x, value=0, na.rm=na.rm)
any <- anyValue(x, value=0, na.rm=na.rm)
# Count NAs
n0 <- count_R(x, value=NA, na.rm=na.rm)
n1 <- count(x, value=NA, na.rm=na.rm)
stopifnot(identical(n1, n0))
all <- allValue(x, value=NA, na.rm=na.rm)
any <- anyValue(x, value=NA, na.rm=na.rm)
if (mode == "integer") {
ux <- unique(as.vector(x))
n0 <- n1 <- integer(length(x))
for (value in ux) {
n0 <- n0 + count_R(x, value=value, na.rm=na.rm)
n1 <- n1 + count(x, value=value, na.rm=na.rm)
stopifnot(identical(n1, n0))
}
stopifnot(all(n0 == ncol(x)))
} # if (mode == "integer")
} # for (na.rm ...)
} # for (mode ...)
# All NAs
naList <- list(NA_integer_, NA_real_, NaN)
for (naValue in naList) {
x <- rep(naValue, times=100L)
for (na.rm in c(FALSE, TRUE)) {
n0 <- count_R(x, na.rm=na.rm)
n1 <- count(x, na.rm=na.rm)
stopifnot(identical(n1, n0))
# Count NAs
n0 <- count_R(x, value=NA, na.rm=na.rm)
n1 <- count(x, value=NA, na.rm=na.rm)
stopifnot(identical(n1, n0))
any <- anyValue(x, value=NA, na.rm=na.rm)
all <- allValue(x, value=NA, na.rm=na.rm)
stopifnot(any)
stopifnot(all)
}
} # for (naValue ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Data type: logical
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- logical(length=100L)
x[13:17] <- TRUE
# Row/column counts
for (na.rm in c(FALSE, TRUE)) {
n0 <- count_R(x, na.rm=na.rm)
n1 <- count(x, na.rm=na.rm)
stopifnot(identical(n1, n0))
nT <- count(x, value=TRUE, na.rm=na.rm)
nF <- count(x, value=FALSE, na.rm=na.rm)
stopifnot(nT + nF == ncol(x))
# Count NAs
n0 <- count_R(x, value=NA, na.rm=na.rm)
n1 <- count(x, value=NA, na.rm=na.rm)
stopifnot(identical(n1, n0))
}
matrixStats/tests/sumOver.R 0000644 0001751 0000144 00000010610 12542546242 015544 0 ustar hornik users library("matrixStats")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Consistency checks
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
set.seed(1)
sumOver_R <- function(x, na.rm=FALSE, idxs=NULL) {
if (is.null(idxs)) {
sum(x, na.rm=na.rm)
} else {
sum(x[idxs], na.rm=na.rm)
}
} # sumOver_R()
cat("Consistency checks:\n")
K <- if (Sys.getenv("_R_CHECK_FULL_") == "") 4 else 20
for (kk in seq_len(K)) {
cat("Random test #", kk, "\n", sep="")
# Simulate data in a matrix of any shape
n <- sample(1e3, size=1L)
x <- rnorm(n, sd=100)
# Add NAs?
if ((kk %% 4) %in% c(3,0)) {
cat("Adding NAs\n")
nna <- sample(n, size=1L)
naValues <- c(NA_real_, NaN)
x[sample(length(x), size=nna)] <- sample(naValues, size=nna, replace=TRUE)
}
# Integer or double?
if ((kk %% 4) %in% c(2,0)) {
cat("Coercing to integers\n")
storage.mode(x) <- "integer"
}
na.rm <- sample(c(TRUE,FALSE), size=1L)
# Sum over all
y0 <- sumOver_R(x, na.rm=na.rm)
y1 <- sumOver(x, na.rm=na.rm)
stopifnot(all.equal(y1,y0))
# Sum over subset
nidxs <- sample(n, size=1L)
idxs <- sample(n, size=nidxs)
y0 <- sumOver_R(x, na.rm=na.rm, idxs=idxs)
y1 <- sumOver(x, na.rm=na.rm, idxs=idxs)
stopifnot(all.equal(y1,y0))
} # for (kk ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# All missing values
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (n in 0:2) {
for (na.rm in c(FALSE, TRUE)) {
x <- rep(NA_real_, times=n)
y0 <- sum(x, na.rm=na.rm)
y <- sumOver(x, na.rm=na.rm)
stopifnot(all.equal(y, y0))
x <- rep(NA_integer_, times=n)
y0 <- sum(x, na.rm=na.rm)
y <- sumOver(x, na.rm=na.rm)
stopifnot(all.equal(y, y0))
}
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Summing of zero elements
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- integer(0)
s1 <- sum(x)
s2 <- sumOver(x)
stopifnot(identical(s1, s2))
x <- 1:10
idxs <- integer(0)
s1 <- sum(x[idxs])
s2 <- sumOver(x, idxs=idxs)
stopifnot(identical(s1, s2))
x <- rep(NA_integer_, times=10L)
s1 <- sum(x, na.rm=TRUE)
s2 <- sumOver(x, na.rm=TRUE)
stopifnot(identical(s1, s2))
x <- rep(NA_integer_, times=10L)
idxs <- 1:5
s1 <- sum(x[idxs], na.rm=TRUE)
s2 <- sumOver(x, idxs=idxs, na.rm=TRUE)
stopifnot(identical(s1, s2))
x <- double(0)
s1 <- sum(x)
s2 <- sumOver(x)
stopifnot(identical(s1, s2))
x <- as.double(1:10)
idxs <- integer(0)
s1 <- sum(x[idxs])
s2 <- sumOver(x, idxs=idxs)
stopifnot(identical(s1, s2))
x <- rep(NA_real_, times=10L)
s1 <- sum(x, na.rm=TRUE)
s2 <- sumOver(x, na.rm=TRUE)
stopifnot(identical(s1, s2))
x <- rep(NA_real_, times=10L)
idxs <- 1:5
s1 <- sum(x[idxs], na.rm=TRUE)
s2 <- sumOver(x, idxs=idxs, na.rm=TRUE)
stopifnot(identical(s1, s2))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Summing of large integers
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- c(.Machine$integer.max, 1L, -.Machine$integer.max)
# Total gives integer overflow
s1 <- sum(x[1:2]) # NA_integer_
s2 <- sumOver(x[1:2]) # NA_integer_
stopifnot(identical(s1, s2))
# Total gives integer overflow (coerce to numeric)
s1 <- sum(as.numeric(x[1:2])) # 2147483648
s2 <- sumOver(as.numeric(x[1:2])) # 2147483648
s3 <- sumOver(x[1:2], mode="double") # 2147483648
stopifnot(identical(s1, s2))
stopifnot(identical(s1, s3))
# Cumulative sum would give integer overflow but not the total
s1 <- sum(x) # 1L
s2 <- sumOver(x) # 1L
stopifnot(identical(s1, s2))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Summing of large doubles
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Double overflow
x <- rep(.Machine$double.xmax, times=2L)
y0 <- sum(x)
print(y0)
y <- sumOver(x)
print(y)
stopifnot(is.infinite(y) && y > 0)
stopifnot(identical(y, y0))
x <- rep(-.Machine$double.xmax, times=2L)
y0 <- sum(x)
print(y0)
y <- sumOver(x)
print(y)
stopifnot(is.infinite(y) && y < 0)
stopifnot(identical(y, y0))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'idxs'
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- 1:10
idxsList <- list(
integer=1:5,
double=as.double(1:5),
logical=(x <= 5)
)
for (idxs in idxsList) {
cat("idxs:\n")
str(idxs)
s1 <- sum(x[idxs], na.rm=TRUE)
s2 <- sumOver(x, idxs=idxs, na.rm=TRUE)
stopifnot(identical(s1, s2))
}
matrixStats/tests/rowWeightedMeans.R 0000644 0001751 0000144 00000004056 12542546242 017367 0 ustar hornik users library("matrixStats")
set.seed(1)
x <- matrix(rnorm(20), nrow=5, ncol=4)
print(x)
# Non-weighted row averages
xM0 <- rowMeans(x)
xM1 <- rowWeightedMeans(x)
print(xM1)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMeans(t(x))
stopifnot(all.equal(xM2, xM0))
# Weighted row averages (uniform weights)
w <- rep(2.5, ncol(x))
xM1 <- rowWeightedMeans(x, w=w)
print(xM1)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMeans(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
# Weighted row averages (excluding some columns)
w <- c(1,1,0,1)
xM0 <- rowMeans(x[,(w == 1),drop=FALSE])
xM1 <- rowWeightedMeans(x, w=w)
print(xM1)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMeans(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
# Weighted row averages (excluding some columns)
w <- c(0,1,0,0)
xM0 <- rowMeans(x[,(w == 1),drop=FALSE])
xM1 <- rowWeightedMeans(x, w=w)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMeans(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
# Weighted row averages (all zero weights)
w <- c(0,0,0,0)
xM0 <- rowMeans(x[,(w == 1),drop=FALSE])
xM1 <- rowWeightedMeans(x, w=w)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMeans(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
# Weighted averages by rows and columns
w <- 1:4
xM1 <- rowWeightedMeans(x, w=w)
print(xM1)
xM2 <- colWeightedMeans(t(x), w=w)
stopifnot(all.equal(xM2, xM1))
x[sample(length(x), size=0.3*length(x))] <- NA
print(x)
# Non-weighted row averages with missing values
xM0 <- rowMeans(x, na.rm=TRUE)
xM1 <- rowWeightedMeans(x, na.rm=TRUE)
print(xM1)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMeans(t(x), na.rm=TRUE)
stopifnot(all.equal(xM2, xM0))
# Weighted row averages with missing values
xM0 <- apply(x, MARGIN=1, FUN=weighted.mean, w=w, na.rm=TRUE)
print(xM0)
xM1 <- rowWeightedMeans(x, w=w, na.rm=TRUE)
print(xM1)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMeans(t(x), w=w, na.rm=TRUE)
stopifnot(all.equal(xM2, xM0))
# Weighted averages by rows and columns
w <- 1:4
xM1 <- rowWeightedMeans(x, w=w, na.rm=TRUE)
xM2 <- colWeightedMeans(t(x), w=w, na.rm=TRUE)
stopifnot(all.equal(xM2, xM1))
matrixStats/tests/rowCumMinMaxs.R 0000644 0001751 0000144 00000007117 12542546242 016665 0 ustar hornik users library("matrixStats")
rowCummins_R <- function(x) {
suppressWarnings({
y <- t(apply(x, MARGIN=1L, FUN=cummin))
})
}
colCummins_R <- function(x) {
suppressWarnings({
y <- apply(x, MARGIN=2L, FUN=cummin)
})
}
rowCummaxs_R <- function(x) {
suppressWarnings({
y <- t(apply(x, MARGIN=1L, FUN=cummax))
})
}
colCummaxs_R <- function(x) {
suppressWarnings({
y <- apply(x, MARGIN=2L, FUN=cummax)
})
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# With and without some NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
for (addNA in c(FALSE, TRUE)) {
cat("addNA=", addNA, "\n", sep="")
x <- matrix(1:100, nrow=20, ncol=5)
if (addNA) {
x[13:17,c(2,4)] <- NA_real_
}
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
# Row/column ranges
r0 <- rowCummins_R(x)
r1 <- rowCummins(x)
r2 <- t(colCummins(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
r0 <- rowCummaxs_R(x)
r1 <- rowCummaxs(x)
r2 <- t(colCummaxs(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
} # for (addNA ...)
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# All NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(NA_real_, nrow=20, ncol=5)
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
r0 <- rowCummins_R(x)
r1 <- rowCummins(x)
r2 <- t(colCummins(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
r0 <- rowCummaxs_R(x)
r1 <- rowCummaxs(x)
r2 <- t(colCummaxs(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A 1x1 matrix
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(0, nrow=1, ncol=1)
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
r0 <- rowCummins_R(x)
r1 <- rowCummins(x)
r2 <- t(colCummins(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
r0 <- rowCummaxs_R(x)
r1 <- rowCummaxs(x)
r2 <- t(colCummaxs(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Corner cases
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
value <- 0
storage.mode(value) <- mode
# A 0x0 matrix
x <- matrix(value, nrow=0L, ncol=0L)
str(x)
r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x))
r1 <- rowCummins(x)
r2 <- t(colCummins(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
# A 0xK matrix
x <- matrix(value, nrow=0L, ncol=5L)
str(x)
r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x))
r1 <- rowCummins(x)
r2 <- t(colCummins(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
# A Nx0 matrix
x <- matrix(value, nrow=5L, ncol=0L)
str(x)
r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x))
r1 <- rowCummins(x)
r2 <- t(colCummins(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
} # for (mode ...)
matrixStats/tests/meanOver.R 0000644 0001751 0000144 00000005302 12542546242 015662 0 ustar hornik users library("matrixStats")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Consistency checks
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
set.seed(1)
meanOver_R <- function(x, na.rm=FALSE, idxs=NULL) {
if (is.null(idxs)) {
mean(x, na.rm=na.rm)
} else {
mean(x[idxs], na.rm=na.rm)
}
} # meanOver_R()
cat("Consistency checks:\n")
K <- if (Sys.getenv("_R_CHECK_FULL_") == "") 4 else 20
for (kk in seq_len(K)) {
cat("Random test #", kk, "\n", sep="")
# Simulate data in a matrix of any shape
n <- sample(1e3, size=1L)
x <- rnorm(n, sd=100)
# Add NAs?
if ((kk %% 4) %in% c(3,0)) {
cat("Adding NAs\n")
nna <- sample(n, size=1L)
naValues <- c(NA_real_, NaN)
x[sample(length(x), size=nna)] <- sample(naValues, size=nna, replace=TRUE)
}
# Integer or double?
if ((kk %% 4) %in% c(2,0)) {
cat("Coercing to integers\n")
storage.mode(x) <- "integer"
}
na.rm <- sample(c(TRUE,FALSE), size=1L)
# Sum over all
y0 <- meanOver_R(x, na.rm=na.rm)
y1 <- meanOver(x, na.rm=na.rm)
stopifnot(all.equal(y1,y0))
# Sum over subset
nidxs <- sample(n, size=1L)
idxs <- sample(n, size=nidxs)
y0 <- meanOver_R(x, na.rm=na.rm, idxs=idxs)
y1 <- meanOver(x, na.rm=na.rm, idxs=idxs)
stopifnot(all.equal(y1,y0))
} # for (kk ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Averaging over zero elements
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- integer(0)
s1 <- mean(x)
s2 <- meanOver(x)
stopifnot(identical(s1, s2))
x <- 1:10
idxs <- integer(0)
s1 <- mean(x[idxs])
s2 <- meanOver(x, idxs=idxs)
stopifnot(identical(s1, s2))
x <- rep(NA_integer_, times=10L)
s1 <- mean(x, na.rm=TRUE)
s2 <- meanOver(x, na.rm=TRUE)
stopifnot(identical(s1, s2))
x <- rep(NA_integer_, times=10L)
idxs <- 1:5
s1 <- mean(x[idxs], na.rm=TRUE)
s2 <- meanOver(x, idxs=idxs, na.rm=TRUE)
stopifnot(identical(s1, s2))
x <- double(0)
s1 <- mean(x)
s2 <- meanOver(x)
stopifnot(identical(s1, s2))
x <- as.double(1:10)
idxs <- integer(0)
s1 <- mean(x[idxs])
s2 <- meanOver(x, idxs=idxs)
stopifnot(identical(s1, s2))
x <- rep(NA_real_, times=10L)
s1 <- mean(x, na.rm=TRUE)
s2 <- meanOver(x, na.rm=TRUE)
stopifnot(identical(s1, s2))
x <- rep(NA_real_, times=10L)
idxs <- 1:5
s1 <- mean(x[idxs], na.rm=TRUE)
s2 <- meanOver(x, idxs=idxs, na.rm=TRUE)
stopifnot(identical(s1, s2))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'idxs'
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- 1:10
idxsList <- list(
integer=1:5,
double=as.double(1:5),
logical=(x <= 5)
)
for (idxs in idxsList) {
cat("idxs:\n")
str(idxs)
s1 <- mean(x[idxs], na.rm=TRUE)
s2 <- meanOver(x, idxs=idxs, na.rm=TRUE)
stopifnot(identical(s1, s2))
}
matrixStats/tests/indexByRow.R 0000644 0001751 0000144 00000003002 12542546242 016173 0 ustar hornik users library("matrixStats")
indexByRow_R1 <- function(dim, idxs=NULL, ...) {
n <- prod(dim)
x <- matrix(seq_len(n), nrow=dim[2L], ncol=dim[1L], byrow=TRUE)
if (!is.null(idxs))
x <- x[idxs]
as.vector(x)
} # indexByRow_R1()
indexByRow_R2 <- function(dim, idxs=NULL, ...) {
n <- prod(dim)
if (is.null(idxs)) {
x <- matrix(seq_len(n), nrow=dim[2L], ncol=dim[1L], byrow=TRUE)
as.vector(x)
} else {
idxs <- idxs - 1
cols <- idxs %/% dim[2L]
rows <- idxs %% dim[2L]
cols + dim[1L]*rows + 1L
}
} # indexByRow_R2()
dim <- c(5L, 4L)
X <- matrix(NA_integer_, nrow=dim[1L], ncol=dim[2L])
Y <- t(X)
idxsByCols <- seq(along=X)
# Assign by columns
X[idxsByCols] <- idxsByCols
print(X)
# Truth
Y0 <- t(X)
idxsByRows <- as.vector(Y0)
# Assert
idxs <- indexByRow(dim)
stopifnot(all.equal(idxs, idxsByRows))
Y <- X
Y[idxsByRows] <- idxs
print(Y)
stopifnot(all(as.vector(Y) == as.vector(X)))
idxs_R1 <- indexByRow_R1(dim)
stopifnot(all.equal(idxs_R1, idxsByRows))
idxs_R2 <- indexByRow_R2(dim)
stopifnot(all.equal(idxs_R2, idxsByRows))
# Assert
idxsByCols <- seq(from=1, to=length(X), by=3L)
idxsByRows <- as.vector(t(X)[idxsByCols])
idxs <- indexByRow(dim, idxs=idxsByCols)
stopifnot(all(idxs == idxsByRows))
idxs_R1 <- indexByRow_R1(dim, idxs=idxsByCols)
stopifnot(all(idxs_R1 == idxsByRows))
idxs_R2 <- indexByRow_R2(dim, idxs=idxsByCols)
stopifnot(all(idxs_R2 == idxsByRows))
## DEPRECATED: Backward compatibility
idxs0 <- indexByRow(dim)
idxs1 <- indexByRow(X)
stopifnot(identical(idxs1, idxs0))
matrixStats/tests/binCounts.R 0000644 0001751 0000144 00000004774 12542546242 016066 0 ustar hornik users library("matrixStats")
library("stats")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Local functions
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
binCounts_hist <- function(x, bx, right=FALSE, ...) {
n0 <- graphics::hist(x, breaks=bx, right=right, include.lowest=TRUE, plot=FALSE)$counts
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Non-sorted and sorted positions
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nx <- 1e5 # Number of data points
nb <- 2e3 # Number of bins
# Uniformely distributed bins
bx <- seq(from=0, to=1, length.out=nb+1L)
bx <- c(-1, bx, 2)
# Sample data points
set.seed(0xBEEF)
x <- runif(nx, min=0, max=1)
for (kk in 1:2) {
n0 <- binCounts_hist(x, bx=bx)
n1 <- binCounts(x, bx=bx)
# Sanity check
stopifnot(identical(n1, n0))
# Check reversed
n1r <- rev(binCounts(-x, bx=rev(-bx), right=TRUE))
stopifnot(identical(n1r, n1))
# Retry with a sorted vector
x <- sort(x)
} # for (kk in 1:2)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Missing values
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- 1:200
x[100] <- NA_integer_
nx <- length(x)
# Bins
bx <- c(0.5,50.5,100.5,150.5,200.5)
yS0 <- binCounts_hist(x, bx=bx)
yS <- binCounts(x, bx=bx)
# Sanity check
stopifnot(all.equal(yS, yS0))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Border cases
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- 1:10
bx <- min(x) - c(10,1)
yS <- binCounts(x, bx=bx)
stopifnot(all.equal(yS, 0L))
bx <- range(x)
yS <- binCounts(x, bx=bx)
stopifnot(all.equal(yS, length(x)-1L))
bx <- max(x) + c(1,10)
yS <- binCounts(x, bx=bx)
stopifnot(all.equal(yS, 0L))
# Every second empty
x <- 1:10
bx <- rep(x, each=2L)
yS <- binCounts(x, bx=bx)
stopifnot(all.equal(yS, rep(c(0L,1L), length.out=length(bx)-1L)))
## NOTE: binCounts_hist() does not give the same last bin count
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Exception handling
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Zero bin bounderies (invalid bin definition)
bx <- double(0L)
res <- try(yS <- binCounts(1:10, bx=bx), silent=TRUE)
stopifnot(inherits(res, "try-error"))
# One bin boundery (invalid bin definition)
bx <- double(1L)
res <- try(yS <- binCounts(1:10, bx=bx), silent=TRUE)
stopifnot(inherits(res, "try-error"))
matrixStats/tests/varDiff_etal.R 0000644 0001751 0000144 00000005160 12542546242 016476 0 ustar hornik users library("matrixStats")
set.seed(1)
x <- rnorm(1e4)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Variance estimators
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sigma2A <- var(x)
cat(sprintf("var(x)=%g\n", sigma2A))
sigma2B <- varDiff(x)
cat(sprintf("varDiff(x)=%g\n", sigma2B))
d <- abs(sigma2B - sigma2A)
cat(sprintf("Absolute difference=%g\n", d))
stopifnot(d < 0.02)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Standard deviation estimators
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
sigmaA <- sd(x)
cat(sprintf("sd(x)=%g\n", sigmaA))
sigmaB <- sdDiff(x)
cat(sprintf("sdDiff(x)=%g\n", sigmaB))
d <- abs(sigmaB - sigmaA)
cat(sprintf("Absolute difference=%g\n", d))
stopifnot(d < 0.01)
# Sanity checks
stopifnot(abs(sigma2A - sigmaA^2) < 1e-9)
stopifnot(abs(sigma2B - sigmaB^2) < 1e-9)
sigmaA2 <- mad(x)
cat(sprintf("mad(x)=%g\n", sigmaA2))
sigmaB2 <- madDiff(x)
cat(sprintf("madDiff(x)=%g\n", sigmaB2))
d <- abs(sigmaB2 - sigmaA2)
cat(sprintf("Absolute difference=%g\n", d))
stopifnot(d < 0.05)
sigmaA3 <- IQR(x)
cat(sprintf("IQR(x)=%g\n", sigmaA3))
sigmaB3 <- iqrDiff(x)
cat(sprintf("iqrDiff(x)=%g\n", sigmaB3))
d <- abs(sigmaB3 - sigmaA3)
cat(sprintf("Absolute difference=%g\n", d))
stopifnot(d < 0.05)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Trimmed estimators
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
y <- x
outliers <- sample(length(x), size=0.1*length(x))
y[outliers] <- 100*y[outliers]
sigmaAo <- sd(y[-outliers])
cat(sprintf("sd(y)=%g\n", sigmaAo))
sigmaBo <- sdDiff(y[-outliers])
cat(sprintf("sdDiff(y)=%g\n", sigmaBo))
d <- abs(sigmaB - sigmaA)
cat(sprintf("Absolute difference=%g\n", d))
stopifnot(d < 0.01)
sigmaBot <- sdDiff(y, trim=0.05)
cat(sprintf("sdDiff(y, trim=0.05)=%g\n", sigmaBot))
d <- abs(sigmaBot - sigmaA)
cat(sprintf("Absolute difference=%g\n", d))
#stopifnot(d < 1e-3)
sigmaCot <- madDiff(y, trim=0.05)
cat(sprintf("madDiff(y, trim=0.05)=%g\n", sigmaCot))
sigmaDot <- iqrDiff(y, trim=0.05)
cat(sprintf("iqrDiff(y, trim=0.05)=%g\n", sigmaDot))
FUNs <- list(
varDiff=varDiff,
sdDiff=sdDiff,
madDiff=madDiff,
iqrDiff=iqrDiff
)
for (fcn in names(FUNs)) {
cat(sprintf("%s()...\n", fcn))
FUN <- FUNs[[fcn]]
for (mode in c("integer", "double")) {
cat("mode: ", mode, "", sep="")
for (n in 0:3) {
x <- runif(n, min=-5, max=5)
storage.mode(x) <- mode
str(x)
y <- FUN(x)
yt <- FUN(x, trim=0.1)
str(list("non-trimmed"=y, trimmed=yt))
} # for (mode ...)
}
cat(sprintf("%s()...DONE\n", fcn))
} # for (fcn ...)
matrixStats/tests/rowOrderStats.R 0000644 0001751 0000144 00000002623 12542546242 016733 0 ustar hornik users library("matrixStats")
library("stats")
rowOrderStats_R <- function(x, probs) {
apply(x, MARGIN=1L, FUN=quantile, probs=probs, type=3L)
} # rowOrderStats_R()
set.seed(1)
K <- if (Sys.getenv("_R_CHECK_FULL_") == "") 5 else 3
# Simulate data in a matrix of any shape
nrow <- 300
ncol <- 100
x <- rnorm(nrow*ncol)
dim(x) <- c(nrow, ncol)
probs <- 0.3
which <- round(probs*ncol)
y0 <- rowOrderStats_R(x, probs=probs)
y1 <- rowOrderStats(x, which=which)
stopifnot(all.equal(y1,y0))
y2 <- colOrderStats(t(x), which=which)
stopifnot(all.equal(y2,y0))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Consistency checks
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
cat("Consistency checks without NAs:\n")
for (kk in seq_len(K)) {
cat("Random test #", kk, "\n", sep="")
# Simulate data in a matrix of any shape
nrow <- sample(100, size=1)
ncol <- sample(100, size=1)
x <- rnorm(nrow*ncol)
dim(x) <- c(nrow, ncol)
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
probs <- runif(1)
which <- round(probs*ncol)
y0 <- rowOrderStats_R(x, probs=probs)
y1 <- rowOrderStats(x, which=which)
stopifnot(all.equal(y1,y0))
y2 <- colOrderStats(t(x), which=which)
stopifnot(all.equal(y2,y0))
} # for (kk in ...)
} # for (mode ...)
matrixStats/tests/rowVarDiffs.R 0000644 0001751 0000144 00000003707 12542546242 016351 0 ustar hornik users library("matrixStats")
FUNs <- list(
rowVarDiffs=list(rowVarDiffs, colVarDiffs),
rowSdDiffs=list(rowSdDiffs, colSdDiffs),
rowMadDiffs=list(rowMadDiffs, colMadDiffs),
rowIQRDiffs=list(rowIQRDiffs, colIQRDiffs)
)
for (fcn in names(FUNs)) {
cat(sprintf("%s()...\n", fcn))
rFUN <- FUNs[[fcn]][[1L]]
cFUN <- FUNs[[fcn]][[2L]]
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# With and without some NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
for (addNA in c(FALSE, TRUE)) {
cat("addNA=", addNA, "\n", sep="")
x <- matrix(1:100+0.1, nrow=20, ncol=5)
if (addNA) {
x[13:17,c(2,4)] <- NA_real_
}
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
# Row/column ranges
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
r1 <- rFUN(x, na.rm=na.rm)
r2 <- cFUN(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r2))
}
} # for (addNA ...)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# All NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(NA_real_, nrow=20, ncol=5)
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
r1 <- rFUN(x, na.rm=na.rm)
r2 <- cFUN(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r2))
}
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A 1x1 matrix
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(0, nrow=1, ncol=1)
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
r1 <- rFUN(x, na.rm=na.rm)
r2 <- cFUN(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r2))
}
cat(sprintf("%s()...DONE\n", fcn))
} # for (fcn ...)
matrixStats/tests/rowCumsums.R 0000644 0001751 0000144 00000005524 12542546242 016300 0 ustar hornik users library("matrixStats")
rowCumsums_R <- function(x) {
suppressWarnings({
t(apply(x, MARGIN=1L, FUN=cumsum))
})
}
colCumsums_R <- function(x) {
suppressWarnings({
apply(x, MARGIN=2L, FUN=cumsum)
})
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# With and without some NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
for (addNA in c(FALSE, TRUE)) {
cat("addNA=", addNA, "\n", sep="")
x <- matrix(1:100, nrow=20, ncol=5)
if (addNA) {
x[13:17,c(2,4)] <- NA_real_
}
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
# Row/column ranges
r0 <- rowCumsums_R(x)
r1 <- rowCumsums(x)
r2 <- t(colCumsums(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
} # for (addNA ...)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# All NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(NA_real_, nrow=20, ncol=5)
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
r0 <- rowCumsums_R(x)
r1 <- rowCumsums(x)
r2 <- t(colCumsums(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A 1x1 matrix
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(0, nrow=1, ncol=1)
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
r0 <- rowCumsums_R(x)
r1 <- rowCumsums(x)
r2 <- t(colCumsums(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Corner cases
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
value <- 0
storage.mode(value) <- mode
# A 0x0 matrix
x <- matrix(value, nrow=0L, ncol=0L)
str(x)
r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x))
r1 <- rowCumsums(x)
r2 <- t(colCumsums(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
# A 0xK matrix
x <- matrix(value, nrow=0L, ncol=5L)
str(x)
r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x))
r1 <- rowCumsums(x)
r2 <- t(colCumsums(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
# A Nx0 matrix
x <- matrix(value, nrow=5L, ncol=0L)
str(x)
r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x))
r1 <- rowCumsums(x)
r2 <- t(colCumsums(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
} # for (mode ...)
matrixStats/tests/benchmark.R 0000644 0001751 0000144 00000000304 12542546242 016035 0 ustar hornik users if (Sys.getenv("_R_CHECK_FULL_") != "" && Sys.getenv("_R_CHECK_USE_VALGRIND_") == "") {
if (require("R.rsp")) {
html <- matrixStats:::benchmark('binCounts')
print(html)
}
} # _R_CHECK_FULL_
matrixStats/tests/zzz.package-unload.R 0000644 0001751 0000144 00000002277 12542546242 017625 0 ustar hornik users ## These tests need to be last of all tests, otherwise
## covr::package_coverage() gives an error.
cat("1. Loading package\n")
requireNamespace("matrixStats")
stopifnot("matrixStats" %in% loadedNamespaces())
cat("2. Unloading package\n")
unloadNamespace("matrixStats")
stopifnot(!"matrixStats" %in% loadedNamespaces())
if (FALSE) {
## 'covr' gives "Error in library("matrixStats") :
## there is no package called 'matrixStats'" here, cf.
## https://travis-ci.org/HenrikBengtsson/matrixStats/builds/48015577
cat("3. Attaching package\n")
library("matrixStats")
stopifnot("package:matrixStats" %in% search())
cat("4. Detaching package\n")
detach("package:matrixStats")
stopifnot(!"package:matrixStats" %in% search())
stopifnot("matrixStats" %in% loadedNamespaces())
cat("5. Unloading package\n")
unloadNamespace("matrixStats")
stopifnot(!"matrixStats" %in% loadedNamespaces())
cat("6. Attaching package (again)\n")
library("matrixStats")
stopifnot("package:matrixStats" %in% search())
cat("7. Detaching package (again)\n")
detach("package:matrixStats")
stopifnot(!"package:matrixStats" %in% search())
stopifnot("matrixStats" %in% loadedNamespaces())
}
cat("7. DONE\n")
matrixStats/tests/rowAvgsPerColSet.R 0000644 0001751 0000144 00000004037 12542546242 017323 0 ustar hornik users library("matrixStats")
X <- matrix(rnorm(20*6), nrow=20, ncol=6)
rownames(X) <- LETTERS[1:nrow(X)]
colnames(X) <- letters[1:ncol(X)]
print(X)
# - - - - - - - - - - - - - - - - - - - - - - - - - -
# Apply rowMeans() for 3 sets of 2 columns
# - - - - - - - - - - - - - - - - - - - - - - - - - -
nbrOfSets <- 3
S <- matrix(1:ncol(X), ncol=nbrOfSets)
colnames(S) <- sprintf("s%d", 1:nbrOfSets)
print(S)
Z <- rowAvgsPerColSet(X, S=S)
print(Z)
# Validation
Z0 <- cbind(s1=rowMeans(X[,1:2]), s2=rowMeans(X[,3:4]),
s3=rowMeans(X[,5:6]))
stopifnot(identical(drop(Z), Z0))
# - - - - - - - - - - - - - - - - - - - - - - - - - -
# Apply colMeans() for 5 sets of 4 rows
# - - - - - - - - - - - - - - - - - - - - - - - - - -
nbrOfSets <- 5
S <- matrix(1:nrow(X), ncol=nbrOfSets)
colnames(S) <- sprintf("s%d", 1:nbrOfSets)
print(S)
Z <- colAvgsPerRowSet(X, S=S)
print(Z)
# Validation
Z0 <- rbind(s1=colMeans(X[1:4,]), s2=colMeans(X[5:8,]),
s3=colMeans(X[9:12,]), s4=colMeans(X[13:16,]),
s5=colMeans(X[17:20,]))
stopifnot(identical(drop(Z), Z0))
# - - - - - - - - - - - - - - - - - - - - - - - - - -
# When there is only one "complete" set
# - - - - - - - - - - - - - - - - - - - - - - - - - -
nbrOfSets <- 1
S <- matrix(1:ncol(X), ncol=nbrOfSets)
colnames(S) <- sprintf("s%d", 1:nbrOfSets)
print(S)
Z <- rowAvgsPerColSet(X, S=S, FUN=rowMeans)
print(Z)
Z0 <- rowMeans(X)
stopifnot(identical(drop(Z), Z0))
nbrOfSets <- 1
S <- matrix(1:nrow(X), ncol=nbrOfSets)
colnames(S) <- sprintf("s%d", 1:nbrOfSets)
print(S)
Z <- colAvgsPerRowSet(X, S=S, FUN=colMeans)
print(Z)
Z0 <- colMeans(X)
stopifnot(identical(drop(Z), Z0))
# - - - - - - - - - - - - - - - - - - - - - - - - - -
# Use weights
# - - - - - - - - - - - - - - - - - - - - - - - - - -
nbrOfSets <- 3
S <- matrix(1:ncol(X), ncol=nbrOfSets)
colnames(S) <- sprintf("s%d", 1:nbrOfSets)
print(S)
W <- matrix(runif(length(X)), nrow=nrow(X), ncol=ncol(X))
Z1 <- rowAvgsPerColSet(X, W=W, S=S, FUN=rowWeightedMeans)
print(Z1)
Z2 <- colAvgsPerRowSet(X, W=W, S=S, FUN=colWeightedMeans)
print(Z2)
matrixStats/tests/rowWeightedVars.R 0000644 0001751 0000144 00000005130 12542546242 017231 0 ustar hornik users library("matrixStats")
set.seed(1)
x <- matrix(rnorm(20), nrow=5L, ncol=4L)
print(x)
# Non-weighted row variances
xM0 <- rowVars(x)
w <- rep(1, times=ncol(x))
xM1 <- rowWeightedVars(x, w=w)
print(xM1)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedVars(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
# Weighted row variances (uniform weights)
w <- rep(2.5, ncol(x))
xM1 <- rowWeightedVars(x, w=w)
print(xM1)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedVars(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
# Weighted row variances (excluding some columns)
w <- c(1,1,0,1)
xM0 <- rowVars(x[,(w == 1),drop=FALSE])
xM1 <- rowWeightedVars(x, w=w)
print(xM1)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedVars(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
# Weighted row variances (excluding some columns)
w <- c(0,1,0,0)
xM0 <- rowVars(x[,(w == 1),drop=FALSE])
xM1 <- rowWeightedVars(x, w=w)
#stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedVars(t(x), w=w)
stopifnot(all.equal(xM2, xM1))
# Weighted row variances (all zero weights)
w <- c(0,0,0,0)
xM0 <- rowVars(x[,(w == 1),drop=FALSE])
xM1 <- rowWeightedVars(x, w=w)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedVars(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
# Weighted variances by rows and columns
w <- 1:4
xM1 <- rowWeightedVars(x, w=w)
print(xM1)
xM2 <- colWeightedVars(t(x), w=w)
stopifnot(all.equal(xM2, xM1))
x[sample(length(x), size=0.3*length(x))] <- NA
print(x)
# Non-weighted row variances with missing values
xM0 <- rowVars(x, na.rm=TRUE)
xM1 <- rowWeightedVars(x, w=rep(1, times=ncol(x)), na.rm=TRUE)
print(xM1)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedVars(t(x), w=rep(1, times=ncol(x)), na.rm=TRUE)
stopifnot(all.equal(xM2, xM0))
# Weighted row variances with missing values
xM1 <- rowWeightedVars(x, w=w, na.rm=TRUE)
print(xM1)
xM2 <- colWeightedVars(t(x), w=w, na.rm=TRUE)
stopifnot(all.equal(xM2, xM1))
# Weighted variances by rows and columns
w <- 1:4
xM1 <- rowWeightedVars(x, w=w, na.rm=TRUE)
xM2 <- colWeightedVars(t(x), w=w, na.rm=TRUE)
stopifnot(all.equal(xM2, xM1))
# Weighted row standard deviation (excluding some columns)
w <- c(1,1,0,1)
## FIXME: rowVars()/rowSds() needs na.rm=FALSE (wrong default)
xM0 <- rowSds(x[,(w == 1),drop=FALSE], na.rm=FALSE)
xM1 <- rowWeightedSds(x, w=w)
print(xM1)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedSds(t(x), w=w, na.rm=FALSE)
stopifnot(all.equal(xM2, xM0))
# Weighted row MADs (excluding some columns)
w <- c(1,1,0,1)
xM0 <- rowMads(x[,(w == 1),drop=FALSE])
xM1 <- rowWeightedMads(x, w=w)
print(xM1)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMads(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
matrixStats/tests/allocMatrix.R 0000644 0001751 0000144 00000001111 12542546242 016357 0 ustar hornik users library("matrixStats")
allocMatrix_R <- function(nrow, ncol, value=NA) {
matrix(data=value, nrow=nrow, ncol=ncol)
} # allocMatrix_R()
values <- list(
-1L, 0L, +1L, NA_integer_, .Machine$integer.max,
-1, 0, +1, NA_real_, NaN, -Inf, +Inf, .Machine$double.xmin, .Machine$double.xmax, .Machine$double.eps, .Machine$double.neg.eps,
FALSE, TRUE, NA
)
nrow <- 5L
ncol <- 10L
for (value in values) {
X0 <- allocMatrix_R(nrow, ncol, value=value)
X <- allocMatrix(nrow, ncol, value=value)
str(list(nrow=nrow, ncol=ncol, value=value, X=X, X0=X0))
stopifnot(identical(X,X0))
}
matrixStats/tests/rowWeightedMedians.R 0000644 0001751 0000144 00000003655 12542546242 017710 0 ustar hornik users library("matrixStats")
set.seed(1)
x <- matrix(rnorm(20), nrow=5, ncol=4)
print(x)
# Non-weighted row medians
xM0 <- rowMedians(x)
xM1 <- rowWeightedMedians(x)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMedians(t(x))
stopifnot(all.equal(xM2, xM0))
# Weighted row medians (uniform weights)
w <- rep(2.5, ncol(x))
xM1 <- rowWeightedMedians(x, w=w)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMedians(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
# Weighted row medians (excluding some columns)
w <- c(1,1,0,1)
xM0 <- rowMedians(x[,(w == 1),drop=FALSE])
xM1 <- rowWeightedMedians(x, w=w)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMedians(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
# Weighted row medians (excluding some columns)
w <- c(0,1,0,0)
xM0 <- rowMedians(x[,(w == 1),drop=FALSE])
xM1 <- rowWeightedMedians(x, w=w)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMedians(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
# Weighted row medians (all zero weights)
w <- c(0,0,0,0)
xM0 <- rowMedians(x[,(w == 1),drop=FALSE])
xM1 <- rowWeightedMedians(x, w=w)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMedians(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
# Weighted medians by rows and columns
w <- 1:4
xM1 <- rowWeightedMedians(x, w=w)
xM2 <- colWeightedMedians(t(x), w=w)
stopifnot(all.equal(xM2, xM1))
# Weighted row medians with missing values
xM0 <- apply(x, MARGIN=1, FUN=weightedMedian, w=w, na.rm=TRUE)
print(xM0)
xM1 <- rowWeightedMedians(x, w=w, na.rm=TRUE)
print(xM1)
stopifnot(all.equal(xM1, xM0))
xM2 <- colWeightedMedians(t(x), w=w)
stopifnot(all.equal(xM2, xM0))
# Weighted medians by rows and columns
w <- 1:4
xM1 <- rowWeightedMedians(x, w=w, na.rm=TRUE)
xM2 <- colWeightedMedians(t(x), w=w, na.rm=TRUE)
stopifnot(all.equal(xM2, xM1))
# Inf weight
x <- matrix(1:2, nrow=1, ncol=2)
w <- c(7, Inf)
xM1 <- rowWeightedMedians(x, w=w)
xM2 <- colWeightedMedians(t(x), w=w)
stopifnot(identical(2, xM1))
stopifnot(identical(2, xM2))
matrixStats/tests/rowMedians.R 0000644 0001751 0000144 00000013624 12542546242 016224 0 ustar hornik users library("matrixStats")
rowMedians_R <- function(x, na.rm=FALSE, ...) {
apply(x, MARGIN=1L, FUN=median, na.rm=na.rm)
}
colMedians_R <- function(x, na.rm=FALSE, ...) {
apply(x, MARGIN=2L, FUN=median, na.rm=na.rm)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Special case: Non-ties
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cat("Special case: Non-ties:\n")
for (mode in c("integer", "double")) {
x <- matrix(1:9+0.1, nrow=3, ncol=3)
storage.mode(x) <- mode
y0 <- rowMedians_R(x, na.rm=FALSE)
y1 <- rowMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
y0 <- colMedians_R(x, na.rm=FALSE)
y1 <- colMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Special case: Ties
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cat("Special case: Ties:\n")
for (mode in c("integer", "double")) {
x <- matrix(1:16+0.1, nrow=4, ncol=4)
storage.mode(x) <- mode
y0 <- rowMedians_R(x, na.rm=FALSE)
y1 <- rowMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
y0 <- colMedians_R(x, na.rm=FALSE)
y1 <- colMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Special case: Single-element matrix
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cat("Special case: Single-element matrix:\n")
for (mode in c("integer", "double")) {
x <- matrix(1, nrow=1, ncol=1)
storage.mode(x) <- mode
y0 <- rowMedians_R(x, na.rm=FALSE)
y1 <- rowMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
y0 <- colMedians_R(x, na.rm=FALSE)
y1 <- colMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Special case: Empty matrix
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cat("Special case: Empty matrix:\n")
for (mode in c("integer", "double")) {
x <- matrix(integer(0), nrow=0, ncol=0)
storage.mode(x) <- mode
y0 <- rowMedians_R(x, na.rm=FALSE)
y1 <- rowMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
y0 <- colMedians_R(x, na.rm=FALSE)
y1 <- colMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Special case: All NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cat("Special case: All NAs:\n")
for (mode in c("integer", "double")) {
x <- matrix(NA_integer_, nrow=3, ncol=3)
storage.mode(x) <- mode
y0 <- rowMedians_R(x, na.rm=TRUE)
y1 <- rowMedians(x, na.rm=TRUE)
stopifnot(all.equal(y1,y0))
y0 <- colMedians_R(x, na.rm=TRUE)
y1 <- colMedians(x, na.rm=TRUE)
stopifnot(all.equal(y1,y0))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Special case: All NaNs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cat("Special case: All NaNs:\n")
x <- matrix(NA_real_, nrow=3, ncol=3)
y0 <- rowMedians_R(x, na.rm=TRUE)
y1 <- rowMedians(x, na.rm=TRUE)
stopifnot(all.equal(y1,y0))
y0 <- colMedians_R(x, na.rm=TRUE)
y1 <- colMedians(x, na.rm=TRUE)
stopifnot(all.equal(y1,y0))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Special case: All Infs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cat("Special case: All Infs:\n")
x <- matrix(Inf, nrow=3, ncol=3)
y0 <- rowMedians_R(x, na.rm=FALSE)
y1 <- rowMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
y0 <- colMedians_R(x, na.rm=FALSE)
y1 <- colMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Special case: All -Infs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cat("Special case: All -Infs:\n")
x <- matrix(-Inf, nrow=3, ncol=3)
y0 <- rowMedians_R(x, na.rm=FALSE)
y1 <- rowMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
y0 <- colMedians_R(x, na.rm=FALSE)
y1 <- colMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Special case: Infs and -Infs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cat("Special case: Infs and -Infs:\n")
x <- matrix(c(-Inf,+Inf), nrow=4, ncol=4)
y0 <- rowMedians_R(x, na.rm=FALSE)
y1 <- rowMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
y0 <- colMedians_R(x, na.rm=FALSE)
y1 <- colMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Special case: Integer overflow with ties
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cat("Special case: Integer overflow with ties:\n")
x <- matrix(.Machine$integer.max, nrow=4, ncol=4)
y0 <- rowMedians_R(x, na.rm=FALSE)
y1 <- rowMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
y0 <- colMedians_R(x, na.rm=FALSE)
y1 <- colMedians(x, na.rm=FALSE)
stopifnot(all.equal(y1,y0))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Consistency checks
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
set.seed(1)
cat("Consistency checks:\n")
K <- if (Sys.getenv("_R_CHECK_FULL_") == "" || Sys.getenv("_R_CHECK_USE_VALGRIND_") != "") 4 else 10
for (kk in seq_len(K)) {
cat("Random test #", kk, "\n", sep="")
# Simulate data in a matrix of any shape
dim <- sample(50:200, size=2)
n <- prod(dim)
x <- rnorm(n, sd=100)
dim(x) <- dim
# Add NAs?
if ((kk %% 4) %in% c(3,0)) {
cat("Adding NAs\n")
nna <- sample(n, size=1)
naValues <- c(NA_real_, NaN)
x[sample(length(x), size=nna)] <- sample(naValues, size=nna, replace=TRUE)
}
# Integer or double?
if ((kk %% 4) %in% c(2,0)) {
cat("Coercing to integers\n")
storage.mode(x) <- "integer"
}
na.rm <- sample(c(TRUE,FALSE), size=1)
# rowMedians():
y0 <- rowMedians_R(x, na.rm=na.rm)
y1 <- rowMedians(x, na.rm=na.rm)
stopifnot(all.equal(y1,y0))
y2 <- colMedians(t(x), na.rm=na.rm)
stopifnot(all.equal(y2,y0))
# colMedians():
y0 <- colMedians_R(x, na.rm=na.rm)
y1 <- colMedians(x, na.rm=na.rm)
stopifnot(all.equal(y1,y0))
y2 <- rowMedians(t(x), na.rm=na.rm)
stopifnot(all.equal(y2,y0))
} # for (kk ...)
matrixStats/tests/rowLogSumExps.R 0000644 0001751 0000144 00000007077 12542546242 016717 0 ustar hornik users # Test inspired by the harmonic mean example in R-help
# thread '[R] Beyond double-precision?' on May 9, 2009.
library("matrixStats")
library("stats")
logSumExp0 <- function(lx) {
iMax <- which.max(lx)
log1p(sum(exp(lx[-iMax] - lx[iMax]))) + lx[iMax]
} # logSumExp0()
n <- 1e3
set.seed(1)
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
X <- matrix(runif(n, min=1.0, max=3.0), nrow=50L)
storage.mode(X) <- mode
str(X)
# The logarithm of the harmonic mean by rows
yH <- log(1/rowMeans(1/X))
str(yH)
nlX <- -log(X)
y0 <- log(ncol(X)) - apply(nlX, MARGIN=1L, FUN=logSumExp0)
stopifnot(all.equal(y0,yH))
y1 <- log(ncol(X)) - apply(nlX, MARGIN=1L, FUN=logSumExp)
stopifnot(all.equal(y1,y0))
y2 <- log(ncol(X)) - rowLogSumExps(nlX)
stopifnot(all.equal(y2,y0))
y3 <- log(ncol(X)) - colLogSumExps(t(nlX))
stopifnot(all.equal(y3,y0))
# The logarithm of the harmonic mean by columns
yH <- log(1/colMeans(1/X))
str(yH)
y0 <- log(nrow(X)) - apply(nlX, MARGIN=2L, FUN=logSumExp0)
stopifnot(all.equal(y0,yH))
y1 <- log(nrow(X)) - apply(nlX, MARGIN=2L, FUN=logSumExp)
stopifnot(all.equal(y1,y0))
y2 <- log(nrow(X)) - colLogSumExps(nlX)
stopifnot(all.equal(y2,y0))
y3 <- log(nrow(X)) - rowLogSumExps(t(nlX))
stopifnot(all.equal(y3,y0))
# Testing names
rownames(nlX) <- seq_len(nrow(X))
colnames(nlX) <- seq_len(ncol(X))
y2 <- rowLogSumExps(nlX)
stopifnot(identical(names(y2), rownames(nlX)))
y3 <- colLogSumExps(t(nlX))
stopifnot(identical(names(y3), rownames(nlX)))
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Corner cases
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Zero-size matrices
lX <- matrix(numeric(0L), nrow=0L, ncol=0L)
y <- rowLogSumExps(lX)
print(y)
stopifnot(length(y) == nrow(lX))
y <- colLogSumExps(lX)
print(y)
stopifnot(length(y) == ncol(lX))
## Zero-height matrices
lX <- matrix(numeric(0L), nrow=0L, ncol=10L)
y <- rowLogSumExps(lX)
print(y)
stopifnot(length(y) == nrow(lX))
y <- colLogSumExps(lX)
print(y)
stopifnot(length(y) == ncol(lX))
stopifnot(all(y == -Inf))
## Zero-width matrices
lX <- matrix(numeric(0L), nrow=10L, ncol=0L)
y <- colLogSumExps(lX)
print(y)
stopifnot(length(y) == ncol(lX))
y <- rowLogSumExps(lX)
print(y)
stopifnot(length(y) == nrow(lX))
stopifnot(all(y == -Inf))
## Matrices with one element
lX <- matrix(1.0, nrow=1L, ncol=1L)
y <- rowLogSumExps(lX)
print(y)
stopifnot(length(y) == nrow(lX))
stopifnot(all(y == lX))
y <- colLogSumExps(lX)
print(y)
stopifnot(length(y) == ncol(lX))
stopifnot(all(y == lX))
## All missing values
lX <- matrix(NA_real_, nrow=1L, ncol=1L)
y <- rowLogSumExps(lX, na.rm=TRUE)
print(y)
stopifnot(length(y) == nrow(lX))
stopifnot(identical(y, -Inf))
lX <- matrix(NA_real_, nrow=1L, ncol=1L)
y <- colLogSumExps(lX, na.rm=TRUE)
print(y)
stopifnot(length(y) == ncol(lX))
stopifnot(identical(y, -Inf))
lX <- matrix(NA_real_, nrow=2L, ncol=2L)
y <- rowLogSumExps(lX, na.rm=TRUE)
print(y)
stopifnot(length(y) == nrow(lX))
stopifnot(all(y == -Inf))
y <- rowLogSumExps(lX, na.rm=FALSE)
print(y)
stopifnot(length(y) == nrow(lX))
stopifnot(all(is.na(y) & !is.nan(y)))
lX <- matrix(NA_real_, nrow=2L, ncol=2L)
y <- colLogSumExps(lX, na.rm=TRUE)
print(y)
stopifnot(length(y) == ncol(lX))
stopifnot(all(y == -Inf))
y <- colLogSumExps(lX, na.rm=FALSE)
print(y)
stopifnot(length(y) == ncol(lX))
stopifnot(all(is.na(y) & !is.nan(y)))
## +Inf values
lX <- matrix(c(1, 2, +Inf), nrow=3L, ncol=2L)
y <- colLogSumExps(lX, na.rm=TRUE)
print(y)
stopifnot(length(y) == ncol(lX))
stopifnot(all(y == +Inf))
matrixStats/tests/rowVars.R 0000644 0001751 0000144 00000005360 12542546242 015555 0 ustar hornik users library("matrixStats")
rowVars_R <- function(x, na.rm=FALSE) {
suppressWarnings({
apply(x, MARGIN=1L, FUN=var, na.rm=na.rm)
})
}
colVars_R <- function(x, na.rm=FALSE) {
suppressWarnings({
apply(x, MARGIN=2L, FUN=var, na.rm=na.rm)
})
}
rowVars_center <- function(x, na.rm=FALSE) {
center <- rowMeans(x, na.rm=na.rm)
rowVars(x, center=center, na.rm=na.rm)
}
colVars_center <- function(x, na.rm=FALSE) {
center <- colMeans(x, na.rm=na.rm)
colVars(x, center=center, na.rm=na.rm)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# With and without some NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
for (addNA in c(FALSE, TRUE)) {
cat("addNA=", addNA, "\n", sep="")
x <- matrix(1:100+0.1, nrow=20, ncol=5)
if (addNA) {
x[13:17,c(2,4)] <- NA_real_
}
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
# Row/column ranges
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
r0 <- rowVars_R(x, na.rm=na.rm)
r1 <- rowVars(x, na.rm=na.rm)
r1b <- rowVars_center(x, na.rm=na.rm)
r2 <- colVars(t(x), na.rm=na.rm)
r2b <- colVars_center(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
stopifnot(all.equal(r1b, r1))
stopifnot(all.equal(r2b, r2))
}
} # for (addNA ...)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# All NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(NA_real_, nrow=20, ncol=5)
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
r0 <- rowVars_R(x, na.rm=na.rm)
r1 <- rowVars(x, na.rm=na.rm)
r1b <- rowVars_center(x, na.rm=na.rm)
r2 <- colVars(t(x), na.rm=na.rm)
r2b <- colVars_center(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
stopifnot(all.equal(r1b, r1))
stopifnot(all.equal(r2b, r2))
}
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A 1x1 matrix
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(0, nrow=1, ncol=1)
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
r0 <- rowVars_R(x, na.rm=na.rm)
r1 <- rowVars(x, na.rm=na.rm)
r1b <- rowVars_center(x, na.rm=na.rm)
r2 <- colVars(t(x), na.rm=na.rm)
r2b <- colVars_center(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
stopifnot(all.equal(r1b, r1))
stopifnot(all.equal(r2b, r2))
}
matrixStats/tests/product.R 0000644 0001751 0000144 00000002244 12542546242 015570 0 ustar hornik users library("matrixStats")
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
# Empty vector
x <- 0
storage.mode(x) <- mode
y <- prod(x, na.rm=TRUE)
print(y)
z <- product(x, na.rm=TRUE)
print(z)
stopifnot(all.equal(z, y))
# Test negative values
x <- c(1,-4,2)
storage.mode(x) <- mode
y <- prod(x, na.rm=TRUE)
print(y)
z <- product(x, na.rm=TRUE)
print(z)
stopifnot(all.equal(z, y))
# Test missing values
x <- c(1,NA,NaN,2)
storage.mode(x) <- mode
y <- prod(x, na.rm=TRUE)
print(y)
z <- product(x, na.rm=TRUE)
print(z)
stopifnot(all.equal(z, y))
x <- c(1,NA,NaN,2)
storage.mode(x) <- mode
y <- prod(x, na.rm=FALSE)
print(y)
z <- product(x, na.rm=FALSE)
print(z)
stopifnot(all(is.na(z), is.na(y)))
x <- c(1,NaN,2)
storage.mode(x) <- mode
y <- prod(x, na.rm=FALSE)
print(y)
stopifnot(is.na(y))
z <- product(x, na.rm=FALSE)
print(z)
stopifnot(is.na(z))
# NAs following 0s should return NA
x <- c(0L, NA_integer_)
storage.mode(x) <- mode
y <- prod(x, na.rm=FALSE)
print(y)
stopifnot(is.na(y))
z <- product(x, na.rm=FALSE)
print(z)
stopifnot(is.na(z))
} # for (mode ...)
matrixStats/tests/binMeans,binCounts.R 0000644 0001751 0000144 00000006723 12542546242 017613 0 ustar hornik users library("matrixStats")
library("stats")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Naive R implementation of binMeans()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
binMeans0 <- function(y, x, bx, na.rm=TRUE, count=TRUE, right=FALSE) {
B <- length(bx)-1L
res <- double(B)
counts <- rep(NaN, times=B)
if (na.rm) {
keep <- !is.na(x) & !is.na(y)
x <- x[keep]
y <- y[keep]
}
# For each bin...
for (kk in seq(length=B)) {
if (right) {
idxs <- which(bx[kk] < x & x <= bx[kk+1L])
} else {
idxs <- which(bx[kk] <= x & x < bx[kk+1L])
}
yKK <- y[idxs]
muKK <- mean(yKK)
res[kk] <- muKK
counts[kk] <- length(idxs)
} # for (kk ...)
if (count) attr(res, "count") <- counts
res
} # binMeans0()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Case #1
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- 1:200
nx <- length(x)
y <- double(nx)
y[1:50] <- 5
y[101:150] <- -5
y <- y + rnorm(nx)
# Bins
bx <- c(0.5,50.5,100.5,150.5,200.5)
yS0 <- binMeans0(y, x=x, bx=bx)
yS <- binMeans(y, x=x, bx=bx)
nS <- binCounts(x, bx=bx)
# Sanity check
stopifnot(all.equal(yS, yS0))
stopifnot(all.equal(attr(yS, "count"), nS))
yS0r <- rev(binMeans0(y, x=-x, bx=rev(-bx), count=FALSE, right=TRUE))
ySr <- rev(binMeans(y, x=-x, bx=rev(-bx), count=FALSE, right=TRUE))
# Sanity check
stopifnot(all.equal(yS0r, yS0, check.attributes=FALSE))
stopifnot(all.equal(ySr, yS0r))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Case #2
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nx <- 1e4
x <- runif(nx)
y <- runif(nx)
nb <- 20
bx <- do.call(seq, c(as.list(range(x)), length.out=nb))
bx1 <- c(bx[-1], bx[nb] + 1)
yS0 <- binMeans0(y, x=x, bx=bx1)
yS <- binMeans(y, x=x, bx=bx1)
nS <- binCounts(x, bx=bx1)
ySr <- rev(binMeans(y, x=-x, bx=rev(-bx1), right=TRUE))
# Sanity check
stopifnot(all.equal(yS, yS0))
stopifnot(all.equal(attr(yS, "count"), nS))
stopifnot(all.equal(ySr, yS, check.attributes=FALSE))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Empty bins
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- c(6:8, 16:19)
nx <- length(x)
y <- runif(nx)
bx <- c(0,5,10,15,20,25)
yS0 <- binMeans0(y, x=x, bx=bx)
yS <- binMeans(y, x=x, bx=bx)
nS <- binCounts(x, bx=bx)
stopifnot(all.equal(attr(yS, "count"), nS))
stopifnot(all.equal(yS, yS0))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Missing values
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- 1:200
x[100] <- NA_integer_
nx <- length(x)
y <- double(nx)
y[1:50] <- 5
y[101:150] <- -5
y[123:125] <- NA_real_
y <- y + rnorm(nx)
# Bins
bx <- c(0.5,50.5,100.5,150.5,200.5)
yS0 <- binMeans0(y, x=x, bx=bx)
yS <- binMeans(y, x=x, bx=bx)
# Sanity check
stopifnot(all.equal(yS, yS0))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Exception handling
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Zero bin bounderies (invalid bin definition)
bx <- double(0L)
res <- try(yS <- binMeans(x=1:10, y=1:10, bx=bx), silent=TRUE)
stopifnot(inherits(res, "try-error"))
# One bin boundery (invalid bin definition)
bx <- double(1L)
res <- try(yS <- binMeans(x=1:10, y=1:10, bx=bx), silent=TRUE)
stopifnot(inherits(res, "try-error"))
matrixStats/tests/allocVector.R 0000644 0001751 0000144 00000001122 12542546242 016357 0 ustar hornik users library("matrixStats")
allocVector_R <- function(length, value=NA) {
x <- vector(mode=typeof(value), length=length)
if (!is.finite(value) || value != 0) x[] <- value
x
} # allocVector_R()
values <- list(
-1L, 0L, +1L, NA_integer_, .Machine$integer.max,
-1, 0, +1, NA_real_, NaN, -Inf, +Inf, .Machine$double.xmin, .Machine$double.xmax, .Machine$double.eps, .Machine$double.neg.eps,
FALSE, TRUE, NA
)
n <- 1e3
for (value in values) {
x0 <- allocVector_R(n, value=value)
x <- allocVector(n, value=value)
str(list(n=n, value=value, x=x, x0=x0))
stopifnot(identical(x,x0))
}
matrixStats/tests/weightedMedian.R 0000644 0001751 0000144 00000004147 12542546242 017032 0 ustar hornik users library("matrixStats")
x <- 1:5
y <- weightedMedian(x)
y <- weightedMedian(x, w=c(NA,Inf,NA,Inf,NA), na.rm=TRUE)
print(y)
y <- weightedMedian(x, w=c(NA,Inf,NA,Inf,NA), na.rm=FALSE)
print(y)
stopifnot(is.na(y))
x <- 1:10
n <- length(x)
y1 <- median(x) # 5.5
y2 <- weightedMedian(x) # 5.5
stopifnot(all.equal(y1, y2))
w <- rep(1, n)
y1 <- weightedMedian(x, w) # 5.5 (default)
y2a <- weightedMedian(x, ties="weighted") # 5.5 (default)
y2b <- weightedMedian(x, ties="min") # 5
y2c <- weightedMedian(x, ties="max") # 6
stopifnot(all.equal(y2a,y1))
y3 <- weightedMedian(x, w) # 5.5 (default)
# Pull the median towards zero
w[1] <- 5
y1 <- weightedMedian(x, w) # 3.5
y <- c(rep(0,w[1]), x[-1]) # Only possible for integer weights
y2 <- median(y) # 3.5
stopifnot(all.equal(y1,y2))
# Put even more weight on the zero
w[1] <- 8.5
y <- weightedMedian(x, w) # 2
# All weight on the first value
w[1] <- Inf
y <- weightedMedian(x, w) # 1
# All weight on the last value
w[1] <- 1
w[n] <- Inf
y <- weightedMedian(x, w) # 10
# All weights set to zero
w <- rep(0, n)
y <- weightedMedian(x, w) # NA
x <- 1:4
w <- rep(1, times=4)
for (mode in c("integer", "double")) {
storage.mode(x) <- mode
for (ties in c("weighted", "mean", "min", "max")) {
cat(sprintf("ties=%s\n", ties))
y <- weightedMedian(x, w, ties=ties)
}
}
set.seed(0x42)
y <- weightedMedian(x=double(0L))
print(y)
stopifnot(length(y) == 1L)
stopifnot(is.na(y))
y <- weightedMedian(x=x[1])
print(y)
stopifnot(length(y) == 1L)
stopifnot(all.equal(y, x[1]))
n <- 1e3
x <- runif(n)
w <- runif(n, min=0, max=1)
for (mode in c("integer", "double")) {
storage.mode(x) <- mode
for (ties in c("weighted", "mean", "min", "max")) {
y <- weightedMedian(x, w, ties=ties)
cat(sprintf("mode=%s, ties=%s, result=%g\n", mode, ties, y))
}
}
# A large vector
n <- 1e5
x <- runif(n)
w <- runif(n, min=0, max=1)
y <- weightedMedian(x, w)
y <- weightedMedian(x, w, ties="min")
matrixStats/tests/rowMads.R 0000644 0001751 0000144 00000012406 12542546242 015525 0 ustar hornik users library("matrixStats")
rowMads_R <- function(x, na.rm=FALSE) {
suppressWarnings({
apply(x, MARGIN=1L, FUN=mad, na.rm=na.rm)
})
}
colMads_R <- function(x, na.rm=FALSE) {
suppressWarnings({
apply(x, MARGIN=2L, FUN=mad, na.rm=na.rm)
})
}
rowMads_center <- function(x, na.rm=FALSE) {
center <- rowMedians(x, na.rm=na.rm)
rowMads(x, center=center, na.rm=na.rm)
}
colMads_center <- function(x, na.rm=FALSE) {
center <- colMedians(x, na.rm=na.rm)
colMads(x, center=center, na.rm=na.rm)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A 3x3 matrix (no ties)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(c(1,2,3,2,3,4,3,4,5)+0.1, nrow=3, ncol=3)
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
cat("rowMads():\n")
r0 <- rowMads_R(x, na.rm=TRUE)
r1 <- rowMads(x, na.rm=TRUE)
r1b <- rowMads_center(x, na.rm=TRUE)
r2 <- colMads(t(x), na.rm=TRUE)
r2b <- colMads_center(t(x), na.rm=TRUE)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
stopifnot(all.equal(r1b, r1))
stopifnot(all.equal(r2b, r2))
cat("colMads():\n")
r0 <- colMads_R(x, na.rm=TRUE)
r1 <- colMads(x, na.rm=TRUE)
r1b <- colMads_center(x, na.rm=TRUE)
r2 <- rowMads(t(x), na.rm=TRUE)
r2b <- rowMads_center(t(x), na.rm=TRUE)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
stopifnot(all.equal(r1b, r1))
stopifnot(all.equal(r2b, r2))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Ties: a 4x4 matrix
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(c(1:4,2:5,3:6,4:7)+0.1, nrow=4, ncol=4)
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
cat("rowMads():\n")
r0 <- rowMads_R(x, na.rm=TRUE)
r1 <- rowMads(x, na.rm=TRUE)
r2 <- colMads(t(x), na.rm=TRUE)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
cat("colMads():\n")
r0 <- colMads_R(x, na.rm=TRUE)
r1 <- colMads(x, na.rm=TRUE)
r2 <- rowMads(t(x), na.rm=TRUE)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# No ties: a 3x3 matrix with an NA value
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(c(1,2,3,2,3,4,3,4,5)+0.1, nrow=3, ncol=3)
x[2,2] <- NA_real_
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
cat("rowMads():\n")
r0 <- rowMads_R(x, na.rm=TRUE)
r1 <- rowMads(x, na.rm=TRUE)
r2 <- colMads(t(x), na.rm=TRUE)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
cat("colMads():\n")
r0 <- colMads_R(x, na.rm=TRUE)
r1 <- colMads(x, na.rm=TRUE)
r2 <- rowMads(t(x), na.rm=TRUE)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# With and without some NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (addNA in c(FALSE, TRUE)) {
cat("addNA=", addNA, "\n", sep="")
x <- matrix(1:100, nrow=20, ncol=5)
if (addNA) {
x[13:17,c(2,4)] <- NA_real_
}
# Row/column ranges
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
cat("rowMads():\n")
r0 <- rowMads_R(x, na.rm=na.rm)
r1 <- rowMads(x, na.rm=na.rm)
r2 <- colMads(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
stopifnot(all.equal(r1, r2))
cat("colMads():\n")
r0 <- colMads_R(x, na.rm=na.rm)
r1 <- colMads(x, na.rm=na.rm)
r2 <- rowMads(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
stopifnot(all.equal(r1, r2))
}
} # for (addNA ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# All NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(NA_real_, nrow=20, ncol=5)
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
r0 <- rowMads_R(x, na.rm=na.rm)
if (na.rm) r0[is.na(r0)] <- NaN
r1 <- rowMads(x, na.rm=na.rm)
r2 <- colMads(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
stopifnot(all.equal(r1, r2))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A 1x1 matrix
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(0, nrow=1, ncol=1)
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
r0 <- rowMads_R(x, na.rm=na.rm)
r1 <- rowMads(x, na.rm=na.rm)
r2 <- colMads(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A 0x0 matrix
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(double(0), nrow=0, ncol=0)
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
r0 <- rowMads_R(x, na.rm=na.rm)
r1 <- rowMads(x, na.rm=na.rm)
r2 <- colMads(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
}
matrixStats/tests/rowCumprods.R 0000644 0001751 0000144 00000006653 12542546242 016444 0 ustar hornik users library("matrixStats")
rowCumprods_R <- function(x) {
suppressWarnings({
t(apply(x, MARGIN=1L, FUN=cumprod))
})
}
colCumprods_R <- function(x) {
suppressWarnings({
apply(x, MARGIN=2L, FUN=cumprod)
})
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# With and without some NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
for (addNA in c(FALSE, TRUE)) {
cat("addNA=", addNA, "\n", sep="")
x <- matrix(1:100, nrow=20, ncol=5)
if (addNA) {
x[13:17,c(2,4)] <- NA_real_
}
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
# Row/column ranges
r0 <- rowCumprods_R(x)
r1 <- rowCumprods(x)
r2 <- t(colCumprods(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
} # for (addNA ...)
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# All NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(NA_real_, nrow=20, ncol=5)
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
r0 <- rowCumprods_R(x)
r1 <- rowCumprods(x)
r2 <- t(colCumprods(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A 1x1 matrix
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(0, nrow=1, ncol=1)
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
r0 <- rowCumprods_R(x)
r1 <- rowCumprods(x)
r2 <- t(colCumprods(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# BUG FIX TEST: Assert zeros don't trump NAs in integer matrices
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(NA_real_, nrow=3, ncol=2)
x[1,2] <- 0
x[2,2] <- 1
x[3,1] <- 0
storage.mode(x) <- mode
cat("mode: ", mode, "\n", sep="")
str(x)
r0 <- rowCumprods_R(x)
r1 <- rowCumprods(x)
r2 <- t(colCumprods(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Corner cases
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
value <- 0
storage.mode(value) <- mode
# A 0x0 matrix
x <- matrix(value, nrow=0L, ncol=0L)
str(x)
r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x))
r1 <- rowCumprods(x)
r2 <- t(colCumprods(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
# A 0xK matrix
x <- matrix(value, nrow=0L, ncol=5L)
str(x)
r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x))
r1 <- rowCumprods(x)
r2 <- t(colCumprods(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
# A Nx0 matrix
x <- matrix(value, nrow=5L, ncol=0L)
str(x)
r0 <- matrix(value, nrow=nrow(x), ncol=ncol(x))
r1 <- rowCumprods(x)
r2 <- t(colCumprods(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
} # for (mode ...)
matrixStats/tests/rowTabulates.R 0000644 0001751 0000144 00000001551 12542546242 016564 0 ustar hornik users library("matrixStats")
N <- 6L
K <- 5L
J <- 5L
data <- matrix(1:J, nrow=N, ncol=K)
modes <- c("integer", "raw")
for (mode in modes) {
cat(sprintf("Mode: %s...\n", mode))
x <- data
storage.mode(x) <- mode
print(x)
y <- rowTabulates(x)
print(y)
stopifnot(identical(dim(y), c(N,J)))
y <- colTabulates(x)
print(y)
stopifnot(identical(dim(y), c(K,J)))
# Count only certain values
y <- rowTabulates(x, values=1:3)
print(y)
stopifnot(identical(dim(y), c(N,3L)))
y <- colTabulates(x, values=1:3)
print(y)
stopifnot(identical(dim(y), c(K,3L)))
# Raw
y <- rowTabulates(x, values=as.raw(1:3))
print(y)
stopifnot(identical(dim(y), c(N,3L)))
y2 <- colTabulates(t(x), values=as.raw(1:3))
print(y2)
stopifnot(identical(dim(y2), c(N,3L)))
stopifnot(identical(y2, y))
cat(sprintf("Mode: %s...done\n", mode))
} # for (mode ...)
matrixStats/tests/rowCounts.R 0000644 0001751 0000144 00000010027 12542546242 016111 0 ustar hornik users library("matrixStats")
rowCounts_R <- function(x, value=TRUE, na.rm=FALSE, ...) {
if (is.na(value)) {
counts <- apply(x, MARGIN=1L, FUN=function(x) sum(is.na(x)))
} else {
counts <- apply(x, MARGIN=1L, FUN=function(x) sum(x == value, na.rm=na.rm))
}
as.integer(counts)
} # rowCounts_R()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Data type: integer and numeric
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(runif(20*5, min=-3, max=3), nrow=20, ncol=5)
x[sample.int(length(x), size=7)] <- 0
storage.mode(x) <- mode
for (na.rm in c(FALSE, TRUE)) {
# Count zeros
r0 <- rowCounts_R(x, value=0, na.rm=na.rm)
r1 <- rowCounts(x, value=0, na.rm=na.rm)
r2 <- colCounts(t(x), value=0, na.rm=na.rm)
stopifnot(identical(r1, r0))
stopifnot(identical(r2, r0))
# Count NAs
r0 <- rowCounts_R(x, value=NA, na.rm=na.rm)
r1 <- rowCounts(x, value=NA, na.rm=na.rm)
r2 <- colCounts(t(x), value=NA, na.rm=na.rm)
stopifnot(identical(r1, r0))
stopifnot(identical(r2, r0))
if (mode == "integer") {
ux <- unique(as.vector(x))
r0 <- r1 <- r2 <- integer(nrow(x))
for (value in ux) {
r0 <- r0 + rowCounts_R(x, value=value, na.rm=na.rm)
r1 <- r1 + rowCounts(x, value=value, na.rm=na.rm)
r2 <- r2 + colCounts(t(x), value=value, na.rm=na.rm)
stopifnot(identical(r1, r0))
stopifnot(identical(r2, r0))
}
stopifnot(all(r0 == ncol(x)))
} # if (mode == "integer")
} # for (na.rm ...)
} # for (mode ...)
# All NAs
naList <- list(NA_integer_, NA_real_, NaN)
for (naValue in naList) {
x <- matrix(naValue, nrow=20, ncol=5)
for (na.rm in c(FALSE, TRUE)) {
r0 <- rowCounts_R(x, na.rm=na.rm)
r1 <- rowCounts(x, na.rm=na.rm)
r2 <- colCounts(t(x), na.rm=na.rm)
stopifnot(identical(r1, r0))
stopifnot(identical(r2, r0))
# Count NAs
r0 <- rowCounts_R(x, value=NA, na.rm=na.rm)
r1 <- rowCounts(x, value=NA, na.rm=na.rm)
r2 <- colCounts(t(x), value=NA, na.rm=na.rm)
stopifnot(all(r0 == ncol(x)))
stopifnot(identical(r1, r0))
stopifnot(identical(r2, r0))
}
} # for (naValue ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Data type: logical
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(FALSE, nrow=20, ncol=5)
x[13:17,c(2,4)] <- TRUE
x[2:4,] <- TRUE
x[,1] <- TRUE
x[5,] <- FALSE
x[,5] <- FALSE
# Row/column counts
for (na.rm in c(FALSE, TRUE)) {
r0 <- rowCounts_R(x, na.rm=na.rm)
r1 <- rowCounts(x, na.rm=na.rm)
r2 <- colCounts(t(x), na.rm=na.rm)
stopifnot(identical(r1, r0))
stopifnot(identical(r2, r0))
rT <- rowCounts(x, value=TRUE, na.rm=na.rm)
rF <- rowCounts(x, value=FALSE, na.rm=na.rm)
stopifnot(rT + rF == ncol(x))
cT <- colCounts(x, value=TRUE, na.rm=na.rm)
cF <- colCounts(x, value=FALSE, na.rm=na.rm)
stopifnot(cT + cF == nrow(x))
# Count NAs
r0 <- rowCounts_R(x, value=NA, na.rm=na.rm)
r1 <- rowCounts(x, value=NA, na.rm=na.rm)
r2 <- colCounts(t(x), value=NA, na.rm=na.rm)
stopifnot(identical(r1, r0))
stopifnot(identical(r2, r0))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Data type: character (not sure if this should be supported)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(rep(letters, length.out=20*5), nrow=20, ncol=5)
x[2:3,3:4] <- NA_character_
# Row/column counts
for (na.rm in c(FALSE, TRUE)) {
for (value in c("g", NA_character_)) {
r0 <- rowCounts_R(x, value=value, na.rm=na.rm)
r1 <- rowCounts(x, value=value, na.rm=na.rm)
r2 <- colCounts(t(x), value=value, na.rm=na.rm)
stopifnot(identical(r1, r0))
stopifnot(identical(r2, r0))
c <- count(x[1,], value=value, na.rm=na.rm)
stopifnot(identical(c,r1[1]))
c <- count(x[2,], value=value, na.rm=na.rm)
stopifnot(identical(c,r1[2]))
}
}
# NA row
x <- matrix(0, nrow=2, ncol=2)
x[1,] <- NA_integer_
r0 <- rowCounts(x, value=0)
r1 <- rowCounts_R(x, value=0)
stopifnot(identical(r0,r1))
matrixStats/tests/psortKM.R 0000644 0001751 0000144 00000002424 12542546242 015507 0 ustar hornik users library("matrixStats")
library("utils")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Local functions
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
psortKM_R <- function(x, k, m) {
x <- sort(x)
x[(k-m+1):k]
} # psortKM_R()
psortKM_R2 <- function(x, k, m) {
partial <- (k-m+1):k
x <- sort.int(x, partial=partial)
x[partial]
} # psortKM_R2()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Consistency checks
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
set.seed(1)
cat("Consistency checks:\n")
x <- 1:500
x[298:300] <- 300
y <- sample(x)
cat("x:\n")
str(x)
cat("sample(x):\n")
str(y)
for (k in c(1, 2, 300, 301, length(x))) {
for (m in 1:min(5,k)) {
px0 <- psortKM_R(x, k=k, m=m)
px0b <- psortKM_R2(x, k=k, m=m)
stopifnot(identical(px0b, px0))
px1 <- matrixStats:::.psortKM(x, k=k, m=m)
cat(sprintf(".psortKM(x, k=%d, m=%d):\n", k, m))
print(px1)
stopifnot(identical(px1, px0))
py0 <- psortKM_R(y, k=k, m=m)
py0b <- psortKM_R2(y, k=k, m=m)
stopifnot(identical(py0b, py0))
py1 <- matrixStats:::.psortKM(y, k=k, m=m)
cat(sprintf(".psortKM(y, k=%d, m=%d):\n", k, m))
print(py1)
stopifnot(identical(py1, py0))
stopifnot(identical(py1, px1))
} # for (m ...)
} # for (k ...)
matrixStats/tests/rowDiffs.R 0000644 0001751 0000144 00000004144 12542546242 015674 0 ustar hornik users library("matrixStats")
rowDiffs_R <- function(x, lag=1L, differences=1L, ...) {
ncol2 <- ncol(x) - lag*differences
if (ncol2 <= 0) {
return(matrix(x[integer(0L)], nrow=nrow(x), ncol=0L))
}
suppressWarnings({
y <- apply(x, MARGIN=1L, FUN=diff, lag=lag, differences=differences)
})
y <- t(y)
dim(y) <- c(nrow(x), ncol2)
y
}
set.seed(0x42)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# With and without some NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
for (addNA in c(FALSE, TRUE)) {
cat("addNA=", addNA, "\n", sep="")
x <- matrix(sample(20*8)+0.1, nrow=20, ncol=8)
if (addNA) {
x[13:17,c(2,4)] <- NA_real_
}
storage.mode(x) <- mode
str(x)
for (lag in 1:4) {
for (differences in 1:3) {
cat(sprintf("mode: %s, lag=%d, differences=%d\n", mode, lag, differences))
# Row/column ranges
r0 <- rowDiffs_R(x, lag=lag, differences=differences)
r1 <- rowDiffs(x, lag=lag, differences=differences)
r2 <- t(colDiffs(t(x), lag=lag, differences=differences))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
stopifnot(all.equal(r1, r2))
}
}
} # for (addNA ...)
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# All NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
x <- matrix(NA_real_, nrow=20, ncol=5)
storage.mode(x) <- mode
str(x)
r0 <- rowDiffs_R(x)
r1 <- rowDiffs(x)
r2 <- t(colDiffs(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A 1x1 matrix
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(0, nrow=1, ncol=1)
r0 <- rowDiffs_R(x)
r1 <- rowDiffs(x)
r2 <- t(colDiffs(t(x)))
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
matrixStats/tests/logSumExp.R 0000644 0001751 0000144 00000005450 12542546242 016035 0 ustar hornik users library("matrixStats")
library("stats")
logSumExp_R <- function(lx, na.rm=FALSE) {
log(sum(exp(lx), na.rm=na.rm))
}
## R-help thread \emph{'[R] Beyond double-precision?'} on May 9, 2009.
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
set.seed(1)
x <- runif(20, min=1.0, max=3.0)
storage.mode(x) <- mode
str(x)
## The logarithm of the harmonic mean
y0 <- log(1/mean(1/x))
print(y0) ## -1.600885
lx <- log(x)
y1 <- log(length(x)) - logSumExp(-lx)
print(y1) ## [1] -1.600885
# Sanity check
stopifnot(all.equal(y1, y0))
y2 <- log(length(x)) - logSumExp_R(-lx)
# Sanity check
stopifnot(all.equal(y2, y0))
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Missing values
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## NA values
lx <- c(1,2,3)
lx[2] <- NA_real_
y0 <- logSumExp_R(lx, na.rm=FALSE)
y <- logSumExp(lx, na.rm=FALSE)
print(y)
stopifnot(identical(y, NA_real_))
stopifnot(all.equal(y, y0))
y0 <- logSumExp_R(lx, na.rm=TRUE)
y <- logSumExp(lx, na.rm=TRUE)
print(y)
stopifnot(all.equal(y, y0))
## NaN values
lx <- c(1,2,3)
lx[2] <- NaN
y0 <- logSumExp_R(lx, na.rm=FALSE)
y <- logSumExp(lx, na.rm=FALSE)
print(y)
stopifnot(identical(y, NA_real_))
stopifnot(all.equal(y, y0))
y0 <- logSumExp_R(lx, na.rm=TRUE)
y <- logSumExp(lx, na.rm=TRUE)
print(y)
stopifnot(all.equal(y, y0))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Corner cases
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Zero-length vectors
lx <- numeric(0L)
y0 <- logSumExp_R(lx)
y <- logSumExp(lx)
print(y)
stopifnot(identical(y, -Inf))
stopifnot(all.equal(y, y0))
## Vectors of length one
lx <- 1.0
y0 <- logSumExp_R(lx)
y <- logSumExp(lx)
print(y)
stopifnot(identical(y, lx))
stopifnot(all.equal(y, y0))
lx <- NA_real_
y0 <- logSumExp_R(lx, na.rm=TRUE)
y <- logSumExp(lx, na.rm=TRUE)
print(y)
stopifnot(identical(y, -Inf))
stopifnot(all.equal(y, y0))
## All missing values
lx <- c(NA_real_, NA_real_)
y0 <- logSumExp_R(lx, na.rm=TRUE)
y <- logSumExp(lx, na.rm=TRUE)
print(y)
stopifnot(identical(y, -Inf))
stopifnot(all.equal(y, y0))
lx <- c(NA_real_, NA_real_)
y0 <- logSumExp_R(lx, na.rm=FALSE)
y <- logSumExp(lx, na.rm=FALSE)
print(y)
stopifnot(identical(y, NA_real_))
stopifnot(all.equal(y, y0))
## +Inf values
lx <- c(1, 2, +Inf)
y0 <- logSumExp_R(lx)
y <- logSumExp(lx)
print(y)
stopifnot(identical(y, +Inf))
stopifnot(all.equal(y, y0))
## First element is a missing value, cf. PR #33
lx <- c(NA_real_, 1)
y0 <- logSumExp_R(lx)
print(y0)
y <- logSumExp(lx, na.rm=FALSE)
print(y)
stopifnot(identical(y, NA_real_))
stopifnot(all.equal(y, y0))
y0 <- logSumExp_R(lx, na.rm=TRUE)
print(y0)
y <- logSumExp(lx, na.rm=TRUE)
print(y)
stopifnot(identical(y, 1))
stopifnot(all.equal(y, y0))
matrixStats/tests/rowSds.R 0000644 0001751 0000144 00000005331 12542546242 015371 0 ustar hornik users library("matrixStats")
rowSds_R <- function(x, na.rm=FALSE) {
suppressWarnings({
apply(x, MARGIN=1L, FUN=sd, na.rm=na.rm)
})
}
colSds_R <- function(x, na.rm=FALSE) {
suppressWarnings({
apply(x, MARGIN=2L, FUN=sd, na.rm=na.rm)
})
}
rowSds_center <- function(x, na.rm=FALSE) {
center <- rowMeans(x, na.rm=na.rm)
rowSds(x, center=center, na.rm=na.rm)
}
colSds_center <- function(x, na.rm=FALSE) {
center <- colMeans(x, na.rm=na.rm)
colSds(x, center=center, na.rm=na.rm)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# With and without some NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
for (addNA in c(FALSE, TRUE)) {
cat("addNA=", addNA, "\n", sep="")
x <- matrix(1:100+0.1, nrow=20, ncol=5)
if (addNA) {
x[13:17,c(2,4)] <- NA_real_
}
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
# Row/column ranges
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
r0 <- rowSds_R(x, na.rm=na.rm)
r1 <- rowSds(x, na.rm=na.rm)
r1b <- rowSds_center(x, na.rm=na.rm)
r2 <- colSds(t(x), na.rm=na.rm)
r2b <- colSds_center(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
stopifnot(all.equal(r1b, r1))
stopifnot(all.equal(r2b, r2))
}
} # for (addNA ...)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# All NAs
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
x <- matrix(NA_real_, nrow=20, ncol=5)
cat("mode: ", mode, "\n", sep="")
storage.mode(x) <- mode
str(x)
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
r0 <- rowSds_R(x, na.rm=na.rm)
r1 <- rowSds(x, na.rm=na.rm)
r1b <- rowSds_center(x, na.rm=na.rm)
r2 <- colSds(t(x), na.rm=na.rm)
r2b <- colSds_center(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
stopifnot(all.equal(r1b, r1))
stopifnot(all.equal(r2b, r2))
}
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# A 1x1 matrix
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(0, nrow=1, ncol=1)
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm=", na.rm, "\n", sep="")
r0 <- rowSds_R(x, na.rm=na.rm)
r1 <- rowSds(x, na.rm=na.rm)
r1b <- rowSds_center(x, na.rm=na.rm)
r2 <- colSds(t(x), na.rm=na.rm)
r2b <- colSds_center(t(x), na.rm=na.rm)
stopifnot(all.equal(r1, r2))
stopifnot(all.equal(r1, r0))
stopifnot(all.equal(r2, r0))
stopifnot(all.equal(r1b, r1))
stopifnot(all.equal(r2b, r2))
}
matrixStats/tests/anyMissing.R 0000644 0001751 0000144 00000005051 12542546242 016230 0 ustar hornik users library("matrixStats")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Setup
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
data <- data.frame(
logical=c(TRUE, FALSE, TRUE, FALSE),
integer=1:4,
double=seq(from=1.0, to=4.0, by=1.0),
complex=seq(from=1.0, to=4.0, by=1.0) + 1.0i,
character=I(letters[1:4])
)
modes <- names(data)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Special cases
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cat("NULL...\n")
stopifnot(identical(anyMissing(NULL), FALSE))
cat("NULL...done\n")
cat("raw...\n")
stopifnot(identical(anyMissing(as.raw(0:3)), FALSE))
cat("raw...done\n")
cat("list(NULL)...\n")
stopifnot(identical(anyMissing(list(NULL)), FALSE))
cat("list(NULL)...done\n")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Scalars, vectors, and matrices of various modes
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in modes) {
cat(sprintf("Mode: %s...\n", mode))
values <- data[[mode]]
# Scalars
cat(" scalar\n")
x <- values[1L]
print(x)
stopifnot(identical(anyMissing(x), FALSE))
is.na(x) <- TRUE
print(x)
stopifnot(identical(anyMissing(x), TRUE))
# Vectors
cat(" vector\n")
x <- values
print(x)
stopifnot(identical(anyMissing(x), FALSE))
is.na(x)[2L] <- TRUE
print(x)
stopifnot(identical(anyMissing(x), TRUE))
# Matrices
cat(" matrix\n")
x <- matrix(c(values, values), ncol=2L)
print(x)
stopifnot(identical(anyMissing(x), FALSE))
is.na(x)[2L] <- TRUE
print(x)
stopifnot(identical(anyMissing(x), TRUE))
cat(sprintf("Mode: %s...done\n", mode))
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Data frames
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cat("data.frame...\n")
x <- data
stopifnot(identical(anyMissing(x), FALSE))
for (mode in modes) {
x <- data
is.na(x[[mode]])[2L] <- TRUE
print(x)
stopifnot(identical(anyMissing(x), TRUE))
} # for (mode ...)
cat("data.frame...done\n")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Lists
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
cat("list...\n")
x <- as.list(data)
stopifnot(identical(anyMissing(x), FALSE))
for (mode in modes) {
x <- as.list(data)
is.na(x[[mode]])[2L] <- TRUE
print(x)
stopifnot(identical(anyMissing(x), TRUE))
} # for (mode ...)
cat("list...done\n")
matrixStats/tests/weightedVar_etal.R 0000644 0001751 0000144 00000003425 12542546242 017370 0 ustar hornik users library("matrixStats")
FUNs <- list(
weightedVar=weightedVar,
weightedSd=weightedSd,
weightedMad=weightedMad
)
for (fcn in names(FUNs)) {
cat(sprintf("%s()...\n", fcn))
FUN <- FUNs[[fcn]]
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
n <- 15L
x <- runif(n, min=-5, max=5)
storage.mode(x) <- mode
str(x)
for (addNA in c(FALSE, TRUE)) {
cat("addNA: ", addNA, "\n", sep="")
if (addNA) {
x[c(5,7)] <- NA
}
str(x)
for (na.rm in c(FALSE, TRUE)) {
cat("na.rm: ", na.rm, "\n", sep="")
cat("Weights are specified (all are 1)\n")
w <- rep(1, times=n)
m1 <- FUN(x, na.rm=na.rm)
str(list(m1=m1))
cat("All weights are 1\n")
w <- rep(1, times=n)
m1 <- FUN(x, w, na.rm=na.rm)
str(list(m1=m1))
cat("First weight is 5\n")
# Pull the mean towards zero
w[1] <- 5
str(w)
m1 <- FUN(x, w, na.rm=na.rm)
str(list(m1=m1))
cat("All weights are 0\n")
# All weights set to zero
w <- rep(0, times=n)
m1 <- FUN(x, w, na.rm=na.rm)
str(list(m1=m1))
cat("First weight is 8.5\n")
# Put even more weight on the zero
w[1] <- 8.5
m1 <- FUN(x, w, na.rm=na.rm)
str(list(m1=m1))
cat("First weight is Inf\n")
# All weight on the first value
w[1] <- Inf
m1 <- FUN(x, w, na.rm=na.rm)
str(list(m1=m1))
cat("Last weight is Inf\n")
# All weight on the last value
w[1] <- 1
w[n] <- Inf
m1 <- FUN(x, w, na.rm=na.rm)
str(list(m1=m1))
} # for (na.rm ...)
} # for (addNA ...)
} # for (mode ...)
cat(sprintf("%s()...DONE\n", fcn))
} # for (fcn ...)
matrixStats/tests/rowIQRs.R 0000644 0001751 0000144 00000003505 12542546242 015457 0 ustar hornik users library("matrixStats")
rowIQRs_R <- function(x, na.rm=FALSE) {
quantileNA <- function(x, ..., na.rm=FALSE) {
if (!na.rm && anyMissing(x))
return(c(NA_real_, NA_real_))
quantile(x, ..., na.rm=na.rm)
}
Q <- apply(x, MARGIN=1L, FUN=quantileNA, probs=c(0.25, 0.75), na.rm=na.rm)
Q[2L,,drop=TRUE] - Q[1L,,drop=TRUE]
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Test with multiple quantiles
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
x <- matrix(1:100+0.1, nrow=10, ncol=10)
storage.mode(x) <- mode
str(x)
for (addNA in c(FALSE, TRUE)) {
if (addNA) {
x[3:5,6:9] <- NA
}
for (na.rm in c(FALSE, TRUE)) {
probs <- c(0,0.5,1)
q0 <- rowIQRs_R(x, na.rm=na.rm)
print(q0)
q1 <- rowIQRs(x, na.rm=na.rm)
print(q1)
stopifnot(all.equal(q1, q0))
q2 <- colIQRs(t(x), na.rm=na.rm)
stopifnot(all.equal(q2, q0))
q <- iqr(x[3,], na.rm=na.rm)
print(q)
} # for (na.rm ...)
} # for (addNA ...)
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Test corner cases
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
# Empty vectors
x <- integer(0L)
storage.mode(x) <- mode
str(x)
q <- iqr(x)
print(q)
stopifnot(identical(q, NA_real_))
# Scalar
x <- 1L
storage.mode(x) <- mode
str(x)
q <- iqr(x)
str(q)
stopifnot(identical(q, 0))
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Single row matrices
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(1, nrow=1L, ncol=2L)
q <- rowIQRs(x)
stopifnot(identical(q, 0))
x <- matrix(1, nrow=2L, ncol=1L)
q <- colIQRs(x)
stopifnot(identical(q, 0))
matrixStats/tests/rowAllAnys.R 0000644 0001751 0000144 00000011164 12542546242 016204 0 ustar hornik users library("matrixStats")
rowAlls_R <- function(x, value=TRUE, na.rm=FALSE, ...) {
if (is.na(value)) {
apply(is.na(x), MARGIN=1L, FUN=all, na.rm=na.rm)
} else {
apply((x == value), MARGIN=1L, FUN=all, na.rm=na.rm)
}
}
rowAnys_R <- function(x, value=TRUE, na.rm=FALSE, ...) {
if (is.na(value)) {
apply(is.na(x), MARGIN=1L, FUN=any, na.rm=na.rm)
} else {
apply((x == value), MARGIN=1L, FUN=any, na.rm=na.rm)
}
}
rowAnyMissings_R <- function(x, ...) {
apply(x, MARGIN=1L, FUN=anyMissing)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Data type: logical
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(FALSE, nrow=20, ncol=5)
x[13:17,c(2,4)] <- TRUE
x[2:4,] <- TRUE
x[,1] <- TRUE
x[5,] <- FALSE
x[,5] <- FALSE
x[3,] <- FALSE
x[4,] <- TRUE
for (kk in 1:3) {
if (kk == 2) {
x[2,2] <- NA
} else if (kk == 3) {
x[,2] <- NA
x[2,] <- NA
}
# Row/column all
for (na.rm in c(FALSE, TRUE)) {
m0 <- rowAlls_R(x, na.rm=na.rm)
m1 <- rowAlls(x, na.rm=na.rm)
m2 <- colAlls(t(x), na.rm=na.rm)
str(list("all()", m0=m0, m1=m1, m2=m2))
stopifnot(identical(m1, m0))
stopifnot(identical(m2, m0))
}
# Row/column any
for (na.rm in c(FALSE, TRUE)) {
m0 <- rowAnys_R(x, na.rm=na.rm)
m1 <- rowAnys(x, na.rm=na.rm)
m2 <- colAnys(t(x), na.rm=na.rm)
str(list("any()", m0=m0, m1=m1, m2=m2))
stopifnot(identical(m1, m0))
stopifnot(identical(m2, m0))
m0 <- rowAnyMissings_R(x)
m1 <- rowAnyMissings(x)
m2 <- colAnyMissings(t(x))
str(list("anyMissing()", m0=m0, m1=m1, m2=m2))
stopifnot(identical(m1, m0))
stopifnot(identical(m2, m0))
}
} # for (kk ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Data type: integer
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(rep(1:28, length.out=20*5), nrow=20, ncol=5)
x[2,] <- 7L
x[3,1] <- 7L
x[2:3,3:4] <- NA_integer_
# Row/column counts
value <- 7L
for (na.rm in c(FALSE, TRUE)) {
## All
r0 <- rowAlls_R(x, value=value, na.rm=na.rm)
r1 <- rowAlls(x, value=value, na.rm=na.rm)
r2 <- colAlls(t(x), value=value, na.rm=na.rm)
stopifnot(identical(r1, r0))
stopifnot(identical(r2, r1))
for (rr in seq_len(nrow(x))) {
c <- allValue(x[rr,], value=value, na.rm=na.rm)
stopifnot(identical(c,r1[rr]))
c <- allValue(x[rr,], value=value, na.rm=na.rm)
stopifnot(identical(c,r1[rr]))
}
## Any
r0 <- rowAnys_R(x, value=value, na.rm=na.rm)
r1 <- rowAnys(x, value=value, na.rm=na.rm)
r2 <- colAnys(t(x), value=value, na.rm=na.rm)
stopifnot(identical(r1, r0))
stopifnot(identical(r2, r1))
for (rr in seq_len(nrow(x))) {
c <- anyValue(x[rr,], value=value, na.rm=na.rm)
stopifnot(identical(c,r1[rr]))
c <- anyValue(x[rr,], value=value, na.rm=na.rm)
stopifnot(identical(c,r1[rr]))
}
}
all_R <- function(x, value=TRUE, ...) {
if (is.na(value)) {
all(is.na(x), ...)
} else {
all(x == value, ...)
}
}
any_R <- function(x, value=TRUE, ...) {
if (is.na(value)) {
any(is.na(x), ...)
} else {
any(x == value, ...)
}
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Data type: character (not sure if this should be supported)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(rep(letters, length.out=20*5), nrow=20, ncol=5)
x[2,] <- "g"
x[2:4,3:4] <- NA_character_
# Row/column counts
for (value in c("g", NA_character_)) {
for (na.rm in c(FALSE, TRUE)) {
## All
r0 <- rowAlls_R(x, value=value, na.rm=na.rm)
r1 <- rowAlls(x, value=value, na.rm=na.rm)
r2 <- colAlls(t(x), value=value, na.rm=na.rm)
stopifnot(identical(r1, r0))
stopifnot(identical(r2, r1))
for (rr in seq_len(nrow(x))) {
c0 <- all_R(x[rr,], value, na.rm=na.rm)
c <- allValue(x[rr,], value=value, na.rm=na.rm)
stopifnot(identical(c,r1[rr]))
stopifnot(identical(c,c0))
}
## Any
r0 <- rowAnys_R(x, value=value, na.rm=na.rm)
r1 <- rowAnys(x, value=value, na.rm=na.rm)
r2 <- colAnys(t(x), value=value, na.rm=na.rm)
stopifnot(identical(r1, r0))
stopifnot(identical(r2, r1))
for (rr in seq_len(nrow(x))) {
c0 <- any_R(x[rr,], value, na.rm=na.rm)
c <- anyValue(x[rr,], value=value, na.rm=na.rm)
stopifnot(identical(c,c0))
stopifnot(identical(c,r1[rr]))
}
}
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# NA 0 test
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(0, nrow=3, ncol=3)
x[1,] <- c(NA_real_, NA_real_, 0)
x[3,] <- c(1, 0, 1)
r0 <- rowAnys_R(x, value=0)
r1 <- rowAnys(x, value=0)
stopifnot(identical(r0, r1))
matrixStats/tests/rowQuantiles.R 0000644 0001751 0000144 00000006417 12542546242 016613 0 ustar hornik users library("matrixStats")
rowQuantiles_R <- function(x, probs, na.rm=FALSE) {
q <- apply(x, MARGIN=1L, FUN=quantile, probs=probs, na.rm=na.rm)
if (!is.null(dim(q))) q <- t(q)
q
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Test with multiple quantiles
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
x <- matrix(1:40+0.1, nrow=8, ncol=5)
storage.mode(x) <- mode
str(x)
probs <- c(0,0.5,1)
q0 <- rowQuantiles_R(x, probs=probs)
print(q0)
q1 <- rowQuantiles(x, probs=probs)
print(q1)
stopifnot(all.equal(q1, q0))
q2 <- colQuantiles(t(x), probs=probs)
stopifnot(all.equal(q2, q0))
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Test with a single quantile
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (mode in c("integer", "double")) {
cat("mode: ", mode, "\n", sep="")
x <- matrix(1:40, nrow=8, ncol=5)
storage.mode(x) <- mode
str(x)
probs <- c(0.5)
q0 <- rowQuantiles_R(x, probs=probs)
print(q0)
q1 <- rowQuantiles(x, probs=probs)
print(q1)
stopifnot(all.equal(q1, q0))
q2 <- colQuantiles(t(x), probs=probs)
stopifnot(all.equal(q2, q0))
} # for (mode ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Consistency checks
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
set.seed(1)
probs <- seq(from=0, to=1, by=0.25)
cat("Consistency checks:\n")
K <- if (Sys.getenv("_R_CHECK_FULL_") == "" || Sys.getenv("_R_CHECK_USE_VALGRIND_") != "") 4 else 20
for (kk in seq_len(K)) {
cat("Random test #", kk, "\n", sep="")
# Simulate data in a matrix of any shape
dim <- sample(20:60, size=2L)
n <- prod(dim)
x <- rnorm(n, sd=100)
dim(x) <- dim
# Add NAs?
hasNA <- (kk %% 4) %in% c(3,0);
if (hasNA) {
cat("Adding NAs\n")
nna <- sample(n, size=1)
naValues <- c(NA_real_, NaN)
x[sample(length(x), size=nna)] <- sample(naValues, size=nna, replace=TRUE)
}
# Integer or double?
if ((kk %% 4) %in% c(2,0)) {
cat("Coercing to integers\n")
storage.mode(x) <- "integer"
}
str(x)
# rowQuantiles():
q0 <- rowQuantiles_R(x, probs=probs, na.rm=hasNA)
q1 <- rowQuantiles(x, probs=probs, na.rm=hasNA)
stopifnot(all.equal(q1, q0))
q2 <- colQuantiles(t(x), probs=probs, na.rm=hasNA)
stopifnot(all.equal(q2, q0))
} # for (kk ...)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Empty matrices
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(NA_real_, nrow=0L, ncol=0L)
probs <- c(0, 0.25, 0.75, 1)
q <- rowQuantiles(x, probs=probs)
stopifnot(identical(dim(q), c(nrow(x), length(probs))))
q <- colQuantiles(x, probs=probs)
stopifnot(identical(dim(q), c(ncol(x), length(probs))))
x <- matrix(NA_real_, nrow=2L, ncol=0L)
q <- rowQuantiles(x, probs=probs)
stopifnot(identical(dim(q), c(nrow(x), length(probs))))
x <- matrix(NA_real_, nrow=0L, ncol=2L)
q <- colQuantiles(x, probs=probs)
stopifnot(identical(dim(q), c(ncol(x), length(probs))))
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Single column matrices
# - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- matrix(1, nrow=2L, ncol=1L)
q <- rowQuantiles(x, probs=probs)
print(q)
x <- matrix(1, nrow=1L, ncol=2L)
q <- colQuantiles(x, probs=probs)
print(q)
matrixStats/src/ 0000755 0001751 0000144 00000000000 12542546311 013405 5 ustar hornik users matrixStats/src/productExpSumLog.c 0000644 0001751 0000144 00000002603 12542546311 017036 0 ustar hornik users /***************************************************************************
Public methods:
SEXP productExpSumLog(SEXP x, SEXP naRm, SEXP hasNA)
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD productExpSumLog
#define X_TYPE 'i'
#include "productExpSumLog_TYPE-template.h"
#define X_TYPE 'r'
#include "productExpSumLog_TYPE-template.h"
#undef METHOD
SEXP productExpSumLog(SEXP x, SEXP naRm, SEXP hasNA) {
SEXP ans = NILSXP;
double res = NA_REAL;
int narm, hasna;
/* Argument 'x': */
assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x");
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'hasNA': */
hasna = asLogicalNoNA(hasNA, "hasNA");
/* Double matrices are more common to use. */
if (isReal(x)) {
res = productExpSumLog_Real(REAL(x), xlength(x), narm, hasna);
} else if (isInteger(x)) {
res = productExpSumLog_Integer(INTEGER(x), xlength(x), narm, hasna);
}
/* Return results */
PROTECT(ans = allocVector(REALSXP, 1));
REAL(ans)[0] = res;
UNPROTECT(1);
return(ans);
} // productExpSumLog()
/***************************************************************************
HISTORY:
2014-06-04 [HB]
o Created.
**************************************************************************/
matrixStats/src/weightedMean.c 0000644 0001751 0000144 00000003172 12542546311 016155 0 ustar hornik users /***************************************************************************
Public methods:
SEXP weightedMean(SEXP x, SEXP w, SEXP naRm, SEXP refine)
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#include
#define METHOD weightedMean
#define X_TYPE 'i'
#include "weightedMean_TYPE-template.h"
#define X_TYPE 'r'
#include "weightedMean_TYPE-template.h"
#undef METHOD
SEXP weightedMean(SEXP x, SEXP w, SEXP naRm, SEXP refine) {
SEXP ans;
int narm, refine2;
double avg = NA_REAL;
R_xlen_t nx, nw;
/* Argument 'x': */
assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x");
nx = xlength(x);
/* Argument 'x': */
assertArgVector(w, (R_TYPE_REAL), "w");
nw = xlength(w);
if (nx != nw) {
error("Argument 'x' and 'w' are of different lengths: %d != %d", nx, nw);
}
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'refine': */
refine2 = asLogicalNoNA(refine, "refine");
/* Double matrices are more common to use. */
if (isReal(x)) {
avg = weightedMean_Real(REAL(x), nx, REAL(w), nw, narm, refine2);
} else if (isInteger(x)) {
avg = weightedMean_Integer(INTEGER(x), nx, REAL(w), nw, narm, refine2);
}
/* Return results */
PROTECT(ans = allocVector(REALSXP, 1));
REAL(ans)[0] = avg;
UNPROTECT(1);
return(ans);
} // weightedMean()
/***************************************************************************
HISTORY:
2014-12-08 [HB]
o Created.
**************************************************************************/
matrixStats/src/rowCumprods_TYPE-template.h 0000644 0001751 0000144 00000007251 12542546311 020561 0 ustar hornik users /***********************************************************************
TEMPLATE:
void rowCumprods_(...)
GENERATES:
void rowCumprods_Integer(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, double *ans)
void rowCumprods_Real(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, double *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD: the name of the resulting function
- X_TYPE: 'i' or 'r'
Authors:
Henrik Bengtsson.
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include
#include
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME])
*/
#include "templates-types.h"
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, ANS_C_TYPE *ans) {
R_xlen_t ii, jj, kk, kk_prev;
LDOUBLE value;
#if ANS_TYPE == 'i'
double R_INT_MIN_d = (double)R_INT_MIN,
R_INT_MAX_d = (double)R_INT_MAX;
/* OK, i.e. no integer overflow yet? */
int warn = 0, ok, *oks = NULL;
#endif
if (nrow == 0 || ncol == 0) return;
if (byrow) {
#if ANS_TYPE == 'i'
oks = (int *) R_alloc(nrow, sizeof(int));
#endif
for (kk=0; kk < nrow; kk++) {
ans[kk] = (ANS_C_TYPE) x[kk];
#if ANS_TYPE == 'i'
oks[kk] = !X_ISNA(x[kk]);
#endif
}
kk_prev = 0;
for (jj=1; jj < ncol; jj++) {
for (ii=0; ii < nrow; ii++) {
#if ANS_TYPE == 'i'
if (oks[ii]) {
/* Missing value? */
if (X_ISNA(x[kk])) {
oks[ii] = 0;
ans[kk] = ANS_NA;
} else {
value = (LDOUBLE) ans[kk_prev] * (LDOUBLE) x[kk];
/* Integer overflow? */
if (value < R_INT_MIN_d || value > R_INT_MAX_d) {
oks[ii] = 0;
warn = 1;
ans[kk] = ANS_NA;
} else {
ans[kk] = (ANS_C_TYPE) value;
}
}
} else {
ans[kk] = ANS_NA;
}
#else
ans[kk] = (ANS_C_TYPE) ((LDOUBLE) ans[kk_prev] * (LDOUBLE) x[kk]);
#endif
kk++;
kk_prev++;
R_CHECK_USER_INTERRUPT(kk);
} /* for (ii ...) */
} /* for (jj ...) */
} else {
kk = 0;
for (jj=0; jj < ncol; jj++) {
value = 1;
#if ANS_TYPE == 'i'
ok = 1;
#endif
for (ii=0; ii < nrow; ii++) {
#if ANS_TYPE == 'i'
if (ok) {
/* Missing value? */
if (X_ISNA(x[kk])) {
ok = 0;
ans[kk] = ANS_NA;
} else {
value *= (LDOUBLE) x[kk];
/* Integer overflow? */
if (value < R_INT_MIN_d || value > R_INT_MAX_d) {
ok = 0;
warn = 1;
ans[kk] = ANS_NA;
} else {
ans[kk] = (ANS_C_TYPE) value;
}
}
} else {
ans[kk] = ANS_NA;
}
#else
value *= x[kk];
ans[kk] = (ANS_C_TYPE) value;
#endif
kk++;
R_CHECK_USER_INTERRUPT(kk);
} /* for (ii ...) */
} /* for (jj ...) */
} /* if (byrow) */
#if ANS_TYPE == 'i'
/* Warn on integer overflow? */
if (warn) {
warning("Integer overflow. Detected one or more elements whose absolute values were out of the range [%d,%d] that can be used to for integers. Such values are set to NA_integer_.", R_INT_MIN, R_INT_MAX);
}
#endif
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-26 [HB]
o Created from rowVars_TYPE-template.h.
**************************************************************************/
matrixStats/src/x_OP_y.c 0000644 0001751 0000144 00000014176 12542546311 014757 0 ustar hornik users #include
#include "types.h"
#include "utils.h"
#define METHOD x_OP_y
/* Addition */
#define X_TYPE 'i'
#define Y_TYPE 'i'
#define ANS_TYPE 'i'
#define OP '+'
#include "x_OP_y_TYPE-template.h"
#define X_TYPE 'i'
#define Y_TYPE 'r'
#define ANS_TYPE 'r'
#define OP '+'
#include "x_OP_y_TYPE-template.h"
#define X_TYPE 'r'
#define Y_TYPE 'i'
#define ANS_TYPE 'r'
#define OP '+'
#include "x_OP_y_TYPE-template.h"
#define X_TYPE 'r'
#define Y_TYPE 'r'
#define ANS_TYPE 'r'
#define OP '+'
#include "x_OP_y_TYPE-template.h"
/* Subtraction */
#define X_TYPE 'i'
#define Y_TYPE 'i'
#define ANS_TYPE 'i'
#define OP '-'
#include "x_OP_y_TYPE-template.h"
#define X_TYPE 'i'
#define Y_TYPE 'r'
#define ANS_TYPE 'r'
#define OP '-'
#include "x_OP_y_TYPE-template.h"
#define X_TYPE 'r'
#define Y_TYPE 'i'
#define ANS_TYPE 'r'
#define OP '-'
#include "x_OP_y_TYPE-template.h"
#define X_TYPE 'r'
#define Y_TYPE 'r'
#define ANS_TYPE 'r'
#define OP '-'
#include "x_OP_y_TYPE-template.h"
/* Multiplication */
#define X_TYPE 'i'
#define Y_TYPE 'i'
#define ANS_TYPE 'i'
#define OP '*'
#include "x_OP_y_TYPE-template.h"
#define X_TYPE 'i'
#define Y_TYPE 'r'
#define ANS_TYPE 'r'
#define OP '*'
#include "x_OP_y_TYPE-template.h"
#define X_TYPE 'r'
#define Y_TYPE 'i'
#define ANS_TYPE 'r'
#define OP '*'
#include "x_OP_y_TYPE-template.h"
#define X_TYPE 'r'
#define Y_TYPE 'r'
#define ANS_TYPE 'r'
#define OP '*'
#include "x_OP_y_TYPE-template.h"
/* Division */
#define X_TYPE 'i'
#define Y_TYPE 'i'
#define ANS_TYPE 'r'
#define OP '/'
#include "x_OP_y_TYPE-template.h"
#define X_TYPE 'i'
#define Y_TYPE 'r'
#define ANS_TYPE 'r'
#define OP '/'
#include "x_OP_y_TYPE-template.h"
#define X_TYPE 'r'
#define Y_TYPE 'i'
#define ANS_TYPE 'r'
#define OP '/'
#include "x_OP_y_TYPE-template.h"
#define X_TYPE 'r'
#define Y_TYPE 'r'
#define ANS_TYPE 'r'
#define OP '/'
#include "x_OP_y_TYPE-template.h"
#undef METHOD
SEXP x_OP_y(SEXP x, SEXP y, SEXP dim, SEXP operator, SEXP commute, SEXP naRm, SEXP hasNA, SEXP byRow) {
SEXP ans = NILSXP;
int narm, hasna, byrow, commute2;
int op;
R_xlen_t nrow, ncol;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'y': */
assertArgVector(y, (R_TYPE_INT | R_TYPE_REAL), "y");
/* Argument 'byRow': */
byrow = asLogicalNoNA(byRow, "byrow");
/* Argument 'commute2': */
commute2 = asLogicalNoNA(commute, "commute");
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'hasNA': */
hasna = asLogicalNoNA(hasNA, "hasNA");
/* Argument 'operator': */
op = asInteger(operator);
if (op == 1) {
/* Addition */
if (isReal(x) || isReal(y)) {
PROTECT(ans = allocMatrix(REALSXP, nrow, ncol));
if (isReal(x) && isReal(y)) {
x_OP_y_Real_Real_Add(REAL(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans));
} else if (isReal(x) && isInteger(y)) {
x_OP_y_Real_Integer_Add(REAL(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans));
} else if (isInteger(x) && isReal(y)) {
x_OP_y_Integer_Real_Add(INTEGER(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans));
}
UNPROTECT(1);
} else {
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
x_OP_y_Integer_Integer_Add(INTEGER(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, INTEGER(ans), xlength(ans));
UNPROTECT(1);
}
} if (op == 2) {
/* Subtraction */
if (isReal(x) || isReal(y)) {
PROTECT(ans = allocMatrix(REALSXP, nrow, ncol));
if (isReal(x) && isReal(y)) {
x_OP_y_Real_Real_Sub(REAL(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans));
} else if (isReal(x) && isInteger(y)) {
x_OP_y_Real_Integer_Sub(REAL(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans));
} else if (isInteger(x) && isReal(y)) {
x_OP_y_Integer_Real_Sub(INTEGER(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans));
}
UNPROTECT(1);
} else {
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
x_OP_y_Integer_Integer_Sub(INTEGER(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, INTEGER(ans), xlength(ans));
UNPROTECT(1);
}
} else if (op == 3) {
/* Multiplication */
if (isReal(x) || isReal(y)) {
PROTECT(ans = allocMatrix(REALSXP, nrow, ncol));
if (isReal(x) && isReal(y)) {
x_OP_y_Real_Real_Mul(REAL(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans));
} else if (isReal(x) && isInteger(y)) {
x_OP_y_Real_Integer_Mul(REAL(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans));
} else if (isInteger(x) && isReal(y)) {
x_OP_y_Integer_Real_Mul(INTEGER(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans));
}
UNPROTECT(1);
} else {
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
x_OP_y_Integer_Integer_Mul(INTEGER(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, INTEGER(ans), xlength(ans));
UNPROTECT(1);
}
} else if (op == 4) {
/* Division */
PROTECT(ans = allocMatrix(REALSXP, nrow, ncol));
if (isReal(x) && isReal(y)) {
x_OP_y_Real_Real_Div(REAL(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans));
} else if (isReal(x) && isInteger(y)) {
x_OP_y_Real_Integer_Div(REAL(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans));
} else if (isInteger(x) && isReal(y)) {
x_OP_y_Integer_Real_Div(INTEGER(x), nrow, ncol, REAL(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans));
} else if (isInteger(x) && isInteger(y)) {
x_OP_y_Integer_Integer_Div(INTEGER(x), nrow, ncol, INTEGER(y), xlength(y), byrow, commute2, narm, hasna, REAL(ans), xlength(ans));
}
UNPROTECT(1);
}
return(ans);
} /* x_OP_y() */
matrixStats/src/rowOrderStats.c 0000644 0001751 0000144 00000004406 12542546311 016377 0 ustar hornik users /***************************************************************************
Public methods:
SEXP rowOrderStats(SEXP x, SEXP which)
Authors: Henrik Bengtsson. Adopted from rowQ() by R. Gentleman.
To do: Add support for missing values.
Copyright Henrik Bengtsson, 2007-2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD rowOrderStats
#define X_TYPE 'i'
#include "rowOrderStats_TYPE-template.h"
#define X_TYPE 'r'
#include "rowOrderStats_TYPE-template.h"
#undef METHOD
SEXP rowOrderStats(SEXP x, SEXP dim, SEXP which) {
SEXP ans = NILSXP;
R_xlen_t nrow, ncol, qq;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'which': */
if (length(which) != 1)
error("Argument 'which' must be a single number.");
if (!isNumeric(which))
error("Argument 'which' must be a numeric number.");
/* Subtract one here, since rPsort does zero based addressing */
qq = asInteger(which) - 1;
/* Assert that 'qq' is a valid index */
if (qq < 0 || qq >= ncol) {
error("Argument 'which' is out of range.");
}
/* Double matrices are more common to use. */
if (isReal(x)) {
PROTECT(ans = allocVector(REALSXP, nrow));
rowOrderStats_Real(REAL(x), nrow, ncol, qq, REAL(ans));
UNPROTECT(1);
} else if (isInteger(x)) {
PROTECT(ans = allocVector(INTSXP, nrow));
rowOrderStats_Integer(INTEGER(x), nrow, ncol, qq, INTEGER(ans));
UNPROTECT(1);
}
return(ans);
} // rowOrderStats()
/***************************************************************************
HISTORY:
2009-02-04 [HB]
o BUG FIX: For some errors in rowOrderStats(), the stack would not become
UNPROTECTED before calling error.
2008-03-25 [HB]
o Renamed from 'rowQuantiles' to 'rowOrderStats'.
2007-08-10 [HB]
o Removed arguments for NAs since rowOrderStats() still don't support it.
2005-11-24 [HB]
o Cool, it works and compiles nicely.
o Preallocate colOffset to speed up things even more.
o Added more comments and error checking.
o Adopted from rowQ() in Biobase of Bioconductor.
**************************************************************************/
matrixStats/src/binMeans.c 0000644 0001751 0000144 00000005517 12542546311 015315 0 ustar hornik users /***************************************************************************
Public methods:
binMeans(SEXP y, SEXP x, SEXP bx, SEXP retCount, SEXP right)
Copyright Henrik Bengtsson, 2012-2013
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#include
#define BIN_BY 'L'
#include "binMeans-BINBY-template.h"
#define BIN_BY 'R'
#include "binMeans-BINBY-template.h"
SEXP binMeans(SEXP y, SEXP x, SEXP bx, SEXP retCount, SEXP right) {
SEXP ans = NILSXP, count = NILSXP;
R_xlen_t nx, ny, nbins;
int closedRight, retcount;
int *count_ptr = NULL;
/* Argument 'y': */
assertArgVector(y, (R_TYPE_REAL), "y");
ny = xlength(y);
/* Argument 'x': */
assertArgVector(x, (R_TYPE_REAL), "x");
nx = xlength(x);
if (nx != ny) {
error("Argument 'y' and 'x' are of different lengths: %d != %d", ny, nx);
}
/* Argument 'bx': */
assertArgVector(bx, (R_TYPE_REAL), "bx");
nbins = xlength(bx)-1;
if (nbins <= 0) {
error("Argument 'bx' must specify at least two bin boundaries (= one bin): %d", xlength(bx));
}
/* Argument 'right': */
closedRight = asLogicalNoNA(right, "right");
/* Argument 'retCount': */
retcount = asLogicalNoNA(retCount, "retCount");
PROTECT(ans = allocVector(REALSXP, nbins));
if (retcount) {
PROTECT(count = allocVector(INTSXP, nbins));
count_ptr = INTEGER(count);
}
if (closedRight) {
binMeans_R(REAL(y), ny, REAL(x), nx, REAL(bx), nbins, REAL(ans), count_ptr);
} else {
binMeans_L(REAL(y), ny, REAL(x), nx, REAL(bx), nbins, REAL(ans), count_ptr);
}
if (retcount) {
setAttrib(ans, install("count"), count);
UNPROTECT(1); // 'count'
}
UNPROTECT(1); // 'ans'
return ans;
return(ans);
} // binMeans()
/***************************************************************************
HISTORY:
2015-05-30 [HB]
o Added protected against 'bx' too short.
2014-10-06 [HB]
o CLEANUP: All argument validation is now done by the high-level C API.
2014-06-02 [HB]
o CLEANUP: Removed unused variable in binMeans().
2013-10-08 [HB]
o Now binCounts() calls binCounts_().
2013-05-10 [HB]
o SPEEDUP: binMeans() no longer tests in every iteration (=for every
data point) whether the last bin has been reached or not.
2012-10-10 [HB]
o BUG FIX: binMeans() would return random/garbage means/counts for
bins that were beyond the last data point.
o BUG FIX: In some cases binMeans() could try to go past the last bin.
2012-10-03 [HB]
o Created binMeans(), which was adopted from from code proposed by
Martin Morgan (Fred Hutchinson Cancer Research Center, Seattle) as
a reply to HB's R-devel thread 'Fastest non-overlapping binning mean
function out there?' on Oct 3, 2012.
**************************************************************************/
matrixStats/src/sumOver.c 0000644 0001751 0000144 00000005117 12542546311 015215 0 ustar hornik users /***************************************************************************
Public methods:
SEXP sumOver(SEXP x, SEXP idxs, SEXP naRm, SEXP mode)
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include
#include "types.h"
#include "utils.h"
#define METHOD sumOver
#define X_TYPE 'i'
#include "sumOver_TYPE-template.h"
#define X_TYPE 'r'
#include "sumOver_TYPE-template.h"
#undef METHOD
SEXP sumOver(SEXP x, SEXP idxs, SEXP naRm, SEXP mode) {
SEXP ans = NILSXP;
int *idxsp;
R_xlen_t nidxs;
int narm, mode2;
double sum;
/* Argument 'x': */
assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x");
/* Argument 'idxs': */
if (isNull(idxs)) {
idxsp = NULL;
nidxs = 0;
} else if (isVectorAtomic(idxs)) {
idxsp = INTEGER(idxs);
nidxs = xlength(idxs);
} else {
/* To please compiler */
idxsp = NULL;
nidxs = 0;
error("Argument 'idxs' must be NULL or a vector.");
}
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'mode': */
if (!isInteger(mode)) {
error("Argument 'mode' must be a single integer.");
}
mode2 = asInteger(mode);
/* Dispatch to low-level C function */
if (isReal(x)) {
sum = sumOver_Real(REAL(x), xlength(x), idxsp, nidxs, narm, mode2);
} else if (isInteger(x)) {
sum = sumOver_Integer(INTEGER(x), xlength(x), idxsp, nidxs, narm, mode2);
} else {
error("Argument 'x' must be numeric.");
}
/* Return results */
switch (mode2) {
case 1: /* integer */
PROTECT(ans = allocVector(INTSXP, 1));
if (ISNAN(sum)) {
INTEGER(ans)[0] = NA_INTEGER;
} else if (sum > R_INT_MAX || sum < R_INT_MIN) {
Rf_warning("Integer overflow. Use sumOver(..., mode=\"numeric\") to avoid this.");
INTEGER(ans)[0] = NA_INTEGER;
} else {
INTEGER(ans)[0] = (int)sum;
}
UNPROTECT(1);
break;
case 2: /* numeric */
PROTECT(ans = allocVector(REALSXP, 1));
if (sum > DOUBLE_XMAX) {
REAL(ans)[0] = R_PosInf;
} else if (sum < -DOUBLE_XMAX) {
REAL(ans)[0] = R_NegInf;
} else {
REAL(ans)[0] = sum;
}
UNPROTECT(1);
break;
default:
/* To please compiler */
ans = NILSXP;
break;
}
return(ans);
} // sumOver()
/***************************************************************************
HISTORY:
2014-11-06 [HB]
o Moved validation of arguments and construction of return object
to this function.
2014-11-02 [HB]
o Created.
**************************************************************************/
matrixStats/src/allocMatrix2.c 0000644 0001751 0000144 00000010646 12542546311 016121 0 ustar hornik users #include
#include "types.h"
#include
/* Checks whether setting bytes of an int/double to all zeroes
corresponds to assigning a zero value. Note that the bit
representation of int's and double's may not be the same
on all architectures. */
int memset_zero_ok_int() {
int t = 1;
memset(&t, 0, sizeof(t));
return (t == 0);
}
int memset_zero_ok_double() {
double t = 1;
memset(&t, 0, sizeof(t));
return (t == 0);
}
/* For debugging purposes */
/*
SEXP memsetZeroable() {
SEXP ans;
PROTECT(ans = allocVector(LGLSXP, 2));
LOGICAL(ans)[1] = memset_zero_ok_int();
LOGICAL(ans)[2] = memset_zero_ok_double();
UNPROTECT(1);
return(ans);
}
*/
void fillWithValue(SEXP ans, SEXP value) {
R_xlen_t i, n;
SEXPTYPE type;
double *ans_ptr_d, value_d;
int *ans_ptr_i, value_i;
int *ans_ptr_l, value_l;
/* Argument 'ans': */
if (!isVectorAtomic(ans)) {
error("Argument 'ans' must be a vector.");
}
n = xlength(ans);
/* Argument 'value': */
if (!isVectorAtomic(value) || xlength(value) != 1) {
error("Argument 'value' must be a scalar.");
}
type = TYPEOF(value);
switch (type) {
case INTSXP:
value_i = asInteger(value);
ans_ptr_i = INTEGER(ans);
if (value_i == 0 && memset_zero_ok_int()) {
memset(ans_ptr_i, 0, n*sizeof(value_i));
} else {
for (i=0; i < n; i++) ans_ptr_i[i] = value_i;
}
break;
case REALSXP:
value_d = asReal(value);
ans_ptr_d = REAL(ans);
if (value_d == 0 && memset_zero_ok_double()) {
memset(ans_ptr_d, 0, n*sizeof(value_d));
} else {
for (i=0; i < n; i++) ans_ptr_d[i] = value_d;
}
break;
case LGLSXP:
value_l = asLogical(value);
ans_ptr_l = LOGICAL(ans);
if (value_l == 0 && memset_zero_ok_int()) {
memset(ans_ptr_l, 0, n*sizeof(value_l));
} else {
for (i=0; i < n; i++) ans_ptr_l[i] = value_l;
}
break;
default:
error("Argument 'value' must be either of type integer, numeric or logical.");
break;
}
} /* fillWithValue() */
SEXP allocVector2(SEXP length, SEXP value, SEXP set) {
SEXP ans;
SEXPTYPE type;
R_xlen_t n = 0;
/* Argument 'length': */
if (isInteger(length) && xlength(length) == 1) {
n = (R_xlen_t)asInteger(length);
} else if (isReal(length) && xlength(length) == 1) {
n = (R_xlen_t)asReal(length);
} else {
error("Argument 'length' must be a single numeric.");
}
if (n < 0) error("Argument 'length' is negative.");
/* Argument 'value': */
if (!isVectorAtomic(value) || xlength(value) != 1) {
error("Argument 'value' must be a scalar.");
}
type = TYPEOF(value);
PROTECT(ans = allocVector(type, n));
fillWithValue(ans, value);
UNPROTECT(1);
return(ans);
} /* allocVector2() */
SEXP allocMatrix2(SEXP nrow, SEXP ncol, SEXP value, SEXP set) {
SEXP ans;
SEXPTYPE type;
int nc, nr;
/* Argument 'nrow' & 'ncol': */
if (!isInteger(nrow) || xlength(nrow) != 1) {
error("Argument 'nrow' must be a single integer.");
}
if (!isInteger(ncol) || xlength(ncol) != 1) {
error("Argument 'ncol' must be a single integer.");
}
nr = asInteger(nrow);
nc = asInteger(ncol);
if (nr < 0) error("Argument 'nrow' is negative.");
if (nr < 0) error("Argument 'ncol' is negative.");
/* Argument 'value': */
if (!isVectorAtomic(value) || xlength(value) != 1) {
error("Argument 'value' must be a scalar.");
}
type = TYPEOF(value);
PROTECT(ans = allocMatrix(type, nr, nc));
fillWithValue(ans, value);
UNPROTECT(1);
return(ans);
} /* allocMatrix2() */
SEXP allocArray2(SEXP dim, SEXP value, SEXP set) {
SEXP ans;
SEXPTYPE type;
int i, d;
double nd = 1.0;
R_xlen_t n;
/* Argument 'dim': */
if (!isInteger(dim) || xlength(dim) == 0) {
error("Argument 'dim' must be an integer vector of at least length one.");
}
for (i = 0; i < xlength(dim); i++) {
d = INTEGER(dim)[i];
nd *= d;
#ifndef LONG_VECTOR_SUPPORT
if (nd > R_INT_MAX) {
error("Argument 'dim' specifies too many elements: %.g > %d", nd, R_INT_MAX);
}
#endif
}
n = (int)nd;
/* Argument 'value': */
if (!isVectorAtomic(value) || xlength(value) != 1) {
error("Argument 'value' must be a scalar.");
}
type = TYPEOF(value);
PROTECT(dim = duplicate(dim));
PROTECT(ans = allocVector(type, n));
fillWithValue(ans, value);
setAttrib(ans, R_DimSymbol, dim);
UNPROTECT(2);
return(ans);
} /* allocArray2() */
matrixStats/src/rowDiffs.c 0000644 0001751 0000144 00000004115 12542546311 015335 0 ustar hornik users /***************************************************************************
Public methods:
SEXP rowDiffs(SEXP x, ...)
SEXP colDiffs(SEXP x, ...)
Authors: Henrik Bengtsson
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD rowDiffs
#define X_TYPE 'i'
#include "rowDiffs_TYPE-template.h"
#define X_TYPE 'r'
#include "rowDiffs_TYPE-template.h"
#undef METHOD
SEXP rowDiffs(SEXP x, SEXP dim, SEXP lag, SEXP differences, SEXP byRow) {
int byrow;
SEXP ans = NILSXP;
R_xlen_t lagg, diff;
R_xlen_t nrow, ncol;
R_xlen_t nrow_ans, ncol_ans;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'lag': */
lagg = asInteger(lag);
if (lagg < 1) {
error("Argument 'lag' must be a positive integer.");
}
/* Argument 'differences': */
diff = asInteger(differences);
if (diff < 1) {
error("Argument 'differences' must be a positive integer.");
}
/* Argument 'byRow': */
byrow = asLogical(byRow);
/* Dimension of result matrix */
if (byrow) {
nrow_ans = nrow;
ncol_ans = (R_xlen_t)((double)ncol - ((double)diff*(double)lagg));
if (ncol_ans < 0) ncol_ans = 0;
} else {
nrow_ans = (R_xlen_t)((double)nrow - ((double)diff*(double)lagg));
if (nrow_ans < 0) nrow_ans = 0;
ncol_ans = ncol;
}
if (isReal(x)) {
PROTECT(ans = allocMatrix(REALSXP, nrow_ans, ncol_ans));
rowDiffs_Real(REAL(x), nrow, ncol, byrow, lagg, diff, REAL(ans), nrow_ans, ncol_ans);
UNPROTECT(1);
} else if (isInteger(x)) {
PROTECT(ans = allocMatrix(INTSXP, nrow_ans, ncol_ans));
rowDiffs_Integer(INTEGER(x), nrow, ncol, byrow, lagg, diff, INTEGER(ans), nrow_ans, ncol_ans);
UNPROTECT(1);
}
return(ans);
} /* rowDiffs() */
/***************************************************************************
HISTORY:
2014-12-29 [HB]
o Created.
**************************************************************************/
matrixStats/src/rowVars_TYPE-template.h 0000644 0001751 0000144 00000005622 12542546311 017700 0 ustar hornik users /***********************************************************************
TEMPLATE:
void rowVars_(...)
GENERATES:
void rowVars_Integer(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int narm, int hasna, int byrow, double *ans)
void rowVars_Real(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int narm, int hasna, int byrow, double *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD: the name of the resulting function
- X_TYPE: 'i' or 'r'
Authors:
Adopted from rowQuantiles.c by R. Gentleman.
Template by Henrik Bengtsson.
Copyright: Henrik Bengtsson, 2007-2013
***********************************************************************/
#include
#include
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME])
*/
#include "templates-types.h"
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int narm, int hasna, int byrow, double *ans) {
R_xlen_t ii, jj, kk;
R_xlen_t *colOffset;
X_C_TYPE *values, value;
double value_d, mu_d, sigma2_d;
/* R allocate memory for the 'values'. This will be
taken care of by the R garbage collector later on. */
values = (X_C_TYPE *) R_alloc(ncol, sizeof(X_C_TYPE));
/* If there are no missing values, don't try to remove them. */
if (hasna == FALSE)
narm = FALSE;
/* Pre-calculate the column offsets */
colOffset = (R_xlen_t *) R_alloc(ncol, sizeof(R_xlen_t));
if (byrow) {
for (jj=0; jj < ncol; jj++)
colOffset[jj] = (R_xlen_t)jj*nrow;
} else {
for (jj=0; jj < ncol; jj++)
colOffset[jj] = (R_xlen_t)jj;
}
for (ii=0; ii < nrow; ii++) {
R_xlen_t rowIdx = byrow ? ii : ncol*ii; //HJ
kk = 0;
for (jj=0; jj < ncol; jj++) {
value = x[rowIdx+colOffset[jj]];
if (X_ISNAN(value)) {
if (narm == FALSE) {
kk = -1;
break;
}
} else {
values[kk] = value;
kk = kk + 1;
}
} /* for (jj ...) */
/* Note that 'values' will never contain NA/NaNs */
if (kk <= 1) {
ans[ii] = NA_REAL;
} else {
/* (a) Calculate mu = sum(x)/length(x) */
mu_d = 0;
for (jj=0; jj < kk; jj++) {
mu_d += (double)values[jj];
}
mu_d /= (double)kk;
/* (b) Calculate sigma^2 */
sigma2_d = 0;
for (jj=0; jj < kk; jj++) {
value_d = ((double)values[jj] - mu_d);
value_d *= value_d;
sigma2_d += value_d;
}
sigma2_d /= (double)(kk-1);
ans[ii] = sigma2_d;
} /* if (kk <= 1) */
R_CHECK_USER_INTERRUPT(ii);
} /* for (ii ...) */
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-18 [HB]
o Created from rowMads_TYPE-template.h.
**************************************************************************/
matrixStats/src/rowVars.c 0000644 0001751 0000144 00000003361 12542546311 015217 0 ustar hornik users /***************************************************************************
Public methods:
SEXP rowVars(SEXP x, SEXP naRm, SEXP hasNA)
SEXP colVars(SEXP x, SEXP naRm, SEXP hasNA)
Authors: Henrik Bengtsson
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD rowVars
#define X_TYPE 'i'
#include "rowVars_TYPE-template.h"
#define X_TYPE 'r'
#include "rowVars_TYPE-template.h"
#undef METHOD
SEXP rowVars(SEXP x, SEXP dim, SEXP naRm, SEXP hasNA, SEXP byRow) {
int narm, hasna, byrow;
SEXP ans;
R_xlen_t nrow, ncol;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'hasNA': */
hasna = asLogicalNoNA(hasNA, "hasNA");
/* Argument 'byRow': */
byrow = asLogical(byRow);
/* Get dimensions of 'x'. */
if (byrow) {
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
} else {
nrow = INTEGER(dim)[1];
ncol = INTEGER(dim)[0];
}
/* R allocate a double vector of length 'nrow'
Note that 'nrow' means 'ncol' if byrow=FALSE. */
PROTECT(ans = allocVector(REALSXP, nrow));
/* Double matrices are more common to use. */
if (isReal(x)) {
rowVars_Real(REAL(x), nrow, ncol, narm, hasna, byrow, REAL(ans));
} else if (isInteger(x)) {
rowVars_Integer(INTEGER(x), nrow, ncol, narm, hasna, byrow, REAL(ans));
}
UNPROTECT(1);
return(ans);
} /* rowVars() */
/***************************************************************************
HISTORY:
2014-11-18 [HB]
o Created from rowMads.c.
**************************************************************************/
matrixStats/src/psortKM.c 0000644 0001751 0000144 00000005345 12542546311 015157 0 ustar hornik users /***************************************************************************
Public methods:
SEXP psortKM(SEXP x, SEXP k, SEXP nk)
Arguments:
x: numeric vector
k: integer scalar in [1,length(x)]
m: integer scalar in [1,k] and not too large if k is large.
Authors: Henrik Bengtsson
Copyright Henrik Bengtsson, 2012
**************************************************************************/
#include
#include
#include "types.h"
#include "utils.h"
void psortKM_C(double *x, R_xlen_t nx, R_xlen_t k, R_xlen_t m, double *ans) {
R_xlen_t ii, ll;
double *xx;
/* R allocate memory for the 'xx'. This will be
taken care of by the R garbage collector later on. */
xx = (double *) R_alloc(nx, sizeof(double));
/* Create a local copy 'xx' of 'x'. */
for (ii=0; ii < nx; ii++) {
xx[ii] = x[ii];
}
/* Permute xx[0:partial] so that xx[partial+1] is in the correct
place with smaller values to the left, ...
Example: psortKM(x, k=50, m=2) with length(x) = 1000
rPsort(xx, 1000, 50); We know x[50] and that x[1:49] <= x[50]
rPsort(xx, 50, 49); x[49] and that x[1:48] <= x[49]
rPsort(xx, 49, 48); x[48] and that x[1:47] <= x[48]
*/
ll = nx;
for (ii=0; ii < m; ii++) {
rPsort(xx, ll, k-1-ii);
ll = (k-1)-ii;
}
for (ii=0; ii < m; ii++) {
ans[ii] = xx[(k-m)+ii];
}
} /* psortKM_C() */
SEXP psortKM(SEXP x, SEXP k, SEXP m) {
SEXP ans;
R_xlen_t nx, kk, mm;
/* Argument 'x': */
assertArgVector(x, (R_TYPE_REAL), "x");
nx = xlength(x);
if (nx == 0) {
error("Argument 'x' must not be empty.");
}
/* Argument 'k': */
if (!isInteger(k)) {
error("Argument 'k' must be an integer.");
}
if (length(k) != 1) {
error("Argument 'k' must be a single integer.");
}
kk = asInteger(k);
if (kk <= 0) {
error("Argument 'k' must be a positive integer.");
} if (kk > nx) {
error("Argument 'k' must not be greater than number of elements in 'x'.");
}
/* Argument 'm': */
if (!isInteger(m)) {
error("Argument 'm' must be an integer.");
}
if (length(m) != 1) {
error("Argument 'm' must be a single integer.");
}
mm = asInteger(m);
if (mm <= 0) {
error("Argument 'm' must be a positive integer.");
} else if (mm > kk) {
error("Argument 'm' must not be greater than argument 'k'.");
}
/* R allocate a double vector of length 'partial' */
PROTECT(ans = allocVector(REALSXP, mm));
psortKM_C(REAL(x), nx, kk, mm, REAL(ans));
UNPROTECT(1);
return(ans);
} /* psortKM() */
/***************************************************************************
HISTORY:
2012-09-10 [HB]
o Added psortKM().
o Created.
**************************************************************************/
matrixStats/src/rowMedians.c 0000644 0001751 0000144 00000005407 12542546311 015667 0 ustar hornik users /***************************************************************************
Public methods:
SEXP rowMedians(SEXP x, SEXP naRm, SEXP hasNA)
SEXP colMedians(SEXP x, SEXP naRm, SEXP hasNA)
Authors: Adopted from rowQuantiles.c by R. Gentleman.
Copyright Henrik Bengtsson, 2007
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD rowMedians
#define X_TYPE 'i'
#include "rowMedians_TYPE-template.h"
#define X_TYPE 'r'
#include "rowMedians_TYPE-template.h"
#undef METHOD
SEXP rowMedians(SEXP x, SEXP dim, SEXP naRm, SEXP hasNA, SEXP byRow) {
int narm, hasna, byrow;
SEXP ans;
R_xlen_t nrow, ncol;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'hasNA': */
hasna = asLogicalNoNA(hasNA, "hasNA");
/* Argument 'byRow': */
byrow = asLogical(byRow);
/* Get dimensions of 'x'. */
if (byrow) {
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
} else {
nrow = INTEGER(dim)[1];
ncol = INTEGER(dim)[0];
}
/* R allocate a double vector of length 'nrow'
Note that 'nrow' means 'ncol' if byrow=FALSE. */
PROTECT(ans = allocVector(REALSXP, nrow));
/* Double matrices are more common to use. */
if (isReal(x)) {
rowMedians_Real(REAL(x), nrow, ncol, narm, hasna, byrow, REAL(ans));
} else if (isInteger(x)) {
rowMedians_Integer(INTEGER(x), nrow, ncol, narm, hasna, byrow, REAL(ans));
}
UNPROTECT(1);
return(ans);
} /* rowMedians() */
/***************************************************************************
HISTORY:
2013-01-13 [HB]
o Added argument 'byRow' to rowMedians() and dropped colMedians().
o Using internal arguments 'by_row' instead of 'by_column'.
2011-12-11 [HB]
o BUG FIX: rowMediansReal(..., na.rm=TRUE) did not handle NaN:s, only NA:s.
Note that NaN:s does not exist for integers.
2011-10-12 [HJ]
o Added colMedians().
o Now rowMediansInteger/Real() can operate also by columns, cf. argument
'by_column'.
2007-08-14 [HB]
o Added checks for user interrupts every 1000 line.
o Added argument 'hasNA' to rowMedians().
2005-12-07 [HB]
o BUG FIX: When calculating the median of an even number (non-NA) values,
the length of the second sort was one element too short, which made the
method to freeze, i.e. rPsort(rowData, qq, qq) is now (...qq+1, qq).
2005-11-24 [HB]
o By implementing a special version for integers, there is no need to
coerce to double in R, which would take up twice the amount of memory.
o rowMedians() now handles NAs too.
o Adopted from rowQuantiles.c in Biobase of Bioconductor.
**************************************************************************/
matrixStats/src/rowLogSumExp.c 0000644 0001751 0000144 00000004161 12542546311 016166 0 ustar hornik users /***************************************************************************
Public methods:
SEXP rowLogSumExps(SEXP lx, SEXP dim, SEXP naRm, SEXP hasNA, SEXP byRow)
Authors: Henrik Bengtsson
Copyright Henrik Bengtsson, 2013-2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#include "logSumExp_internal.h"
SEXP rowLogSumExps(SEXP lx, SEXP dim, SEXP naRm, SEXP hasNA, SEXP byRow) {
SEXP ans;
int narm, hasna, byrow;
R_xlen_t nrow, ncol, len, ii;
double *x, *xx, *ans_ptr;
/* Argument 'lx' and 'dim': */
assertArgMatrix(lx, dim, (R_TYPE_REAL), "lx");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'hasNA': */
hasna = asLogicalNoNA(hasNA, "hasNA");
/* Argument 'byRow': */
byrow = asLogical(byRow);
/* R allocate a double vector of length 'nrow'
Note that 'nrow' means 'ncol' if byrow=FALSE. */
if (byrow) { len = nrow; } else { len = ncol; }
PROTECT(ans = allocVector(REALSXP, len));
ans_ptr = REAL(ans);
/* Get the values */
x = REAL(lx);
if (byrow) {
/* R allocate memory for row-vector 'xx' of length 'ncol'.
This will be taken care of by the R garbage collector later on. */
xx = (double *) R_alloc(ncol, sizeof(double));
for (ii=0; ii < nrow; ii++) {
ans_ptr[ii] = logSumExp_double_by(x, ncol, narm, hasna, nrow, xx);
/* Move to the beginning next row */
x++;
}
} else {
for (ii=0; ii < ncol; ii++) {
ans_ptr[ii] = logSumExp_double(x, nrow, narm, hasna);
/* Move to the beginning next column */
x += nrow;
}
}
UNPROTECT(1); /* PROTECT(ans = ...) */
return(ans);
} /* rowLogSumExps() */
/***************************************************************************
HISTORY:
2013-05-02 [HB]
o BUG FIX: Incorrectly used ISNAN() on an int variable as caught by the
'cc' compiler on Solaris. Reported by Brian Ripley upon CRAN submission.
2013-04-30 [HB]
o Created.
**************************************************************************/
matrixStats/src/colCounts.c 0000644 0001751 0000144 00000005744 12542546311 015534 0 ustar hornik users /***************************************************************************
Public methods:
SEXP colCounts(SEXP x, SEXP value, SEXP naRm, SEXP hasNA)
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD colCounts
#define X_TYPE 'i'
#include "colCounts_TYPE-template.h"
#define X_TYPE 'r'
#include "colCounts_TYPE-template.h"
#define X_TYPE 'l'
#include "colCounts_TYPE-template.h"
#undef METHOD
SEXP colCounts(SEXP x, SEXP dim, SEXP value, SEXP what, SEXP naRm, SEXP hasNA) {
SEXP ans;
int narm, hasna, what2;
R_xlen_t nrow, ncol;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'value': */
if (length(value) != 1)
error("Argument 'value' must be a single value.");
if (!isNumeric(value))
error("Argument 'value' must be a numeric value.");
/* Argument 'what': */
what2 = asInteger(what);
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'hasNA': */
hasna = asLogicalNoNA(hasNA, "hasNA");
/* R allocate an integer vector of length 'ncol' */
PROTECT(ans = allocVector(INTSXP, ncol));
if (isReal(x)) {
colCounts_Real(REAL(x), nrow, ncol, asReal(value), what2, narm, hasna, INTEGER(ans));
} else if (isInteger(x)) {
colCounts_Integer(INTEGER(x), nrow, ncol, asInteger(value), what2, narm, hasna, INTEGER(ans));
} else if (isLogical(x)) {
colCounts_Logical(LOGICAL(x), nrow, ncol, asLogical(value), what2, narm, hasna, INTEGER(ans));
}
UNPROTECT(1);
return(ans);
} // colCounts()
SEXP count(SEXP x, SEXP value, SEXP what, SEXP naRm, SEXP hasNA) {
SEXP ans;
int narm, hasna, what2;
R_xlen_t nx;
/* Argument 'x' and 'dim': */
assertArgVector(x, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x");
nx = xlength(x);
/* Argument 'value': */
if (length(value) != 1)
error("Argument 'value' must be a single value.");
if (!isNumeric(value))
error("Argument 'value' must be a numeric value.");
/* Argument 'what': */
what2 = asInteger(what);
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'hasNA': */
hasna = asLogicalNoNA(hasNA, "hasNA");
/* R allocate a integer scalar */
PROTECT(ans = allocVector(INTSXP, 1));
if (isReal(x)) {
colCounts_Real(REAL(x), nx, 1, asReal(value), what2, narm, hasna, INTEGER(ans));
} else if (isInteger(x)) {
colCounts_Integer(INTEGER(x), nx, 1, asInteger(value), what2, narm, hasna, INTEGER(ans));
} else if (isLogical(x)) {
colCounts_Logical(LOGICAL(x), nx, 1, asLogical(value), what2, narm, hasna, INTEGER(ans));
}
UNPROTECT(1);
return(ans);
} // count()
/***************************************************************************
HISTORY:
2014-11-14 [HB]
o Created from rowCounts.c.
**************************************************************************/
matrixStats/src/rowMedians_TYPE-template.h 0000644 0001751 0000144 00000013262 12542546311 020344 0 ustar hornik users /***********************************************************************
TEMPLATE:
void rowMedians_(...)
GENERATES:
void rowMedians_Integer(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int narm, int hasna, int byrow, double *ans)
void rowMedians_Real(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int narm, int hasna, int byrow, double *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD: the name of the resulting function
- X_TYPE: 'i' or 'r'
Authors:
Adopted from rowQuantiles.c by R. Gentleman.
Template by Henrik Bengtsson.
Copyright: Henrik Bengtsson, 2007-2013
***********************************************************************/
#include
#include
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME])
*/
#include "templates-types.h"
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int narm, int hasna, int byrow, double *ans) {
int isOdd;
R_xlen_t ii, jj, kk, qq;
R_xlen_t *colOffset;
X_C_TYPE *values, value;
/* R allocate memory for the 'values'. This will be
taken care of by the R garbage collector later on. */
values = (X_C_TYPE *) R_alloc(ncol, sizeof(X_C_TYPE));
/* If there are no missing values, don't try to remove them. */
if (hasna == FALSE)
narm = FALSE;
/* When narm == FALSE, isOdd and qq are the same for all rows */
if (narm == FALSE) {
isOdd = (ncol % 2 == 1);
qq = (R_xlen_t)(ncol/2) - 1;
} else {
isOdd = FALSE;
qq = 0;
}
value = 0;
/* Pre-calculate the column offsets */
colOffset = (R_xlen_t *) R_alloc(ncol, sizeof(R_xlen_t));
// HJ begin
if (byrow) {
for (jj=0; jj < ncol; jj++)
colOffset[jj] = (R_xlen_t)jj*nrow;
} else {
for (jj=0; jj < ncol; jj++)
colOffset[jj] = (R_xlen_t)jj;
}
// HJ end
if (hasna == TRUE) {
for (ii=0; ii < nrow; ii++) {
R_xlen_t rowIdx = byrow ? ii : ncol*ii; //HJ
kk = 0; /* The index of the last non-NA value detected */
for (jj=0; jj < ncol; jj++) {
value = x[rowIdx+colOffset[jj]]; //HJ
if (X_ISNAN(value)) {
if (narm == FALSE) {
kk = -1;
break;
}
} else {
values[kk] = value;
kk = kk + 1;
}
}
/* Note that 'values' will never contain NA/NaNs */
if (kk == 0) {
ans[ii] = R_NaN;
} else if (kk == -1) {
ans[ii] = R_NaReal;
} else {
/* When narm == TRUE, isOdd and qq may change with row */
if (narm == TRUE) {
isOdd = (kk % 2 == 1);
qq = (R_xlen_t)(kk/2) - 1;
}
/* Permute x[0:kk-1] so that x[qq] is in the correct
place with smaller values to the left, ... */
X_PSORT(values, kk, qq+1);
value = values[qq+1];
if (isOdd == TRUE) {
ans[ii] = (double)value;
} else {
/* Permute x[0:qq-2] so that x[qq-1] is in the correct
place with smaller values to the left, ... */
X_PSORT(values, qq+1, qq);
ans[ii] = ((double)values[qq] + (double)value)/2;
}
}
R_CHECK_USER_INTERRUPT(ii);
} /* for (ii ...) */
} else {
for (ii=0; ii < nrow; ii++) {
R_xlen_t rowIdx = byrow ? ii : ncol*ii; //HJ
for (jj=0; jj < ncol; jj++)
values[jj] = x[rowIdx+colOffset[jj]]; //HJ
/* Permute x[0:ncol-1] so that x[qq] is in the correct
place with smaller values to the left, ... */
X_PSORT(values, ncol, qq+1);
value = values[qq+1];
if (isOdd == TRUE) {
ans[ii] = (double)value;
} else {
/* Permute x[0:qq-2] so that x[qq-1] is in the correct
place with smaller values to the left, ... */
X_PSORT(values, qq+1, qq);
ans[ii] = ((double)values[qq] + (double)value)/2;
}
R_CHECK_USER_INTERRUPT(ii);
} /* for (ii ...) */
} /* if (hasna ...) */
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-06 [HB]
o CLEANUP: Moving away from R data types in low-level C functions.
2014-11-01 [HB]
o SPEEDUP: Now using 'ansp = REAL(ans)' once and then assigning to
'ansp' instead of to 'REAL(ans)'.
2013-04-23 [HB]
o BUG FIX: The integer template of rowMedians_() would
not handle ties properly. This was because ties were calculated as
'(double)((rowData[qq] + value)/2)' instead of
'((double)(rowData[qq] + value))/2'.
2013-01-13 [HB]
o Merged rowMedians_Integer() and rowMedians_Read() into template
rowMedians_().
2013-01-13 [HB]
o Using internal arguments 'by_row' instead of 'by_column'.
2011-12-11 [HB]
o BUG FIX: rowMediansReal(..., na.rm=TRUE) did not handle NaN:s, only NA:s.
Note that NaN:s does not exist for integers.
2011-10-12 [HJ]
o Added colMedians().
o Now rowMediansInteger/Real() can operate also by columns, cf. argument
'by_column'.
2007-08-14 [HB]
o Added checks for user interrupts every 1000 line.
o Added argument 'hasNA' to rowMedians().
2005-12-07 [HB]
o BUG FIX: When calculating the median of an even number (non-NA) values,
the length of the second sort was one element too short, which made the
method to freeze, i.e. rPsort(rowData, qq, qq) is now (...qq+1, qq).
2005-11-24 [HB]
o By implementing a special version for integers, there is no need to
coerce to double in R, which would take up twice the amount of memory.
o rowMedians() now handles NAs too.
o Adopted from rowQuantiles.c in Biobase of Bioconductor.
**************************************************************************/
matrixStats/src/rowDiffs_TYPE-template.h 0000644 0001751 0000144 00000006542 12542546311 020022 0 ustar hornik users /***********************************************************************
TEMPLATE:
void rowDiffs_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD_NAME: the name of the resulting function
- X_TYPE: 'i' or 'r'
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
*/
#include "templates-types.h"
#include
#if X_TYPE == 'i'
static R_INLINE int diff_int(int a, int b) {
if (X_ISNA(a) || X_ISNA(b)) return(NA_INTEGER);
return a-b;
}
#define X_DIFF diff_int
#define DIFF_X_MATRIX diff_matrix_int
#elif X_TYPE == 'r'
#define X_DIFF(a,b) a-b
#define DIFF_X_MATRIX diff_matrix_double
#endif
static R_INLINE void DIFF_X_MATRIX(X_C_TYPE *x, int nrow_x, int ncol_x, int byrow, int lag, X_C_TYPE *y, int nrow_y, int ncol_y) {
int ii, jj, ss, tt, uu;
if (byrow) {
uu = lag * nrow_x;
tt = 0;
ss = 0;
for (jj=0; jj < ncol_y; jj++) {
for (ii=0; ii < nrow_y; ii++) {
y[ss++] = X_DIFF(x[uu++], x[tt++]);
}
}
} else {
uu = lag;
tt = 0;
ss = 0;
for (jj=0; jj < ncol_y; jj++) {
for (ii=0; ii < nrow_y; ii++) {
/* Rprintf("y[%d] = x[%d] - x[%d] = %g - %g = %g\n", ss, uu, tt, (double)x[uu], (double)x[tt], (double)X_DIFF(x[uu], x[tt])); */
y[ss++] = X_DIFF(x[uu++], x[tt++]);
}
tt += lag;
uu += lag;
}
}
}
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nrow_ans, R_xlen_t ncol_ans) {
R_xlen_t nrow_tmp, ncol_tmp;
X_C_TYPE *tmp = NULL;
/* Nothing to do? */
if ((byrow && ncol_ans <= 0) || (!byrow && nrow_ans <= 0)) return;
/* Special case (difference == 1) */
if (differences == 1) {
DIFF_X_MATRIX(x, nrow, ncol, byrow, lag, ans, nrow_ans, ncol_ans);
} else {
/* Allocate temporary work matrix (to hold intermediate differences) */
if (byrow) {
nrow_tmp = nrow;
ncol_tmp = ncol - lag;
} else {
nrow_tmp = nrow - lag;
ncol_tmp = ncol;
}
tmp = Calloc(nrow_tmp*ncol_tmp, X_C_TYPE);
/* (a) First order of differences */
DIFF_X_MATRIX(x, nrow, ncol, byrow, lag, tmp, nrow_tmp, ncol_tmp);
if (byrow) {
ncol_tmp = ncol_tmp - lag;
} else {
nrow_tmp = nrow_tmp - lag;
}
/* (a) Intermediate orders of differences */
while (--differences > 1) {
DIFF_X_MATRIX(tmp, nrow_tmp, ncol_tmp, byrow, lag, tmp, nrow_tmp, ncol_tmp);
if (byrow) {
ncol_tmp = ncol_tmp - lag;
} else {
nrow_tmp = nrow_tmp - lag;
}
}
/* (c) Last order of differences */
DIFF_X_MATRIX(tmp, nrow_tmp, ncol_tmp, byrow, lag, ans, nrow_ans, ncol_ans);
/* Deallocate temporary work matrix */
Free(tmp);
} /* if (differences ...) */
}
#undef X_DIFF
#undef DIFF_X_MATRIX
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-12-29 [HB]
o Created.
**************************************************************************/
matrixStats/src/indexByRow.c 0000644 0001751 0000144 00000004312 12542546311 015643 0 ustar hornik users /***************************************************************************
Public methods:
SEXP indexByRow(SEXP dim, SEXP idxs)
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
SEXP indexByRow(SEXP dim, SEXP idxs) {
SEXP ans;
int nr, nc;
int *ans_ptr, *idxs_ptr;
R_xlen_t i, idx, n, nidxs;
int col, row;
int d;
double nd = 1.0;
/* Argument 'dim': */
if (!isInteger(dim) || xlength(dim) != 2) {
error("Argument 'dim' must be an integer vector of length two.");
}
for (i = 0; i < xlength(dim); i++) {
d = INTEGER(dim)[i];
if (d < 0) {
error("Argument 'dim' specifies a negative value: %d", d);
}
nd *= d;
#ifndef LONG_VECTOR_SUPPORT
if (nd > R_INT_MAX) {
error("Argument 'dim' specifies too many elements: %.g > %d", nd, R_INT_MAX);
}
#endif
}
n = (R_xlen_t)nd;
/* Argument 'idxs': */
if (isNull(idxs)) {
idxs_ptr = NULL;
nidxs = 0;
} else if (isVectorAtomic(idxs)) {
idxs_ptr = INTEGER(idxs);
nidxs = xlength(idxs);
} else {
/* To please compiler */
idxs_ptr = NULL;
nidxs = 0;
error("Argument 'idxs' must be NULL or a vector.");
}
nr = INTEGER(dim)[0];
nc = INTEGER(dim)[1];
if (idxs_ptr == NULL) {
PROTECT(ans = allocVector(INTSXP, n));
ans_ptr = INTEGER(ans);
row = 1;
col = 0;
for (i = 0; i < n; i++) {
ans_ptr[i] = row + col*nr;
col++;
if (col == nc) {
row++;
col = 0;
}
}
UNPROTECT(1);
} else {
PROTECT(ans = allocVector(INTSXP, nidxs));
ans_ptr = INTEGER(ans);
for (i = 0; i < nidxs; i++) {
// idxs <- idxs - 1
// cols <- idxs %/% dim[2L]
// rows <- idxs %% dim[2L]
// cols + dim[1L]*rows + 1L
idx = idxs_ptr[i] - 1;
col = idx / nc;
row = idx - nc*col;
row = idx % nc;
idx = col + nr*row + 1;
ans_ptr[i] = idx;
}
UNPROTECT(1);
}
return(ans);
} // indexByRow()
/***************************************************************************
HISTORY:
2014-11-09 [HB]
o Created.
**************************************************************************/
matrixStats/src/rowCounts.c 0000644 0001751 0000144 00000003667 12542546311 015570 0 ustar hornik users /***************************************************************************
Public methods:
SEXP rowCounts(SEXP x, SEXP value, SEXP naRm, SEXP hasNA)
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD rowCounts
#define X_TYPE 'i'
#include "rowCounts_TYPE-template.h"
#define X_TYPE 'r'
#include "rowCounts_TYPE-template.h"
#define X_TYPE 'l'
#include "rowCounts_TYPE-template.h"
#undef METHOD
SEXP rowCounts(SEXP x, SEXP dim, SEXP value, SEXP what, SEXP naRm, SEXP hasNA) {
SEXP ans;
int narm, hasna, what2;
R_xlen_t nrow, ncol;
/* Argument 'x' & 'dim': */
assertArgMatrix(x, dim, (R_TYPE_LGL | R_TYPE_INT | R_TYPE_REAL), "x");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'value': */
if (length(value) != 1)
error("Argument 'value' must be a single value.");
if (!isNumeric(value))
error("Argument 'value' must be a numeric value.");
/* Argument 'what': */
what2 = asInteger(what);
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'hasNA': */
hasna = asLogicalNoNA(hasNA, "hasNA");
/* R allocate a double vector of length 'nrow' */
PROTECT(ans = allocVector(INTSXP, nrow));
/* Double matrices are more common to use. */
if (isReal(x)) {
rowCounts_Real(REAL(x), nrow, ncol, asReal(value), what2, narm, hasna, INTEGER(ans));
} else if (isInteger(x)) {
rowCounts_Integer(INTEGER(x), nrow, ncol, asInteger(value), what2, narm, hasna, INTEGER(ans));
} else if (isLogical(x)) {
rowCounts_Logical(LOGICAL(x), nrow, ncol, asLogical(value), what2, narm, hasna, INTEGER(ans));
}
UNPROTECT(1);
return(ans);
} // rowCounts()
/***************************************************************************
HISTORY:
2014-06-02 [HB]
o Created.
**************************************************************************/
matrixStats/src/templates-types_undef.h 0000644 0001751 0000144 00000000661 12542546311 020102 0 ustar hornik users #undef CONCAT
#undef CONCAT_MACROS
#undef METHOD_NAME
#undef X_C_TYPE
#undef X_IN_C
#undef X_ISNAN
#undef X_ISNA
#undef X_ABS
#undef X_PSORT
#undef X_QSORT_I
#undef Y_C_TYPE
#undef Y_IN_C
#undef Y_ISNAN
#undef Y_ISNA
#undef Y_ABS
#undef Y_PSORT
#undef Y_QSORT_I
#undef ANS_SXP
#undef ANS_NA
#undef ANS_ISNAN
#undef ANS_ISNA
#undef ANS_C_TYPE
#undef ANS_IN_C
#undef X_TYPE
#undef Y_TYPE
#undef ANS_TYPE
#undef MARGIN
#undef OP
matrixStats/src/templates-types.h 0000644 0001751 0000144 00000007161 12542546311 016723 0 ustar hornik users #include
/*
* Sets type-specific macros
*/
#define CONCAT(x,y) x ##_## y
#define CONCAT_MACROS(x,y) CONCAT(x,y)
/*
Data type macros for argument 'x'
*/
#if X_TYPE == 'i'
#define X_C_TYPE int
#define X_IN_C INTEGER
#define X_ISNAN(x) (x == NA_INTEGER)
#define X_ISNA(x) (x == NA_INTEGER)
#define X_ABS(x) abs(x)
#define X_PSORT iPsort
#define X_QSORT_I R_qsort_int_I
#elif X_TYPE == 'r'
#define X_C_TYPE double
#define X_IN_C REAL
#define X_ISNAN(x) ISNAN(x) /* NA or NaN */
#define X_ISNA(x) ISNA(x) /* NA only */
#define X_ABS(x) fabs(x)
#define X_PSORT rPsort
#define X_QSORT_I R_qsort_I
#elif X_TYPE == 'l'
#define X_C_TYPE int
#define X_IN_C LOGICAL
#define X_ISNAN(x) (x == NA_LOGICAL)
#else
#error "INTERNAL ERROR: Failed to set C macro X_C_TYPE etc.: Unknown X_TYPE"
#endif
/*
Data type macros for argument 'y'
*/
#ifdef Y_TYPE
#if Y_TYPE == 'i'
#define Y_C_TYPE int
#define Y_IN_C INTEGER
#define Y_ISNAN(x) (x == NA_INTEGER)
#define Y_ISNA(x) (x == NA_INTEGER)
#define Y_ABS(x) abs(x)
#define Y_PSORT iPsort
#define Y_QSORT_I R_qsort_int_I
#elif Y_TYPE == 'r'
#define Y_C_TYPE double
#define Y_IN_C REAL
#define Y_ISNAN(x) ISNAN(x) /* NA or NaN */
#define Y_ISNA(x) ISNA(x) /* NA only */
#define Y_ABS(x) fabs(x)
#define Y_PSORT rPsort
#define Y_QSORT_I R_qsort_I
#elif Y_TYPE == 'l'
#define Y_C_TYPE int
#define Y_IN_C LOGICAL
#define Y_ISNAN(x) (x == NA_LOGICAL)
#else
#error "INTERNAL ERROR: Failed to set C macro Y_C_TYPE etc.: Unknown Y_TYPE"
#endif
#else
#define Y_TYPE '.'
#endif
/*
Data type macros for result ('ans')
*/
#ifndef ANS_TYPE
/* Default to same as 'x' */
#define ANS_TYPE X_TYPE
#endif
#if ANS_TYPE == 'i'
#define ANS_SXP INTSXP
#define ANS_NA NA_INTEGER
#define ANS_ISNAN(x) (x == NA_INTEGER)
#define ANS_ISNA(x) (x == NA_INTEGER)
#define ANS_C_TYPE int
#define ANS_IN_C INTEGER
#elif ANS_TYPE == 'r'
#define ANS_SXP REALSXP
#define ANS_NA NA_REAL
#define ANS_ISNAN(x) ISNAN(x) /* NA or NaN */
#define ANS_ISNA(x) ISNA(x) /* NA only */
#define ANS_C_TYPE double
#define ANS_IN_C REAL
#elif ANS_TYPE == 'l'
#define ANS_SXP LGLSXP
#define ANS_NA NA_LOGICAL
#define ANS_ISNAN(x) (x == NA_LOGICAL)
#define ANS_C_TYPE int
#define ANS_IN_C LOGICAL
#else
#error "INTERNAL ERROR: Failed to set C macro ANS_C_TYPE: Unknown ANS_TYPE"
#endif
/*
Method name based on 'x' (and 'y') types
*/
#ifndef METHOD_NAME
#if X_TYPE == 'i'
#if Y_TYPE == 'i'
#define METHOD_NAME CONCAT_MACROS(METHOD, Integer_Integer)
#elif Y_TYPE == 'r'
#define METHOD_NAME CONCAT_MACROS(METHOD, Integer_Real)
#elif Y_TYPE == 'l'
#define METHOD_NAME CONCAT_MACROS(METHOD, Integer_Logical)
#else
#define METHOD_NAME CONCAT_MACROS(METHOD, Integer)
#endif
#elif X_TYPE == 'r'
#if Y_TYPE == 'i'
#define METHOD_NAME CONCAT_MACROS(METHOD, Real_Integer)
#elif Y_TYPE == 'r'
#define METHOD_NAME CONCAT_MACROS(METHOD, Real_Real)
#elif Y_TYPE == 'l'
#define METHOD_NAME CONCAT_MACROS(METHOD, Real_Logical)
#else
#define METHOD_NAME CONCAT_MACROS(METHOD, Real)
#endif
#elif X_TYPE == 'l'
#if Y_TYPE == 'i'
#define METHOD_NAME CONCAT_MACROS(METHOD, Logical_Integer)
#elif Y_TYPE == 'r'
#define METHOD_NAME CONCAT_MACROS(METHOD, Logical_Real)
#elif Y_TYPE == 'l'
#define METHOD_NAME CONCAT_MACROS(METHOD, Logical_Logical)
#else
#define METHOD_NAME CONCAT_MACROS(METHOD, Logical)
#endif
#else
#error "INTERNAL ERROR: Failed to set C macro METHOD_NAME: Unknown X_TYPE"
#endif
#endif
matrixStats/src/rowMads_TYPE-template.h 0000644 0001751 0000144 00000016041 12542546311 017646 0 ustar hornik users /***********************************************************************
TEMPLATE:
void rowMads_(...)
GENERATES:
void rowMads_Integer(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, double scale, int narm, int hasna, int byrow, double *ans)
void rowMads_Real(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, double scale, int narm, int hasna, int byrow, double *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD: the name of the resulting function
- X_TYPE: 'i' or 'r'
Authors:
Adopted from rowQuantiles.c by R. Gentleman.
Template by Henrik Bengtsson.
Copyright: Henrik Bengtsson, 2007-2013
***********************************************************************/
#include
#include
#include "types.h"
#include /* abs() and fabs() */
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME])
*/
#include "templates-types.h"
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, double scale, int narm, int hasna, int byrow, double *ans) {
int isOdd;
R_xlen_t ii, jj, kk, qq;
R_xlen_t *colOffset;
X_C_TYPE *values, value, mu;
double *values_d, value_d, mu_d;
/* R allocate memory for the 'values'. This will be
taken care of by the R garbage collector later on. */
values = (X_C_TYPE *) R_alloc(ncol, sizeof(X_C_TYPE));
values_d = (double *) R_alloc(ncol, sizeof(double));
/* If there are no missing values, don't try to remove them. */
if (hasna == FALSE)
narm = FALSE;
/* When narm == FALSE, isOdd and qq are the same for all rows */
if (narm == FALSE) {
isOdd = (ncol % 2 == 1);
qq = (R_xlen_t)(ncol/2) - 1;
} else {
isOdd = FALSE;
qq = 0;
}
value = 0;
/* Pre-calculate the column offsets */
colOffset = (R_xlen_t *) R_alloc(ncol, sizeof(R_xlen_t));
// HJ begin
if (byrow) {
for (jj=0; jj < ncol; jj++)
colOffset[jj] = (R_xlen_t)jj*nrow;
} else {
for (jj=0; jj < ncol; jj++)
colOffset[jj] = (R_xlen_t)jj;
}
// HJ end
hasna = TRUE;
if (hasna == TRUE) {
for (ii=0; ii < nrow; ii++) {
R_xlen_t rowIdx = byrow ? ii : ncol*ii; //HJ
kk = 0; /* The index of the last non-NA value detected */
for (jj=0; jj < ncol; jj++) {
value = x[rowIdx+colOffset[jj]]; //HJ
if (X_ISNAN(value)) {
if (narm == FALSE) {
kk = -1;
break;
}
} else {
values[kk] = value;
kk = kk + 1;
}
} /* for (jj ...) */
/* Note that 'values' will never contain NA/NaNs */
if (kk == 0) {
ans[ii] = NA_REAL;
} else if (kk == 1) {
ans[ii] = 0;
} else if (kk == -1) {
ans[ii] = R_NaReal;
} else {
/* When narm == TRUE, isOdd and qq may change with row */
if (narm == TRUE) {
isOdd = (kk % 2 == 1);
qq = (R_xlen_t)(kk/2) - 1;
}
/* Permute x[0:kk-1] so that x[qq] is in the correct
place with smaller values to the left, ... */
X_PSORT(values, kk, qq+1);
value = values[qq+1];
/* Calculate mu and sigma */
if (isOdd == TRUE) {
/* Since there are an odd number of values, then we
also know that 'mu' is one of the values in 'x',
which in turn mean we don't have to coerce integers
to doubles, if 'x' is an integer. Simple benchmarking
shows that it significantly faster to avoid coercion. */
mu = value;
/* (a) Subtract mu and absolute value, i.e. x <- |x-mu| */
for (jj=0; jj < kk; jj++) {
value = (values[jj] - mu);
values[jj] = X_ABS(value);
}
/* (b) Calculate median of |x-mu| */
/* Permute x[0:kk-1] so that x[qq] is in the correct
place with smaller values to the left, ... */
X_PSORT(values, kk, qq+1);
value = values[qq+1];
ans[ii] = scale * (double)value;
} else {
/* Here we have to coerce to doubles since 'mu' is an average. */
/* Permute x[0:qq-2] so that x[qq-1] is in the correct
place with smaller values to the left, ... */
X_PSORT(values, qq+1, qq);
#if X_TYPE == 'i'
/* If the difference between two integers is an even number, then
their means is also an integer, and then we can avoid coercion
to double also here. This should happen roughly half the
time we end up here which is worth optimizing for. Simple
benchmarking show a significant difference in speed, particular
for the column-based version. */
if ((values[qq] - value) % 2 == 0) {
/* No need to coerce */
mu = (values[qq] + value)/2;
/* (a) Subtract mu and absolute value, i.e. x <- |x-mu| */
for (jj=0; jj < kk; jj++) {
value = (values[jj] - mu);
values[jj] = X_ABS(value);
}
/* (b) Calculate median of |x-mu| */
/* Permute x[0:kk-1] so that x[qq] is in the correct
place with smaller values to the left, ... */
X_PSORT(values, kk, qq+1);
X_PSORT(values, qq+1, qq);
ans[ii] = scale * ((double)values[qq] + (double)values[qq+1])/2;
/* Done, continue to next vector */
continue;
}
#endif
mu_d = ((double)values[qq] + (double)value)/2;
/* (a) Subtract mu and square, i.e. x <- (x-mu)^2 */
for (jj=0; jj < kk; jj++) {
value_d = ((double)values[jj] - mu_d);
values_d[jj] = fabs(value_d);
}
/* (b) Calculate median */
/* Permute x[0:kk-1] so that x[qq-1] and x[qq] are in the
correct places with smaller values to the left, ... */
rPsort(values_d, kk, qq+1);
rPsort(values_d, qq+1, qq);
ans[ii] = scale * (values_d[qq] + values_d[qq+1])/2;
}
} /* if (kk == 0) */
R_CHECK_USER_INTERRUPT(ii);
} /* for (ii ...) */
} else {
for (ii=0; ii < nrow; ii++) {
R_xlen_t rowIdx = byrow ? ii : ncol*ii; //HJ
for (jj=0; jj < ncol; jj++)
values[jj] = x[rowIdx+colOffset[jj]]; //HJ
/* Permute x[0:ncol-1] so that x[qq] is in the correct
place with smaller values to the left, ... */
X_PSORT(values, ncol, qq+1);
value = values[qq+1];
if (isOdd == TRUE) {
ans[ii] = (double)value;
} else {
/* Permute x[0:qq-2] so that x[qq-1] is in the correct
place with smaller values to the left, ... */
X_PSORT(values, qq+1, qq);
ans[ii] = ((double)values[qq] + value)/2;
}
R_CHECK_USER_INTERRUPT(ii);
} /* for (ii ...) */
} /* if (hasna ...) */
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-17 [HB]
o Created from rowMedians_TYPE-template.h.
**************************************************************************/
matrixStats/src/productExpSumLog_TYPE-template.h 0000644 0001751 0000144 00000005117 12542546311 021520 0 ustar hornik users /***********************************************************************
TEMPLATE:
LDOUBLE productExpSumLog_(X_C_TYPE *x, R_xlen_t nx, int narm, int hasna)
GENERATES:
LDOUBLE productExpSumLog_Real(double *x, R_xlen_t nx, int narm, int hasna)
LDOUBLE productExpSumLog_Integer(int *x, R_xlen_t nx, int narm, int hasna)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD_NAME: the name of the resulting function
- X_TYPE: 'i' or 'r'
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
*/
#include "templates-types.h"
double METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, int narm, int hasna) {
LDOUBLE y = 0.0, t;
R_xlen_t ii;
int isneg = 0;
int hasZero = 0;
/* Calculate sum(log(abs(x))) */
for (ii = 0 ; ii < nx; ii++) {
t = x[ii];
/* Missing values? */
if (narm) {
if (X_ISNAN(t)) continue;
}
#if X_TYPE == 'i'
/* Early stopping? */
if (X_ISNAN(t)) {
y = NA_REAL;
break;
} else if (t < 0) {
isneg = !isneg;
t = -t;
} else if (t == 0) {
hasZero = 1;
}
#elif X_TYPE == 'r'
if (t < 0) {
isneg = !isneg;
t = -t;
}
#endif
t = log(t);
y += t;
/*
Rprintf("#%d: x=%g, is.nan(x)=%d, abs(x)=%g, is.nan(abs(x))=%d, log(abs(x))=%g, is.nan(log(abs(x)))=%d, sum=%g, is.nan(sum)=%d\n", ii, x[ii], R_IsNaN(x[ii]), X_ABS(x[ii]), R_IsNaN(abs(x[ii])), t, R_IsNaN(y), y, R_IsNaN(y)); */
}
if (ISNAN(y)) {
/* If there where NA and/or NaN elements, then 'y' will at this
point be NaN. The information on an NA value is lost when
calculating fabs(NA), which returns NaN. For consistency with
integers, we return NA in all cases. */
y = NA_REAL;
} else if (hasZero) {
/* no NA in 'x' and 'x' contains zero */
y = 0;
} else {
y = exp(y);
/* Update sign */
if (isneg) {
y = -y;
}
/* Overflow or underflow? */
if (y > DOUBLE_XMAX) {
y = R_PosInf;
} else if (y < -DOUBLE_XMAX) {
y = R_NegInf;
}
}
return (double)y;
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-06 [HB]
o CLEANUP: Moving away from R data types in low-level C functions.
2014-06-04 [HB]
o Created.
**************************************************************************/
matrixStats/src/anyMissing.c 0000644 0001751 0000144 00000004020 12542546311 015666 0 ustar hornik users /***************************************************************************
Public methods:
anyMissing(SEXP x)
TO DO: Support list():s too.
Copyright Henrik Bengtsson, 2007
**************************************************************************/
#include
#include "types.h"
SEXP anyMissing(SEXP x) {
SEXP ans;
R_xlen_t nx, ii;
double *xdp;
int *xip, *xlp;
Rcomplex *xcp;
PROTECT(ans = allocVector(LGLSXP, 1));
LOGICAL(ans)[0] = 0;
nx = xlength(x);
/* anyMissing() on zero-length objects should always return FALSE,
just like any(double(0)). */
if (nx == 0) {
UNPROTECT(1);
return(ans);
}
switch (TYPEOF(x)) {
case REALSXP:
xdp = REAL(x);
for (ii=0; ii < nx; ii++) {
if ISNAN(xdp[ii]) {
LOGICAL(ans)[0] = 1;
break;
}
}
break;
case INTSXP:
xip = INTEGER(x);
for (ii=0; ii < nx; ii++) {
if (xip[ii] == NA_INTEGER) {
LOGICAL(ans)[0] = 1;
break;
}
}
break;
case LGLSXP:
xlp = LOGICAL(x);
for (ii=0; ii < nx; ii++) {
if (xlp[ii] == NA_LOGICAL) {
LOGICAL(ans)[0] = 1;
break;
}
}
break;
case CPLXSXP:
xcp = COMPLEX(x);
for (ii=0; ii < nx; ii++) {
if (ISNAN(xcp[ii].r) || ISNAN(xcp[ii].i)) {
LOGICAL(ans)[0] = 1;
break;
}
}
break;
case STRSXP:
for (ii=0; ii < nx; ii++) {
if (STRING_ELT(x, ii) == NA_STRING) {
LOGICAL(ans)[0] = 1;
break;
}
}
break;
case RAWSXP:
/* no such thing as a raw NA; always FALSE */
break;
default:
break;
} /* switch() */
UNPROTECT(1); /* ans */
return(ans);
} // anyMissing()
/***************************************************************************
HISTORY:
2007-08-14 [HB]
o Created using do_isna() in src/main/coerce.c as a template.
**************************************************************************/
matrixStats/src/binCounts-BINBY-template.h 0000644 0001751 0000144 00000006400 12542546311 020174 0 ustar hornik users /***************************************************************************
TEMPLATE:
void binCounts_(...)
GENERATES:
void binCounts_L(double *x, int nx, double *bx, int nbins, int *count)
void binCounts_R(double *x, int nx, double *bx, int nbins, int *count)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- BIN_BY: 'L' or 'R'
Copyright Henrik Bengtsson, 2012-2014
**************************************************************************/
#include
#if BIN_BY == 'L' /* [u,v) */
#define METHOD_NAME binCounts_L
#define IS_PART_OF_FIRST_BIN(x, bx0) (x < bx0)
#define IS_PART_OF_NEXT_BIN(x, bx1) (x >= bx1)
#elif BIN_BY == 'R' /* (u,v] */
#define METHOD_NAME binCounts_R
#define IS_PART_OF_FIRST_BIN(x, bx0) (x <= bx0)
#define IS_PART_OF_NEXT_BIN(x, bx1) (x > bx1)
#endif
void METHOD_NAME(double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, int *count) {
R_xlen_t ii = 0, jj = 0, iStart = 0;
int n = 0;
int warn = 0;
// Count?
if (nbins > 0) {
// Skip to the first bin
while ((iStart < nx) && IS_PART_OF_FIRST_BIN(x[iStart], bx[0])) {
++iStart;
}
// For each x...
for (ii = iStart; ii < nx; ++ii) {
// Skip to a new bin?
while (IS_PART_OF_NEXT_BIN(x[ii], bx[jj+1])) {
count[jj++] = n;
// No more bins?
if (jj >= nbins) {
ii = nx; // Cause outer for-loop to exit
break;
}
n = 0;
}
/* Although unlikely, with long vectors the count for a bin
can become greater than what is possible to represent by
an integer. Detect and warn about this. */
if (n == R_INT_MAX) {
warn = 1;
// No point in keep counting for this bin
break;
}
// Count
++n;
}
// Update count of the last bin?
if (jj < nbins) {
count[jj] = n;
// Assign the remaining bins to zero counts
while (++jj < nbins) {
count[jj] = 0;
}
}
} // if (nbins > 0)
if (warn) {
warning("Integer overflow. Detected one or more bins with a count that is greater than what can be represented by the integer data type. Setting count to the maximum integer possible (.Machine$integer.max = %d). The bin mean is still correct.", R_INT_MAX);
}
}
/* Undo template macros */
#undef BIN_BY
#undef IS_PART_OF_FIRST_BIN
#undef IS_PART_OF_NEXT_BIN
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-07 [HB]
o ROBUSTNESS: Added protection for integer overflow in bin counts.
2014-11-06 [HB]
o CLEANUP: Moving away from R data types in low-level C functions.
2013-10-08 [HB]
o Created template for binCounts_() to create functions that
bin either by [u,v) or (u,v].
2013-05-10 [HB]
o SPEEDUP: binCounts() no longer tests in every iteration (=for every
data point) whether the last bin has been reached or not.
2012-10-10 [HB]
o BUG FIX: binCounts() would return random/garbage counts for bins
that were beyond the last data point.
o BUG FIX: In some cases binCounts() could try to go past the last bin.
2012-10-03 [HB]
o Created.
**************************************************************************/
matrixStats/src/utils.h 0000644 0001751 0000144 00000005237 12542546311 014725 0 ustar hornik users #include
#include "types.h"
#define R_TYPE_LGL 1 /* 0b0001 */
#define R_TYPE_INT 2 /* 0b0010 */
#define R_TYPE_REAL 4 /* 0b0100 */
static R_INLINE void assertArgVector(SEXP x, int type, char *xlabel) {
/* Argument 'x': */
if (!isVectorAtomic(x)) {
error("Argument '%s' must be a matrix or a vector.", xlabel);
}
switch (TYPEOF(x)) {
case LGLSXP:
if (!(type & R_TYPE_LGL))
error("Argument '%s' cannot be logical.", xlabel);
break;
case INTSXP:
if (!(type & R_TYPE_INT))
error("Argument '%s' cannot be integer.", xlabel);
break;
case REALSXP:
if (!(type & R_TYPE_REAL))
error("Argument '%s' cannot be numeric.", xlabel);
break;
} /* switch */
} /* assertArgVector() */
static R_INLINE void assertArgDim(SEXP dim, double max, char *maxlabel) {
double nrow, ncol;
/* Argument 'dim': */
if (!isVectorAtomic(dim) || xlength(dim) != 2 || !isInteger(dim)) {
error("Argument 'dim' must be an integer vector of length two.");
}
nrow = (double)INTEGER(dim)[0];
ncol = (double)INTEGER(dim)[1];
if (nrow < 0) {
error("Argument 'dim' specifies a negative number of rows (dim[1]): %d", nrow);
} else if (ncol < 0) {
error("Argument 'dim' specifies a negative number of columns (dim[2]): %d", ncol);
} else if (nrow * ncol != max) {
error("Argument 'dim' does not match length of argument '%s': %g * %g != %g", maxlabel, nrow, ncol, max);
}
} /* assertArgDim() */
static R_INLINE void assertArgMatrix(SEXP x, SEXP dim, int type, char *xlabel) {
/* Argument 'x': */
if (isMatrix(x)) {
} else if (isVectorAtomic(x)) {
} else {
error("Argument '%s' must be a matrix or a vector.", xlabel);
}
switch (TYPEOF(x)) {
case LGLSXP:
if (!(type & R_TYPE_LGL))
error("Argument '%s' cannot be logical.", xlabel);
break;
case INTSXP:
if (!(type & R_TYPE_INT))
error("Argument '%s' cannot be integer.", xlabel);
break;
case REALSXP:
if (!(type & R_TYPE_REAL))
error("Argument '%s' cannot be numeric.", xlabel);
break;
} /* switch */
/* Argument 'dim': */
assertArgDim(dim, xlength(x), "x");
} /* assertArgMatrix() */
static R_INLINE int asLogicalNoNA(SEXP x, char *xlabel) {
int value = 0;
if (length(x) != 1)
error("Argument '%s' must be a single value.", xlabel);
if (isLogical(x)) {
value = asLogical(x);
} else if (isInteger(x)) {
value = asInteger(x);
} else {
error("Argument '%s' must be a logical.", xlabel);
}
if (value != TRUE && value != FALSE)
error("Argument '%s' must be either TRUE or FALSE.", xlabel);
return value;
} /* asLogicalNoNA() */
matrixStats/src/colOrderStats.c 0000644 0001751 0000144 00000003435 12542546311 016346 0 ustar hornik users /***************************************************************************
Public methods:
SEXP colOrderStats(SEXP x, SEXP which)
Authors: Henrik Bengtsson
To do: Add support for missing values.
Copyright Henrik Bengtsson, 2007-2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD colOrderStats
#define X_TYPE 'i'
#include "colOrderStats_TYPE-template.h"
#define X_TYPE 'r'
#include "colOrderStats_TYPE-template.h"
#undef METHOD
SEXP colOrderStats(SEXP x, SEXP dim, SEXP which) {
SEXP ans = NILSXP;
R_xlen_t nrow, ncol, qq;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'which': */
if (length(which) != 1)
error("Argument 'which' must be a single number.");
if (!isNumeric(which))
error("Argument 'which' must be a numeric number.");
/* Subtract one here, since rPsort does zero based addressing */
qq = asInteger(which) - 1;
/* Assert that 'qq' is a valid index */
if (qq < 0 || qq >= nrow) {
error("Argument 'which' is out of range.");
}
/* Double matrices are more common to use. */
if (isReal(x)) {
PROTECT(ans = allocVector(REALSXP, ncol));
colOrderStats_Real(REAL(x), nrow, ncol, qq, REAL(ans));
UNPROTECT(1);
} else if (isInteger(x)) {
PROTECT(ans = allocVector(INTSXP, ncol));
colOrderStats_Integer(INTEGER(x), nrow, ncol, qq, INTEGER(ans));
UNPROTECT(1);
}
return(ans);
} // colOrderStats()
/***************************************************************************
HISTORY:
2014-11-16 [HB]
o Created from rowOrderStats.c.
**************************************************************************/
matrixStats/src/colRanges.c 0000644 0001751 0000144 00000007137 12542546311 015476 0 ustar hornik users /***************************************************************************
Public methods:
SEXP colRanges(SEXP x, SEXP what)
Authors: Henrik Bengtsson.
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD colRanges
#define X_TYPE 'i'
#include "colRanges_TYPE-template.h"
#define X_TYPE 'r'
#include "colRanges_TYPE-template.h"
#undef METHOD
SEXP colRanges(SEXP x, SEXP dim, SEXP what, SEXP naRm, SEXP hasNA) {
SEXP ans = NILSXP, ans2 = NILSXP;
int *mins, *maxs;
double *mins2, *maxs2;
int *is_counted, all_counted = 0;
int what2, narm, hasna;
R_xlen_t nrow, ncol, jj;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'what': */
if (length(what) != 1)
error("Argument 'what' must be a single number.");
if (!isNumeric(what))
error("Argument 'what' must be a numeric number.");
what2 = asInteger(what);
if (what2 < 0 || what2 > 2)
error("Invalid value of 'what': %d", what2);
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'hasNA': */
hasna = asLogicalNoNA(hasNA, "hasNA");
is_counted = (int *) R_alloc(ncol, sizeof(int));
if (isReal(x)) {
if (what2 == 2) {
PROTECT(ans = allocMatrix(REALSXP, ncol, 2));
} else {
PROTECT(ans = allocVector(REALSXP, ncol));
}
colRanges_Real(REAL(x), nrow, ncol, what2, narm, hasna, REAL(ans), is_counted);
UNPROTECT(1);
} else if (isInteger(x)) {
if (what2 == 2) {
PROTECT(ans = allocMatrix(INTSXP, ncol, 2));
} else {
PROTECT(ans = allocVector(INTSXP, ncol));
}
colRanges_Integer(INTEGER(x), nrow, ncol, what2, narm, hasna, INTEGER(ans), is_counted);
/* Any entries with zero non-missing values? */
all_counted = 1;
for (jj=0; jj < ncol; jj++) {
if (!is_counted[jj]) {
all_counted = 0;
break;
}
}
if (!all_counted) {
/* Handle zero non-missing values */
/* Instead of return INTSXP, we must return REALSXP (to hold -Inf, and Inf) */
if (what2 == 0) {
PROTECT(ans2 = allocVector(REALSXP, ncol));
mins = INTEGER(ans);
mins2 = REAL(ans2);
for (jj=0; jj < ncol; jj++) {
if (is_counted[jj]) {
mins2[jj] = (double)mins[jj];
} else {
mins2[jj] = R_PosInf;
}
}
UNPROTECT(1); /* ans2 */
} else if (what2 == 1) {
PROTECT(ans2 = allocVector(REALSXP, ncol));
maxs = INTEGER(ans);
maxs2 = REAL(ans2);
for (jj=0; jj < ncol; jj++) {
if (is_counted[jj]) {
maxs2[jj] = (double)maxs[jj];
} else {
maxs2[jj] = R_NegInf;
}
}
UNPROTECT(1); /* ans2 */
} else if (what2 == 2) {
PROTECT(ans2 = allocMatrix(REALSXP, ncol, 2));
mins = INTEGER(ans);
maxs = &INTEGER(ans)[ncol];
mins2 = REAL(ans2);
maxs2 = &REAL(ans2)[ncol];
for (jj=0; jj < ncol; jj++) {
if (is_counted[jj]) {
mins2[jj] = (double)mins[jj];
maxs2[jj] = (double)maxs[jj];
} else {
mins2[jj] = R_PosInf;
maxs2[jj] = R_NegInf;
}
}
UNPROTECT(1); /* ans2 */
}
ans = ans2;
}
UNPROTECT(1); /* ans */
}
return(ans);
} // rowRanges()
/***************************************************************************
HISTORY:
2014-11-16 [HB]
o Created.
**************************************************************************/
matrixStats/src/signTabulate_TYPE-template.h 0000644 0001751 0000144 00000003306 12542546311 020654 0 ustar hornik users /***********************************************************************
TEMPLATE:
void signTabulate_(X_C_TYPE *x, R_xlen_t nx, double *ans)
GENERATES:
void signTabulate_Real(double *x, R_xlen_t nx, double *ans)
void signTabulate_Integer(int *x, R_xlen_t nx, double *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD_NAME: the name of the resulting function
- X_TYPE: 'i' or 'r'
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
*/
#include "templates-types.h"
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, double *ans) {
X_C_TYPE xi;
R_xlen_t ii;
R_xlen_t nNeg = 0, nZero = 0, nPos = 0, nNA=0;
#if X_TYPE == 'r'
R_xlen_t nPosInf=0, nNegInf=0;
#endif
for (ii = 0; ii < nx; ii++) {
xi = x[ii];
if (X_ISNAN(xi)) {
nNA++;
} else if (xi > 0) {
nPos++;
#if X_TYPE == 'r'
if (xi == R_PosInf) nPosInf++;
#endif
} else if (xi < 0) {
nNeg++;
#if X_TYPE == 'r'
if (xi == R_NegInf) nNegInf++;
#endif
} else if (xi == 0) {
nZero++;
}
}
ans[0] = nNeg;
ans[1] = nZero;
ans[2] = nPos;
ans[3] = nNA;
#if X_TYPE == 'r'
ans[4] = nNegInf;
ans[5] = nPosInf;
#endif
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-06 [HB]
o CLEANUP: Moving away from R data types in low-level C functions.
2014-06-04 [HB]
o Created.
**************************************************************************/
matrixStats/src/colOrderStats_TYPE-template.h 0000644 0001751 0000144 00000003703 12542546311 021023 0 ustar hornik users /***********************************************************************
TEMPLATE:
void colOrderStats_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int qq, X_C_TYPE *ans)
GENERATES:
void colOrderStats_Real(double *x, R_xlen_t nrow, R_xlen_t ncol, int qq, double *ans)
void colOrderStats_Integer(int *x, R_xlen_t nrow, R_xlen_t ncol, int qq, int *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD_NAME: the name of the resulting function
- X_TYPE: 'i' or 'r'
- ANS_TYPE: 'i' or 'r'
Authors:
Adopted from ditto for rows.
Copyright: Henrik Bengtsson, 2007-2014
***********************************************************************/
#include
#include
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C)
*/
#include "templates-types.h"
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t qq, X_C_TYPE *ans) {
R_xlen_t ii, jj;
R_xlen_t offset;
X_C_TYPE *values;
/* R allocate memory for the 'values'. This will be
taken care of by the R garbage collector later on. */
values = (X_C_TYPE *) R_alloc(nrow, sizeof(X_C_TYPE));
for (jj=0; jj < ncol; jj++) {
offset = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++)
values[ii] = x[ii+offset];
/* Sort vector of length 'nrow' up to position 'qq'.
"...partial sorting: they permute x so that x[qq] is in the
correct place with smaller values to the left, larger ones
to the right." */
X_PSORT(values, nrow, qq);
ans[jj] = values[qq];
}
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-16 [HB]
o Created from rowOrderStats() ditto.
**************************************************************************/
matrixStats/src/signTabulate.c 0000644 0001751 0000144 00000002210 12542546311 016166 0 ustar hornik users /***************************************************************************
Public methods:
SEXP signTabulate(SEXP x)
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD signTabulate
#define X_TYPE 'i'
#include "signTabulate_TYPE-template.h"
#define X_TYPE 'r'
#include "signTabulate_TYPE-template.h"
#undef METHOD
SEXP signTabulate(SEXP x) {
SEXP ans = NILSXP;
/* Argument 'x': */
assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x");
/* Double matrices are more common to use. */
if (isReal(x)) {
PROTECT(ans = allocVector(REALSXP, 6));
signTabulate_Real(REAL(x), xlength(x), REAL(ans));
UNPROTECT(1);
} else if (isInteger(x)) {
PROTECT(ans = allocVector(REALSXP, 4));
signTabulate_Integer(INTEGER(x), xlength(x), REAL(ans));
UNPROTECT(1);
}
return(ans);
} // signTabulate()
/***************************************************************************
HISTORY:
2014-06-04 [HB]
o Created.
**************************************************************************/
matrixStats/src/binCounts.c 0000644 0001751 0000144 00000003607 12542546311 015523 0 ustar hornik users /***************************************************************************
Public methods:
binCounts(SEXP x, SEXP bx, SEXP right)
Copyright Henrik Bengtsson, 2012-2013
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#include
#define BIN_BY 'L'
#include "binCounts-BINBY-template.h"
#define BIN_BY 'R'
#include "binCounts-BINBY-template.h"
SEXP binCounts(SEXP x, SEXP bx, SEXP right) {
SEXP counts = NILSXP;
R_xlen_t nbins;
int closedRight;
/* Argument 'x': */
assertArgVector(x, (R_TYPE_REAL), "x");
/* Argument 'bx': */
assertArgVector(bx, (R_TYPE_REAL), "bx");
nbins = xlength(bx)-1;
if (nbins <= 0) {
error("Argument 'bx' must specify at least two bin boundaries (= one bin): %d", xlength(bx));
}
/* Argument 'right': */
closedRight = asLogicalNoNA(right, "right");
PROTECT(counts = allocVector(INTSXP, nbins));
if (closedRight) {
binCounts_R(REAL(x), xlength(x), REAL(bx), nbins, INTEGER(counts));
} else {
binCounts_L(REAL(x), xlength(x), REAL(bx), nbins, INTEGER(counts));
}
UNPROTECT(1);
return(counts);
} // binCounts()
/***************************************************************************
HISTORY:
2015-05-30 [HB]
o Added protected against 'bx' too short.
2014-06-03 [HB]
o Dropped unused variable 'count'.
2013-10-08 [HB]
o Now binCounts() calls binCounts_().
2013-05-10 [HB]
o SPEEDUP: binCounts() no longer tests in every iteration (=for every
data point) whether the last bin has been reached or not.
2012-10-10 [HB]
o BUG FIX: binCounts() would return random/garbage counts for bins
that were beyond the last data point.
o BUG FIX: In some cases binCounts() could try to go past the last bin.
2012-10-03 [HB]
o Created.
**************************************************************************/
matrixStats/src/rowCumprods.c 0000644 0001751 0000144 00000002712 12542546311 016077 0 ustar hornik users /***************************************************************************
Public methods:
SEXP rowCumprods(SEXP x, SEXP naRm, SEXP hasNA)
SEXP colCumprods(SEXP x, SEXP naRm, SEXP hasNA)
Authors: Henrik Bengtsson
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD rowCumprods
#define X_TYPE 'i'
#include "rowCumprods_TYPE-template.h"
#define X_TYPE 'r'
#include "rowCumprods_TYPE-template.h"
#undef METHOD
SEXP rowCumprods(SEXP x, SEXP dim, SEXP byRow) {
int byrow;
SEXP ans = NILSXP;
R_xlen_t nrow, ncol;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'byRow': */
byrow = asLogical(byRow);
/* Double matrices are more common to use. */
if (isReal(x)) {
PROTECT(ans = allocMatrix(REALSXP, nrow, ncol));
rowCumprods_Real(REAL(x), nrow, ncol, byrow, REAL(ans));
UNPROTECT(1);
} else if (isInteger(x)) {
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
rowCumprods_Integer(INTEGER(x), nrow, ncol, byrow, INTEGER(ans));
UNPROTECT(1);
}
return(ans);
} /* rowCumprods() */
/***************************************************************************
HISTORY:
2014-11-26 [HB]
o Created from rowVars.c.
**************************************************************************/
matrixStats/src/rowRanges_TYPE-template.h 0000644 0001751 0000144 00000013754 12542546311 020211 0 ustar hornik users /***********************************************************************
TEMPLATE:
void rowRanges_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int what, X_C_TYPE *ans)
GENERATES:
void rowRanges_Real(double *x, R_xlen_t nrow, R_xlen_t ncol, int what, double *ans)
void rowRanges_Integer(int *x, R_xlen_t nrow, R_xlen_t ncol, int what, int *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD_NAME: the name of the resulting function
- X_TYPE: 'i' or 'r'
- ANS_TYPE: 'i' or 'r'
Authors:
Henrik Bengtsson.
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C)
*/
#include "templates-types.h"
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int what, int narm, int hasna, X_C_TYPE *ans, int *is_counted) {
R_xlen_t ii, jj;
R_xlen_t offset;
X_C_TYPE value, *mins = NULL, *maxs = NULL;
int *skip = NULL;
/* Rprintf("(nrow,ncol)=(%d,%d), what=%d\n", nrow, ncol, what); */
/* If there are no missing values, don't try to remove them. */
if (hasna == FALSE)
narm = FALSE;
if (hasna) {
skip = (int *) R_alloc(nrow, sizeof(int));
for (ii=0; ii < nrow; ii++) {
is_counted[ii] = 0;
skip[ii] = 0;
}
/* Missing values */
if (what == 0) {
/* rowMins() */
mins = ans;
for (jj=0; jj < ncol; jj++) {
offset = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++) {
if (!narm && skip[ii]) continue;
value = x[ii+offset];
if (X_ISNAN(value)) {
if (!narm) {
mins[ii] = value;
is_counted[ii] = 1;
/* Early stopping? */
#if X_TYPE == 'i'
skip[ii] = 1;
#elif X_TYPE == 'r'
if (X_ISNA(value)) skip[ii] = 1;
#endif
}
} else if (!is_counted[ii]) {
mins[ii] = value;
is_counted[ii] = 1;
} else if (value < mins[ii]) {
mins[ii] = value;
}
}
} /* for (jj ...) */
#if X_TYPE == 'r'
/* Handle zero non-missing values */
for (ii=0; ii < nrow; ii++) {
if (!is_counted[ii]) {
mins[ii] = R_PosInf;
}
}
#endif
} else if (what == 1) {
/* rowMaxs() */
maxs = ans;
for (jj=0; jj < ncol; jj++) {
offset = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++) {
if (!narm && skip[ii]) continue;
value = x[ii+offset];
if (X_ISNAN(value)) {
if (!narm) {
maxs[ii] = value;
is_counted[ii] = 1;
/* Early stopping? */
#if X_TYPE == 'i'
skip[ii] = 1;
#elif X_TYPE == 'r'
if (X_ISNA(value)) skip[ii] = 1;
#endif
}
} else if (!is_counted[ii]) {
maxs[ii] = value;
is_counted[ii] = 1;
} else if (value > maxs[ii]) {
maxs[ii] = value;
}
}
} /* for (jj ...) */
#if X_TYPE == 'r'
/* Handle zero non-missing values */
for (ii=0; ii < nrow; ii++) {
if (!is_counted[ii]) {
maxs[ii] = R_NegInf;
}
}
#endif
} else if (what == 2) {
/* rowRanges() */
mins = ans;
maxs = &ans[nrow];
for (jj=0; jj < ncol; jj++) {
offset = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++) {
if (!narm && skip[ii]) continue;
value = x[ii+offset];
if (X_ISNAN(value)) {
if (!narm) {
mins[ii] = value;
maxs[ii] = value;
is_counted[ii] = 1;
/* Early stopping? */
#if X_TYPE == 'i'
skip[ii] = 1;
#elif X_TYPE == 'r'
if (X_ISNA(value)) skip[ii] = 1;
#endif
}
} else if (!is_counted[ii]) {
mins[ii] = value;
maxs[ii] = value;
is_counted[ii] = 1;
} else if (value < mins[ii]) {
mins[ii] = value;
} else if (value > maxs[ii]) {
maxs[ii] = value;
}
}
} /* for (jj ...) */
#if X_TYPE == 'r'
/* Handle zero non-missing values */
for (ii=0; ii < nrow; ii++) {
if (!is_counted[ii]) {
mins[ii] = R_PosInf;
maxs[ii] = R_NegInf;
}
}
#endif
} /* if (what ...) */
} else {
/* No missing values */
if (what == 0) {
/* rowMins() */
mins = ans;
/* Initiate results */
for (ii=0; ii < nrow; ii++) {
mins[ii] = x[ii];
}
for (jj=1; jj < ncol; jj++) {
offset = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++) {
value = x[ii+offset];
if (value < mins[ii]) mins[ii] = value;
}
}
} else if (what == 1) {
/* rowMax() */
maxs = ans;
/* Initiate results */
for (ii=0; ii < nrow; ii++) {
maxs[ii] = x[ii];
}
for (jj=1; jj < ncol; jj++) {
offset = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++) {
value = x[ii+offset];
if (value > maxs[ii]) maxs[ii] = value;
}
}
} else if (what == 2) {
/* rowRanges()*/
mins = ans;
maxs = &ans[nrow];
/* Initiate results */
for (ii=0; ii < nrow; ii++) {
mins[ii] = x[ii];
maxs[ii] = x[ii];
}
for (jj=1; jj < ncol; jj++) {
offset = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++) {
value = x[ii+offset];
if (value < mins[ii]) {
mins[ii] = value;
} else if (value > maxs[ii]) {
maxs[ii] = value;
}
}
}
} /* if (what ...) */
} /* if (narm) */
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-16 [HB]
o Created.
**************************************************************************/
matrixStats/src/diff2.c 0000644 0001751 0000144 00000003267 12542546311 014553 0 ustar hornik users /***************************************************************************
Public methods:
SEXP diff2(SEXP x, SEXP lag, SEXP differences)
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include
#include "types.h"
#include "utils.h"
#define METHOD diff2
#define X_TYPE 'i'
#include "diff2_TYPE-template.h"
#define X_TYPE 'r'
#include "diff2_TYPE-template.h"
#undef METHOD
SEXP diff2(SEXP x, SEXP lag, SEXP differences) {
SEXP ans = NILSXP;
R_xlen_t nx, nans, lagg, diff;
/* Argument 'x': */
assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x");
nx = xlength(x);
/* Argument 'lag': */
lagg = asInteger(lag);
if (lagg < 1) {
error("Argument 'lag' must be a positive integer.");
}
/* Argument 'differences': */
diff = asInteger(differences);
if (diff < 1) {
error("Argument 'differences' must be a positive integer.");
}
/* Length of result vector */
nans = (R_xlen_t)((double)nx - ((double)diff*(double)lagg));
if (nans < 0) nans = 0;
/* Dispatch to low-level C function */
if (isReal(x)) {
PROTECT(ans = allocVector(REALSXP, nans));
diff2_Real(REAL(x), nx, lagg, diff, REAL(ans), nans);
UNPROTECT(1);
} else if (isInteger(x)) {
PROTECT(ans = allocVector(INTSXP, nans));
diff2_Integer(INTEGER(x), nx, lagg, diff, INTEGER(ans), nans);
UNPROTECT(1);
} else {
error("Argument 'x' must be numeric.");
}
return ans;
} // diff2()
/***************************************************************************
HISTORY:
2014-12-29 [HB]
o Created.
**************************************************************************/
matrixStats/src/binMeans-BINBY-template.h 0000644 0001751 0000144 00000011734 12542546311 017772 0 ustar hornik users /***************************************************************************
TEMPLATE:
binMeans_(...)
GENERATES:
void binMeans_L(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count)
void binMeans_R(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- BIN_BY: 'L' or 'R'
Copyright Henrik Bengtsson, 2012-2013
**************************************************************************/
#include "types.h"
#if BIN_BY == 'L' /* [u,v) */
#define METHOD_NAME binMeans_L
#define IS_PART_OF_FIRST_BIN(x, bx0) (x < bx0)
#define IS_PART_OF_NEXT_BIN(x, bx1) (x >= bx1)
#elif BIN_BY == 'R' /* (u,v] */
#define METHOD_NAME binMeans_R
#define IS_PART_OF_FIRST_BIN(x, bx0) (x <= bx0)
#define IS_PART_OF_NEXT_BIN(x, bx1) (x > bx1)
#endif
void METHOD_NAME(double *y, R_xlen_t ny, double *x, R_xlen_t nx, double *bx, R_xlen_t nbins, double *ans, int *count) {
R_xlen_t ii = 0, jj = 0, iStart=0;
R_xlen_t n = 0;
LDOUBLE sum = 0.0;
int warn = 0;
// Count?
if (nbins > 0) {
// Skip to the first bin
while ((iStart < nx) && IS_PART_OF_FIRST_BIN(x[iStart], bx[0])) {
++iStart;
}
// For each x...
for (ii = iStart; ii < nx; ++ii) {
// Skip to a new bin?
while (IS_PART_OF_NEXT_BIN(x[ii], bx[jj+1])) {
// Update statistic of current bin?
if (count) {
/* Although unlikely, with long vectors the count for a bin
can become greater than what is possible to represent by
an integer. Detect and warn about this. */
if (n > R_INT_MAX) {
warn = 1;
count[jj] = R_INT_MAX;
} else {
count[jj] = n;
}
}
ans[jj] = n > 0 ? sum / n : R_NaN;
sum = 0.0;
n = 0;
// ...and move to next
++jj;
// No more bins?
if (jj >= nbins) {
// Make the outer for-loop to exit...
ii = nx - 1;
// ...but correct for the fact that the y[nx-1] point will
// be incorrectly added to the sum. Doing the correction
// here avoids an if (ii < nx) sum += y[ii] below.
sum -= y[ii];
break;
}
}
// Sum and count
sum += y[ii];
++n;
}
// Update statistic of the last bin?
if (jj < nbins) {
if (count) {
/* Although unlikely, with long vectors the count for a bin
can become greater than what is possible to represent by
an integer. Detect and warn about this. */
if (n > R_INT_MAX) {
warn= 1;
count[jj] = R_INT_MAX;
} else {
count[jj] = n;
}
}
ans[jj] = n > 0 ? sum / n : R_NaN;
// Assign the remaining bins to zero counts and missing mean values
while (++jj < nbins) {
ans[jj] = R_NaN;
if (count) count[jj] = 0;
}
}
} // if (nbins > 0)
if (warn) {
warning("Integer overflow. Detected one or more bins with a count that is greater than what can be represented by the integer data type. Setting count to the maximum integer possible (.Machine$integer.max = %d). The bin mean is still correct.", R_INT_MAX);
}
}
/* Undo template macros */
#undef BIN_BY
#undef IS_PART_OF_FIRST_BIN
#undef IS_PART_OF_NEXT_BIN
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-07 [HB]
o ROBUSTNESS: Added protection for integer overflow in bin counts.
2014-11-06 [HB]
o CLEANUP: Moving away from R data types in low-level C functions.
2014-10-01 [HB]
o BUG FIX: binMeans() returned 0.0 instead of NA_real_ for empty bins.
2014-04-04 [HB]
o BUG FIX: The native code of binMeans(x, bx) would try to access
an out-of-bounds value of argument 'y' iff 'x' contained elements
that are left of all bins in 'bx'. This bug had no impact on the
results and since no assignment was done it should also not crash/
core dump R. This was discovered thanks to new memtests (ASAN and
valgrind) provided by CRAN.
2013-10-08 [HB]
o Created template for binMeans_() to create functions that
bin either by [u,v) or (u,v].
2013-05-10 [HB]
o SPEEDUP: binMeans() no longer tests in every iteration (=for every
data point) whether the last bin has been reached or not.
2012-10-10 [HB]
o BUG FIX: binMeans() would return random/garbage means/counts for
bins that were beyond the last data point.
o BUG FIX: In some cases binMeans() could try to go past the last bin.
2012-10-03 [HB]
o Created binMeans(), which was adopted from from code proposed by
Martin Morgan (Fred Hutchinson Cancer Research Center, Seattle) as
a reply to HB's R-devel thread 'Fastest non-overlapping binning mean
function out there?' on Oct 3, 2012.
**************************************************************************/
matrixStats/src/types.h 0000644 0001751 0000144 00000001203 12542546311 014716 0 ustar hornik users #include /* R_xlen_t, ... */
#ifndef R_INT_MIN
#define R_INT_MIN -INT_MAX
#endif
#ifndef R_INT_MAX
#define R_INT_MAX INT_MAX
#endif
/* As in /src/include/Defn.h */
#ifdef HAVE_LONG_DOUBLE
#define LDOUBLE long double
#else
#define LDOUBLE double
#endif
/* Backward compatibility with R (< 3.0.0)
As in /src/include/Rinternals.h */
#ifndef R_XLEN_T_MAX
typedef int R_xlen_t;
#define R_XLEN_T_MAX R_LEN_T_MAX
#ifndef xlength
#define xlength length
#endif
#endif
/* Macro to check for user interrupts every 2^20 iteration */
#define R_CHECK_USER_INTERRUPT(i) if (i % 1048576 == 0) R_CheckUserInterrupt()
matrixStats/src/weightedMedian_TYPE-template.h 0000644 0001751 0000144 00000020355 12542546311 021153 0 ustar hornik users /***********************************************************************
TEMPLATE:
double weightedMedian_(X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t nw, int narm, int interpolate, int ties)
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
*/
#include "templates-types.h"
#include
double METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t nw, int narm, int interpolate, int ties) {
X_C_TYPE *xtmp;
double res;
double dx, dy, Dy;
double *wtmp, *wcum, wtotal, wlow, whigh, tmp_d, tmp_d2;
R_xlen_t nxt, ii, jj, half;
int *idxs;
int equalweights = 0;
/* Quick results? */
if (nx == 0) {
return NA_REAL;
} else if (nx == 1) {
return (double)x[0];
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* Weights */
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
wtmp = Calloc(nx, double);
/* Check for missing, negative, and infite weights */
nxt = 0;
for (ii=0; ii < nx; ii++) {
/* Assume negative or missing weight by default or
that the signals is missing and should be dropped */
wtmp[ii] = 0;
if (ISNAN(w[ii])) {
if (!narm) {
Free(wtmp);
return NA_REAL;
}
} else if (w[ii] <= 0) {
/* Drop non-positive weights */
} else if (isinf(w[ii])) {
/* Detected a +Inf. From now on, treat all +Inf
weights equal and drop everything else */
nxt = 0;
for (jj=0; jj < nx; jj++) {
/* Assume non-infinite weight by default */
wtmp[jj] = 0;
if (isinf(w[jj])) {
if (X_ISNAN(x[ii])) {
if (!narm) {
Free(wtmp);
return NA_REAL;
}
} else {
/* Infinite weight, i.e. use data point */
wtmp[jj] = 1;
nxt++;
}
} else if (ISNAN(w[jj])) {
if (!narm) {
Free(wtmp);
return NA_REAL;
}
}
}
equalweights = 1;
break;
} else {
/* A data points with a finite positive weight */
if (X_ISNAN(x[ii])) {
if (!narm) {
Free(wtmp);
return NA_REAL;
}
} else {
/* A data point with a non-missing value */
wtmp[ii] = w[ii];
nxt++;
}
}
}
/*
printf("nx=%d, nxt=%d\n", nx, nxt);
for (ii=0; ii < nx; ii++) printf("w[%d]=%g, wtmp[%d]=%g\n", (int)ii, (double)w[ii], (int)ii, wtmp[ii]);
*/
/* Nothing to do? */
if (nxt == 0) {
Free(wtmp);
return NA_REAL;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* Copy (x,w) to work with and calculate total weight */
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
xtmp = Calloc(nxt, X_C_TYPE);
jj = 0;
wtotal = 0;
for (ii=0; ii < nx; ii++) {
if (wtmp[ii] > 0) {
/* printf("ii=%d, jj=%d, wtmp[%d]=%g\n", (int)ii, (int)jj, (int)ii, wtmp[ii]); */
xtmp[jj] = x[ii];
wtmp[jj] = wtmp[ii];
wtotal += wtmp[jj];
jj++;
}
}
x = xtmp;
w = wtmp;
nx = nxt;
nw = nx;
/*
for (ii=0; ii < nx; ii++) printf("x[%d]=%g, w[%d]=%g\n", (int)ii, (double)x[ii], (int)ii, w[ii]);
*/
/* Early stopping? */
if (nx == 1) {
res = (double)x[0];
Free(xtmp);
Free(wtmp);
return res;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* All weights equal? Happens if +Inf were detected. */
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
if (equalweights) {
half = (nx+1) / 2;
/*
printf("half=%d\n", (int)half);
*/
X_PSORT(x, nx, half);
/*
for (ii=0; ii < nx; ii++) printf("x[%d]=%g\n", (int)ii, (double)x[ii]);
*/
/* FIXME: Add support for ties here too */
if (nx % 2 == 1) {
res = (double)x[half-1];
} else {
X_PSORT(x, half, half-1);
res = ((double)x[half-1] + (double)x[half]) / 2;
}
Free(xtmp);
Free(wtmp);
return res;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* Sort x and calculated the cumulative sum of weights (normalize to */
/* one) according to the reordered vector. */
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* (a) Sort x */
idxs = Calloc(nx, int);
for (ii = 0; ii < nx; ii++) idxs[ii] = ii;
X_QSORT_I(x, idxs, 1, nx);
/* (b) Normalized cumulative weights */
wcum = Calloc(nx, double);
tmp_d2 = 0;
/* Index where cumulative weight passed 1/2 */
half = nx+1; /* Default is last */
if (interpolate) {
/* Adjust */
for (ii = 0; ii < nx; ii++) {
tmp_d = w[idxs[ii]] / wtotal;
tmp_d2 += tmp_d;
wcum[ii] = tmp_d2 - (tmp_d/2);
if (wcum[ii] >= 0.5) {
half = ii;
/* Early stopping - no need to continue */
break;
}
}
} else {
for (ii = 0; ii < nx; ii++) {
tmp_d2 += w[idxs[ii]] / wtotal;
wcum[ii] = tmp_d2;
if (tmp_d2 > 0.5) {
half = ii;
/* Early stopping - no need to continue */
break;
}
}
}
Free(wtmp);
Free(idxs);
/* Two special cases where more than half of the total weight is at
a) the first, or b) the last value */
if (half == 0 || half == nx) {
res = (double)x[half];
Free(wcum);
Free(xtmp);
return res;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* Alt 1: Linearly interpolated weighted median */
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
if (interpolate) {
/* The width and the height of the "rectangle". */
dx = (double)(x[half] - x[half-1]);
Dy = wcum[half] - wcum[half-1];
/* printf("dx=%g, Dy=%g\n", dx, Dy); */
/* The width and the height of the triangle which upper corner touches
the level where the cumulative sum of weights *equals* half the
total weight. */
dy = 0.5 - wcum[half];
dx = (dy/Dy) * dx;
/* printf("dx=%g, dy=%g\n", dx, dy); */
/* The corresponding x value */
res = dx + x[half];
Free(wcum);
Free(xtmp);
return res;
}
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* Alt 2: Classical weighted median (tied or not) */
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
/* At this point we know that:
1) at most half the total weight is in the set x[1:half],
2) that the set x[(half+2):n] contains less than half the total weight
The question is whether x[(half+1):n] contains *more* than
half the total weight (try x=c(1,2,3), w=c(1,1,1)). If it is then
we can be sure that x[half+1] is the weighted median we are looking
for, otherwise it is any function of x[k:(half+1)]. */
wlow = wcum[half-1];
whigh = 1 - wlow;
/*
printf("half=%d, wtotal=%g, wlow=%g, whigh=%g, ties=%d\n", half, (double)wtotal, (double)wlow, (double)whigh, ties);
printf("x[half+(-1:1)]=c(%g, %g, %g)\n", x[half-1-1], x[half-1], x[half-1+1]);
*/
if (whigh > 0.5) {
/* printf("matrixStats2: Not a tie!\n"); */
/* Not a tie */
res = x[half];
} else {
/* printf("matrixStats2: A tie!\n"); */
/* A tie! */
if (ties == 1) { /* weighted */
/* printf("ties=%d, half=%d, wlow*x[half]=%g, whigh*x[half+1]=%g\n", ties, half, wlow*x[half-1], whigh*x[half]); */
res = wlow*(double)x[half-1] + whigh*(double)x[half];
} else if (ties == 2) { /* min */
res = (double)x[half-1];
} else if (ties == 4) { /* max */
res = (double)x[half];
} else if (ties == 8) { /* mean */
res = ((double)x[half-1] + (double)x[half]) / 2;
} else {
error("Unknown value of argument 'ties': %d", ties);
}
}
Free(wcum);
Free(xtmp);
return res;
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2015-01-01 [HB]
o Created.
**************************************************************************/
matrixStats/src/colRanges_TYPE-template.h 0000644 0001751 0000144 00000013331 12542546311 020146 0 ustar hornik users /***********************************************************************
TEMPLATE:
void colRanges_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int what, X_C_TYPE *ans)
GENERATES:
void colRanges_Real(double *x, R_xlen_t nrow, R_xlen_t ncol, int what, double *ans)
void colRanges_Integer(int *x, R_xlen_t nrow, R_xlen_t ncol, int what, int *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD_NAME: the name of the resulting function
- X_TYPE: 'i' or 'r'
- ANS_TYPE: 'i' or 'r'
Authors:
Henrik Bengtsson.
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C)
*/
#include "templates-types.h"
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int what, int narm, int hasna, X_C_TYPE *ans, int *is_counted) {
R_xlen_t ii, jj;
R_xlen_t offset;
X_C_TYPE value, *mins = NULL, *maxs = NULL;
/* Rprintf("(nrow,ncol)=(%d,%d), what=%d\n", nrow, ncol, what); */
/* If there are no missing values, don't try to remove them. */
if (hasna == FALSE)
narm = FALSE;
if (hasna) {
for (jj=0; jj < ncol; jj++) is_counted[jj] = 0;
/* Missing values */
if (what == 0) {
/* colMins() */
mins = ans;
for (jj=0; jj < ncol; jj++) {
offset = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++) {
value = x[ii+offset];
if (X_ISNAN(value)) {
if (!narm) {
mins[jj] = value;
is_counted[jj] = 1;
/* Early stopping? */
#if X_TYPE == 'i'
break;
#elif X_TYPE == 'r'
if (X_ISNA(value)) break;
#endif
}
} else if (!is_counted[jj]) {
mins[jj] = value;
is_counted[jj] = 1;
} else if (value < mins[jj]) {
mins[jj] = value;
}
}
} /* for (jj ...) */
#if X_TYPE == 'r'
/* Handle zero non-missing values */
for (jj=0; jj < ncol; jj++) {
if (!is_counted[jj]) {
mins[jj] = R_PosInf;
}
}
#endif
} else if (what == 1) {
/* colMaxs() */
maxs = ans;
for (jj=0; jj < ncol; jj++) {
offset = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++) {
value = x[ii+offset];
if (X_ISNAN(value)) {
if (!narm) {
maxs[jj] = value;
is_counted[jj] = 1;
/* Early stopping? */
#if X_TYPE == 'i'
break;
#elif X_TYPE == 'r'
if (X_ISNA(value)) break;
#endif
}
} else if (!is_counted[jj]) {
maxs[jj] = value;
is_counted[jj] = 1;
} else if (value > maxs[jj]) {
maxs[jj] = value;
}
}
} /* for (jj ...) */
#if X_TYPE == 'r'
/* Handle zero non-missing values */
for (jj=0; jj < ncol; jj++) {
if (!is_counted[jj]) {
maxs[jj] = R_NegInf;
}
}
#endif
} else if (what == 2) {
/* colRanges() */
mins = ans;
maxs = &ans[ncol];
for (jj=0; jj < ncol; jj++) {
offset = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++) {
value = x[ii+offset];
if (X_ISNAN(value)) {
if (!narm) {
mins[jj] = value;
maxs[jj] = value;
is_counted[jj] = 1;
/* Early stopping? */
#if X_TYPE == 'i'
break;
#elif X_TYPE == 'r'
if (X_ISNA(value)) break;
#endif
}
} else if (!is_counted[jj]) {
mins[jj] = value;
maxs[jj] = value;
is_counted[jj] = 1;
} else if (value < mins[jj]) {
mins[jj] = value;
} else if (value > maxs[jj]) {
maxs[jj] = value;
}
}
} /* for (jj ...) */
#if X_TYPE == 'r'
/* Handle zero non-missing values */
for (jj=0; jj < ncol; jj++) {
if (!is_counted[jj]) {
mins[jj] = R_PosInf;
maxs[jj] = R_NegInf;
}
}
#endif
} /* if (what ...) */
} else {
/* No missing values */
if (what == 0) {
/* colMins() */
mins = ans;
/* Initiate results */
for (jj=0; jj < ncol; jj++) {
mins[jj] = x[jj];
}
for (jj=1; jj < ncol; jj++) {
offset = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++) {
value = x[ii+offset];
if (value < mins[jj]) mins[jj] = value;
}
}
} else if (what == 1) {
/* colMax() */
maxs = ans;
/* Initiate results */
for (jj=0; jj < ncol; jj++) {
maxs[jj] = x[jj];
}
for (jj=1; jj < ncol; jj++) {
offset = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++) {
value = x[ii+offset];
if (value > maxs[jj]) maxs[jj] = value;
}
}
} else if (what == 2) {
/* colRanges()*/
mins = ans;
maxs = &ans[ncol];
/* Initiate results */
for (jj=0; jj < ncol; jj++) {
mins[jj] = x[jj];
maxs[jj] = x[jj];
}
for (jj=1; jj < ncol; jj++) {
offset = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++) {
value = x[ii+offset];
if (value < mins[jj]) {
mins[jj] = value;
} else if (value > maxs[jj]) {
maxs[jj] = value;
}
}
}
} /* if (what ...) */
} /* if (narm) */
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-16 [HB]
o Created.
**************************************************************************/
matrixStats/src/rowRanksWithTies.c 0000644 0001751 0000144 00000014667 12542546311 017056 0 ustar hornik users /***************************************************************************
Public methods:
SEXP rowRanksWithTies(SEXP x, SEXP tiesMethod, SEXP byRow)
Authors: Hector Corrada Bravo, Peter Langfelder and Henrik Bengtsson
TO DO: Add support for missing values.
**************************************************************************/
#include
#include "utils.h"
/* Template Ranks__ties() */
/*****************************************************************
* ties.method = "min"
*****************************************************************/
#define METHOD_NAME rowRanks_Real_tiesMin
#define MARGIN 'r'
#define X_TYPE 'r'
#define TIESMETHOD '0' /* min */
#include "rowRanksWithTies_TYPE_TIES-template.h"
#define METHOD_NAME rowRanks_Integer_tiesMin
#define MARGIN 'r'
#define X_TYPE 'i'
#define TIESMETHOD '0' /* min */
#include "rowRanksWithTies_TYPE_TIES-template.h"
#define METHOD_NAME colRanks_Real_tiesMin
#define MARGIN 'c'
#define X_TYPE 'r'
#define TIESMETHOD '0' /* min */
#include "rowRanksWithTies_TYPE_TIES-template.h"
#define METHOD_NAME colRanks_Integer_tiesMin
#define MARGIN 'c'
#define X_TYPE 'i'
#define TIESMETHOD '0' /* min */
#include "rowRanksWithTies_TYPE_TIES-template.h"
/*****************************************************************
* ties.method = "max"
*****************************************************************/
#define METHOD_NAME rowRanks_Real_tiesMax
#define MARGIN 'r'
#define X_TYPE 'r'
#define TIESMETHOD '1' /* max */
#include "rowRanksWithTies_TYPE_TIES-template.h"
#define METHOD_NAME rowRanks_Integer_tiesMax
#define MARGIN 'r'
#define X_TYPE 'i'
#define TIESMETHOD '1' /* max */
#include "rowRanksWithTies_TYPE_TIES-template.h"
#define METHOD_NAME colRanks_Real_tiesMax
#define MARGIN 'c'
#define X_TYPE 'r'
#define TIESMETHOD '1' /* max */
#include "rowRanksWithTies_TYPE_TIES-template.h"
#define METHOD_NAME colRanks_Integer_tiesMax
#define MARGIN 'c'
#define X_TYPE 'i'
#define TIESMETHOD '1' /* max */
#include "rowRanksWithTies_TYPE_TIES-template.h"
/*****************************************************************
* ties.method = "average"
*****************************************************************/
#define METHOD_NAME rowRanks_Real_tiesAverage
#define MARGIN 'r'
#define X_TYPE 'r'
#define TIESMETHOD 'a' /* average */
#include "rowRanksWithTies_TYPE_TIES-template.h"
#define METHOD_NAME rowRanks_Integer_tiesAverage
#define MARGIN 'r'
#define X_TYPE 'i'
#define TIESMETHOD 'a' /* average */
#include "rowRanksWithTies_TYPE_TIES-template.h"
#define METHOD_NAME colRanks_Real_tiesAverage
#define MARGIN 'c'
#define X_TYPE 'r'
#define TIESMETHOD 'a' /* average */
#include "rowRanksWithTies_TYPE_TIES-template.h"
#define METHOD_NAME colRanks_Integer_tiesAverage
#define MARGIN 'c'
#define X_TYPE 'i'
#define TIESMETHOD 'a' /* average */
#include "rowRanksWithTies_TYPE_TIES-template.h"
/* Peter Langfelder's modifications:
* byrow: 0 => rank columns, !0 => rank rows
* tiesMethod: 1: maximum, 2: average, 3:minimum
* The returned rank is a REAL matrix to accomodate average ranks
*/
SEXP rowRanksWithTies(SEXP x, SEXP dim, SEXP tiesMethod, SEXP byRow) {
int tiesmethod, byrow;
SEXP ans = NILSXP;
int nrow, ncol;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'tiesMethod': */
tiesmethod = asInteger(tiesMethod);
if (tiesmethod < 1 || tiesmethod > 3) {
error("Argument 'tiesMethod' is out of range [1,3]: %d", tiesmethod);
}
/* Argument 'byRow': */
byrow = asLogical(byRow);
/* Double matrices are more common to use. */
if (isReal(x)) {
if (byrow) {
switch (tiesmethod) {
case 1:
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
rowRanks_Real_tiesMax(REAL(x), nrow, ncol, 1, INTEGER(ans));
UNPROTECT(1);
break;
case 2:
PROTECT(ans = allocMatrix(REALSXP, nrow, ncol));
rowRanks_Real_tiesAverage(REAL(x), nrow, ncol, 1, REAL(ans));
UNPROTECT(1);
break;
case 3:
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
rowRanks_Real_tiesMin(REAL(x), nrow, ncol, 1, INTEGER(ans));
UNPROTECT(1);
break;
} /* switch */
} else {
switch (tiesmethod) {
case 1:
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
colRanks_Real_tiesMax(REAL(x), nrow, ncol, 0, INTEGER(ans));
UNPROTECT(1);
break;
case 2:
PROTECT(ans = allocMatrix(REALSXP, nrow, ncol));
colRanks_Real_tiesAverage(REAL(x), nrow, ncol, 0, REAL(ans));
UNPROTECT(1);
break;
case 3:
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
colRanks_Real_tiesMin(REAL(x), nrow, ncol, 0, INTEGER(ans));
UNPROTECT(1);
break;
} /* switch */
}
} else if (isInteger(x)) {
if (byrow) {
switch (tiesmethod) {
case 1:
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
rowRanks_Integer_tiesMax(INTEGER(x), nrow, ncol, 1, INTEGER(ans));
UNPROTECT(1);
break;
case 2:
PROTECT(ans = allocMatrix(REALSXP, nrow, ncol));
rowRanks_Integer_tiesAverage(INTEGER(x), nrow, ncol, 1, REAL(ans));
UNPROTECT(1);
break;
case 3:
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
rowRanks_Integer_tiesMin(INTEGER(x), nrow, ncol, 1, INTEGER(ans));
UNPROTECT(1);
break;
} /* switch */
} else {
switch (tiesmethod) {
case 1:
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
colRanks_Integer_tiesMax(INTEGER(x), nrow, ncol, 0, INTEGER(ans));
UNPROTECT(1);
break;
case 2:
PROTECT(ans = allocMatrix(REALSXP, nrow, ncol));
colRanks_Integer_tiesAverage(INTEGER(x), nrow, ncol, 0, REAL(ans));
UNPROTECT(1);
break;
case 3:
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
colRanks_Integer_tiesMin(INTEGER(x), nrow, ncol, 0, INTEGER(ans));
UNPROTECT(1);
break;
} /* switch */
}
}
return(ans);
} // rowRanksWithTies()
/***************************************************************************
HISTORY:
2013-01-13 [HB]
o Added argument 'tiesMethod' to rowRanks().
**************************************************************************/
matrixStats/src/rowRanges.c 0000644 0001751 0000144 00000007137 12542546311 015530 0 ustar hornik users /***************************************************************************
Public methods:
SEXP rowRanges(SEXP x, SEXP what)
Authors: Henrik Bengtsson.
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD rowRanges
#define X_TYPE 'i'
#include "rowRanges_TYPE-template.h"
#define X_TYPE 'r'
#include "rowRanges_TYPE-template.h"
#undef METHOD
SEXP rowRanges(SEXP x, SEXP dim, SEXP what, SEXP naRm, SEXP hasNA) {
SEXP ans = NILSXP, ans2 = NILSXP;
int *mins, *maxs;
double *mins2, *maxs2;
int *is_counted, all_counted = 0;
int what2, narm, hasna;
R_xlen_t nrow, ncol, ii;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'what': */
if (length(what) != 1)
error("Argument 'what' must be a single number.");
if (!isNumeric(what))
error("Argument 'what' must be a numeric number.");
what2 = asInteger(what);
if (what2 < 0 || what2 > 2)
error("Invalid value of 'what': %d", what2);
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'hasNA': */
hasna = asLogicalNoNA(hasNA, "hasNA");
is_counted = (int *) R_alloc(nrow, sizeof(int));
if (isReal(x)) {
if (what2 == 2) {
PROTECT(ans = allocMatrix(REALSXP, nrow, 2));
} else {
PROTECT(ans = allocVector(REALSXP, nrow));
}
rowRanges_Real(REAL(x), nrow, ncol, what2, narm, hasna, REAL(ans), is_counted);
UNPROTECT(1);
} else if (isInteger(x)) {
if (what2 == 2) {
PROTECT(ans = allocMatrix(INTSXP, nrow, 2));
} else {
PROTECT(ans = allocVector(INTSXP, nrow));
}
rowRanges_Integer(INTEGER(x), nrow, ncol, what2, narm, hasna, INTEGER(ans), is_counted);
/* Any entries with zero non-missing values? */
all_counted = 1;
for (ii=0; ii < nrow; ii++) {
if (!is_counted[ii]) {
all_counted = 0;
break;
}
}
if (!all_counted) {
/* Handle zero non-missing values */
/* Instead of return INTSXP, we must return REALSXP (to hold -Inf, and Inf) */
if (what2 == 0) {
PROTECT(ans2 = allocVector(REALSXP, nrow));
mins = INTEGER(ans);
mins2 = REAL(ans2);
for (ii=0; ii < nrow; ii++) {
if (is_counted[ii]) {
mins2[ii] = (double)mins[ii];
} else {
mins2[ii] = R_PosInf;
}
}
UNPROTECT(1); /* ans2 */
} else if (what2 == 1) {
PROTECT(ans2 = allocVector(REALSXP, nrow));
maxs = INTEGER(ans);
maxs2 = REAL(ans2);
for (ii=0; ii < nrow; ii++) {
if (is_counted[ii]) {
maxs2[ii] = (double)maxs[ii];
} else {
maxs2[ii] = R_NegInf;
}
}
UNPROTECT(1); /* ans2 */
} else if (what2 == 2) {
PROTECT(ans2 = allocMatrix(REALSXP, nrow, 2));
mins = INTEGER(ans);
maxs = &INTEGER(ans)[nrow];
mins2 = REAL(ans2);
maxs2 = &REAL(ans2)[nrow];
for (ii=0; ii < nrow; ii++) {
if (is_counted[ii]) {
mins2[ii] = (double)mins[ii];
maxs2[ii] = (double)maxs[ii];
} else {
mins2[ii] = R_PosInf;
maxs2[ii] = R_NegInf;
}
}
UNPROTECT(1); /* ans2 */
}
ans = ans2;
}
UNPROTECT(1); /* ans */
}
return(ans);
} // rowRanges()
/***************************************************************************
HISTORY:
2014-11-16 [HB]
o Created.
**************************************************************************/
matrixStats/src/meanOver.c 0000644 0001751 0000144 00000003335 12542546311 015331 0 ustar hornik users /***************************************************************************
Public methods:
SEXP meanOver(SEXP x, SEXP idxs, SEXP naRm, SEXP refine)
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD meanOver
#define X_TYPE 'i'
#include "meanOver_TYPE-template.h"
#define X_TYPE 'r'
#include "meanOver_TYPE-template.h"
#undef METHOD
SEXP meanOver(SEXP x, SEXP idxs, SEXP naRm, SEXP refine) {
SEXP ans;
int *idxs_ptr;
R_xlen_t nidxs;
int narm, refine2;
double avg = NA_REAL;
/* Argument 'x': */
assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x");
/* Argument 'idxs': */
if (isNull(idxs)) {
idxs_ptr = NULL;
nidxs = 0;
} else if (isVectorAtomic(idxs)) {
idxs_ptr = INTEGER(idxs);
nidxs = xlength(idxs);
} else {
/* To please compiler */
idxs_ptr = NULL;
nidxs = 0;
error("Argument 'idxs' must be NULL or a vector.");
}
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'refine': */
refine2 = asLogicalNoNA(refine, "refine");
/* Double matrices are more common to use. */
if (isReal(x)) {
avg = meanOver_Real(REAL(x), xlength(x), idxs_ptr, nidxs, narm, refine2);
} else if (isInteger(x)) {
avg = meanOver_Integer(INTEGER(x), xlength(x), idxs_ptr, nidxs, narm, refine2);
}
/* Return results */
PROTECT(ans = allocVector(REALSXP, 1));
REAL(ans)[0] = avg;
UNPROTECT(1);
return(ans);
} // meanOver()
/***************************************************************************
HISTORY:
2014-11-02 [HB]
o Created.
**************************************************************************/
matrixStats/src/rowCumsums.c 0000644 0001751 0000144 00000002635 12542546311 015743 0 ustar hornik users /***************************************************************************
Public methods:
SEXP rowCumsums(SEXP x, ...)
SEXP colCumsums(SEXP x, ...)
Authors: Henrik Bengtsson
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD rowCumsums
#define X_TYPE 'i'
#include "rowCumsums_TYPE-template.h"
#define X_TYPE 'r'
#include "rowCumsums_TYPE-template.h"
#undef METHOD
SEXP rowCumsums(SEXP x, SEXP dim, SEXP byRow) {
int byrow;
SEXP ans = NILSXP;
R_xlen_t nrow, ncol;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'byRow': */
byrow = asLogical(byRow);
/* Double matrices are more common to use. */
if (isReal(x)) {
PROTECT(ans = allocMatrix(REALSXP, nrow, ncol));
rowCumsums_Real(REAL(x), nrow, ncol, byrow, REAL(ans));
UNPROTECT(1);
} else if (isInteger(x)) {
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
rowCumsums_Integer(INTEGER(x), nrow, ncol, byrow, INTEGER(ans));
UNPROTECT(1);
}
return(ans);
} /* rowCumsums() */
/***************************************************************************
HISTORY:
2014-11-26 [HB]
o Created from rowVars.c.
**************************************************************************/
matrixStats/src/rowOrderStats_TYPE-template.h 0000644 0001751 0000144 00000005443 12542546311 021060 0 ustar hornik users /***********************************************************************
TEMPLATE:
void rowOrderStats_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int qq, X_C_TYPE *ans)
GENERATES:
void rowOrderStats_Real(double *x, R_xlen_t nrow, R_xlen_t ncol, int qq, double *ans)
void rowOrderStats_Integer(int *x, R_xlen_t nrow, R_xlen_t ncol, int qq, int *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD_NAME: the name of the resulting function
- X_TYPE: 'i' or 'r'
- ANS_TYPE: 'i' or 'r'
Authors:
Adopted from rowQ() by R. Gentleman.
Template by Henrik Bengtsson.
Copyright: Henrik Bengtsson, 2007-2014
***********************************************************************/
#include
#include
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C)
*/
#include "templates-types.h"
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, R_xlen_t qq, X_C_TYPE *ans) {
R_xlen_t ii, jj;
R_xlen_t *colOffset;
X_C_TYPE *values;
/* R allocate memory for the 'values'. This will be
taken care of by the R garbage collector later on. */
values = (X_C_TYPE *) R_alloc(ncol, sizeof(X_C_TYPE));
/* Pre-calculate the column offsets */
colOffset = (R_xlen_t *) R_alloc(ncol, sizeof(R_xlen_t));
for (jj=0; jj < ncol; jj++)
colOffset[jj] = (R_xlen_t)jj*nrow;
for (ii=0; ii < nrow; ii++) {
for (jj=0; jj < ncol; jj++)
values[jj] = x[ii+colOffset[jj]];
/* Sort vector of length 'ncol' up to position 'qq'.
"...partial sorting: they permute x so that x[qq] is in the
correct place with smaller values to the left, larger ones
to the right." */
X_PSORT(values, ncol, qq);
ans[ii] = values[qq];
}
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-06 [HB]
o CLEANUP: Moving away from R data types in low-level C functions.
2013-01-13 [HB]
o Merged rowOrderStatsReal() and rowOrderStatsInteger() into
one rowOrderStats_() templated function.
2009-02-04 [HB]
o BUG FIX: For some errors in rowOrderStats(), the stack would not become
UNPROTECTED before calling error.
2008-03-25 [HB]
o Renamed from 'rowQuantiles' to 'rowOrderStats'.
2007-08-10 [HB]
o Removed arguments for NAs since rowOrderStats() still don't support it.
2005-11-24 [HB]
o Cool, it works and compiles nicely.
o Preallocate colOffset to speed up things even more.
o Added more comments and error checking.
o Adopted from rowQ() in Biobase of Bioconductor.
**************************************************************************/
matrixStats/src/logSumExp_internal.h 0000644 0001751 0000144 00000000600 12542546311 017371 0 ustar hornik users /***************************************************************************
Authors: Henrik Bengtsson
Copyright Henrik Bengtsson, 2013-2014
**************************************************************************/
double logSumExp_double(double *x, R_xlen_t nx, int narm, int hasna);
double logSumExp_double_by(double *x, R_xlen_t nx, int narm, int hasna, int by, double *xx);
matrixStats/src/rowCumsums_TYPE-template.h 0000644 0001751 0000144 00000007173 12542546311 020424 0 ustar hornik users /***********************************************************************
TEMPLATE:
void rowCumsums_(...)
GENERATES:
void rowCumsums_Integer(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, double *ans)
void rowCumsums_Real(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, double *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD: the name of the resulting function
- X_TYPE: 'i' or 'r'
Authors:
Henrik Bengtsson.
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include
#include
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME])
*/
#include "templates-types.h"
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, ANS_C_TYPE *ans) {
R_xlen_t ii, jj, kk, kk_prev;
LDOUBLE value;
#if ANS_TYPE == 'i'
double R_INT_MIN_d = (double)R_INT_MIN,
R_INT_MAX_d = (double)R_INT_MAX;
/* OK, i.e. no integer overflow yet? */
int warn = 0, ok, *oks = NULL;
#endif
if (nrow == 0 || ncol == 0) return;
if (byrow) {
#if ANS_TYPE == 'i'
oks = (int *) R_alloc(nrow, sizeof(int));
#endif
for (kk=0; kk < nrow; kk++) {
ans[kk] = (ANS_C_TYPE) x[kk];
#if ANS_TYPE == 'i'
oks[kk] = !X_ISNA(x[kk]);
#endif
}
kk_prev = 0;
for (jj=1; jj < ncol; jj++) {
for (ii=0; ii < nrow; ii++) {
#if ANS_TYPE == 'i'
if (oks[ii]) {
/* Missing value? */
if (X_ISNA(x[kk])) {
oks[ii] = 0;
ans[kk] = ANS_NA;
} else {
value = (LDOUBLE) ans[kk_prev] + (LDOUBLE) x[kk];
/* Integer overflow? */
if (value < R_INT_MIN_d || value > R_INT_MAX_d) {
oks[ii] = 0;
warn = 1;
ans[kk] = ANS_NA;
} else {
ans[kk] = (ANS_C_TYPE) value;
}
}
} else {
ans[kk] = ANS_NA;
}
#else
ans[kk] = (ANS_C_TYPE) ((LDOUBLE) ans[kk_prev] + (LDOUBLE) x[kk]);
#endif
kk++;
kk_prev++;
R_CHECK_USER_INTERRUPT(kk);
} /* for (ii ...) */
} /* for (jj ...) */
} else {
kk = 0;
for (jj=0; jj < ncol; jj++) {
value = 0;
#if ANS_TYPE == 'i'
ok = 1;
#endif
for (ii=0; ii < nrow; ii++) {
#if ANS_TYPE == 'i'
if (ok) {
/* Missing value? */
if (X_ISNA(x[kk])) {
ok = 0;
ans[kk] = ANS_NA;
} else {
value += (LDOUBLE) x[kk];
/* Integer overflow? */
if (value < R_INT_MIN_d || value > R_INT_MAX_d) {
ok = 0;
warn = 1;
ans[kk] = ANS_NA;
} else {
ans[kk] = (ANS_C_TYPE) value;
}
}
} else {
ans[kk] = ANS_NA;
}
#else
value += x[kk];
ans[kk] = (ANS_C_TYPE) value;
#endif
kk++;
R_CHECK_USER_INTERRUPT(kk);
} /* for (ii ...) */
} /* for (jj ...) */
} /* if (byrow) */
#if ANS_TYPE == 'i'
/* Warn on integer overflow? */
if (warn) {
warning("Integer overflow. Detected one or more elements whose absolute values were out of the range [%d,%d] that can be used to for integers. Such values are set to NA_integer_.", R_INT_MIN, R_INT_MAX);
}
#endif
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-26 [HB]
o Created from rowVars_TYPE-template.h.
**************************************************************************/
matrixStats/src/logSumExp.c 0000644 0001751 0000144 00000002304 12542546311 015473 0 ustar hornik users /***************************************************************************
Public methods:
SEXP logSumExp(SEXP lx, SEXP naRm, SEXP hasNA)
Arguments:
lx : numeric vector
naRm : a logical scalar
hasNA: a logical scalar
Authors: Henrik Bengtsson
Copyright Henrik Bengtsson, 2013
**************************************************************************/
#include
#include
#include "types.h"
#include "utils.h"
#include "logSumExp_internal.h"
SEXP logSumExp(SEXP lx, SEXP naRm, SEXP hasNA) {
int narm, hasna;
/* Argument 'lx': */
assertArgVector(lx, (R_TYPE_REAL), "lx");
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'hasNA': */
hasna = asLogicalNoNA(hasNA, "hasNA");
return(Rf_ScalarReal(logSumExp_double(REAL(lx), xlength(lx), narm, hasna)));
} /* logSumExp() */
/***************************************************************************
HISTORY:
2013-05-02 [HB]
o BUG FIX: Incorrectly used ISNAN() on an int variable as caught by the
'cc' compiler on Solaris. Reported by Brian Ripley upon CRAN submission.
2013-04-30 [HB]
o Created.
**************************************************************************/
matrixStats/src/logSumExp_internal.c 0000644 0001751 0000144 00000010742 12542546311 017374 0 ustar hornik users #include
#include
#include "types.h"
#include "utils.h"
/*
logSumExp_double(x):
1. Scans for the maximum value of x=(x[0], x[1], ..., x[n-1])
2. Computes result from 'x'.
NOTE: The above sweeps the "contiguous" 'x' vector twice.
*/
double logSumExp_double(double *x, R_xlen_t nx, int narm, int hasna) {
R_xlen_t ii, iMax;
double xii, xMax;
LDOUBLE sum;
int hasna2 = FALSE; /* Indicates whether NAs where detected or not */
/* Quick return? */
if (nx == 0) {
return(R_NegInf);
} else if (nx == 1) {
if (narm && ISNAN(x[0])) {
return(R_NegInf);
} else {
return(x[0]);
}
}
/* Find the maximum value */
iMax = 0;
xMax = x[0];
if (ISNAN(xMax)) hasna2 = TRUE;
for (ii=1; ii < nx; ii++) {
/* Get the ii:th value */
xii = x[ii];
if (hasna && ISNAN(xii)) {
if (narm) {
hasna2 = TRUE;
continue;
} else {
return(R_NaReal);
}
}
if (xii > xMax || (narm && ISNAN(xMax))) {
iMax = ii;
xMax = xii;
}
R_CHECK_USER_INTERRUPT(ii);
} /* for (ii ...) */
/* Early stopping? */
if (ISNAN(xMax)) {
/* Found only missing values? */
return narm ? R_NegInf : R_NaReal;
} else if (xMax == R_PosInf) {
/* Found +Inf? */
return(R_PosInf);
}
/* Sum differences */
sum = 0.0;
for (ii=0; ii < nx; ii++) {
if (ii == iMax) {
continue;
}
/* Get the ii:th value */
xii = x[ii];
if (!hasna2 || !ISNAN(xii)) {
sum += exp(xii - xMax);
}
R_CHECK_USER_INTERRUPT(ii);
} /* for (ii ...) */
sum = xMax + log1p(sum);
return(sum);
} /* logSumExp_double() */
/*
logSumExp_double_by(x):
1. Scans for the maximum value of x=(x[0], x[by], ..., x[(n-1)*by])
and copies the values to xx = (xx[0], xx[1], xx[2], ..., xx[n-1]),
which *must* be preallocated.
2. Computes result from 'xx'.
NOTE: The above sweeps the "scattered" 'x' vector only once, and then
the "contigous" 'xx' vector once. This is more likely to create
cache hits.
*/
double logSumExp_double_by(double *x, R_xlen_t nx, int narm, int hasna, int by, double *xx) {
R_xlen_t ii, iMax, idx;
double xii, xMax;
LDOUBLE sum;
int hasna2 = FALSE; /* Indicates whether NAs where detected or not */
/* Quick return? */
if (nx == 0) {
return(R_NegInf);
} else if (nx == 1) {
if (narm && ISNAN(x[0])) {
return(R_NegInf);
} else {
return(x[0]);
}
}
/* To increase the chances for cache hits below, which
sweeps through the data twice, we copy data into a
temporary contigous vector while scanning for the
maximum value. */
/* Find the maximum value (and copy) */
iMax = 0;
xMax = x[0];
if (ISNAN(xMax)) hasna2 = TRUE;
xx[0] = xMax;
idx = 0;
for (ii=1; ii < nx; ii++) {
/* Get the ii:th value */
idx = idx + by;
xii = x[idx];
/* Copy */
xx[ii] = xii;
if (hasna && ISNAN(xii)) {
if (narm) {
hasna2 = TRUE;
continue;
} else {
return(R_NaReal);
}
}
if (xii > xMax || (narm && ISNAN(xMax))) {
iMax = ii;
xMax = xii;
}
R_CHECK_USER_INTERRUPT(ii);
} /* for (ii ...) */
/* Early stopping? */
if (ISNAN(xMax)) {
/* Found only missing values? */
return narm ? R_NegInf : R_NaReal;
} else if (xMax == R_PosInf) {
/* Found +Inf? */
return(R_PosInf);
}
/* Sum differences */
sum = 0.0;
for (ii=0; ii < nx; ii++) {
if (ii == iMax) {
continue;
}
/* Get the ii:th value */
xii = xx[ii];
if (!hasna2 || !ISNAN(xii)) {
sum += exp(xii - xMax);
}
R_CHECK_USER_INTERRUPT(ii);
} /* for (ii ...) */
sum = xMax + log1p(sum);
return(sum);
} /* logSumExp_double_by() */
/***************************************************************************
HISTORY:
2015-01-26 [HB]
o SPEEDUP: Now step 2 ("summing") only checks where NAs if NAs were
detected in step 1 ("max value"), which should be noticibly faster
since testing for NA is expensive for double values.
o SPEEDUP: Now function returns early after step 1 ("max value") if
the maximum value found is +Inf, or if all values where NAs.
o BUG FIX: Now logSumExp(, na.rm=TRUE) also returns -Inf.
2013-05-02 [HB]
o BUG FIX: Incorrectly used ISNAN() on an int variable as caught by the
'cc' compiler on Solaris. Reported by Brian Ripley upon CRAN submission.
2013-04-30 [HB]
o Created.
**************************************************************************/
matrixStats/src/colCounts_TYPE-template.h 0000644 0001751 0000144 00000011633 12542546311 020205 0 ustar hornik users /***********************************************************************
TEMPLATE:
void colCounts_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, X_C_TYPE value, int narm, int hasna, int *ans)
GENERATES:
void colCounts_Real(double *x, R_xlen_t nrow, R_xlen_t ncol, double value, int narm, int hasna, int *ans)
void colCounts_Integer(int *x, R_xlen_t nrow, R_xlen_t ncol, int value, int narm, int hasna, int *ans)
void colCounts_Logical(int *x, R_xlen_t nrow, R_xlen_t ncol, int value, int narm, int hasna, int *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD_NAME: the name of the resulting function
- X_TYPE: 'i', 'r', or 'l'
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
*/
#include "templates-types.h"
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, X_C_TYPE value, int what, int narm, int hasna, int *ans) {
R_xlen_t ii, jj, kk;
int count;
X_C_TYPE xvalue;
if (what == 0L) { /* all */
/* Count missing values? [sic!] */
if (X_ISNAN(value)) {
kk = 0;
for (jj=0; jj < ncol; jj++) {
count = 1;
for (ii=0; ii < nrow; ii++) {
if (!X_ISNAN(x[kk++])) {
count = 0;
/* Found another value! Early stopping */
kk += nrow - ii - 1;
break;
}
}
ans[jj] = count;
}
} else {
kk = 0;
for (jj=0; jj < ncol; jj++) {
count = 1;
for (ii=0; ii < nrow; ii++) {
xvalue = x[kk++];
if (xvalue == value) {
} else if (narm && X_ISNAN(xvalue)) {
/* Skip */
} else if (!narm && X_ISNAN(xvalue)) {
/* Early stopping is not possible, because if we do
find an element that is not 'value' later, then
we know for sure that all = FALSE regardless of
missing values. In other words, at this point
the answer can be either NA or FALSE.*/
count = NA_INTEGER;
} else {
count = 0;
/* Found another value! Early stopping */
kk += nrow - ii - 1;
break;
}
} /* for (ii ...) */
ans[jj] = count;
} /* for (jj ...) */
} /* if (X_ISNAN(value)) */
} else if (what == 1L) { /* any */
/* Count missing values? [sic!] */
if (X_ISNAN(value)) {
kk = 0;
for (jj=0; jj < ncol; jj++) {
count = 0;
for (ii=0; ii < nrow; ii++) {
if (X_ISNAN(x[kk++])) {
count = 1;
/* Found value! Early stopping */
kk += nrow - ii - 1;
break;
}
}
ans[jj] = count;
}
} else {
kk = 0;
for (jj=0; jj < ncol; jj++) {
count = 0;
for (ii=0; ii < nrow; ii++) {
xvalue = x[kk++];
if (xvalue == value) {
count = 1;
/* Found value! Early stopping */
kk += nrow - ii - 1;
break;
} else if (narm && X_ISNAN(xvalue)) {
/* Skipping */
} else if (!narm && X_ISNAN(xvalue)) {
/* Early stopping is not possible, because if we do
find an element that is 'value' later, then
we know for sure that any = TRUE regardless of
missing values. In other words, at this point
the answer can be either NA or TRUE.*/
count = NA_INTEGER;
}
} /* for (ii ...) */
ans[jj] = count;
} /* for (jj ...) */
} /* if (X_ISNAN(value)) */
} else if (what == 2L) { /* count */
/* Count missing values? [sic!] */
if (X_ISNAN(value)) {
kk = 0;
for (jj=0; jj < ncol; jj++) {
count = 0;
for (ii=0; ii < nrow; ii++) {
if (X_ISNAN(x[kk++])) {
++count;
}
}
ans[jj] = count;
}
} else {
kk = 0;
for (jj=0; jj < ncol; jj++) {
count = 0;
for (ii=0; ii < nrow; ii++) {
xvalue = x[kk++];
if (xvalue == value) {
++count;
} else if (!narm && X_ISNAN(xvalue)) {
count = NA_INTEGER;
/* Early stopping */
kk += nrow - ii - 1;
break;
}
} /* for (ii ...) */
ans[jj] = count;
} /* for (jj ...) */
} /* if (X_ISNAN(value)) */
} else {
error("INTERNAL ERROR: Unknown value of 'what' for colCounts: %d", what);
} /* if (what) */
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-14 [HB]
o Created colCounts() templates from rowCounts() templates.
**************************************************************************/
matrixStats/src/rowCumMinMaxs_TYPE-template.h 0000644 0001751 0000144 00000006134 12542546311 021005 0 ustar hornik users /***********************************************************************
TEMPLATE:
void rowCummins_(...)
GENERATES:
void rowCummins_Integer(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, double *ans)
void rowCummins_Real(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, double *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD: the name of the resulting function
- X_TYPE: 'i' or 'r'
Authors:
Henrik Bengtsson.
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include
#include
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME])
*/
#include "templates-types.h"
#if COMP == '<'
#define OP <
#elif COMP == '>'
#define OP >
#endif
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, int byrow, ANS_C_TYPE *ans) {
R_xlen_t ii, jj, kk, kk_prev;
ANS_C_TYPE value;
int ok;
int *oks = NULL;
if (nrow == 0 || ncol == 0) return;
if (byrow) {
oks = (int *) R_alloc(nrow, sizeof(int));
for (kk=0; kk < nrow; kk++) {
value = (ANS_C_TYPE) x[kk];
if (ANS_ISNAN(value)) {
oks[kk] = 0;
value = ANS_NA;
ans[kk] = ANS_NA;
} else {
oks[kk] = 1;
ans[kk] = value;
}
}
kk_prev = 0;
for (jj=1; jj < ncol; jj++) {
for (ii=0; ii < nrow; ii++) {
if (oks[ii]) {
value = (ANS_C_TYPE) x[kk];
if (ANS_ISNAN(value)) {
oks[ii] = 0;
ans[kk] = ANS_NA;
} else {
if (value OP ans[kk_prev]) {
ans[kk] = value;
} else {
ans[kk] = (ANS_C_TYPE) ans[kk_prev];
}
}
} else {
ans[kk] = ANS_NA;
}
kk++;
kk_prev++;
R_CHECK_USER_INTERRUPT(kk);
} /* for (ii ...) */
} /* for (jj ...) */
} else {
kk = 0;
for (jj=0; jj < ncol; jj++) {
value = (ANS_C_TYPE) x[kk];
if (ANS_ISNAN(value)) {
ok = 0;
value = ANS_NA;
ans[kk] = ANS_NA;
} else {
ok = 1;
ans[kk] = value;
}
kk_prev = kk;
kk++;
for (ii=1; ii < nrow; ii++) {
if (ok) {
value = (ANS_C_TYPE) x[kk];
if (ANS_ISNAN(value)) {
ok = 0;
value = ANS_NA;
ans[kk] = ANS_NA;
} else {
if (value OP ans[kk_prev]) {
ans[kk] = value;
} else {
ans[kk] = (ANS_C_TYPE) ans[kk_prev];
}
}
kk++;
kk_prev++;
} else {
ans[kk] = ANS_NA;
kk++;
}
R_CHECK_USER_INTERRUPT(kk);
} /* for (ii ...) */
} /* for (jj ...) */
} /* if (byrow) */
}
#undef OP
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-26 [HB]
o Created from rowVars_TYPE-template.h.
**************************************************************************/
matrixStats/src/weightedMedian.c 0000644 0001751 0000144 00000003423 12542546311 016471 0 ustar hornik users /***************************************************************************
Public methods:
SEXP weightedMedian(SEXP x, SEXP w, SEXP naRm, SEXP interpolate, SEXP ties)
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#include
#define METHOD weightedMedian
#define X_TYPE 'i'
#include "weightedMedian_TYPE-template.h"
#define X_TYPE 'r'
#include "weightedMedian_TYPE-template.h"
#undef METHOD
SEXP weightedMedian(SEXP x, SEXP w, SEXP naRm, SEXP interpolate, SEXP ties) {
SEXP ans;
int narm, interpolate2, ties2;
double mu = NA_REAL;
R_xlen_t nx, nw;
/* Argument 'x': */
assertArgVector(x, (R_TYPE_INT | R_TYPE_REAL), "x");
nx = xlength(x);
/* Argument 'x': */
assertArgVector(w, (R_TYPE_REAL), "w");
nw = xlength(w);
if (nx != nw) {
error("Argument 'x' and 'w' are of different lengths: %d != %d", nx, nw);
}
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'interpolate': */
interpolate2 = asLogicalNoNA(interpolate, "interpolate");
/* Argument 'ties': */
ties2 = asInteger(ties);
/* Double matrices are more common to use. */
if (isReal(x)) {
mu = weightedMedian_Real(REAL(x), nx, REAL(w), nw, narm, interpolate2, ties2);
} else if (isInteger(x)) {
mu = weightedMedian_Integer(INTEGER(x), nx, REAL(w), nw, narm, interpolate2, ties2);
}
/* Return results */
PROTECT(ans = allocVector(REALSXP, 1));
REAL(ans)[0] = mu;
UNPROTECT(1);
return(ans);
} // weightedMedian()
/***************************************************************************
HISTORY:
2015-01-01 [HB]
o Created.
**************************************************************************/
matrixStats/src/weightedMean_TYPE-template.h 0000644 0001751 0000144 00000004205 12542546311 020632 0 ustar hornik users /***********************************************************************
TEMPLATE:
double weightedMean_(X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t nw, int narm, int refine)
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
*/
#include "templates-types.h"
#include
double METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, double *w, R_xlen_t nw, int narm, int refine) {
X_C_TYPE value;
double weight;
R_xlen_t i;
LDOUBLE sum = 0, wtotal = 0;
LDOUBLE avg = R_NaN;
for (i=0; i < nx; i++) {
weight = w[i];
/* Skip or early stopping? */
if (weight == 0) {
continue;
}
value = x[i];
#if X_TYPE == 'i'
if (X_ISNAN(value)) {
/* Skip or early stopping? */
if (narm) {
continue;
} else {
sum = R_NaReal;
break;
}
} else {
sum += (LDOUBLE)weight * (LDOUBLE)value;
wtotal += weight;
}
#elif X_TYPE == 'r'
if (!narm || !X_ISNAN(value)) {
sum += (LDOUBLE)weight * (LDOUBLE)value;
wtotal += weight;
}
#endif
} /* for (i ...) */
if (wtotal > DOUBLE_XMAX || wtotal < -DOUBLE_XMAX) {
avg = R_NaN;
} else if (sum > DOUBLE_XMAX) {
avg = R_PosInf;
} else if (sum < -DOUBLE_XMAX) {
avg = R_NegInf;
} else {
avg = sum / wtotal;
#if X_TYPE == 'r'
/* Extra precision by summing over residuals? */
if (refine && R_FINITE(avg)) {
sum = 0;
for (i=0; i < nx; i++) {
weight = w[i];
/* Skip? */
if (weight == 0) {
continue;
}
value = (LDOUBLE)x[i];
if (!narm || !ISNAN(value)) {
sum += (LDOUBLE)weight * (value - avg);
}
}
avg += (sum / wtotal);
}
#endif
}
return (double)avg;
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-12-08 [HB]
o Created.
**************************************************************************/
matrixStats/src/rowCounts_TYPE-template.h 0000644 0001751 0000144 00000012533 12542546311 020237 0 ustar hornik users /***********************************************************************
TEMPLATE:
void rowCounts_(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, X_C_TYPE value, int narm, int hasna, int *ans)
GENERATES:
void rowCounts_Real(double *x, R_xlen_t nrow, R_xlen_t ncol, double value, int narm, int hasna, int *ans)
void rowCounts_Integer(int *x, R_xlen_t nrow, R_xlen_t ncol, int value, int narm, int hasna, int *ans)
void rowCounts_Logical(int *x, R_xlen_t nrow, R_xlen_t ncol, int value, int narm, int hasna, int *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD_NAME: the name of the resulting function
- X_TYPE: 'i', 'r', or 'l'
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
*/
#include "templates-types.h"
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol, X_C_TYPE value, int what, int narm, int hasna, int *ans) {
R_xlen_t ii, jj, kk;
int count;
X_C_TYPE xvalue;
if (what == 0) { /* all */
for (ii=0; ii < nrow; ii++) ans[ii] = 1;
/* Count missing values? [sic!] */
if (X_ISNAN(value)) {
kk = 0;
for (jj=0; jj < ncol; jj++) {
for (ii=0; ii < nrow; ii++) {
/* Skip? */
if (ans[ii]) {
xvalue = x[kk++];
if (!X_ISNAN(xvalue)) {
ans[ii] = 0;
/* Found another value! Skip from now on */
}
} else {
kk++;
}
}
}
} else {
kk = 0;
for (jj=0; jj < ncol; jj++) {
for (ii=0; ii < nrow; ii++) {
/* Skip? */
if (ans[ii]) {
xvalue = x[kk++];
if (xvalue == value) {
} else if (narm && X_ISNAN(xvalue)) {
/* Skip */
} else if (!narm && X_ISNAN(xvalue)) {
/* Early stopping is not possible, because if we do
find an element that is not 'value' later, then
we know for sure that all = FALSE regardless of
missing values. In other words, at this point
the answer can be either NA or FALSE.*/
ans[ii] = NA_INTEGER;
} else {
/* Found another value! Skip from now on */
ans[ii] = 0;
}
} else {
kk++;
}
} /* for (ii ...) */
} /* for (jj ...) */
}
} else if (what == 1) { /* any */
for (ii=0; ii < nrow; ii++) ans[ii] = 0;
/* Count missing values? [sic!] */
if (X_ISNAN(value)) {
kk = 0;
for (jj=0; jj < ncol; jj++) {
for (ii=0; ii < nrow; ii++) {
/* Skip? */
if (ans[ii]) {
kk++;
} else {
xvalue = x[kk++];
if (X_ISNAN(xvalue)) {
ans[ii] = 1;
/* Found value! Skip from now on */
}
}
}
}
} else {
kk = 0;
for (jj=0; jj < ncol; jj++) {
for (ii=0; ii < nrow; ii++) {
/* Skip? */
if (ans[ii] && ans[ii] != NA_INTEGER) {
kk++;
} else {
xvalue = x[kk++];
if (xvalue == value) {
/* Found value! Skip from now on */
ans[ii] = 1;
} else if (narm && X_ISNAN(xvalue)) {
/* Skip */
} else if (!narm && X_ISNAN(xvalue)) {
/* Early stopping is not possible, because if we do
find an element that is 'value' later, then
we know for sure that any = TRUE regardless of
missing values. In other words, at this point
the answer can be either NA or TRUE.*/
ans[ii] = NA_INTEGER;
}
}
} /* for (ii ...) */
} /* for (jj ...) */
}
} else if (what == 2) { /* count */
for (ii=0; ii < nrow; ii++) ans[ii] = 0;
/* Count missing values? [sic!] */
if (X_ISNAN(value)) {
kk = 0;
for (jj=0; jj < ncol; jj++) {
for (ii=0; ii < nrow; ii++) {
xvalue = x[kk++];
if (X_ISNAN(xvalue)) ans[ii] = ans[ii] + 1;
}
}
} else {
kk = 0;
for (jj=0; jj < ncol; jj++) {
for (ii=0; ii < nrow; ii++) {
count = ans[ii];
/* Nothing more to do on this row? */
if (count == NA_INTEGER) {
kk++;
continue;
}
xvalue = x[kk++];
if (xvalue == value) {
ans[ii] = count + 1;
} else {
if (!narm && X_ISNAN(xvalue)) {
ans[ii] = NA_INTEGER;
continue;
}
}
} /* for (ii ...) */
} /* for (jj ...) */
}
} else {
error("INTERNAL ERROR: Unknown value of 'what' for colCounts: %d", what);
} /* if (what ...) */
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-06 [HB]
o CLEANUP: Moving away from R data types in low-level C functions.
2014-11-01 [HB]
o SPEEDUP: Now using ansp = INTEGER(ans) once and then querying/assigning
'ansp[i]' instead of INTEGER(ans)[i].
2014-06-02 [HB]
o Created.
**************************************************************************/
matrixStats/src/meanOver_TYPE-template.h 0000644 0001751 0000144 00000006641 12542546311 020013 0 ustar hornik users /***********************************************************************
TEMPLATE:
double meanOver_(X_C_TYPE *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int refine)
GENERATES:
double meanOver_Integer(int *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int refine)
double meanOver_Real(double *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int refine)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD_NAME: the name of the resulting function
- X_TYPE: 'i' or 'r'
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
*/
#include "templates-types.h"
#include
double METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int refine) {
X_C_TYPE value;
R_xlen_t i, idx;
LDOUBLE sum = 0, avg = R_NaN;
#if X_TYPE == 'r'
LDOUBLE rsum = 0;
#endif
int count = 0;
/* Sum over all element? */
if (!idxs) {
for (i=0; i < nx; i++) {
value = x[i];
#if X_TYPE == 'i'
if (!X_ISNAN(value)) {
sum += (LDOUBLE)value;
++count;
} else if (!narm) {
sum = R_NaReal;
break;
}
#elif X_TYPE == 'r'
if (!narm || !ISNAN(value)) {
sum += (LDOUBLE)value;
++count;
}
#endif
} /* for (i ...) */
if (sum > DOUBLE_XMAX) {
avg = R_PosInf;
} else if (sum < -DOUBLE_XMAX) {
avg = R_NegInf;
} else {
avg = sum / count;
/* Extra precision by summing over residuals? */
#if X_TYPE == 'r'
if (refine && R_FINITE(avg)) {
for (i=0; i < nx; i++) {
value = x[i];
if (!narm || !ISNAN(value)) {
rsum += (LDOUBLE)(value - avg);
}
}
avg += (rsum / count);
}
#endif
}
} else {
for (i=0; i < nidxs; i++) {
idx = idxs[i];
if (idx <= 0) {
Rf_error("Argument \'idxs\' contains a non-positive index: %d", idx);
} else if (idx > nx) {
Rf_error("Argument \'idxs\' contains an index out of range [1,%d]: %d", nx, idx);
}
value = x[idx-1];
#if X_TYPE == 'i'
if (!X_ISNAN(value)) {
sum += (LDOUBLE)value;
++count;
} else if (!narm) {
sum = R_NaReal;
break;
}
#elif X_TYPE == 'r'
if (!narm || !ISNAN(value)) {
sum += (LDOUBLE)value;
++count;
}
#endif
} /* for (i ...) */
if (sum > DOUBLE_XMAX) {
avg = R_PosInf;
} else if (sum < -DOUBLE_XMAX) {
avg = R_NegInf;
} else {
avg = sum / count;
/* Extra precision by summing over residuals? */
#if X_TYPE == 'r'
if (refine && R_FINITE(avg)) {
for (i=0; i < nidxs; i++) {
idx = idxs[i];
value = x[idx-1];
if (!narm || !ISNAN(value)) {
rsum += (LDOUBLE)(value - avg);
}
}
avg += (rsum / count);
}
#endif
}
}
return (double)avg;
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-06 [HB]
o CLEANUP: Now meanOver_() uses only basic C types.
2014-11-02 [HB]
o Created.
**************************************************************************/
matrixStats/src/sumOver_TYPE-template.h 0000644 0001751 0000144 00000004427 12542546311 017677 0 ustar hornik users /***********************************************************************
TEMPLATE:
double sumOver_(X_C_TYPE *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int mode)
GENERATES:
double sumOver_Integer(int *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int mode)
double sumOver_Real(double *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int mode)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD_NAME: the name of the resulting function
- X_TYPE: 'i' or 'r'
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
*/
#include "templates-types.h"
#include
double METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, int *idxs, R_xlen_t nidxs, int narm, int mode) {
X_C_TYPE value;
R_xlen_t i, idx;
LDOUBLE sum = 0;
/* Sum over all element? */
if (!idxs) {
for (i=0; i < nx; i++) {
value = x[i];
#if X_TYPE == 'i'
if (!X_ISNAN(value)) {
sum += (LDOUBLE)value;
} else if (!narm) {
sum = R_NaReal;
break;
}
#elif X_TYPE == 'r'
if (!narm || !X_ISNAN(value)) {
sum += (LDOUBLE)value;
}
#endif
} /* for (i ...) */
} else {
for (i=0; i < nidxs; i++) {
idx = idxs[i];
if (idx <= 0) {
Rf_error("Argument \'idxs\' contains a non-positive index: %d", idx);
} else if (idx > nx) {
Rf_error("Argument \'idxs\' contains an index out of range [1,%d]: %d", nx, idx);
}
value = x[idx-1];
#if X_TYPE == 'i'
if (!X_ISNAN(value)) {
sum += (LDOUBLE)value;
} else if (!narm) {
sum = R_NaReal;
break;
}
#elif X_TYPE == 'r'
if (!narm || !X_ISNAN(value)) {
sum += (LDOUBLE)value;
}
#endif
} /* for (i ...) */
}
return (double)sum;
}
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-06 [HB]
o CLEANUP: Now sumOver_() uses only basic C types.
2014-11-02 [HB]
o Created.
**************************************************************************/
matrixStats/src/rowRanksWithTies_TYPE_TIES-template.h 0000644 0001751 0000144 00000013547 12542546311 022355 0 ustar hornik users /***********************************************************************
TEMPLATE:
Ranks_Real_ties(...)
GENERATES:
void colRanks_Real_tiesMin(double *x, int nrow, int ncol, int byrow, double *ans)
void rowRanks_Real_tiesMin(double *x, int nrow, int ncol, int byrow, double *ans)
void colRanks_Real_tiesMax(double *x, int nrow, int ncol, int byrow, double *ans)
void rowRanks_Real_tiesMax(double *x, int nrow, int ncol, int byrow, double *ans)
void colRanks_Real_tiesAverage(double *x, int nrow, int ncol, int byrow, double *ans)
void rowRanks_Real_tiesAverage(double *x, int nrow, int ncol, int byrow, double *ans)
void colRanks_Integer_tiesMin(int *x, int nrow, int ncol, int byrow, int *ans)
void rowRanks_Integer_tiesMin(int *x, int nrow, int ncol, int byrow, int *ans)
void colRanks_Integer_tiesMax(int *x, int nrow, int ncol, int byrow, int *ans)
void rowRanks_Integer_tiesMax(int *x, int nrow, int ncol, int byrow, int *ans)
void colRanks_Integer_tiesAverage(int *x, int nrow, int ncol, int byrow, int *ans)
void rowRanks_Integer_tiesAverage(int *x, int nrow, int ncol, int byrow, int *ans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD_NAME: the name of the resulting function
- MARGIN: 'r' (rows) or 'c' (columns).
- X_TYPE: 'i' or 'r'
- ANS_TYPE: 'i' or 'r'
- TIESMETHOD: '0' (min), '1' (max), 'a' (average)
Authors:
Hector Corrada Bravo [HCB]
Peter Langfelder [PL]
Henrik Bengtsson [HB]
***********************************************************************/
#include
#if TIESMETHOD == '0' /* min */
#define ANS_TYPE 'i'
#define RANK(firstTie, aboveTie) firstTie + 1
#elif TIESMETHOD == '1' /* max */
#define ANS_TYPE 'i'
#define RANK(firstTie, aboveTie) aboveTie
#elif TIESMETHOD == 'a' /* average */
#define ANS_TYPE 'r'
#define RANK(firstTie, aboveTie) ((double) (firstTie + aboveTie + 1))/2
#endif
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, X_ISNAN, [METHOD_NAME])
ANS_TYPE => (ANS_SXP, ANS_NA, ANS_C_TYPE, ANS_IN_C)
*/
#include "templates-types.h"
/* Indexing formula to compute the vector index of element j of vector i.
Should take arguments element, vector, nElements, nVectors. */
#if MARGIN == 'r' /* rows */
#define INDEX_OF(element, vector, nElements, nVectors) \
vector + element*nVectors
#elif MARGIN == 'c' /* columns */
#define INDEX_OF(element, vector, nElements, nVectors) \
element + vector*nElements
#endif
void METHOD_NAME(X_C_TYPE *x, int nrow, int ncol, int byrow, ANS_C_TYPE *ans) {
ANS_C_TYPE rank;
X_C_TYPE *values, current, tmp;
int ii, jj, kk;
int *I;
int lastFinite, firstTie, aboveTie;
int nvalues, nVec;
if (byrow) {
nvalues = ncol;
nVec = nrow;
} else {
nvalues = nrow;
nVec = ncol;
}
values = (X_C_TYPE *) R_alloc(nvalues, sizeof(X_C_TYPE));
I = (int *) R_alloc(nvalues, sizeof(int));
for (ii=0; ii < nVec; ii++) {
lastFinite = nvalues-1;
/* Put the NA/NaN elements at the end of the vector and update
the index vector appropriately.
This may be a bit faster since it only uses one loop over
the length of the vector, plus it shortens the sort in case
there are missing values. /PL (2012-12-14)
*/
for (jj = 0; jj <= lastFinite; jj++) {
tmp = x[ INDEX_OF(jj, ii, nvalues, nVec) ];
if (X_ISNAN(tmp)) {
while (lastFinite > jj && X_ISNAN(x[ INDEX_OF(lastFinite, ii, nvalues, nVec) ])) {
I[lastFinite] = lastFinite;
lastFinite--;
}
I[lastFinite] = jj;
I[jj] = lastFinite;
values[ jj ] = x[ INDEX_OF(lastFinite, ii, nvalues, nVec) ];
values[ lastFinite ] = tmp;
lastFinite--;
} else {
I[jj] = jj;
values[ jj ] = tmp;
}
} /* for (jj ...) */
// Diagnostic print-outs
/*
Rprintf("Swapped vector:\n");
for (jj=0; jj < nvalues; jj++)
{
Rprintf(" %8.4f,", values[jj]);
if (((jj+1) % 5==0) || (jj==nvalues-1)) Rprintf("\n");
}
Rprintf("Index vector:\n");
for (jj=0; jj 0) X_QSORT_I(values, I, 1, lastFinite + 1);
// Calculate the ranks.
for (jj=0; jj <= lastFinite;) {
firstTie = jj;
current = values[jj];
while ((jj <= lastFinite) && (values[jj] == current)) jj++;
aboveTie = jj;
// Depending on rank method, get maximum, average, or minimum rank
rank = RANK(firstTie, aboveTie);
for (kk=firstTie; kk < aboveTie; kk++) {
ans[ INDEX_OF(I[kk], ii, nvalues, nVec) ] = rank;
}
}
// At this point jj = lastFinite + 1, no need to re-initialize again.
for (; jj < nvalues; jj++) {
ans[ INDEX_OF(I[jj], ii, nvalues, nVec) ] = ANS_NA;
}
// Rprintf("\n");
}
}
/* Undo template macros */
#undef RANK
#undef INDEX_OF
#undef TIESMETHOD
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-11-06 [HB]
o CLEANUP: Moving away from R data types in low-level C functions.
2013-04-23 [HB]
o BUG FIX: Ranks did not work for integers with NAs; now using X_ISNAN().
2013-01-13 [HB]
o Template cleanup. Extened tempate to integer matrices.
o Added argument 'tiesMethod' to rowRanks().
2012-12-14 [PL]
o Added internal support for "min", "max" and "average" ties. Using
template to generate the various versions of the functions.
2013-01-13 [HCB]
o Created. Using "max" ties.
**************************************************************************/
matrixStats/src/diff2_TYPE-template.h 0000644 0001751 0000144 00000004524 12542546311 017227 0 ustar hornik users /***********************************************************************
TEMPLATE:
void diff2_(X_C_TYPE *x, R_xlen_t nx, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nans)
Arguments:
The following macros ("arguments") should be defined for the
template to work as intended.
- METHOD_NAME: the name of the resulting function
- X_TYPE: 'i' or 'r'
Copyright: Henrik Bengtsson, 2014
***********************************************************************/
#include "types.h"
/* Expand arguments:
X_TYPE => (X_C_TYPE, X_IN_C, [METHOD_NAME])
*/
#include "templates-types.h"
#include
#if X_TYPE == 'i'
static R_INLINE int diff_int(int a, int b) {
if (X_ISNA(a) || X_ISNA(b)) return(NA_INTEGER);
return a-b;
}
#define X_DIFF diff_int
#elif X_TYPE == 'r'
#define X_DIFF(a,b) a-b
#endif
void METHOD_NAME(X_C_TYPE *x, R_xlen_t nx, R_xlen_t lag, R_xlen_t differences, X_C_TYPE *ans, R_xlen_t nans) {
int ii, tt, uu;
X_C_TYPE *tmp = NULL;
/* Nothing to do? */
if (nans <= 0) return;
/* Special case (difference == 1) */
if (differences == 1) {
uu = lag;
tt = 0;
for (ii=0; ii < nans; ii++) {
ans[ii] = X_DIFF(x[uu++], x[tt++]);
}
} else {
/* Allocate temporary work vector (to hold intermediate differences) */
tmp = Calloc(nx - lag, X_C_TYPE);
/* (a) First order of differences */
uu = lag;
tt = 0;
for (ii=0; ii < nx-lag; ii++) {
tmp[ii] = X_DIFF(x[uu++], x[tt++]);
}
nx -= lag;
/* (b) All other orders of differences but the last */
while (--differences > 1) {
uu = lag;
tt = 0;
for (ii=0; ii < nx-lag; ii++) {
tmp[ii] = X_DIFF(tmp[uu++], tmp[tt++]);
}
nx -= lag;
}
/* Sanity check */
/* if (nx-lag != nans) error("nx != nans: %d != %d\n", nx, nans); */
/* (c) Last order of differences */
uu = lag;
tt = 0;
for (ii=0; ii < nans; ii++) {
ans[ii] = X_DIFF(tmp[uu++], tmp[tt++]);
}
/* Deallocate temorary work vector */
Free(tmp);
} /* if (differences ...) */
}
#undef X_DIFF
/* Undo template macros */
#include "templates-types_undef.h"
/***************************************************************************
HISTORY:
2014-12-29 [HB]
o Created.
**************************************************************************/
matrixStats/src/rowMads.c 0000644 0001751 0000144 00000003721 12542546311 015170 0 ustar hornik users /***************************************************************************
Public methods:
SEXP rowMads(SEXP x, SEXP constant, SEXP naRm, SEXP hasNA)
SEXP colMads(SEXP x, SEXP constant, SEXP naRm, SEXP hasNA)
Authors: Henrik Bengtsson
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD rowMads
#define X_TYPE 'i'
#include "rowMads_TYPE-template.h"
#define X_TYPE 'r'
#include "rowMads_TYPE-template.h"
#undef METHOD
SEXP rowMads(SEXP x, SEXP dim, SEXP constant, SEXP naRm, SEXP hasNA, SEXP byRow) {
int narm, hasna, byrow;
SEXP ans;
R_xlen_t nrow, ncol;
double scale;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
/* Argument 'constant': */
if (!isNumeric(constant))
error("Argument 'constant' must be a numeric scale.");
scale = asReal(constant);
/* Argument 'naRm': */
narm = asLogicalNoNA(naRm, "na.rm");
/* Argument 'hasNA': */
hasna = asLogicalNoNA(hasNA, "hasNA");
/* Argument 'byRow': */
byrow = asLogical(byRow);
/* Get dimensions of 'x'. */
if (byrow) {
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
} else {
nrow = INTEGER(dim)[1];
ncol = INTEGER(dim)[0];
}
/* R allocate a double vector of length 'nrow'
Note that 'nrow' means 'ncol' if byrow=FALSE. */
PROTECT(ans = allocVector(REALSXP, nrow));
/* Double matrices are more common to use. */
if (isReal(x)) {
rowMads_Real(REAL(x), nrow, ncol, scale, narm, hasna, byrow, REAL(ans));
} else if (isInteger(x)) {
rowMads_Integer(INTEGER(x), nrow, ncol, scale, narm, hasna, byrow, REAL(ans));
}
UNPROTECT(1);
return(ans);
} /* rowMads() */
/***************************************************************************
HISTORY:
2014-11-17 [HB]
o Created from rowMedians.c.
**************************************************************************/
matrixStats/src/rowCumMinMaxs.c 0000644 0001751 0000144 00000004561 12542546311 016330 0 ustar hornik users /***************************************************************************
Public methods:
SEXP rowCummins(SEXP x, SEXP naRm, SEXP hasNA)
SEXP colCummins(SEXP x, SEXP naRm, SEXP hasNA)
Authors: Henrik Bengtsson
Copyright Henrik Bengtsson, 2014
**************************************************************************/
#include
#include "types.h"
#include "utils.h"
#define METHOD rowCummins
#define COMP '<'
#define X_TYPE 'i'
#include "rowCumMinMaxs_TYPE-template.h"
#define X_TYPE 'r'
#include "rowCumMinMaxs_TYPE-template.h"
#undef COMP
#undef METHOD
SEXP rowCummins(SEXP x, SEXP dim, SEXP byRow) {
int byrow;
SEXP ans = NILSXP;
R_xlen_t nrow, ncol;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'byRow': */
byrow = asLogical(byRow);
/* Double matrices are more common to use. */
if (isReal(x)) {
PROTECT(ans = allocMatrix(REALSXP, nrow, ncol));
rowCummins_Real(REAL(x), nrow, ncol, byrow, REAL(ans));
UNPROTECT(1);
} else if (isInteger(x)) {
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
rowCummins_Integer(INTEGER(x), nrow, ncol, byrow, INTEGER(ans));
UNPROTECT(1);
}
return(ans);
} /* rowCummins() */
#define METHOD rowCummaxs
#define COMP '>'
#define X_TYPE 'i'
#include "rowCumMinMaxs_TYPE-template.h"
#define X_TYPE 'r'
#include "rowCumMinMaxs_TYPE-template.h"
#undef COMP
#undef METHOD
SEXP rowCummaxs(SEXP x, SEXP dim, SEXP byRow) {
int byrow;
SEXP ans = NILSXP;
R_xlen_t nrow, ncol;
/* Argument 'x' and 'dim': */
assertArgMatrix(x, dim, (R_TYPE_INT | R_TYPE_REAL), "x");
nrow = INTEGER(dim)[0];
ncol = INTEGER(dim)[1];
/* Argument 'byRow': */
byrow = asLogical(byRow);
/* Double matrices are more common to use. */
if (isReal(x)) {
PROTECT(ans = allocMatrix(REALSXP, nrow, ncol));
rowCummaxs_Real(REAL(x), nrow, ncol, byrow, REAL(ans));
UNPROTECT(1);
} else if (isInteger(x)) {
PROTECT(ans = allocMatrix(INTSXP, nrow, ncol));
rowCummaxs_Integer(INTEGER(x), nrow, ncol, byrow, INTEGER(ans));
UNPROTECT(1);
}
return(ans);
} /* rowCummaxs() */
/***************************************************************************
HISTORY:
2014-11-26 [HB]
o Created from rowVars.c.
**************************************************************************/
matrixStats/src/x_OP_y_TYPE-template.h 0000644 0001751 0000144 00000017612 12542546311 017434 0 ustar hornik users #include "types.h"
#include "templates-types.h"
#if OP == '+'
#define METHOD_NAME_T CONCAT_MACROS(METHOD_NAME, Add)
#define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME_T)
static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) {
#if X_TYPE == 'i'
if (X_ISNAN(x)) return NA_REAL;
#endif
#if Y_TYPE == 'i'
if (Y_ISNAN(y)) return NA_REAL;
#endif
return (double)x + (double)y;
}
#define FUN_narm CONCAT_MACROS(FUN, METHOD_NAME_T)
static R_INLINE double FUN_narm(X_C_TYPE x, Y_C_TYPE y) {
if (X_ISNAN(x)) {
return (double)y;
} else if (Y_ISNAN(y)) {
return (double)x;
} else {
return (double)x + (double)y;
}
}
#elif OP == '-'
#define METHOD_NAME_T CONCAT_MACROS(METHOD_NAME, Sub)
#define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME_T)
static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) {
#if X_TYPE == 'i'
if (X_ISNAN(x)) return NA_REAL;
#endif
#if Y_TYPE == 'i'
if (Y_ISNAN(y)) return NA_REAL;
#endif
return (double)x - (double)y;
}
#define FUN_narm FUN_no_NA
#elif OP == '*'
#define METHOD_NAME_T CONCAT_MACROS(METHOD_NAME, Mul)
#define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME_T)
static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) {
#if X_TYPE == 'i'
if (X_ISNAN(x)) return NA_REAL;
#endif
#if Y_TYPE == 'i'
if (Y_ISNAN(y)) return NA_REAL;
#endif
return (double)x * (double)y;
}
#define FUN_narm CONCAT_MACROS(FUN, METHOD_NAME_T)
static R_INLINE double FUN_narm(X_C_TYPE x, Y_C_TYPE y) {
if (X_ISNAN(x)) {
return (double)y;
} else if (Y_ISNAN(y)) {
return (double)x;
} else {
return (double)x * (double)y;
}
}
#elif OP == '/'
#define METHOD_NAME_T CONCAT_MACROS(METHOD_NAME, Div)
#define FUN_no_NA CONCAT_MACROS(FUN_no_NA, METHOD_NAME_T)
static R_INLINE double FUN_no_NA(X_C_TYPE x, Y_C_TYPE y) {
#if X_TYPE == 'i'
if (X_ISNAN(x)) return NA_REAL;
#endif
#if Y_TYPE == 'i'
if (Y_ISNAN(y)) return NA_REAL;
#endif
return (double)x / (double)y;
}
#define FUN_narm FUN_no_NA
#else
#error "INTERNAL ERROR: Failed to set C inline function FUN(x, y): Unknown OP"
#endif
void METHOD_NAME_T(X_C_TYPE *x, R_xlen_t nrow, R_xlen_t ncol,
Y_C_TYPE *y, R_xlen_t ny,
int byrow, int commute,
int narm, int hasna,
ANS_C_TYPE *ans, R_xlen_t n) {
R_xlen_t kk, xi, yi, nx = nrow * ncol;
R_xlen_t row, col, txi;
double value;
#if ANS_TYPE == 'i'
int ok = 1; /* OK, i.e. no integer overflow yet? */
double R_INT_MIN_d = (double)R_INT_MIN,
R_INT_MAX_d = (double)R_INT_MAX;
#endif
xi = 0;
txi = row = col = 0;
yi = 0;
if (byrow) {
if (commute) {
if (narm) {
for (kk=0; kk < n; kk++) {
value = FUN_narm(y[yi], x[xi]);
#if ANS_TYPE == 'i'
if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) {
ok = 0;
value = NA_REAL;
}
ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value;
#else
ans[kk] = (ANS_C_TYPE) value;
#endif
if (++xi >= nx) xi = 0;
if (++row >= nrow) { /* Current index in t(x): */
row = 0; /* col = xi / nrow; */
col++; /* row = xi % nrow; */
txi = col; /* txi = row * ncol + col; */
} else {
txi += ncol;
}
yi = txi % ny;
}
} else {
for (kk=0; kk < n; kk++) {
value = FUN_no_NA(y[yi], x[xi]);
#if ANS_TYPE == 'i'
if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) {
ok = 0;
value = NA_REAL;
}
ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value;
#else
ans[kk] = (ANS_C_TYPE) value;
#endif
if (++xi >= nx) xi = 0;
if (++row >= nrow) { /* Current index in t(x): */
row = 0; /* col = xi / nrow; */
col++; /* row = xi % nrow; */
txi = col; /* txi = row * ncol + col; */
} else {
txi += ncol;
}
yi = txi % ny;
}
}
} else {
if (narm) {
for (kk=0; kk < n; kk++) {
value = FUN_narm(x[xi], y[yi]);
#if ANS_TYPE == 'i'
if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) {
ok = 0;
value = NA_REAL;
}
ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value;
#else
ans[kk] = (ANS_C_TYPE) value;
#endif
if (++xi >= nx) xi = 0;
if (++row >= nrow) { /* Current index in t(x): */
row = 0; /* col = xi / nrow; */
col++; /* row = xi % nrow; */
txi = col; /* txi = row * ncol + col; */
} else {
txi += ncol;
}
yi = txi % ny;
}
} else {
for (kk=0; kk < n; kk++) {
value = FUN_no_NA(x[xi], y[yi]);
#if ANS_TYPE == 'i'
if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) {
ok = 0;
value = NA_REAL;
}
ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value;
#else
ans[kk] = (ANS_C_TYPE) value;
#endif
if (++xi >= nx) xi = 0;
if (++row >= nrow) { /* Current index in t(x): */
row = 0; /* col = xi / nrow; */
col++; /* row = xi % nrow; */
txi = col; /* txi = row * ncol + col; */
} else {
txi += ncol;
}
yi = txi % ny;
}
}
}
} else {
if (commute) {
if (narm) {
for (kk=0; kk < n; kk++) {
value = FUN_narm(y[yi], x[xi]);
#if ANS_TYPE == 'i'
if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) {
ok = 0;
value = NA_REAL;
}
ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value;
#else
ans[kk] = (ANS_C_TYPE) value;
#endif
if (++xi >= nx) xi = 0;
if (++yi >= ny) yi = 0;
}
} else {
for (kk=0; kk < n; kk++) {
value = FUN_no_NA(y[yi], x[xi]);
#if ANS_TYPE == 'i'
if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) {
ok = 0;
value = NA_REAL;
}
ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value;
#else
ans[kk] = (ANS_C_TYPE) value;
#endif
if (++xi >= nx) xi = 0;
if (++yi >= ny) yi = 0;
}
}
} else {
if (narm) {
for (kk=0; kk < n; kk++) {
value = FUN_narm(x[xi], y[yi]);
#if ANS_TYPE == 'i'
if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) {
ok = 0;
value = NA_REAL;
}
ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value;
#else
ans[kk] = (ANS_C_TYPE) value;
#endif
if (++xi >= nx) xi = 0;
if (++yi >= ny) yi = 0;
}
} else {
for (kk=0; kk < n; kk++) {
value = FUN_no_NA(x[xi], y[yi]);
#if ANS_TYPE == 'i'
if (ok && (value < R_INT_MIN_d || value > R_INT_MAX_d)) {
ok = 0;
value = NA_REAL;
}
ans[kk] = ISNAN(value) ? NA_INTEGER : (ANS_C_TYPE) value;
#else
ans[kk] = (ANS_C_TYPE) value;
#endif
if (++xi >= nx) xi = 0;
if (++yi >= ny) yi = 0;
}
}
}
} /* if (byrow) */
#if ANS_TYPE == 'i'
/* Warn on integer overflow? */
if (!ok) {
warning("Integer overflow. Detected one or more elements whose absolute values were out of the range [%d,%d] that can be used to for integers. Such values are set to NA_integer_.", R_INT_MIN, R_INT_MAX);
}
#endif
}
#undef FUN
#undef FUN_narm
#undef METHOD_NAME_T
/* Undo template macros */
#include "templates-types_undef.h"
matrixStats/NAMESPACE 0000644 0001751 0000144 00000004667 12542546241 014054 0 ustar hornik users useDynLib("matrixStats")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# IMPORTS
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
importFrom("methods", "setMethod")
importFrom("methods", "setGeneric")
importFrom("methods", "loadMethod")
importFrom("methods", "signature")
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# EXPORTS
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
export("allocVector")
export("allocMatrix")
export("allocArray")
export("allValue")
export("anyMissing")
export("anyValue")
export("binCounts")
export("binMeans")
export("colAlls")
export("colAnys")
export("colAnyMissings")
export("colAvgsPerRowSet")
export("colCollapse")
export("colCounts")
export("colCummins")
export("colCummaxs")
export("colCumprods")
export("colCumsums")
export("colDiffs")
export("colIQRs")
export("colIQRDiffs")
export("colLogSumExps")
export("colMadDiffs")
export("colMads")
export("colMaxs")
export("colMedians")
export("colMins")
export("colOrderStats")
export("colProds")
export("colQuantiles")
export("colRanges")
export("colRanks")
export("colSdDiffs")
export("colSds")
export("colTabulates")
export("colVarDiffs")
export("colVars")
export("colWeightedMads")
export("colWeightedMeans")
export("colWeightedMedians")
export("colWeightedSds")
export("colWeightedVars")
export("count")
export("diff2")
export("indexByRow")
export("iqr")
export("iqrDiff")
export("logSumExp")
export("madDiff")
export("meanOver")
export("product")
export("rowAlls")
export("rowAnys")
export("rowAnyMissings")
export("rowAvgsPerColSet")
export("rowCollapse")
export("rowCounts")
export("rowCummins")
export("rowCummaxs")
export("rowCumprods")
export("rowCumsums")
export("rowDiffs")
export("rowIQRs")
export("rowIQRDiffs")
export("rowLogSumExps")
export("rowMadDiffs")
export("rowMads")
export("rowMaxs")
export("rowMedians")
export("rowMins")
export("rowOrderStats")
export("rowProds")
export("rowQuantiles")
export("rowRanges")
export("rowRanks")
export("rowSdDiffs")
export("rowSds")
export("rowTabulates")
export("rowVarDiffs")
export("rowVars")
export("rowWeightedMads")
export("rowWeightedMeans")
export("rowWeightedMedians")
export("rowWeightedSds")
export("rowWeightedVars")
export("sdDiff")
export("signTabulate")
export("sumOver")
export("x_OP_y")
export("t_tx_OP_y")
export("varDiff")
export("weightedMad")
export("weightedMean")
export("weightedMedian")
export("weightedSd")
export("weightedVar")
matrixStats/NEWS 0000644 0001751 0000144 00000067266 12542546241 013340 0 ustar hornik users Package: matrixStats
====================
Version: 0.14.2 [2015-06-23]
o BUG FIX: x_OP_y() and t_tx_OP_y() would return garbage on
Solaris SPARC (and possibly other architectures as well) when
input was integer and had missing values.
Version: 0.14.1 [2015-06-17]
o BUG FIX: product(x, na.rm=FALSE) for integer 'x' with both zeros
and NAs returned zero rather than NA.
o BUG FIX: weightedMean(x, w, na.rm=TRUE) did not handle missing
values in 'x' properly, if it was an integer. It would also
return NaN if there were weights 'w' with missing values, whereas
stats::weighted.mean() would skip such data points. Now
weightedMean() does the same.
o BUG FIX: (col|row)WeightedMedians() did not handle infinite weights
as weightedMedian() does.
o BUG FIX: x_OP_y(x, y, OP, na.rm=FALSE) returned garbage iff
'x' or 'y' had missing values of type integer.
o BUG FIX: rowQuantiles() and rowIQRs() did not work for single-row
matrices. Analogously for the corresponding column functions.
o BUG FIX: rowCumsums(), rowCumprods() rowCummins(), and rowCummaxs(),
accessed out-of-bound elements for Nx0 matrices where N > 0.
The corresponding column methods has similar memory errors for
0xK matrices where K > 0.
o BUG FIX: anyMissing(list(NULL)) returned NULL; now FALSE.
o BUG FIX: rowCounts() resulted in garbage if a previous column had NAs
(because forgot to update index kk in such cases).
o BUG FIX: rowCumprods(x) handled missing values and zeros incorrectly
for integer 'x (not double); a zero would trump an existing missing
value causing the following cumulative products to become zero.
It was only a zero that trumped NAs; any other integer would work
as expected. Also, this bug was not in colCumprods().
o BUG FIX: rowAnys(x, value, na.rm=FALSE) did not handle missing
values in a numeric 'x' properly. Similarly, for non-numeric
and non-logical 'x', row- and colAnys(), row- and colAlls(),
anyValue() and allValue() did not handle when 'value' was a
missing value.
o All of the above bugs were identified and fixed by Dongcan Jiang
(Peking University, China), who also added corresponding unit tests.
Version: 0.14.0 [2015-02-13]
o ROBUSTNESS/TESTS: Package tests cover 96% of the code (was 91%).
o CONSISTENCY: Renamed argument 'centers' of col- and rowMads() to
'center'. This is consistent with col- and rowVars().
o CONSISTENCY: col- and rowVars() now using na.rm=FALSE as the default
(na.rm=TRUE was mistakenly introduced as the default in v0.9.7).
o SPEEDUP: The check for user interrupts at the C level is now done less
frequently of the functions. It does every k:th iteration, where
k = 2^20, which is tested for using (iter % k == 0). It turns out, at
least with the default compiler optimization settings that I use, that
this test is 3 times faster if k = 2^n where n is an integer. The
following functions checks for user interrupts: logSumExp(),
(col|row)LogSumExps(), (col|row)Medians(),, (col|row)Mads(),
(col|row)Vars(), and (col|row)Cum(Min|Max|prod|sum)s().
o SPEEDUP: logSumExp(x) is now faster if 'x' does not contain any
missing values. It is also faster if all values are missing or
the maximum value is +Inf - in both cases it can skip the actual
summation step.
o BUG FIX: all() and any() flavored methods on non-numeric and
non-logical (e.g. character) vectors and matrices with na.rm=FALSE
did not give results consistent with all() and any() if there were
missing values. For example, with x <- c("a", NA, "b") we have
all(x == "a") == FALSE and any(x == "a") == TRUE whereas our
corresponding methods would return NA in those cases. The methods
fixed are allValue(), anyValue(), col- and rowAlls(), and
col- and rowAnys(). Added more package tests to cover these cases.
o BUG FIX: Now logSumExp(x, na.rm=TRUE) would return NA if all values
were NA and length(x) > 1. Now it returns -Inf for all length(x):s.
o CLEANUP: anyMissing() is no longer an S4 generic. This was done as
part of the migration of making all functions of matrixStats plain
R functions, which minimizes calling overhead and it will also allow
us to drop 'methods' from the package dependencies. I've scanned
all CRAN and Bioconductor packages depending on matrixStats and
none of them relied on anyMissing() dispatching on class, so
hopefully this move has little impact. The only remaining S4 methods
are now colMedians() and rowMedians().
o CLEANUP: Package no longer depends on R.methodsS3.
Version: 0.13.1 [2015-01-21]
o BUG FIX: diff2() with differences >= 3 would *read* spurious values
beyond the allocated memory. This error, introduced in 0.13.0, was
harmless in the sense that the returned value was unaffected and still
correct. Thanks to Brian Ripley and the CRAN check tools for catching
this. I could reproduce it locally with 'valgrind'.
Version: 0.13.0 [2015-01-20]
o Added iqrDiff() and (col|row)IqrDiffs().
o CONSISTENCY: Now rowQuantiles(x, na.rm=TRUE) returns all NAs for rows
with missing values. Analogously for colQuantiles(), colIQRs(),
rowIQRs() and iqr(). Previously, all these functions gave an error
saying missing values are not allowed.
o SPEEDUP: (col|row)Diffs() are now implemented in native code
and notably faster than diff() for matrices.
o SPEEDUP: Added diff2(), which is notably faster than base::diff()
for vectors, which it is designed for.
o DOCUMENTATION: Added vignette summarizing available functions.
o COMPLETENESS: Added corresponding "missing" vector functions for
already existing column and row functions. Similarly, added
"missing" column and row functions for already existing vector
functions, e.g. added iqr() and count() to complement already
existing (col|row)IQRs() and (col|row)Counts() functions.
o SPEEDUP: Made binCounts() and binMeans() a bit faster.
o SPEEDUP: Added count(x, value) which is a notably faster
than sum(x == value). This can also be used to count missing
values etc. Also, added allValue() and anyValue() for
all(x == value) and any(x == value).
o SPEEDUP: Implemented weightedMedian() in native code, which made
it ~3-10 times faster. Dropped support for ties="both", because
it would have to return two values in case of ties, which made
the API unnecessarily complicated. If really needed, then call
the function twice with ties="min" and ties="max".
o SPEEDUP: Added weightedMean(), which is ~10 times faster than
stats::weighted.mean().
o SPEEDUP: (col|row)Anys() and (col|row)Alls() is now notably
faster compared to previous versions.
o SPEEDUP/CLEANUP: Turned several S3 and S4 methods into plain R
functions, which decreases the overhead of calling the functions.
After this there are no longer any S3 methods. Remaining
S4 methods are anyMissing() and rowMedians().
o ROBUSTNESS: Now column and row methods give slightly more informative
error messages if a data.frame is passed instead of a matrix.
o CLEANUP: In the effort of migrating anyMissing() into a plain R
function, the specific anyMissing() implementations for data.frame:s
and and list:s were dropped and is now handled by anyMissing()
for "ANY", which is the only S4 method remaining now. In a near
future release, this remaining "ANY" method will turned into a plain
R function and the current S4 generic will be dropped. We know of
know CRAN and Bioconductor packages that relies on it being a generic
function. Note also that since R (>= 3.1.0) there is a base::anyNA()
function that does the exact same thing making anyMissing() obsolete.
o BUG FIX: weightedMedian(..., ties="both") would give an error
if there was a tie. Added package test for this case.
Version: 0.12.2 [2014-12-07]
o CODE FIX: The native code for product() on integer vector
incorrectly used C-level abs() on intermediate values despite
those being doubles requiring fabs(). Despite this, the calculated
product would still be correct (at least when validated on several
local setups as well as on the CRAN servers). Again, thanks to
Brian Ripley for pointing out another invalid integer-double
coersion at the C level.
Version: 0.12.1 [2014-12-06]
o ROBUSTNESS: Updated package tests to check methods in more
scenarios, especially with both integer and numeric input data.
o BUG FIX: (col|row)Cumsums(x) where 'x' is integer would return
garbage for columns (rows) containing missing values.
o BUG FIX: rowMads(x) where 'x' is numeric (not integer) would give
incorrect results for rows that had an *odd* number of values
(no ties). Analogously issues with colMads(). Added package tests
for such cases too. Thanks to Brian Ripley and the CRAN check tools
for (yet again) catching another coding mistake. Details: This was
because the C-level calculation of the absolute value of residuals
toward the median would use integer-based abs() rather than double-
based fabs(). Now it fabs() is used when the values are double and
abs() when they are integers.
Version: 0.12.0 [2014-12-05]
o Submitted to CRAN.
Version: 0.11.9 [2014-11-26]
o Added (col|row)Cumsums(), (col|row)Cumprods(), (col|row)Cummins(),
and (col|row)Cummaxs().
o BUG FIX: (col|row)WeightedMeans() with all zero weights gave
mean estimates with values 0 instead of NaN.
Version: 0.11.8 [2014-11-25]
o SPEEDUP: Implemented (col|row)Mads(), (col|row)Sds() and
(col|row)Vars() in native code.
o SPEEDUP: Made (col|row)Quantiles(x) faster for 'x' without
missing values (and default type=7L quantiles). It should
still be implemented in native code.
o SPEEDUP: Made rowWeightedMeans() faster.
o BUG FIX: (col|row)Medians(x) when 'x' is integer would give
invalid median values in case (a) it was calculated as the mean
of two values ("ties"), and (b) the sum of those values where
greater than .Machine$integer.max. Now such ties are calculated
using floating point precision. Add lots of package tests.
Version: 0.11.6 [2014-11-16]
o SPEEDUP: Now (col|row)Mins(), (col|row)Maxs() and (col|row)Ranges()
are implemented in native code providing a significant speedup.
o SPEEDUP: Now colOrderStats() also is implemented in native code,
which indirectly makes colMins(), colMaxs() and colRanges() faster.
o SPEEDUP: colTabulates(x) no longer uses rowTabulates(t(x)).
o SPEEDUP: colQuantiles(x) no longer uses rowQuantiles(t(x)).
o CLEANUP: Argument 'flavor' of (col|row)Ranks() is now ignored.
Version: 0.11.5 [2014-11-15]
o SPEEDUP: Now colCollapse(x) no longer utilizes rowCollapse(t(x)).
Added package tests for (col|row)Collapse().
o SPEEDUP: Now colDiffs(x) no longer uses rowDiffs(t(x)).
Added package tests for (col|row)Diffs().
o SPEEDUP: Package no longer utilizes match.arg() due to
its overhead; methods sumOver(), (col|row)Prods() and
(col|row)Ranks() were updated.
o (col|row)Prods() now uses default method="direct" (was "expSumLog").
Version: 0.11.4 [2014-11-14]
o Added support for vector input to several of the row- and column methods
as long as the "intended" matrix dimension is specified via argument
'dim'. For instance, rowCounts(x, dim=c(nrow, ncol)) is the same
as rowCounts(matrix(x, nrow, ncol)), but more efficient since it
avoids creating/allocating a temporary matrix.
o SPEEDUP: Now colCounts() is implemented in native code. Moreover,
(col|row)Counts() are now also implemented in native code for
logical input (previously only for integer and double input).
Added more package tests and benchmarks for these functions.
Version: 0.11.3 [2014-11-11]
o Turned sdDiff(), madDiff(), varDiff(), weightedSd(), weightedVar()
and weightedMad() into plain functions (were generic functions).
o Removed unnecessary usage of '::'.
Version: 0.11.2 [2014-11-09]
o SPEEDUP: Implemented indexByRow() in native code and it is no longer
a generic function, but a regular function, which is also faster to
call. The first argument of indexByRow() has been changed to 'dim'
such that one should use indexByRow(dim(X)) instead of indexByRow(X)
as in the past. The latter form is still supported, but deprecated.
o Added allocVector(), allocMatrix() and allocArray() for faster
allocation numeric vectors, matrices and arrays, particularly when
filled with non-missing values.
Version: 0.11.1 [2014-11-07]
o Better support for long vectors.
o ROBUSTNESS: Although unlikely, with long vectors support for binCounts()
and binMeans() it is possible that a bin gets a higher count than what
can be represented by an R integer (.Machine$integer.max=2^31-1). If
that happens, an informative warning is generated and the bin count is
set to .Machine$integer.max. If this happens for binMeans(), the
corresponding mean is still properly calculated and valid.
o PRECISION: Using greater floating-point precision in more internal
intermediate calculations, where possible.
o CLEANUP: Cleanup and harmonized the internal C API such there are two
well defined API levels. The high-level API is called by R via .Call()
and takes care of most of the argument validation and construction of
the return value. This function dispatch to functions in the low-level
API based on data type(s) and other arguments. The low-level API is
written to work with basic C data types only.
o BUG FIX: Package incorrectly redefined R_xlen_t on R (>= 3.0.0) systems
where LONG_VECTOR_SUPPORT is not supported.
Version: 0.11.0 [2014-11-02]
o Added sumOver() and meanOver(), which are notably faster versions
of sum(x[idxs]) and mean(x[idxs]). Moreover, instead of having to do
sum(as.numeric(x)) to avoid integer overflow when 'x' is an integer
vector, one can do sumOver(x, mode="numeric"), which avoids the extra
copy created when coercing to numeric (this numeric copy is also twice
as large as the integer vector). Added package tests and benchmark
reports for these functions.
Version: 0.10.4 [2014-11-01]
o SPEEDUP: Made anyMissing(), logSumExp(), (col|row)Medians(),
(col|row)Counts() slightly faster by making the native code assign
the results directly to the native vector instead of to the R vector,
e.g. ansp[i] = v where ansp=REAL(ans) instead of REAL(ans)[i] = v.
o Added benchmark reports for anyMissing() and logSumExp().
Version: 0.10.3 [2014-10-01]
o BUG FIX: binMeans() returned 0.0 instead of NA_real_ for empty bins.
Version: 0.10.2 [2014-09-01]
o BUG FIX: On some systems, the package failed to build on R (<= 2.15.3)
with compilation error: "redefinition of typedef 'R_xlen_t'".
Version: 0.10.1 [2014-06-09]
o Added benchmark reports for also non-matrixStats functions col/rowSums()
and col/rowMeans().
o Now all colNnn() and rowNnn() methods are benchmarked in a combined
report making it possible to also compare colNnn(x) with rowNnn(t(x)).
Version: 0.10.0 [2014-06-07]
o BUG FIX: The package tests for product() incorrectly assumed that the
value of prod(c(NaN, NA)) is uniquely defined. However, as documented
in help("is.nan"), it may be NA or NaN depending on R system/platform.
o Relaxed some packages tests such that they assert numerical
correctness via all.equal() rather than identical().
o Submitted to CRAN.
Version: 0.9.7 [2014-06-05]
o BUG FIX: Introduced a bug in v0.9.5 causing col- and rowVars() and
hence also col- and rowSds() to return garbage. Add package tests
for these now.
o Submitted to CRAN.
Version: 0.9.6 [2014-06-04]
o SPEEDUP: Now col- and rowProds() utilizes new product() function.
o SPEEDUP: Added product() for calculating the product of a numeric
vector via the logarithm.
o Added signTabulate() for tabulating the number of negatives, zeros,
positives and missing values. For doubles, the number of negative
and positive infinite values are also counted.
Version: 0.9.5 [2014-06-04]
o Added argument 'method' to col- and rowProds() for controlling how
the product is calculated.
o SPEEDUP: Package is now byte compiled.
o SPEEDUP: Made weightedMedian() a plain function (was an S3 method).
o SPEEDUP: Made rowProds() and rowTabulates() notably faster.
o SPEEDUP: Now rowCounts(), rowAnys(), rowAlls() and corresponding
column methods can search for any value in addition to the
default TRUE. The search for a matching integer or double value
is done in native code, which is notably faster (and more
memory efficient because it avoids creating any new objects).
o SPEEDUP: Made colVars() and colSds() notably faster and
rowVars() and rowSds() a slightly bit faster.
o SPEEDUP: Turned more S4 methods into S3 methods, e.g. rowCounts(),
rowAlls(), rowAnys(), rowTabulates() and rowCollapse().
o Added benchmark reports, e.g. matrixStats:::benchmark('colMins').
o CLEANUP: Now only exporting plain functions and generic functions.
Version: 0.9.4 [2014-05-23]
o SPEEDUP: Turned several S4 methods into S3 methods, e.g.
indexByRow(), madDiff(), sdDiff() and varDiff().
Version: 0.9.3 [2014-04-26]
o Added argument 'trim' to madDiff(), sdDiff() and varDiff().
Version: 0.9.2 [2014-04-04]
o BUG FIX: The native code of binMeans(x, bx) would try to access
an out-of-bounds value of argument 'y' iff 'x' contained elements
that are left of all bins in 'bx'. This bug had no impact on the
results and since no assignment was done it should also not crash/
core dump R. This was discovered thanks to new memtests (ASAN and
valgrind) provided by CRAN.
Version: 0.9.1 [2014-03-31]
o BUG FIX: rowProds() would throw "Error in rowSums(isNeg) : 'x' must
be an array of at least two dimensions" on matrices where all rows
contained at least on zero. Thanks to Roel Verbelen at KU Leuven
for the report.
Version: 0.9.0 [2014-03-26]
o Added weighedVar() and weightedSd().
Version: 0.8.14 [2013-11-23]
o MEMORY: Updated all functions to do a better job of cleaning out
temporarily allocated objects as soon as possible such that the
garbage collector can remove them sooner, iff wanted. This
increase the chance for a smaller memory footprint.
o Submitted to CRAN.
Version: 0.8.13 [2013-10-08]
o Added argument 'right' to binCounts() and binMeans() to specify
whether binning should be done by (u,v] or [u,v). Added system
tests validating the correctness of the two cases.
o Bumped up package dependencies.
Version: 0.8.12 [2013-09-26]
o SPEEDUP: Now utilizing anyMissing() everywhere possible.
Version: 0.8.11 [2013-09-21]
o ROBUSTNESS: Now importing 'loadMethod' from 'methods' package such
that 'matrixStats' S4-based methods also work when 'methods' is
not loaded, e.g. when 'Rscript' is used, cf. Section 'Default
packages' in 'R Installation and Administration'.
o ROBUSTNESS: Updates package system tests such that the can run
with only the 'base' package loaded.
Version: 0.8.10 [2013-09-15]
o CLEANUP: Now only importing two functions from the 'methods' package.
o Bumped up package dependencies.
Version: 0.8.9 [2013-08-29]
o CLEANUP: Now the package startup message acknowledges argument
'quietly' of library()/require().
Version: 0.8.8 [2013-07-29]
o DOCUMENTATION: The dimension of the return value was swapped
in help("rowQuantiles").
Version: 0.8.7 [2013-07-28]
o SPEEDUP: Made (col|row)Mins() and (col|row)Maxs() much faster.
o BUG FIX: rowRanges(x) on an Nx0 matrix would give an error.
Same for colRanges(x) on an 0xN matrix. Added system tests
for these and other special cases.
Version: 0.8.6 [2013-07-20]
o Forgot to declare S3 methods (col|row)WeightedMedians().
o Bumped up package dependencies.
Version: 0.8.5 [2013-05-25]
o Minor speedup of (col|row)Tabulates() by replacing rm() calls
with NULL assignments.
Version: 0.8.4 [2013-05-20]
o CRAN POLICY: Now all Rd \usage{} lines are at most 90 characters long.
Version: 0.8.3 [2013-05-10]
o SPEEDUP: binCounts() and binMeans() now uses Hoare's Quicksort
for presorting 'x' before counting/averaging. They also no longer
test in every iteration (=for every data point) whether the last
bin has been reached or not, but only after completing a bin.
Version: 0.8.2 [2013-05-02]
o DOCUMENTATION: Minor corrections and updates to help pages.
Version: 0.8.1 [2013-05-02]
o BUG FIX: Native code of logSumExp() used an invalid check for
missing value of an integer argument. Detected by Brian Ripley
upon CRAN submission.
Version: 0.8.0 [2013-05-01]
o Added logSumExp(lx) and (col|row)LogSumExps(lx) for accurately
computing of log(sum(exp(lx))) for standalone vectors, and
row and column vectors of matrices. Thanks to Nakayama (Japan)
for the suggestion and contributing a draft in R.
Version: 0.7.1 [2013-04-23]
o Added argument 'preserveShape' to colRanks(). For backward
compatibility the default is preserveShape=FALSE, but it may
change in the future.
o BUG FIX: Since v0.6.4, (col|row)Ranks() gave the incorrect
results for integer matrices with missing values.
o BUG FIX: Since v0.6.4, (col|row)Medians() for integers would
calculate ties as floor(tieAvg).
Version: 0.7.0 [2013-01-14]
o Now (col|row)Ranks() support "max" (default), "min" and
"average" for argument 'ties.method'. Added system tests
validation these cases. Thanks Peter Langfelder (UCLA) for
contributing this.
Version: 0.6.4 [2013-01-13]
o Added argument 'ties.method' to rowRanks() and colRanks(), but
still only support for "max" (as before).
o ROBUSTNESS: Lots of cleanup of the internal/native code. Native
code for integer and double cases have been harmonized and are
now generated from a common code template. This was inspired by
code contributions from Peter Langfelder (UCLA).
Version: 0.6.3 [2013-01-13]
o Added anyMissing() for data type 'raw', which always returns FALSE.
o ROBUSTNESS: Added system test for anyMissing().
o ROBUSTNESS: Now S3 methods are declared in the namespace.
Version: 0.6.2 [2012-11-15]
o CRAN POLICY: Made example(weightedMedian) faster.
Version: 0.6.1 [2012-10-10]
o BUG FIX: In some cases binCounts() and binMeans() could try to go
past the last bin resulting a core dump.
o BUG FIX: binCounts() and binMeans() would return random/garbage
values for bins that were beyond the last data point.
Version: 0.6.0 [2012-10-04]
o Added binMeans() for fast sample-mean calculation in bins.
Thanks to Martin Morgan at the Fred Hutchinson Cancer Research
Center, Seattle, for contributing the core code for this.
o Added binCounts() for fast element counting in bins.
Version: 0.5.3 [2012-09-10]
o CRAN POLICY: Replaced the .Internal(psort(...)) call with
a call to a new internal partial sorting function, which
utilizes the native rPsort() part of the R internals.
Version: 0.5.2 [2012-07-02]
o Updated package dependencies to match CRAN.
Version: 0.5.1 [2012-06-25]
o GENERALIZATION: Now (col|row)Prods() handle missing values.
o BUG FIX: In certain cases, (col|row)Prods() would return NA instead
of 0 for some elements. Added a redundancy test for the case.
Thanks Brenton Kenkel at University of Rochester for reporting on this.
o Now this package only imports methods.
Version: 0.5.0 [2012-04-16]
o Added weightedMad() from aroma.core v2.5.0.
o Added weightedMedian() from aroma.light v1.25.2.
o This package no longer depends on the aroma.light package for
any of its functions.
o Now this package only imports R.methodsS3, meaning it no
longer loads R.methodsS3 when it is loaded.
Version: 0.4.5 [2012-03-19]
o Updated the default argument 'centers' of rowMads()/colMads()
to explicitly be (col|row)Medians(x,...). The default
behavior has not changed.
Version: 0.4.4 [2012-03-05]
o BUG FIX: colMads() would return the incorrect estimates. This bug
was introduced in matrixStats v0.4.0 (2011-11-11).
o ROBUSTNESS: Added system/redundancy tests for rowMads()/colMads().
o CRAN: Made the system tests "lighter" by default, but full tests
can still be run, cf. tests/*.R scripts.
Version: 0.4.3 [2011-12-11]
o BUG FIX: rowMedians(..., na.rm=TRUE) did not handle NaN (only NA).
The reason for this was the the native code used ISNA() to test
for NA and NaN, but it should have been ISNAN(), which is opposite
to how is.na() and is.nan() at the R level work. Added system
tests for this case.
Version: 0.4.2 [2011-11-29]
o Added rowAvgsPerColSet() and colAvgsPerRowSet().
Version: 0.4.1 [2011-11-25]
o Added help pages with an example to rowIQRs() and colIQRs().
o Added example to rowQuantiles().
o BUG FIX: rowIQRs() and colIQRs() would return the 25% and the 75%
quantiles, not the difference between them. Thanks Pierre Neuvial
at CNRS, Evry, France for the report.
Version: 0.4.0 [2011-11-11]
o Added rowRanks() and colRanks(). Thanks Hector Corrada Bravo
(University of Maryland) and Harris Jaffee (John Hopkins).
o Dropped the previously introduced expansion of 'center' in rowMads()
and colMads(). It added unnecessary overhead if not needed.
Version: 0.3.0 [2011-10-13]
o SPEEDUP/LESS MEMORY: colMedians(x) no longer uses rowMedians(t(x));
instead there is now an optimized native-code implementation.
Also, colMads() utilizes the new colMedians() directly.
This improvement was kindly contributed by Harris Jaffee at
Biostatistics of John Hopkins, USA.
o Added additional unit tests for colMedians() and rowMedians().
Version: 0.2.2 [2010-10-06]
o Now the result of (col|row)Quantiles() contains column names.
Version: 0.2.1 [2010-04-05]
o Added a startup message when package is loaded.
o CLEAN UP: Removed obsolete internal .First.lib() and .Last.lib().
Version: 0.2.0 [2010-03-30]
o DOCUMENTATION: Fixed some incorrect cross references.
Version: 0.1.9 [2010-02-03]
o BUG FIX: (col|row)WeightedMeans(..., na.rm=TRUE) would incorrectly
treat missing values as zeros. Added corresponding redundancy tests
(also for the median case). Thanks Pierre Neuvial for reporting this.
Version: 0.1.8 [2009-11-13]
o BUG FIX: colRanges(x) would return a matrix of wrong dimension
if 'x' did not have any missing values. This would affect all
functions relying on colRanges(), e.g. colMins() and colMaxs().
Added a redundancy test for this case. Thanks Pierre Neuvial
at UC Berkeley for reporting this.
o BUG FIX: (col|row)Ranges() return a matrix with dimension names.
Version: 0.1.7 [2009-06-20]
WORKAROUND: Cannot use "%#x" in rowTabulates() when creating the column
names of the result matrix. It gave an error OSX with R v2.9.0 devel
(2009-01-13 r47593b) current the OSX server at R-forge.
Version: 0.1.6 [2009-06-17]
o Updated the Rdoc example for rowWeightedMedians() to run conditionally
on aroma.light, which is only a suggested package - not a required one.
This in order to prevent R CMD check to fail on CRAN, which prevents
it for building binaries (as it currently happens on their OSX servers).
Version: 0.1.5 [2009-02-04]
o BUG FIX: For some errors in rowOrderStats(), the stack would not
become UNPROTECTED before calling error.
Version: 0.1.4 [2009-02-02]
o Added methods (col|row)Weighted(Mean|Median)s() for weighted averaging.
o Added more Rdoc comments.
o Package passes R CMD check flawlessly.
Version: 0.1.3 [2008-07-30]
o Added (col|row)Tabulates() for integer and raw matrices.
o BUG FIX: rowCollapse(x) was broken and returned the wrong elements.
Version: 0.1.2 [2008-04-13]
o Added (col|row)Collapse().
o Added varDiff(), sdDiff() and madDiff().
o Added indexByRow().
Version: 0.1.1 [2008-03-25]
o Added (col|row)OrderStats().
o Added (col|row)Ranges() and (col|row)(Min|Max)s().
o Added colMedians().
o Now anyMissing() support most data types as structures.
Version: 0.1.0 [2007-11-26]
o Imported the rowNnn() methods from Biobase.
o Created.
matrixStats/R/ 0000755 0001751 0000144 00000000000 12542546242 013022 5 ustar hornik users matrixStats/R/signTabulate.R 0000644 0001751 0000144 00000002055 12542546242 015571 0 ustar hornik users ############################################################################/**
# @RdocFunction signTabulate
# @alias signTabulate
#
# @title "Calculates the number of negative, zero, positive and missing values"
#
# @synopsis
#
# \description{
# @get "title" in a @numeric vector. For @double vectors, the number of
# negative and positive infinite values are also counted.
# }
#
# \arguments{
# \item{x}{a @numeric @vector.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @named @numeric @vector.
# }
#
# \seealso{
# @see "base::sign".
# }
#
# @author "HB"
#
# @keyword internal
#*/############################################################################
signTabulate <- function(x, ...) {
res <- .Call("signTabulate", x, PACKAGE="matrixStats");
names(res) <- c("-1", "0", "+1", "NA", "-Inf", "+Inf")[1:length(res)];
res;
} # signTabulate()
############################################################################
# HISTORY:
# 2014-06-04 [HB]
# o Created.
############################################################################
matrixStats/R/pkgStartupMessage.R 0000644 0001751 0000144 00000003325 12542546242 016621 0 ustar hornik users ## covr: skip=all
pkgStartupMessage <- function(..., quietly=NA) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Infer 'quietly' from argument 'argument' in library() call?
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (is.na(quietly)) {
quietly <- FALSE
# Just in case the below won't work one day due to R updates...
tryCatch({
# The default, if not found
quietly <- formals(base::library)$quietly
# Identify the environment/frame of interest by making sure
# it at least contains all the arguments of source().
argsToFind <- names(formals(base::library))
# Scan the call frames/environments backwards...
srcfileList <- list()
for (ff in sys.nframe():0) {
env <- sys.frame(ff)
# Does the environment look like a library() environment?
exist <- sapply(argsToFind, FUN=exists, envir=env, inherits=FALSE)
if (!all(exist)) {
# Nope, then skip to the next one
next
}
# Was argument 'quietly' specified?
missing <- eval(expression(missing(quietly)), envir=env)
if (!missing) {
quietly <- get("quietly", envir=env, inherits=FALSE)
break
}
# ...otherwise keep searching due to nested library() calls.
} # for (ff ...)
}, error = function() {})
} # if (is.na(quietly)
# Output message?
if (!quietly) {
packageStartupMessage(...)
}
}
############################################################################
# HISTORY:
# 2015-01-27
# o Copied from R.methodsS3. Here it will only be used internally.
############################################################################
matrixStats/R/diff2.R 0000644 0001751 0000144 00000003001 12542546242 014131 0 ustar hornik users ############################################################################/**
# @RdocFunction diff2
#
# @title "Fast lagged differences"
#
# @synopsis
#
# \description{
# Computes the lagged and iterated differences.
# }
#
# \arguments{
# \item{x}{A @numeric @vector of length N.}
# \item{lag}{An @integer specifying the lag.}
# \item{differences}{An @integer specifying the order of difference.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric @vector of length N - \code{differences}.
# }
#
# @examples "../incl/diff2.Rex"
#
# \seealso{
# @see "base::diff".
# }
#
# @author
#
# @keyword univar
# @keyword internal
#*/############################################################################
diff2 <- function(x, lag=1L, differences=1L, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'lag':
if (length(lag) != 1L) {
stop("Argument 'lag' is not a scalar: ", length(lag))
}
# Argument 'differences':
if (length(differences) != 1L) {
stop("Argument 'differences' is not a scalar: ", length(differences))
}
lag <- as.integer(lag)
differences <- as.integer(differences)
.Call("diff2", x, lag, differences, PACKAGE="matrixStats");
} # diff2()
############################################################################
# HISTORY:
# 2014-12-29
# o Created.
############################################################################
matrixStats/R/rowRanks.R 0000644 0001751 0000144 00000012056 12542546242 014757 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowRanks
# @alias colRanks
#
# @title "Gets the rank of each row (column) of a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowRanks
# @usage colRanks
# }
#
# \arguments{
# \item{x}{A @numeric or @integer NxK @matrix.}
# \item{ties.method}{A @character string specifying how ties are treated.
# For details, see below.}
# \item{dim.}{An @integer @vector of length two specifying the
# dimension of \code{x}, also when not a @matrix.}
# \item{preserveShape}{A @logical specifying whether the @matrix
# returned should preserve the input shape of \code{x}, or not.}
# \item{...}{Not used.}
# }
#
# \value{
# An @integer @matrix is returned.
# The \code{rowRanks()} function always returns an NxK @matrix,
# where N (K) is the number of rows (columns) whose ranks are calculated.
#
# The \code{colRanks()} function returns an NxK @matrix,
# if \code{preserveShape = TRUE}, otherwise a KxN @matrix.
#
# %% The mode of the returned matrix is @integer, except for
# %% \code{ties.method == "average"} when it is @double.
# }
#
# \details{
# The row ranks of \code{x} are collected as \emph{rows}
# of the result matrix.
#
# The column ranks of \code{x} are collected as \emph{rows}
# if \code{preserveShape = FALSE}, otherwise as \emph{columns}.
#
# The implementation is optimized for both speed and memory.
# To avoid coercing to @doubles (and hence memory allocation), there
# is a unique implementation for @integer matrices.
# It is more memory efficient to do
# \code{colRanks(x, preserveShape=TRUE)} than
# \code{t(colRanks(x, preserveShape=FALSE))}.
#
# Any @names of \code{x} are ignored and absent in the result.
# }
#
# \section{Missing and non- values}{
# These are ranked as \code{NA}, as with \code{na.last="keep"}
# in the @see "base::rank" function.
# }
#
# \section{Ties}{
# When some values are equal ("ties"), argument \code{ties.method}
# specifies what their ranks should be.
# If \code{ties.method} is \code{"max"}, ties
# are ranked as the maximum value.
# If \code{ties.method} is \code{"average"}, ties are ranked
# by their average.
# If \code{ties.method} is \code{"max"} (\code{"min"}), ties
# are ranked as the maximum (minimum) value.
# If \code{ties.method} is \code{"average"}, ties are ranked
# by their average.
# For further details, see @see "base::rank".
# }
#
# \author{
# Hector Corrada Bravo and Harris Jaffee.
# Peter Langfelder for adding 'ties.method' support.
# Henrik Bengtsson adapted the original native implementation
# of \code{rowRanks()} from Robert Gentleman's \code{rowQ()}
# in the \pkg{Biobase} package.
# }
#
# \seealso{
# @see "base::rank".
# For developers, see also Section 'Utility functions' in
# 'Writing R Extensions manual', particularly the native functions
# \code{R_qsort_I()} and \code{R_qsort_int_I()}.
# }
#
# @keyword array
# @keyword iteration
# @keyword robust
# @keyword univar
#*/###########################################################################
rowRanks <- function(x, ties.method=c("max", "average", "min"), dim.=dim(x), ...) {
# Argument 'ties.method':
ties.method <- ties.method[1L]
if (is.element("flavor", names(list(...)))) {
.Deprecated(old="Argument 'flavor' of rowRanks()", package="matrixStats")
}
tiesMethod <- charmatch(ties.method, c("max", "average", "min"), nomatch=0L)
if (tiesMethod == 0L) {
stop("Unknown value of argument 'ties.method': ", ties.method)
}
dim. <- as.integer(dim.)
# byrow=TRUE
.Call("rowRanksWithTies", x, dim., tiesMethod, TRUE, PACKAGE="matrixStats")
}
colRanks <- function(x, ties.method=c("max", "average", "min"), dim.=dim(x), preserveShape=FALSE, ...) {
# Argument 'ties.method':
ties.method <- ties.method[1L]
if (is.element("flavor", names(list(...)))) {
.Deprecated(old="Argument 'flavor' of rowRanks()", package="matrixStats")
}
# Argument 'preserveShape'
preserveShape <- as.logical(preserveShape)
tiesMethod <- charmatch(ties.method, c("max", "average", "min"), nomatch=0L)
if (tiesMethod == 0L) {
stop("Unknown value of argument 'ties.method': ", ties.method)
}
dim. <- as.integer(dim.)
# byrow=FALSE
y <- .Call("rowRanksWithTies", x, dim., tiesMethod, FALSE, PACKAGE="matrixStats")
if (!preserveShape) y <- t(y)
y
}
############################################################################
# HISTORY:
# 2014-12-17 [HB]
# o CLEANUP: Made col- and rowRanks() plain R functions.
# 2014-11-15 [HB]
# o SPEEDUP: No longer using match.arg() due to its overhead.
# 2013-04-23 [HB]
# o Added argument 'preserveShape' to colRanks(), cf. private email
# 'row- and colRanks in package matrixStats' on 2012-10-05 until
# 2013-02-28.
# 2013-01-14 [HB]
# o Added internal support for rowRanks() with ties "max", "min" and
# "average".
# 2011-11-11 [HB]
# o Added '...' to generic functions rowRanks() and colRanks().
# 2011-10-17 [HJ]
# o Added rowRanks and colRanks().
############################################################################
matrixStats/R/rowCollapse.R 0000644 0001751 0000144 00000004563 12542546242 015447 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowCollapse
# @alias colCollapse
#
# @title "Extracts one cell per row (column) from a matrix"
#
# \description{
# @get "title".
# The implementation is optimized for memory and speed.
# }
#
# \usage{
# @usage rowCollapse
# @usage colCollapse
# }
#
# \arguments{
# \item{x}{An NxK @matrix.}
# \item{idxs}{An index @vector of (maximum) length N (K) specifying the
# columns (rows) to be extracted.}
# \item{dim.}{An @integer @vector of length two specifying the
# dimension of \code{x}, also when not a @matrix.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @vector of length N (K).
# }
#
# @examples "../incl/rowCollapse.Rex"
#
# @author "HB"
#
# \seealso{
# \emph{Matrix indexing} to index elements in matrices and arrays,
# cf. @see "base::[".
# }
#
# @keyword utilities
#*/###########################################################################
rowCollapse <- function(x, idxs, dim.=dim(x), ...) {
# Argument 'idxs':
idxs <- rep(idxs, length.out=dim.[1L])
# Columns of interest
cols <- 0:(dim.[2L]-1L)
cols <- cols[idxs]
# Calculate column-based indices
idxs <- dim.[1L] * cols + seq_len(dim.[1L])
cols <- NULL # Not needed anymore
x[idxs]
}
colCollapse <- function(x, idxs, dim.=dim(x), ...) {
# Argument 'idxs':
idxs <- rep(idxs, length.out=dim.[2L])
# Rows of interest
rows <- seq_len(dim.[1L])
rows <- rows[idxs]
# Calculate column-based indices
idxs <- dim.[1L] * 0:(dim.[2L]-1L) + rows
rows <- NULL # Not needed anymore
x[idxs]
}
############################################################################
# HISTORY:
# 2014-12-19 [HB]
# o CLEANUP: Made col- and rowCollapse() plain R functions.
# 2014-11-15
# o SPEEDUP: Made calculation of colOffsets faster.
# o SPEEDUP: Now colCollapse(x) no longer utilizes rowCollapse(t(x)).
# 2014-06-02
# o Made rowCollapse() an S3 method (was S4).
# 2013-11-23
# o MEMORY: rowCollapse() does a better job cleaning out allocated
# objects sooner.
# 2008-06-13
# o BUG FIX: rowCollapse(x) was broken and returned the wrong elements.
# 2008-04-13
# o Added Rdocs.
# o Added colCollapse().
# 2007-10-21
# o Created.
############################################################################
matrixStats/R/weightedMean.R 0000644 0001751 0000144 00000004740 12542546242 015553 0 ustar hornik users ############################################################################/**
# @RdocFunction weightedMean
#
# \encoding{latin1}
#
# @title "Weighted Arithmetic Mean"
#
# @synopsis
#
# \description{
# Computes the weighted sample mean of a numeric vector.
# }
#
# \arguments{
# \item{x}{a @numeric @vector containing the values whose weighted mean is
# to be computed.}
# \item{w}{a vector of weights the same length as \code{x} giving the weights
# to use for each element of \code{x}. Negative weights are treated
# as zero weights. Default value is equal weight to all values.}
# \item{na.rm}{a logical value indicating whether @NA values in
# \code{x} should be stripped before the computation proceeds,
# or not. If @NA, no check at all for @NAs is done.
# Default value is @NA (for efficiency).}
# \item{refine}{If @TRUE and \code{x} is @numeric, then extra effort is
# used to calculate the average with greater numerical precision,
# otherwise not.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric scalar.
# If \code{x} is of zero length, then \code{NaN} is returned,
# which is consistent with @see "base::mean".
# }
#
# @examples "../incl/weightedMean.Rex"
#
# \section{Missing values}{
# This function handles missing values consistently
# @see "stats::weighted.mean". More precisely,
# if \code{na.rm=FALSE}, then any missing values in either \code{x}
# or \code{w} will give result \code{NA_real_}.
# If \code{na.rm=TRUE}, then all \code{(x,w)} data points for which
# \code{x} is missing are skipped. Note that if both \code{x} and
# \code{w} are missing for a data points, then it is also skipped
# (by the same rule). However, if only \code{w} is missing, then
# the final results will always be \code{NA_real_} regardless of
# \code{na.rm}.
# }
#
# \seealso{
# @see "base::mean" and @see "stats::weighted.mean".
# }
#
# @author
#
# @keyword "univar"
# @keyword "robust"
#*/############################################################################
weightedMean <- function(x, w, na.rm=FALSE, refine=FALSE, ...) {
# Argument 'refine':
refine <- as.logical(refine)
w <- as.numeric(w)
.Call("weightedMean", x, w, na.rm, refine, PACKAGE="matrixStats")
} # weightedMean()
###############################################################################
# HISTORY:
# 2014-12-08
# o Created.
###############################################################################
matrixStats/R/rowProds.R 0000644 0001751 0000144 00000007643 12542546242 014776 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowProds
# @alias rowProds
# @alias colProds
# @alias product
#
# @title "Calculates the product for each row (column) in a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowProds
# @usage colProds
# @usage product
# }
#
# \arguments{
# \item{x}{A @numeric NxK @matrix.}
# \item{na.rm}{If @TRUE, missing values are ignored, otherwise not.}
# \item{method}{A @character string specifying how each product
# is calculated.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric @vector of length N (K).
# }
#
# \details{
# If \code{method="expSumLog"}, then then @see "product" function
# is used, which calculates the produce via the logarithmic transform
# (treating negative values specially). This improves the precision
# and lowers the risk for numeric overflow.
# If \code{method="direct"}, the direct product is calculated via
# the @see "base::prod" function.
# }
#
# \section{Missing values}{
# Note, if \code{method="expSumLog"}, \code{na.rm=FALSE}, and \code{x}
# contains missing values (@NA or @NaN), then the calculated value
# is also missing value.
# Note that it depends on platform whether @NaN or @NA is returned
# when an @NaN exists, cf. @see "base::is.nan".
# }
#
# @author "HB"
#
# @keyword array
# @keyword iteration
# @keyword robust
# @keyword univar
#*/###########################################################################
rowProds <- function(x, na.rm=FALSE, method=c("direct", "expSumLog"), ...) {
# Preallocate result (zero:ed by default)
n <- nrow(x)
y <- double(length=n)
# Nothing todo?
if (n == 0L) return(y)
# Argument 'method':
method <- method[1L]
# How to calculate product?
if (method == "expSumLog") {
prod <- product
} else if (method == "direct") {
} else {
stop("Unknown value of argument 'method': ", method)
}
for (ii in seq_len(n)) {
y[ii] <- prod(x[ii,,drop=TRUE], na.rm=na.rm)
}
y;
} # rowProds()
colProds <- function(x, na.rm=FALSE, method=c("direct", "expSumLog"), ...) {
# Preallocate result (zero:ed by default)
n <- ncol(x)
y <- double(length=n)
# Nothing todo?
if (n == 0L) return(y)
# Argument 'method':
method <- method[1L]
# How to calculate product?
if (method == "expSumLog") {
prod <- product
} else if (method == "direct") {
} else {
stop("Unknown value of argument 'method': ", method)
}
for (ii in seq_len(n)) {
y[ii] <- prod(x[,ii,drop=TRUE], na.rm=na.rm)
}
y
} # colProds()
############################################################################
# HISTORY:
# 2014-11-15 [HB]
# o SPEEDUP: No longer using match.arg() due to its overhead.
# 2014-06-04 [HB]
# o Now col- and rowProds() utilizes new product() function.
# o Added argument 'method' to col- and rowProds().
# 2014-06-02 [HB]
# o Now rowProds() uses rowCounts(x) when 'x' is logical.
# o Now rowProds() avoids subsetting rows unless needed.
# 2014-03-31 [HB]
# o BUG FIX: rowProds() would throw "Error in rowSums(isNeg) : 'x' must
# be an array of at least two dimensions" on matrices where all rows
# contained at least on zero. Thanks to Roel Verbelen at KU Leuven
# for the report.
# 2013-11-23 [HB]
# o MEMORY: rowProbs() does a better job cleaning out allocated
# objects sooner.
# 2012-06-25 [HB]
# o GENERALIZATION: Now row- and colProds() handles missing values.
# o BUG FIX: In certain cases, row- and colProds() would return NA instead
# of 0 for some elements. Thanks Brenton Kenkel at University of
# Rochester for reporting on this.
# 2008-07-30 [HB]
# o Now it is only rows without zeros for which the calculation is
# actually performed.
# 2008-03-26 [HB]
# o Created.
############################################################################
matrixStats/R/x_OP_y.R 0000644 0001751 0000144 00000004316 12542546242 014346 0 ustar hornik users ############################################################################/**
# @RdocFunction x_OP_y
# @alias x_OP_y
# @alias t_tx_OP_y
#
# @title "Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)'"
#
# \usage{
# @usage x_OP_y
# @usage t_tx_OP_y
# }
#
# \description{
# @get "title", where OP can be +, -, *, and /.
# For + and *, na.rm=TRUE will drop missing values first.
# }
#
# \arguments{
# \item{x}{A @numeric NxK @matrix.}
# \item{y}{A @numeric @vector of length L.}
# \item{OP}{A @character specifying which operator to use.}
# \item{commute}{If @TRUE, 'y OP x' ('t(y OP t(x))') is calculated,
# otherwise 'x OP y' ('t(t(x) OP y)').}
# \item{na.rm}{If @TRUE, missing values are ignored, otherwise not.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric NxK @matrix.
# }
#
# \section{Missing values}{
# If \code{na.rm=TRUE}, then missing values are "dropped" before applying
# the operator to each pair of values. For instance, if \code{x[1,1]} is
# a missing value, then the result of \code{x[1,1] + y[1]} equals
# \code{y[1]}. If also \code{y[1]} is a missing value, then the result
# is a missing value. This only applies to additions and multiplications.
# For subtractions and divisions, argument \code{na.rm} is ignored.
# }
#
# @examples "../incl/x_OP_y.Rex"
#
# @author
#
# @keyword internal
#*/############################################################################
x_OP_y <- function(x, y, OP, commute=FALSE, na.rm=FALSE) {
commute <- as.logical(commute)
na.rm <- as.logical(na.rm)
op <- charmatch(OP, c("+", "-", "*", "/"), nomatch=0L)
stopifnot(op > 0L)
.Call("x_OP_y", x, y, dim(x), op, commute, na.rm, TRUE, FALSE, package="matrixStats")
} # x_OP_y()
t_tx_OP_y <- function(x, y, OP, commute=FALSE, na.rm=FALSE) {
commute <- as.logical(commute)
na.rm <- as.logical(na.rm)
op <- charmatch(OP, c("+", "-", "*", "/"), nomatch=0L)
stopifnot(op > 0L)
.Call("x_OP_y", x, y, dim(x), op, commute, na.rm, TRUE, TRUE, package="matrixStats")
} # t_tx_OP_y()
############################################################################
# HISTORY:
# 2014-11-24 [HB]
# o Created.
############################################################################
matrixStats/R/rowRanges.R 0000644 0001751 0000144 00000006100 12542546242 015111 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowRanges
# @alias colRanges
# @alias rowMins
# @alias rowMaxs
# @alias colMins
# @alias colMaxs
#
# @title "Gets the range of values in each row (column) of a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowRanges
# @usage colRanges
# @usage rowMins
# @usage colMins
# @usage rowMaxs
# @usage colMaxs
# }
#
# \arguments{
# \item{x}{A @numeric NxK @matrix.}
# \item{na.rm}{If @TRUE, @NAs are excluded first, otherwise not.}
# \item{dim.}{An @integer @vector of length two specifying the
# dimension of \code{x}, also when not a @matrix.}
# \item{...}{Not used.}
# }
#
# \value{
# \code{rowRanges()} (\code{colRanges()}) returns a
# @numeric Nx2 (Kx2) @matrix, where
# N (K) is the number of rows (columns) for which the ranges are
# calculated.
#
# \code{rowMins()/rowMaxs()} (\code{colMins()/colMaxs()}) returns a
# @numeric @vector of length N (K).
# }
#
# @author "HB"
#
# \seealso{
# @see "rowOrderStats" and @see "base::pmin.int".
# }
#
# @keyword array
# @keyword iteration
# @keyword robust
# @keyword univar
#*/###########################################################################
rowRanges <- function(x, na.rm=FALSE, dim.=dim(x), ...) {
dim. <- as.integer(dim.)
na.rm <- as.logical(na.rm)
.Call("rowRanges", x, dim., 2L, na.rm, TRUE, PACKAGE="matrixStats")
}
rowMins <- function(x, na.rm=FALSE, dim.=dim(x), ...) {
dim. <- as.integer(dim.)
na.rm <- as.logical(na.rm)
.Call("rowRanges", x, dim., 0L, na.rm, TRUE, PACKAGE="matrixStats")
}
rowMaxs <- function(x, na.rm=FALSE, dim.=dim(x), ...) {
dim. <- as.integer(dim.)
na.rm <- as.logical(na.rm)
.Call("rowRanges", x, dim., 1L, na.rm, TRUE, PACKAGE="matrixStats")
}
colRanges <- function(x, na.rm=FALSE, dim.=dim(x), ...) {
dim. <- as.integer(dim.)
na.rm <- as.logical(na.rm)
.Call("colRanges", x, dim., 2L, na.rm, TRUE, PACKAGE="matrixStats")
}
colMins <- function(x, na.rm=FALSE, dim.=dim(x), ...) {
dim. <- as.integer(dim.)
na.rm <- as.logical(na.rm)
.Call("colRanges", x, dim., 0L, na.rm, TRUE, PACKAGE="matrixStats")
}
colMaxs <- function(x, na.rm=FALSE, dim.=dim(x), ...) {
dim. <- as.integer(dim.)
na.rm <- as.logical(na.rm)
.Call("colRanges", x, dim., 1L, na.rm, TRUE, PACKAGE="matrixStats")
}
############################################################################
# HISTORY:
# 2014-12-17 [HB]
# o CLEANUP: Made col- and rowRanges() plain R functions.
# 2014-11-16
# o SPEEDUP: Implemented in native code.
# 2013-07-28
# o SPEEDUP: Made (col|row)Mins() and (col|row)Maxs() faster.
# o BUG FIX: rowRanges(x) on an Nx0 matrix 'x' would give an error.
# Ditto for colRanges(x).
# 2009-02-01
# o BUG FIX: colRanges(x) would give an error if nrow(x) == 0.
# 2008-03-25
# o Since colOrderStats() cannot handle missing values we use the slower
# colRanges() for the case when na.rm=TRUE.
# o Added {row|col}{Min|Max}s().
# o Created {row|col}Ranges() for scratch. Handles NAs.
############################################################################
matrixStats/R/sumOver.R 0000644 0001751 0000144 00000006026 12542546242 014611 0 ustar hornik users ############################################################################/**
# @RdocFunction sumOver
#
# @title "Fast sum over subset of vector elements"
#
# @synopsis
#
# \description{
# Computes the sum of all or a subset of values.
# }
#
# \arguments{
# \item{x}{A @numeric @vector of length N.}
# \item{idxs}{A @numeric index @vector in [1,N] of elements to sum over.
# If @NULL, all elements are considered.}
# \item{na.rm}{If @TRUE, missing values are skipped, otherwise not.}
# \item{mode}{A @character string specifying the data type of the
# return value. Default is to use the same mode as argument
# \code{x}.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a scalar of the data type specified by argument \code{mode}.
# If \code{mode="integer"}, then integer overflow occurs if the
# \emph{sum} is outside the range of defined integer values.
# }
#
# \details{
# \code{sumOver(x, idxs)} gives equivalent results as
# \code{sum(x[idxs])}, but is faster and more memory efficient
# since it avoids the actual subsetting which requires copying
# of elements and garbage collection thereof.
#
# Furthermore, \code{sumOver(x, mode="double")} is equivalent to
# \code{sum(as.numeric(x))}, but is much more memory efficient when
# \code{x} is an @integer vector.
# }
#
# @examples "../incl/sumOver.Rex"
#
# \seealso{
# @see "base::sum".
# To efficiently average over a subset, see @see "meanOver".
# }
#
# @author
#
# @keyword univar
# @keyword internal
#*/############################################################################
sumOver <- function(x, idxs=NULL, na.rm=FALSE, mode=typeof(x), ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'x':
if (!is.numeric(x)) {
stop("Argument 'x' is not numeric: ", mode(x));
}
n <- length(x);
# Argument 'na.rm':
if (!is.logical(na.rm)) {
stop("Argument 'na.rm' is not logical: ", mode(na.rm));
}
# Argument 'idxs':
if (is.null(idxs)) {
} else if (is.integer(idxs)) {
} else if (is.logical(idxs)) {
if (length(idxs) != n) {
stop(sprintf("Lengths of arguments 'idxs' and 'x' do not match: %d != %d", length(idxs), n));
}
idxs <- which(idxs);
} else {
idxs <- as.integer(idxs);
}
# Argument 'mode':
mode <- mode[1L]
modeI <- charmatch(mode, c("integer", "double"), nomatch=0L)
if (modeI == 0L) {
stop("Unknown value of argument 'mode': ", mode)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Summing
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
.Call("sumOver", x, idxs, na.rm, modeI, PACKAGE="matrixStats");
} # sumOver()
############################################################################
# HISTORY:
# 2014-11-15 [HB]
# o SPEEDUP: No longer using match.arg() due to its overhead.
# 2014-11-02 [HB]
# o Created.
############################################################################
matrixStats/R/rowWeightedMeans.R 0000644 0001751 0000144 00000014300 12542546242 016417 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowWeightedMeans
# @alias colWeightedMeans
#
# @title "Calculates the weighted means for each row (column) in a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowWeightedMeans
# @usage colWeightedMeans
# }
#
# \arguments{
# \item{x}{A @numeric NxK @matrix.}
# \item{w}{A @numeric @vector of length K (N).}
# \item{na.rm}{If @TRUE, missing values are excluded from the calculation,
# otherwise not.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric @vector of length N (K).
# }
#
# \details{
# The implementations of these methods are optimized for both speed
# and memory.
# If no weights are given, the corresponding
# \code{rowMeans()}/\code{colMeans()} is used.
# }
#
# @examples "../incl/rowWeightedMeans.Rex"
#
# @author "HB"
#
# \seealso{
# See \code{rowMeans()} and \code{colMeans()} in @see "base::colSums"
# for non-weighted means.
# See also @see "stats::weighted.mean".
# }
#
# @keyword array
# @keyword iteration
# @keyword robust
# @keyword univar
#*/###########################################################################
rowWeightedMeans <- function(x, w=NULL, na.rm=FALSE, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'w':
hasWeights <- !is.null(w);
if (hasWeights) {
n <- ncol(x);
if (length(w) != n) {
stop("The length of argument 'w' is does not match the number of column in 'x': ", length(w), " != ", n);
}
if (!is.numeric(w)) {
stop("Argument 'w' is not numeric: ", mode(w));
}
if (any(!is.na(w) & w < 0)) {
stop("Argument 'w' has negative weights.");
}
}
if (hasWeights) {
# Allocate results
m <- nrow(x);
if (m == 0L)
return(double(0L));
# Drop entries with zero weight? ...but keep NAs
idxs <- which(is.na(w) | w != 0);
nw <- length(idxs);
if (nw == 0L) {
return(rep(NaN, times=m));
} else if (nw < n) {
w <- w[idxs];
x <- x[,idxs,drop=FALSE];
}
idxs <- NULL; # Not needed anymore
# Has missing values?
if (na.rm) {
# Really?
na.rm <- anyMissing(x);
}
if (na.rm) {
# Indices of missing values
nas <- which(is.na(x));
# Weight matrix
W <- matrix(w, nrow=nrow(x), ncol=ncol(x), byrow=TRUE);
w <- NULL; # Not needed anymore
W[nas] <- NA;
wS <- rowSums(W, na.rm=TRUE);
# Standarized weights summing to one w/out missing values
W[nas] <- 0;
W <- W / wS;
x[nas] <- 0;
nas <- NULL; # Not needed anymore
x <- W * x;
W <- NULL; # Not needed anymore
} else {
wS <- sum(w);
# Standardize weights summing to one.
w <- w / wS;
# Weighted values
## SLOW: for (rr in 1:m) x[rr,] <- w * x[rr,,drop=TRUE];
## FAST:
x <- t_tx_OP_y(x, w, OP="*", na.rm=FALSE)
w <- NULL; # Not needed anymore
}
# Here we know there are no missing value in the new 'x'
res <- rowSums(x, na.rm=FALSE);
} else {
res <- rowMeans(x, na.rm=na.rm);
}
res;
} # rowWeightedMeans()
colWeightedMeans <- function(x, w=NULL, na.rm=FALSE, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'w':
hasWeights <- !is.null(w);
if (hasWeights) {
n <- nrow(x);
if (length(w) != n) {
stop("The length of argument 'w' is does not match the number of rows in 'x': ", length(w), " != ", n);
}
if (!is.numeric(w)) {
stop("Argument 'w' is not numeric: ", mode(w));
}
if (any(!is.na(w) & w < 0)) {
stop("Argument 'w' has negative weights.");
}
}
if (hasWeights) {
# Allocate results
m <- ncol(x);
if (m == 0L)
return(double(0L));
# Drop entries with zero weight? ...but keep NAs
idxs <- which(is.na(w) | w != 0);
nw <- length(idxs);
if (nw == 0L) {
return(rep(NaN, times=m));
} else if (nw < n) {
w <- w[idxs];
x <- x[idxs,,drop=FALSE];
}
idxs <- NULL; # Not needed anymore
# Has missing values?
if (na.rm) {
# Really?
na.rm <- anyMissing(x);
}
if (na.rm) {
# Indices of missing values
nas <- which(is.na(x));
# Weight matrix
W <- matrix(w, nrow=nrow(x), ncol=ncol(x), byrow=FALSE);
w <- NULL; # Not needed anymore
W[nas] <- NA;
wS <- colSums(W, na.rm=TRUE);
# Standarized weights summing to one w/out missing values
W[nas] <- 0;
for (cc in 1:m) {
W[,cc] <- W[,cc,drop=TRUE] / wS[cc];
}
x[nas] <- 0;
nas <- NULL; # Not needed anymore
x <- W * x;
W <- NULL; # Not needed anymore
} else {
wS <- sum(w);
# Standardize weights summing to one.
w <- w / wS;
# Weighted values
x <- w*x;
## SLIGHTLY SLOWER: x <- x_OP_y(x, w, OP="*");
w <- NULL; # Not needed anymore
}
# Here we know there are no missing value in the new 'x'
res <- colSums(x, na.rm=FALSE);
} else {
res <- colMeans(x, na.rm=na.rm);
}
res;
} # colWeightedMeans()
##############################################################################
# HISTORY:
# 2014-12-19 [HB]
# o CLEANUP: Made col- and rowWeightedMeans() plain R functions.
# 2013-11-29
# o BUG FIX: (col|row)WeightedMeans() with all zero weights gave an
# invalid result.
# 2013-11-23
# o MEMORY: Now (col|row)WeightedMeans() clean out allocated objects sooner.
# 2010-02-03
# o BUG FIX: (col|row)WeightedMeans(..., na.rm=TRUE) would incorrectly treat
# missing values as zeros. Thanks Pierre Neuvial for reporting this.
# 2008-02-01
# o Added special implementation for column version.
# o Added Rdoc comments.
# o Created.
##############################################################################
matrixStats/R/meanOver.R 0000644 0001751 0000144 00000006035 12542546242 014725 0 ustar hornik users ############################################################################/**
# @RdocFunction meanOver
#
# @title "Fast averaging over subset of vector elements"
#
# @synopsis
#
# \description{
# Computes the sample mean of all or a subset of values.
# }
#
# \arguments{
# \item{x}{A @numeric @vector of length N.}
# \item{idxs}{A @numeric index @vector in [1,N] of elements to mean over.
# If @NULL, all elements are considered.}
# \item{na.rm}{If @TRUE, missing values are skipped, otherwise not.}
# \item{refine}{If @TRUE and \code{x} is @numeric, then extra effort is
# used to calculate the average with greater numerical precision,
# otherwise not.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric scalar.
# }
#
# \details{
# \code{meanOver(x, idxs)} gives equivalent results as
# \code{mean(x[idxs])}, but is faster and more memory efficient
# since it avoids the actual subsetting which requires copying
# of elements and garbage collection thereof.
#
# If \code{x} is @numeric and \code{refine=TRUE}, then a two-pass scan
# is used to calculate the average. The first scan calculates the total
# sum and divides by the number of (non-missing) values. In the second
# scan, this average is refined by adding the residuals towards the first
# average. The @see "base::mean" uses this approach.
# \code{meanOver(..., refine=FALSE)} is almost twice as fast as
# \code{meanOver(..., refine=TRUE)}.
# }
#
# @examples "../incl/meanOver.Rex"
#
# \seealso{
# @see "base::mean".
# To efficiently sum over a subset, see @see "sumOver".
# }
#
# @author
#
# @keyword univar
# @keyword internal
#*/############################################################################
meanOver <- function(x, idxs=NULL, na.rm=FALSE, refine=TRUE, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'x':
if (!is.numeric(x)) {
stop("Argument 'x' is not numeric: ", mode(x));
}
n <- length(x);
# Argument 'na.rm':
if (!is.logical(na.rm)) {
stop("Argument 'na.rm' is not logical: ", mode(na.rm));
}
# Argument 'idxs':
if (is.null(idxs)) {
} else if (is.integer(idxs)) {
} else if (is.logical(idxs)) {
if (length(idxs) != n) {
stop(sprintf("Lengths of arguments 'idxs' and 'x' do not match: %d != %d", length(idxs), n));
}
idxs <- which(idxs);
} else {
idxs <- as.integer(idxs);
}
# Argument 'refine':
if (!is.logical(refine)) {
stop("Argument 'refine' is not logical: ", mode(refine));
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Averaging
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
.Call("meanOver", x, idxs, na.rm, refine, PACKAGE="matrixStats");
} # meanOver()
############################################################################
# HISTORY:
# 2014-11-02 [HB]
# o Created.
############################################################################
matrixStats/R/indexByRow.R 0000644 0001751 0000144 00000002573 12542546242 015246 0 ustar hornik users ###########################################################################/**
# @RdocFunction indexByRow
#
# @title "Translates matrix indices by rows into indices by columns"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage indexByRow
# }
#
# \arguments{
# \item{dim}{A @numeric @vector of length two specifying the length
# of the "template" matrix.}
# \item{idxs}{A @vector of indices. If @NULL, all indices are returned.}
# \item{...}{Not use.}
# }
#
# \value{
# Returns an @integer @vector of indices.
# }
#
# @examples "../incl/indexByRow.Rex"
#
# @author "HB"
#
# @keyword iteration
# @keyword logic
#*/###########################################################################
indexByRow <- function(dim, idxs=NULL, ...) {
if (is.matrix(dim)) {
# BACKWARD COMPATIBILITY: Keep for a while, but deprecate
# in the future.
dim <- dim(dim)
} else {
dim <- as.integer(dim)
}
if (!is.null(idxs)) idxs <- as.integer(idxs)
.Call("indexByRow", dim, idxs, package="matrixStats")
}
##############################################################################
# HISTORY:
# 2014-11-09
# o Now indexByRow() is a plain R function (was a generic function).
# o Implemented in C.
# 2014-05-23
# o CLEANUP: Made indexByRow() an S3 rather than S4 generic.
# 2007-04-12
# o Created.
##############################################################################
matrixStats/R/binCounts.R 0000644 0001751 0000144 00000010105 12542546242 015106 0 ustar hornik users ############################################################################/**
# @RdocFunction binCounts
#
# @title "Fast element counting in non-overlapping bins"
#
# @synopsis
#
# \description{
# Counts the number of elements in non-overlapping bins
# }
#
# \arguments{
# \item{x}{A @numeric @vector of K positions for to be binned and counted.}
# \item{bx}{A @numeric @vector of B+1 ordered positions specifying
# the B > 0 bins \code{[bx[1],bx[2])}, \code{[bx[2],bx[3])}, ...,
# \code{[bx[B],bx[B+1])}.}
# \item{right}{If @TRUE, the bins are right-closed (left open),
# otherwise left-closed (right open).}
# \item{...}{Not used.}
# }
#
# \value{
# Returns an @integer @vector of length B with non-negative integers.
# }
#
# \details{
# \code{binCounts(x, bx, right=TRUE)} gives equivalent results as
# \code{rev(binCounts(-x, bx=rev(-bx), right=FALSE))}, but is
# faster and more memory efficient.
# }
#
# \section{Missing and non-finite values}{
# Missing values in \code{x} are ignored/dropped.
# Missing values in \code{bx} are not allowed and gives an error.
# }
#
# \seealso{
# An alternative for counting occurrences within bins is
# @see "graphics::hist", e.g. \code{hist(x, breaks=bx, plot=FALSE)$counts}.
# That approach is ~30-60\% slower than \code{binCounts(..., right=TRUE)}.
#
# To count occurrences of indices \code{x} (positive @integers) in
# \code{[1,B]}, use \code{tabulate(x, nbins=B)}, where \code{x} does
# \emph{not} have to be sorted first.
# For details, see @see "base::tabulate".
#
# To average values within bins, see @see "binMeans".
# }
#
# @author "HB"
#
# @keyword "univar"
#*/############################################################################
binCounts <- function(x, bx, right=FALSE, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'x':
if (!is.numeric(x)) {
stop("Argument 'x' is not numeric: ", mode(x));
}
# Argument 'bx':
if (!is.numeric(bx)) {
stop("Argument 'bx' is not numeric: ", mode(bx));
}
if (any(is.infinite(bx))) {
stop("Argument 'bx' must not contain Inf values.");
}
if (is.unsorted(bx)) {
stop("Argument 'bx' is not ordered.");
}
# Argument 'right':
right <- as.logical(right);
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Preprocessing of x
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Drop missing values
keep <- which(!is.na(x));
if (length(keep) < length(x)) {
x <- x[keep];
}
keep <- NULL; # Not needed anymore
# Order x (by increasing x).
# If 'x' is already sorted, the overhead of (re)sorting is
# relatively small.
x <- sort.int(x, method="quick");
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Bin
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x <- as.numeric(x);
bx <- as.numeric(bx);
.Call("binCounts", x, bx, right, PACKAGE="matrixStats");
} # binCounts()
############################################################################
# HISTORY:
# 2014-12-29 [HB]
# o SPEEDUP: Now binCounts() and binMeans() uses is.unsorted() instead
# of o <- order(); any(diff(o) != 1L).
# 2014-12-17 [HB]
# o CLEANUP: Made binCounts() and binMeans() plain R functions.
# 2013-11-24 [HB]
# o DOCUMENTATION: Added reference to base::tabulate().
# 2013-11-23 [HB]
# o MEMORY: binCounts() cleans out more temporary variables as soon as
# possible such that the garbage collector can remove them sooner.
# 2012-05-10 [HB]
# o DOCUMENTATION: Now help(binCounts) cross references hist(), which is
# almost as fast. Thanks Ilari Scheinin (Finland) for pointing this out.
# o SPEEDUP: Now binMeans() and binCounts() use Hoare's Quicksort
# method for sorting 'x'.
# 2012-10-03 [HB]
# o Created.
############################################################################
matrixStats/R/rowOrderStats.R 0000644 0001751 0000144 00000004553 12542546242 015776 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowOrderStats
# @alias colOrderStats
#
# @title "Gets an order statistic for each row (column) in a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowOrderStats
# @usage colOrderStats
# }
#
# \arguments{
# \item{x}{A @numeric NxK @matrix.}
# \item{which}{An @integer index in [1,K] ([1,N]) indicating which
# order statistic to be returned.}
# \item{dim.}{An @integer @vector of length two specifying the
# dimension of \code{x}, also when not a @matrix.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric @vector of length N (K).
# }
#
# \details{
# The implementation of \code{rowOrderStats()} is optimized for both
# speed and memory.
# To avoid coercing to @doubles (and hence memory allocation), there
# is a unique implementation for @integer matrices.
# }
#
# \section{Missing values}{
# This method does \emph{not} handle missing values, that is, the result
# corresponds to having \code{na.rm=FALSE} (if such an argument would
# be available).
# }
#
# \author{
# The native implementation of \code{rowOrderStats()} was adopted
# by Henrik Bengtsson from Robert Gentleman's \code{rowQ()}
# in the \pkg{Biobase} package.
# }
#
# \seealso{
# See \code{rowMeans()} in @see "base::colSums".
# }
#
# @keyword array
# @keyword iteration
# @keyword robust
# @keyword univar
#*/###########################################################################
rowOrderStats <- function(x, which, dim.=dim(x), ...) {
dim. <- as.integer(dim.)
which <- as.integer(which)
.Call("rowOrderStats", x, dim., which, PACKAGE="matrixStats");
}
colOrderStats <- function(x, which, dim.=dim(x), ...) {
dim. <- as.integer(dim.)
which <- as.integer(which)
.Call("colOrderStats", x, dim., which, PACKAGE="matrixStats");
}
############################################################################
# HISTORY:
# 2014-12-19 [HB]
# o CLEANUP: Made col- and rowOrderStats() plain R functions.
# 2014-11-16
# o SPEEDUP: Now colOrderStats() also is implemented in native code.
# 2008-03-25
# o Added colOrderStats().
# o Renamed from rowQuantiles() to rowOrderStats(), especially because it
# is not returning quantiles like quantile().
# o Created (again?)
############################################################################
matrixStats/R/rowCumsums.R 0000644 0001751 0000144 00000004660 12542546242 015337 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowCumsums
# @alias rowCumsums
# @alias colCumsums
# @alias rowCumprods
# @alias colCumprods
# @alias rowCummins
# @alias colCummins
# @alias rowCummaxs
# @alias colCummaxs
#
# @title "Cumulative sums, products, minima and maxima for each row (column) in a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowCumsums
# @usage colCumsums
# @usage rowCumprods
# @usage colCumprods
# @usage rowCummins
# @usage colCummins
# @usage rowCummaxs
# @usage colCummaxs
# }
#
# \arguments{
# \item{x}{A @numeric NxK @matrix.}
# \item{dim.}{An @integer @vector of length two specifying the
# dimension of \code{x}, also when not a @matrix.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric NxK @matrix of the same mode as \code{x}.
# }
#
# @examples "../incl/rowCumsums.Rex"
#
# @author "HB"
#
# \seealso{
# See @see "base::cumsum", @see "base::cumprod",
# @see "base::cummin", and @see "base::cummax".
# }
#
# @keyword array
# @keyword iteration
# @keyword univar
#*/###########################################################################
rowCumsums <- function(x, dim.=dim(x), ...) {
dim <- as.integer(dim.);
.Call("rowCumsums", x, dim, TRUE, PACKAGE="matrixStats")
}
colCumsums <- function(x, dim.=dim(x), ...) {
dim <- as.integer(dim.);
.Call("rowCumsums", x, dim, FALSE, PACKAGE="matrixStats")
}
rowCumprods <- function(x, dim.=dim(x), ...) {
dim <- as.integer(dim.);
.Call("rowCumprods", x, dim, TRUE, PACKAGE="matrixStats")
}
colCumprods <- function(x, dim.=dim(x), ...) {
dim <- as.integer(dim.);
.Call("rowCumprods", x, dim, FALSE, PACKAGE="matrixStats")
}
rowCummins <- function(x, dim.=dim(x), ...) {
dim <- as.integer(dim.);
.Call("rowCummins", x, dim, TRUE, PACKAGE="matrixStats")
}
colCummins <- function(x, dim.=dim(x), ...) {
dim <- as.integer(dim.);
.Call("rowCummins", x, dim, FALSE, PACKAGE="matrixStats")
}
rowCummaxs <- function(x, dim.=dim(x), ...) {
dim <- as.integer(dim.);
.Call("rowCummaxs", x, dim, TRUE, PACKAGE="matrixStats")
}
colCummaxs <- function(x, dim.=dim(x), ...) {
dim <- as.integer(dim.);
.Call("rowCummaxs", x, dim, FALSE, PACKAGE="matrixStats")
}
############################################################################
# HISTORY:
# 2014-11-26 [HB]
# o Created.
############################################################################
matrixStats/R/benchmark.R 0000644 0001751 0000144 00000001316 12542546242 015100 0 ustar hornik users benchmark <- function(fcn, tags=NULL, path=NULL, workdir="reports", envir=parent.frame(), ...) {
requireNamespace("R.rsp") || stop("R.rsp not installed.")
if (is.function(fcn)) {
fcn <- deparse(substitute(fcn))
}
if (is.null(path)) {
path <- system.file("benchmarking", package="matrixStats")
}
fullname <- paste(c(fcn, tags), collapse=",")
filename <- sprintf("%s.md.rsp", fullname)
pathname <- file.path(path, filename)
R.rsp::rfile(pathname, workdir=workdir, envir=envir, ...)
} # benchmark()
############################################################################
# HISTORY:
# 2014-06-02
# o Created.
############################################################################
matrixStats/R/rowMedians.S4.R 0000644 0001751 0000144 00000005364 12542546242 015552 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowMedians
# @alias colMedians
# \alias{rowMedians,matrix-method}
# \alias{colMedians,matrix-method}
#
# @title "Calculates the median for each row (column) in a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowMedians
# @usage colMedians
# }
#
# \arguments{
# \item{x}{A @numeric NxK @matrix.}
# \item{na.rm}{If @TRUE, @NAs are excluded first, otherwise not.}
# \item{dim.}{An @integer @vector of length two specifying the
# dimension of \code{x}, also when not a @matrix.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric @vector of length N (K).
# }
#
# \details{
# The implementation of \code{rowMedians()} and \code{colMedians()}
# is optimized for both speed and memory.
# To avoid coercing to @doubles (and hence memory allocation), there
# is a special implementation for @integer matrices.
# That is, if \code{x} is an @integer @matrix, then
# \code{rowMedians(as.double(x))} (\code{rowMedians(as.double(x))})
# would require three times the memory of \code{rowMedians(x)}
# (\code{colMedians(x)}), but all this is avoided.
# }
#
# @author "HB, HJ"
#
# \seealso{
# See @see "rowMedians" and \code{colMedians()} for weighted medians.
# For mean estimates, see \code{rowMeans()} in @see "base::colSums".
# }
#
# @keyword array
# @keyword iteration
# @keyword robust
# @keyword univar
#*/###########################################################################
setGeneric("rowMedians", function(x, na.rm=FALSE, dim.=dim(x), ...) {
standardGeneric("rowMedians");
})
setMethod("rowMedians", signature(x="matrix"), function(x, na.rm=FALSE, dim.=dim(x), ...) {
dim. <- as.integer(dim.)
na.rm <- as.logical(na.rm);
hasNAs <- TRUE; # Add as an argument? /2007-08-24
.Call("rowMedians", x, dim., na.rm, hasNAs, TRUE, PACKAGE="matrixStats");
})
setGeneric("colMedians", function(x, na.rm=FALSE, dim.=dim(x), ...) {
standardGeneric("colMedians");
})
setMethod("colMedians", signature(x="matrix"), function(x, na.rm=FALSE, dim.=dim(x), ...) {
dim. <- as.integer(dim.)
na.rm <- as.logical(na.rm);
hasNAs <- TRUE; # Add as an argument? /2007-08-24
.Call("rowMedians", x, dim., na.rm, hasNAs, FALSE, PACKAGE="matrixStats");
})
############################################################################
# HISTORY:
# 2011-10-13 [HJ]
# o In the past, colMedians(x) was accomplished as rowMedians(t(x));
# it is now done directly.
# 2008-03-25
# o Added colMedians() - a wrapper around rowMedians() for now.
# o Turned into a S4 method as it used to be in Biobase.
# 2007-08-14
# o Added argument 'hasNA'.
# 2005-11-25
# o Created.
############################################################################
matrixStats/R/rowAvgsPerColSet.R 0000644 0001751 0000144 00000013103 12542546242 016354 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowAvgsPerColSet
# @alias colAvgsPerRowSet
#
# @title "Applies a row-by-row (column-by-column) averaging function to equally-sized subsets of matrix columns (rows)"
#
# \description{
# @get "title".
# Each subset is averaged independently of the others.
# }
#
# @synopsis
#
# \arguments{
# \item{X}{A @numeric NxM @matrix.}
# \item{W}{An optional @numeric NxM @matrix of weights.}
# \item{S}{An @integer KxJ @matrix specifying the J subsets. Each
# column holds K column (row) indices for the corresponding subset.}
# \item{FUN}{The row-by-row (column-by-column) @function used to average
# over each subset of \code{X}. This function must accept a @numeric
# NxK (KxM) @matrix and the @logical argument \code{na.rm} (which is
# automatically set), and return a @numeric @vector of length N (M).}
# \item{...}{Additional arguments passed to then \code{FUN} @function.}
# \item{tFUN}{If @TRUE, the NxK (KxM) @matrix passed to \code{FUN()}
# is transposed first.}
# }
#
# \value{
# Returns a @numeric JxN (MxJ) @matrix,
# where row names equal \code{rownames(X)} (\code{colnames(S)})
# and column names \code{colnames(S)} (\code{colnames(X)}).
# }
#
# \details{
# If argument \code{S} is a single column vector with indices
# \code{1:N}, then \code{rowAvgsPerColSet(X, S=S, FUN=rowMeans)}
# gives the same result as \code{rowMeans(X)}.
# Analogously, for \code{rowAvgsPerColSet()}.
# }
#
# @examples "../incl/rowAvgsPerColSet.Rex"
#
# @author "HB"
#
# @keyword internal
# @keyword utilities
#*/###########################################################################
rowAvgsPerColSet <- function(X, W=NULL, S, FUN=rowMeans, ..., tFUN=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'X':
if (!is.matrix(X)) {
stop("Argument 'X' is not a matrix: ", class(X)[1L]);
}
dimX <- dim(X);
# Argument 'W':
hasW <- !is.null(W);
if (hasW) {
if (!is.matrix(W)) {
stop("Argument 'W' is not a matrix: ", class(W)[1L]);
}
if (any(dim(W) != dimX)) {
stop("Argument 'W' does not have the same dimension as 'X': ", paste(dim(W), collapse="x"), " != ", paste(dimX, collapse="x"));
}
if (!is.numeric(W)) {
stop("Argument 'W' is not numeric: ", mode(W));
}
}
# Argument 'S':
if (!is.matrix(S)) {
stop("Argument 'S' is not a matrix: ", class(S)[1L]);
}
nbrOfSets <- ncol(S);
setNames <- colnames(S);
# Argument 'FUN':
if (!is.function(FUN)) {
stop("Argument 'FUN' is not a function: ", mode(S));
}
# Argument 'tFUN':
tFUN <- as.logical(tFUN);
# Check if missing values have to be excluded while averaging
na.rm <- (anyMissing(X) || anyMissing(S));
# Record names of dimension
rownamesX <- rownames(X);
dimnames(X) <- NULL;
# Average in sets of columns of X.
Z <- apply(S, MARGIN=2L, FUN=function(jj) {
# Extract set of columns from X
jj <- jj[is.finite(jj)];
Zjj <- X[,jj,drop=FALSE];
jj <- NULL; # Not needed anymore
if (tFUN) {
Zjj <- t(Zjj);
}
# Average by weights
if (hasW) {
Wjj <- W[,jj,drop=FALSE];
Zjj <- FUN(Zjj, W=Wjj, ..., na.rm=na.rm);
Wjj <- NULL; # Not needed anymore
} else {
Zjj <- FUN(Zjj, ..., na.rm=na.rm);
}
# Sanity check
stopifnot(length(Zjj) == dimX[1L]);
# Return set average
Zjj;
});
# Sanity check
stopifnot(dim(Z) == c(dimX[1L], nbrOfSets));
# Set names
rownames(Z) <- rownamesX;
colnames(Z) <- setNames;
Z;
} # rowAvgsPerColSet()
colAvgsPerRowSet <- function(X, W=NULL, S, FUN=colMeans, tFUN=FALSE, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'X':
if (!is.matrix(X)) {
stop("Argument 'X' is not a matrix: ", class(X)[1L]);
}
# Argument 'W':
# Argument 'S':
if (!is.matrix(S)) {
stop("Argument 'S' is not a matrix: ", class(S)[1L]);
}
# Argument 'FUN':
if (!is.function(FUN)) {
stop("Argument 'FUN' is not a function: ", mode(S));
}
# Argument 'tFUN':
tFUN <- as.logical(tFUN);
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Transpose
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
tX <- t(X);
if (is.null(W)) {
tW <- NULL;
} else {
tW <- t(W);
}
# ...
tZ <- rowAvgsPerColSet(X=tX, W=tW, S=S, FUN=FUN, tFUN=!tFUN, ...);
tX <- tW <- NULL; # Not needed anymore
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Transpose back
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Z <- t(tZ);
tZ <- NULL; # Not needed anymore
Z;
} # colAvgsPerRowSet()
##############################################################################
# HISTORY:
# 2014-12-17 [HB]
# o CLEANUP: Made col- and rowAvgsPerColSet() plain R functions.
# 2013-11-23
# o MEMORY: rowAvgsPerColSet() and colAvgsPerRowSet() do a better job
# cleaning out allocated objects sooner.
# 2011-11-29
# o Added rowAvgsPerColSet() and colAvgsPerRowSet().
# o Created from blockAvg() in the aroma.cn.eval package.
##############################################################################
matrixStats/R/allocMatrix.R 0000644 0001751 0000144 00000003130 12542546242 015421 0 ustar hornik users ############################################################################/**
# @RdocFunction allocMatrix
# @alias allocVector
# @alias allocArray
#
# @title "Allocates an empty vector, matrix or array"
#
# \usage{
# @usage allocVector
# @usage allocMatrix
# @usage allocArray
# }
#
# \description{
# @get title faster than the corresponding function in R.
# }
#
# \arguments{
# \item{length, nrow, ncol, dim}{@numerics specifying the dimension of
# the created @vector, @matrix or @array.}
# \item{value}{A @numeric scalar that all elements will have as value.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @vector, @matrix and @array respectively of the same data
# type as \code{value}.
# }
#
# @author "HB"
#
# \seealso{
# See also @vector, @matrix and @array.
# }
#
# @keyword programming
# @keyword internal
#*/############################################################################
allocVector <- function(length, value=0.0, ...) {
length <- as.integer(length)
.Call("allocVector2", length, value, PACKAGE="matrixStats")
} # allocVector()
allocMatrix <- function(nrow, ncol, value=0.0, ...) {
nrow <- as.integer(nrow)
ncol <- as.integer(ncol)
.Call("allocMatrix2", nrow, ncol, value, PACKAGE="matrixStats")
} # allocMatrix()
allocArray <- function(dim, value=0.0, ...) {
dim <- as.integer(dim)
.Call("allocArray2", dim, value, PACKAGE="matrixStats")
} # allocArray()
############################################################################
# HISTORY:
# 2014-11-08 [HB]
# o Created.
############################################################################
matrixStats/R/rowWeightedMedians.R 0000644 0001751 0000144 00000010027 12542546242 016736 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowWeightedMedians
# @alias colWeightedMedians
#
# @title "Calculates the weighted medians for each row (column) in a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowWeightedMedians
# @usage colWeightedMedians
# }
#
# \arguments{
# \item{x}{A @numeric NxK @matrix.}
# \item{w}{A @numeric @vector of length K (N).}
# \item{na.rm}{If @TRUE, missing values are excluded from the calculation,
# otherwise not.}
# \item{...}{Additional arguments passed to @see "weightedMedian".}
# }
#
# \value{
# Returns a @numeric @vector of length N (K).
# }
#
# \details{
# The implementations of these methods are optimized for both speed
# and memory.
# If no weights are given, the corresponding
# @see "rowMedians"/\code{colMedians()} is used.
# }
#
# \examples{
# @include "../incl/rowWeightedMedians.Rex"
# }
#
# @author "HB"
#
# \seealso{
# See @see "rowMedians" and \code{colMedians()} for non-weighted medians.
# Internally, @see "weightedMedian" is used.
# }
#
# @keyword array
# @keyword iteration
# @keyword robust
# @keyword univar
#*/###########################################################################
rowWeightedMedians <- function(x, w=NULL, na.rm=FALSE, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'w':
hasWeights <- !is.null(w);
if (hasWeights) {
n <- ncol(x);
if (length(w) != n) {
stop("The length of argument 'w' is does not match the number of column in 'x': ", length(w), " != ", n);
}
if (!is.numeric(w)) {
stop("Argument 'w' is not numeric: ", mode(w));
}
if (any(!is.na(w) & w < 0)) {
stop("Argument 'w' has negative weights.");
}
}
if (hasWeights) {
# Allocate results
m <- nrow(x);
if (m == 0L)
return(double(0L));
res <- apply(x, MARGIN=1L, FUN=function(x) {
weightedMedian(x, w=w, na.rm=na.rm, ...);
});
w <- NULL; # Not needed anymore
} else {
res <- rowMedians(x, na.rm=na.rm);
}
res;
} # rowWeightedMedians()
colWeightedMedians <- function(x, w=NULL, na.rm=FALSE, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'w':
hasWeights <- !is.null(w);
if (hasWeights) {
n <- nrow(x);
if (length(w) != n) {
stop("The length of argument 'w' is does not match the number of rows in 'x': ", length(w), " != ", n);
}
if (!is.numeric(w)) {
stop("Argument 'w' is not numeric: ", mode(w));
}
if (any(!is.na(w) & w < 0)) {
stop("Argument 'w' has negative weights.");
}
}
if (hasWeights) {
# Allocate results
m <- ncol(x);
if (m == 0L)
return(double(0L));
res <- apply(x, MARGIN=2L, FUN=function(x) {
weightedMedian(x, w=w, na.rm=na.rm, ...);
});
w <- NULL; # Not needed anymore
} else {
res <- colMedians(x, na.rm=na.rm);
}
res;
}
##############################################################################
# HISTORY:
# 2014-12-19 [HB]
# o CLEANUP: Made col- and rowWeightedMedians() plain R functions.
# 2013-11-23
# o MEMORY: Now (col|row)WeightedMedians() clean out allocated objects sooner.
# 2012-04-16
# o Now {col|row}WeightedMedians() no longer require aroma.light, because
# weightedMedian() is now in this package.
# 2009-06-17
# o Updated the Rdoc example to run conditionally on aroma.light, which is
# only a suggested package - not a required one. This in order to prevent
# R CMD check to fail on CRAN (as currently done on their OSX servers).
# 2008-02-02
# o Created from rowWeightedMeans.matrix.R.
##############################################################################
matrixStats/R/rowLogSumExps.R 0000644 0001751 0000144 00000005112 12542546242 015742 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowLogSumExps
# @alias colLogSumExps
# \alias{rowLogSumExps,matrix-method}
# \alias{colLogSumExps,matrix-method}
#
# @title "Accurately computes the logarithm of the sum of exponentials across rows or columns"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowLogSumExps
# @usage colLogSumExps
# }
#
# \arguments{
# \item{lx}{A @numeric NxK @matrix.
# Typically \code{lx} are \eqn{log(x)} values.}
# \item{na.rm}{If @TRUE, any missing values are ignored, otherwise not.}
# \item{dim.}{An @integer @vector of length two specifying the
# dimension of \code{x}, also when not a @matrix.}
# \item{...}{Not used.}
# }
#
# \value{
# A @numeric @vector of length N (K).
# }
#
# \section{Benchmarking}{
# These methods are implemented in native code and have been optimized
# for speed and memory.
# }
#
# \author{
# Native implementation by Henrik Bengtsson.
# Original R code by Nakayama ??? (Japan).
# }
#
# \seealso{
# To calculate the same on vectors, @see "logSumExp".
# }
#
# @keyword array
#*/###########################################################################
rowLogSumExps <- function(lx, na.rm=FALSE, dim.=dim(lx), ...) {
dim. <- as.integer(dim.)
hasNA <- TRUE;
res <- .Call("rowLogSumExps",
lx, dim.,
as.logical(na.rm), as.logical(hasNA), TRUE,
PACKAGE="matrixStats");
# Preserve names
names <- rownames(lx);
if (!is.null(names)) {
names(res) <- names;
}
res;
} # rowLogSumExps()
colLogSumExps <- function(lx, na.rm=FALSE, dim.=dim(lx), ...) {
dim. <- as.integer(dim.)
hasNA <- TRUE;
res <- .Call("rowLogSumExps",
lx, dim.,
as.logical(na.rm), as.logical(hasNA), FALSE,
PACKAGE="matrixStats");
# Preserve names
names <- colnames(lx);
if (!is.null(names)) {
names(res) <- names;
}
res;
} # rowLogSumExps()
############################################################################
# HISTORY:
# 2013-04-30 [HB]
# o SPEEDUP: (col|row)LogSumExps() are now implemented natively.
# o Renamed to (col|row)LogSumExps().
# 2013-04-29 [HB]
# o Added rowSumsInLogspace().
# o Renamed to colSumsInLogspace() which utilizes logSumExp().
# 2013-04-24 [HB]
# o Added colSumsP() adopted from log.colSums.exp() code contributed
# by Nakayama ??? (Japan) on 2013-01-08.
# o Created.
############################################################################
matrixStats/R/rowVars.R 0000644 0001751 0000144 00000007427 12542546242 014622 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowVars
# @alias rowVars
# @alias colVars
# \alias{rowVars,matrix-method}
# \alias{colVars,matrix-method}
#
# @title "Variance estimates for each row (column) in a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowVars
# @usage colVars
# }
#
# \arguments{
# \item{x}{A @numeric NxK @matrix.}
# \item{center}{(optional) The center, defaults to the row means.}
# \item{na.rm}{If @TRUE, @NAs are excluded first, otherwise not.}
# \item{dim.}{An @integer @vector of length two specifying the
# dimension of \code{x}, also when not a @matrix.}
# \item{...}{Additional arguments passed to \code{rowMeans()} and
# \code{rowSums()}.}
# }
#
# \value{
# Returns a @numeric @vector of length N (K).
# }
#
# @examples "../incl/rowMethods.Rex"
#
# @author "HB"
#
# \seealso{
# See \code{rowMeans()} and \code{rowSums()} in @see "base::colSums".
# }
#
# @keyword array
# @keyword iteration
# @keyword robust
# @keyword univar
#*/###########################################################################
rowVars <- function(x, na.rm=FALSE, center=NULL, dim.=dim(x), ...) {
if (is.null(center)) {
dim. <- as.integer(dim.)
na.rm <- as.logical(na.rm)
hasNAs <- TRUE
sigma2 <- .Call("rowVars", x, dim., na.rm, hasNAs, TRUE, PACKAGE="matrixStats");
return(sigma2)
}
ncol <- ncol(x);
# Nothing to do?
if (ncol <= 1L) {
x <- rep(NA_real_, times=nrow(x));
return(x);
}
if (na.rm) {
# Count number of missing values in each row
nNA <- rowCounts(x, value=NA_real_, na.rm=FALSE);
# Number of non-missing values
n <- ncol - nNA;
hasNA <- any(nNA > 0L);
if (hasNA) {
# Set NA estimates for rows with less than two observations
n[n <= 1L] <- NA_integer_;
} else {
# No need to check for missing values below
na.rm <- FALSE;
}
} else {
# Assuming no missing values
n <- ncol;
}
# Spread
x <- x*x;
x <- rowMeans(x, na.rm=na.rm);
# Variance
x <- (x - center^2);
x * (n/(n-1));
}
colVars <- function(x, na.rm=FALSE, center=NULL, dim.=dim(x), ...) {
if (is.null(center)) {
dim. <- as.integer(dim.)
na.rm <- as.logical(na.rm)
hasNAs <- TRUE
sigma2 <- .Call("rowVars", x, dim., na.rm, hasNAs, FALSE, PACKAGE="matrixStats");
return(sigma2)
}
nrow <- nrow(x);
# Nothing to do?
if (nrow <= 1L) {
x <- rep(NA_real_, times=ncol(x));
return(x);
}
if (na.rm) {
# Count number of missing values in each column
nNA <- colCounts(x, value=NA_real_, na.rm=FALSE);
# Number of non-missing values
n <- nrow - nNA;
hasNA <- any(nNA > 0L);
if (hasNA) {
# Set NA estimates for rows with less than two observations
n[n <= 1L] <- NA_integer_;
} else {
# No need to check for missing values below
na.rm <- FALSE;
}
} else {
# Assuming no missing values
n <- nrow;
}
# Spread
x <- x*x;
x <- colMeans(x, na.rm=na.rm);
# Variance
x <- (x - center^2);
x * (n/(n-1));
}
############################################################################
# HISTORY:
# 2015-02-09 [HB]
# o Now using na.rm=FALSE as the default.
# 2014-06-02 [HB]
# o Now rowVars() are utilizing rowCounts() instead of rowSums().
# o SPEEDUP: Made colVars() and colSds() significantly faster and
# rowVars() and rowSds() a slightly bit faster.
# o Now using NA_integer_ instead of NA.
# 2008-03-26 [HB]
# o Added argument 'center=NULL', cf. base::mad().
# o Created from genefilter::rowVars() by Wolfgang Huber.
############################################################################
matrixStats/R/product.R 0000644 0001751 0000144 00000000517 12542546242 014630 0 ustar hornik users product <- function(x, na.rm=FALSE, ...) {
.Call("productExpSumLog", x, as.logical(na.rm), TRUE, PACKAGE="matrixStats");
} # product()
############################################################################
# HISTORY:
# 2014-06-04 [HB]
# o Created.
############################################################################
matrixStats/R/weightedMedian.R 0000644 0001751 0000144 00000016553 12542546242 016075 0 ustar hornik users ############################################################################/**
# @RdocFunction weightedMedian
#
# \encoding{latin1}
#
# @title "Weighted Median Value"
#
# @synopsis
#
# \description{
# Computes a weighted median of a numeric vector.
# }
#
# \arguments{
# \item{x}{a @numeric @vector containing the values whose weighted median is
# to be computed.}
# \item{w}{a vector of weights the same length as \code{x} giving the weights
# to use for each element of \code{x}. Negative weights are treated
# as zero weights. Default value is equal weight to all values.}
# \item{na.rm}{a logical value indicating whether @NA values in
# \code{x} should be stripped before the computation proceeds,
# or not. If @NA, no check at all for @NAs is done.
# Default value is @NA (for efficiency).}
# \item{interpolate}{If @TRUE, linear interpolation is used to get a
# consistent estimate of the weighted median.}
# \item{ties}{If \code{interpolate == FALSE},
# a character string specifying how to solve ties between two
# \code{x}'s that are satisfying the weighted median criteria.
# Note that at most two values can satisfy the criteria.
# When \code{ties} is \code{"min"}, the smaller value of the two
# is returned and when it is \code{"max"}, the larger value is
# returned.
# If \code{ties} is \code{"mean"}, the mean of the two values is
# returned.
# Finally, if \code{ties} is \code{"weighted"} (or @NULL) a
# weighted average of the two are returned, where the weights are
# weights of all values \code{x[i] <= x[k]} and \code{x[i] >= x[k]},
# respectively.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric scalar.
# }
#
# \details{
# For the \code{n} elements \code{x = c(x[1], x[2], ..., x[n])} with positive
# weights \code{w = c(w[1], w[2], ..., w[n])} such that \code{sum(w) = S},
# the \emph{weighted median} is defined as the element \code{x[k]} for which
# the total weight of all elements \code{x[i] < x[k]} is less or equal to
# \code{S/2} and for which the total weight of all elements \code{x[i] > x[k]}
# is less or equal to \code{S/2} (c.f. [1]).
#
# If \code{w} is missing then all elements of \code{x} are given the same
# positive weight. If all weights are zero, @NA_real_ is returned.
#
# If one or more weights are \code{Inf}, it is the same as these weights
# have the same weight and the others has zero. This makes things easier for
# cases where the weights are result of a division with zero.
#
# The weighted median solves the following optimization problem:
#
# \deqn{\alpha^* = \arg_\alpha \min \sum_{k=1}{K} w_k |x_k-\alpha|}
# where \eqn{x=(x_1,x_2,\ldots,x_K)} are scalars and
# \eqn{w=(w_1,w_2,\ldots,w_K)} are the corresponding "weights" for
# each individual \eqn{x} value.
# }
#
# @examples "../incl/weightedMedian.Rex"
#
# \seealso{
# @see "stats::median", @see "base::mean" and @see "weightedMean".
# }
#
# \references{
# [1] T.H. Cormen, C.E. Leiserson, R.L. Rivest, Introduction to Algorithms,
# The MIT Press, Massachusetts Institute of Technology, 1989.
# }
#
# \author{
# Henrik Bengtsson and Ola Hossjer, Centre for Mathematical
# Sciences, Lund University.
# Thanks to Roger Koenker, Econometrics, University of Illinois, for
# the initial ideas.
# }
#
# @keyword "univar"
# @keyword "robust"
#*/############################################################################
weightedMedian <- function(x, w=rep(1, times=length(x)), na.rm=FALSE, interpolate=is.null(ties), ties=NULL, ...) {
# Argument 'x':
# Argument 'w':
w <- as.double(w)
# Argument 'na.rm':
na.rm <- as.logical(na.rm)
if (is.na(na.rm)) na.rm <- FALSE
# Argument 'interpolate':
interpolate <- as.logical(interpolate)
# Argument 'ties':
if (is.null(ties)) {
tiesC <- 1L
} else {
if (ties == "weighted") {
tiesC <- 1L
} else if (ties == "min") {
tiesC <- 2L
} else if (ties == "max") {
tiesC <- 4L
} else if (ties == "mean") {
tiesC <- 8L
} else if (ties == "both") {
.Defunct("As of matrixStats (> 0.12.2), weightedMedian(..., interpolate=FALSE, ties=\"both\") is no longer supported. Use ties=\"min\" and then ties=\"max\" to achieve the same result.")
} else {
stop("Unknown value on 'ties': ", ties)
}
}
.Call("weightedMedian", x, w, na.rm, interpolate, tiesC, package="matrixStats")
} # weightedMedian()
###############################################################################
# HISTORY:
# 2015-01-26
# o CLEANUP: Drop old internally-renamed .weightedMedian().
# 2015-01-01
# o Dropped support for weightedMedian(..., ties="both").
# o BUG FIX: weightedMedian(..., ties="both") would give "Error in
# .subset(x, k, k + 1L) : incorrect number of dimensions" if there
# was a tie.
# 2014-06-03
# o SPEEDUP: Made weightedMedian() a plain function (was an S3 method).
# 2013-11-23
# o MEMORY: Now weightedMad() cleans out allocated objects sooner.
# 2013-09-26
# o Now utilizing anyMissing().
# 2012-09-10
# o Replaced an .Internal(psort(...)) call with new .psortKM().
# 2012-04-16
# o Added local function qsort() to weightedMedian(), which was adopted
# from calculateResidualSet() for ProbeLevelModel in aroma.affymatrix 2.5.0.
# o Added local function psortGet() to weightedMedian().
# 2011-04-08
# o Now weightedMedian() returns NA:s of the same mode as argument 'x'.
# 2006-04-21
# o Now negative weights are not check for, but instead treated as zero
# weights. This was done to minimize the overhead of the function.
# o Replace all "[[" and "[" with .subset2() and .subset() to minimize
# overhead from method dispatching.
# o Remove all calls to rm().
# o It is now possible to specify via na.rm=NA that there are no NAs.
# o Small speed up, especially when all weights were the same.
# 2006-01-31
# o Rdoc bug fix: 'reference' to 'references'.
# 2005-07-26
# o Argument 'interpolate' defaults to TRUE only if 'ties' is NULL.
# 2005-06-03
# o Renamed from weighted. median() to weightedMedian().
# o Made into a default function.
# o Now using setMethodS3().
# 2003-02-01
# o Update the Rdoc with comments about the method and interpolate argument.
# 2002-06-27
# * Reduced memory usage a little bit by calling rm() when possible; minimized
# the risk for automatic garbage collecting.
# * Improved speed when looking for largest index k s.t. wcum[k] < wmid.
# * Improved speed by making use of the R v1.5.0 internal quick sort.
# * Made na.rm=FALSE by default for efficiency.
# 2002-02-28
# * Calls plain median(x) in two cases: i) all weights are equal, ii) some of
# the weights are Inf's. See code for more information.
# 2002-02-14
# * BUG FIX: If interpolate==TRUE and sum(lows) == 0 then use k <- 1.
# * Added the interpolation version of the weighted median for consistent
# estimates.
# 2002-02-07
# * Optimized the code for speed.
# * Added support for zero and Inf weights.
# * Added the 'ties' argument.
# * Created!
# * Thanks to the following people for helping me out with this one:
# - David Brahm, brahm@alum.mit.edu
# - David Eppstein, eppstein@ics.uci.edu
# - Frank E Harrell Jr, fharrell@virginia.edu
# - Markus Jantti, markus.jantti@iki.fi
# - Roger Koenker, roger@ysidro.econ.uiuc.edu
###############################################################################
matrixStats/R/rowMads.R 0000644 0001751 0000144 00000005452 12542546242 014567 0 ustar hornik users rowMads <- function(x, center=NULL, constant=1.4826, na.rm=FALSE, dim.=dim(x), centers=NULL, ...) {
## BACKWARD COMPATIBILITY:
## - Added to matrixStats 0.14.0.
## - Remove in matrixStats (>= 0.15.0)
if (!is.null(centers)) {
center <- centers
.Deprecated(msg="Argument 'centers' for matrixStats::rowMads() has been renamed to 'center'. Please update code accordingly.")
}
if (is.null(center)) {
dim. <- as.integer(dim.)
na.rm <- as.logical(na.rm)
constant = as.numeric(constant)
hasNAs <- TRUE
x <- .Call("rowMads", x, dim., constant, na.rm, hasNAs, TRUE, PACKAGE="matrixStats")
} else {
x <- x - center
x <- abs(x)
x <- rowMedians(x, na.rm=na.rm, ...)
x <- constant*x
}
x
} # rowMads()
colMads <- function(x, center=NULL, constant=1.4826, na.rm=FALSE, dim.=dim(x), centers=NULL, ...) {
## BACKWARD COMPATIBILITY:
## - Added to matrixStats 0.14.0.
## - Remove in matrixStats (>= 0.15.0)
if (!is.null(centers)) {
center <- centers
.Deprecated(msg="Argument 'centers' for matrixStats::colMads() has been renamed to 'center'. Please update code accordingly.")
}
if (is.null(center)) {
dim. <- as.integer(dim.)
na.rm <- as.logical(na.rm)
constant = as.numeric(constant)
hasNAs <- TRUE
x <- .Call("rowMads", x, dim., constant, na.rm, hasNAs, FALSE, PACKAGE="matrixStats")
} else {
## SLOW:
# for (cc in seq(length=ncol(x))) {
# x[,cc] <- x[,cc] - center[cc]
# }
## FAST:
x <- t_tx_OP_y(x, center, OP="-", na.rm=FALSE)
x <- abs(x)
x <- colMedians(x, na.rm=na.rm, ...)
x <- constant*x
}
x
} # colMads()
############################################################################
# HISTORY:
# 2015-02-10 [HB]
# o CONSISTENCY: Renamed argument 'centers' of col- and rowMads() to
# 'center'. This is consistent with col- and rowVars(). Added
# backward compatibility code/test for the migration.
# 2014-11-17 [HB]
# o SPEEDUP: Implemented (col|row)Mads(..., centers=NULL) in native code.
# 2012-03-19 [HJ]
# o Changed default value of centers to row/colMedians(x,...)
# 2012-03-04 [HC]
# o BUG FIX: colMads() would return the incorrect estimates. This bug
# was introduced in matrixStats v0.4.0 (2011-11-11).
# 2011-11-11 [HB]
# o Dropped the previously introduced expansion of 'center' in rowMads()
# and colMads(). It added unnecessary overhead if not needed.
# 2011-10-13 [HJ]
# o Implemented colMads() as rowMads() by using the improved colMedians().
# o Now rowMads() expands 'center' to a matrix of the same dimensions as
# 'x'. This is not actually necessary, but it enforces that 'x' must be
# a matrix.
# 2008-03-26 [HB]
# o Created.
############################################################################
matrixStats/R/varDiff.R 0000644 0001751 0000144 00000016727 12542546242 014543 0 ustar hornik users ###########################################################################/**
# @RdocFunction varDiff
# @alias sdDiff
# @alias madDiff
# @alias iqrDiff
# @alias colVarDiffs
# @alias rowVarDiffs
# @alias colSdDiffs
# @alias rowSdDiffs
# @alias colMadDiffs
# @alias rowMadDiffs
# @alias colIQRDiffs
# @alias rowIQRDiffs
#
# @title "Estimation of scale based on sequential-order differences"
#
# \description{
# @get "title", corresponding to the scale estimates provided by
# @see "stats::var", @see "stats::sd", @see "stats::mad" and
# @see "stats::IQR".
# }
#
# \usage{
# @usage varDiff
# @usage colVarDiffs
# @usage rowVarDiffs
#
# @usage sdDiff
# @usage colSdDiffs
# @usage rowSdDiffs
#
# @usage madDiff
# @usage colMadDiffs
# @usage rowMadDiffs
#
# @usage iqrDiff
# @usage colIQRDiffs
# @usage rowIQRDiffs
# }
#
# \arguments{
# \item{x}{A @numeric @vector of length N or a @numeric NxK @matrix.}
# \item{na.rm}{If @TRUE, @NAs are excluded, otherwise not.}
# \item{diff}{The positional distance of elements for which the
# difference should be calculated.}
# \item{trim}{A @double in [0,1/2] specifying the fraction of
# observations to be trimmed from each end of (sorted) \code{x}
# before estimation.}
# \item{constant}{A scale factor adjusting for asymptotically
# normal consistency.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric @vector of length 1, length N, or length K.
# }
#
# @author "HB"
#
# \seealso{
# For the corresponding non-differentiated estimates, see
# @see "stats::var", @see "stats::sd", @see "stats::mad" and
# @see "stats::IQR".
# Internally, @see "diff2" is used which is a faster version
# of @see "base::diff".
# }
#
# \details{
# Note that n-order difference MAD estimates, just like the ordinary
# MAD estimate by @see "stats::mad", apply a correction factor such
# that the estimates are consistent with the standard deviation
# under Gaussian distributions.
#
# The interquartile range (IQR) estimates does \emph{not} apply such
# a correction factor. If asymptotically normal consistency is wanted,
# the correction factor for IQR estimate is \code{1 / (2 * qnorm(3/4))},
# which is half of that used for MAD estimates, which is
# \code{1 / qnorm(3/4)}. This correction factor needs to be applied
# manually, i.e. there is no \code{constant} argument for the IQR
# functions.
# }
#
# \references{
# [1] J. von Neumann et al., \emph{The mean square successive difference}.
# Annals of Mathematical Statistics, 1941, 12, 153-162.\cr
# }
#
# @keyword iteration
# @keyword robust
# @keyword univar
#*/###########################################################################
varDiff <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) {
if (na.rm)
x <- x[!is.na(x)]
# Nothing to do?
n <- length(x)
if (n <= 1L)
return(NA_real_)
# Calculate differences?
if (diff > 0L) {
x <- diff2(x, differences=diff)
# Nothing to do?
n <- length(x)
if (n == 1L)
return(NA_real_)
}
# Trim?
if (trim > 0 && n > 0L) {
if (anyMissing(x)) return(NA_real_)
lo <- floor(n*trim)+1
hi <- (n+1)-lo
partial <- unique(c(lo, hi))
x <- sort.int(x, partial=partial)
x <- x[lo:hi]
}
# Estimate
var <- var(x, na.rm=FALSE)
x <- NULL # Not needed anymore
# Correction for the differentiation
var / (2^diff)
} # varDiff()
sdDiff <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) {
if (na.rm)
x <- x[!is.na(x)]
# Nothing to do?
n <- length(x)
if (n <= 1L)
return(NA_real_)
# Calculate differences?
if (diff > 0L) {
x <- diff2(x, differences=diff)
# Nothing to do?
n <- length(x)
if (n == 1L)
return(NA_real_)
}
# Trim?
if (trim > 0 && n > 0L) {
if (anyMissing(x)) return(NA_real_)
lo <- floor(n*trim)+1
hi <- (n+1)-lo
partial <- unique(c(lo, hi))
x <- sort.int(x, partial=partial)
x <- x[lo:hi]
}
# Estimate
sd <- sd(x, na.rm=FALSE)
x <- NULL # Not needed anymore
# Correction for the differentiation
sd / (sqrt(2)^diff)
} # sdDiff()
madDiff <- function(x, na.rm=FALSE, diff=1L, trim=0, constant=1.4826, ...) {
if (na.rm)
x <- x[!is.na(x)]
# Nothing to do?
n <- length(x)
if (n <= 0L)
return(NA_real_)
# Calculate differences?
if (diff > 0L) {
x <- diff2(x, differences=diff)
# Nothing to do?
n <- length(x)
if (n == 1L)
return(NA_real_)
}
# Trim?
if (trim > 0 && n > 0L) {
if (anyMissing(x)) return(NA_real_)
lo <- floor(n*trim)+1
hi <- (n+1)-lo
partial <- unique(c(lo, hi))
x <- sort.int(x, partial=partial)
x <- x[lo:hi]
}
# Estimate
sd <- mad(x, na.rm=FALSE, constant=constant, ...)
x <- NULL # Not needed anymore
# Correction for the differentiation
sd / (sqrt(2)^diff)
} # madDiff()
iqrDiff <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) {
if(na.rm) {
x <- x[!is.na(x)]
} else if (anyMissing(x)) {
return(NA_real_)
}
# At this point, there should be no missing values
# Nothing to do?
n <- length(x)
if (n == 0L) {
return(NA_real_)
} else if (n == 1L) {
return(0)
}
# Calculate differences?
if (diff > 0L) {
x <- diff2(x, differences=diff)
# Nothing to do?
n <- length(x)
if (n == 1L)
return(0)
}
# Trim?
if (trim > 0 && n > 0L) {
lo <- floor(n*trim)+1
hi <- (n+1)-lo
partial <- unique(c(lo, hi))
x <- sort.int(x, partial=partial)
x <- x[lo:hi]
}
# Estimate
qs <- quantile(x, probs=c(0.25, 0.75), na.rm=FALSE, names=FALSE, ...)
x <- NULL # Not needed anymore
iqr <- (qs[2L] - qs[1L])
# Correction for the differentiation
iqr / (sqrt(2)^diff)
} # iqrDiff()
rowVarDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) {
apply(x, MARGIN=1L, FUN=varDiff, na.rm=na.rm, diff=diff, trim=trim, ...)
}
colVarDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) {
apply(x, MARGIN=2L, FUN=varDiff, na.rm=na.rm, diff=diff, trim=trim, ...)
}
rowSdDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) {
apply(x, MARGIN=1L, FUN=sdDiff, na.rm=na.rm, diff=diff, trim=trim, ...)
}
colSdDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) {
apply(x, MARGIN=2L, FUN=sdDiff, na.rm=na.rm, diff=diff, trim=trim, ...)
}
rowMadDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) {
apply(x, MARGIN=1L, FUN=madDiff, na.rm=na.rm, diff=diff, trim=trim, ...)
}
colMadDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) {
apply(x, MARGIN=2L, FUN=madDiff, na.rm=na.rm, diff=diff, trim=trim, ...)
}
rowIQRDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) {
apply(x, MARGIN=1L, FUN=iqrDiff, na.rm=na.rm, diff=diff, trim=trim, ...)
}
colIQRDiffs <- function(x, na.rm=FALSE, diff=1L, trim=0, ...) {
apply(x, MARGIN=2L, FUN=iqrDiff, na.rm=na.rm, diff=diff, trim=trim, ...)
}
############################################################################
# HISTORY:
# 2015-01-16
# o Added iqrDiff() and (col|row)IQRDiffs().
# 2014-14-19
# o Added (col|row)(Var|Sd|Mad)Diffs() for completeness.
# 2014-11-10
# o Turned *Diff() into a function.
# 2014-05-24
# o Turned *Diff() into an S3 method (was S4).
# 2014-04-26
# o Added argument 'trim' to madDiff(), sdDiff() and varDiff().
# 2013-11-23
# o MEMORY: Now *Diff() cleans out allocated objects sooner.
# 2012-07-17
# o Added the reference to von Neumann et al. (1941).
# 2009-02-02
# o Added Rdoc comments.
# 2008-04-13
# o Added varDiff(), sdDiff() and madDiff().
# 2008-04-10 [on UA930 SFO-LHR]
# o Created.
############################################################################
matrixStats/R/rowTabulates.R 0000644 0001751 0000144 00000010635 12542546242 015626 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowTabulates
# @alias colTabulates
#
# @title "Tabulates the values in a matrix by row (column)"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowTabulates
# @usage colTabulates
# }
#
# \arguments{
# \item{x}{An @integer or @raw NxK @matrix.}
# \item{values}{An @vector of J values of count. If @NULL, all (unique)
# values are counted.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a NxJ (KxJ) @matrix where
# N (K) is the number of row (column) @vectors tabulated and
# J is the number of values counted.
# }
#
# @examples "../incl/rowTabulates.Rex"
#
# @author "HB"
#
# @keyword utilities
#*/###########################################################################
rowTabulates <- function(x, values=NULL, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'x':
if (is.integer(x)) {
} else if (is.raw(x)) {
} else {
stop("Argument 'x' is not of type integer or raw: ", class(x)[1]);
}
# Argument 'values':
if (is.null(values)) {
values <- as.vector(x);
values <- unique(values);
if (is.raw(values)) {
values <- as.integer(values);
values <- sort(values);
# WORKAROUND: Cannot use "%#x" because it gives an error OSX with
# R v2.9.0 devel (2009-01-13 r47593b) at R-forge. /HB 2009-06-20
names <- sprintf("%x", values);
names <- paste("0x", names, sep="");
values <- as.raw(values);
} else {
values <- sort(values);
names <- as.character(values);
}
} else {
if (is.raw(values)) {
names <- sprintf("%x", as.integer(values));
names <- paste("0x", names, sep="");
} else {
names <- as.character(values);
}
}
nbrOfValues <- length(values);
counts <- matrix(0L, nrow=nrow(x), ncol=nbrOfValues);
colnames(counts) <- names;
for (kk in seq(length=nbrOfValues)) {
counts[,kk] <- rowCounts(x, value=values[kk], ...);
}
counts;
}
colTabulates <- function(x, values=NULL, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'x':
if (is.integer(x)) {
} else if (is.raw(x)) {
} else {
stop("Argument 'x' is not of type integer or raw: ", class(x)[1]);
}
# Argument 'values':
if (is.null(values)) {
values <- as.vector(x);
values <- unique(values);
if (is.raw(values)) {
values <- as.integer(values);
values <- sort(values);
# WORKAROUND: Cannot use "%#x" because it gives an error OSX with
# R v2.9.0 devel (2009-01-13 r47593b) at R-forge. /HB 2009-06-20
names <- sprintf("%x", values);
names <- paste("0x", names, sep="");
values <- as.raw(values);
} else {
values <- sort(values);
names <- as.character(values);
}
} else {
if (is.raw(values)) {
names <- sprintf("%x", as.integer(values));
names <- paste("0x", names, sep="");
} else {
names <- as.character(values);
}
}
transpose <- FALSE
if (transpose) {
## nbrOfValues <- length(values);
## counts <- matrix(0L, nrow=nbrOfValues, ncol=ncol(x));
## rownames(counts) <- names;
## for (kk in seq(length=nbrOfValues)) {
## counts[kk,] <- colCounts(x, value=values[kk], ...);
## }
} else {
nbrOfValues <- length(values);
counts <- matrix(0L, nrow=ncol(x), ncol=nbrOfValues);
colnames(counts) <- names;
for (kk in seq(length=nbrOfValues)) {
counts[,kk] <- colCounts(x, value=values[kk], ...);
}
}
counts;
}
############################################################################
# HISTORY:
# 2014-12-19 [HB]
# o CLEANUP: Made col- and rowTabulates() plain R functions.
# 2014-11-16
# o Now colTabulates(x) no longer calls rowTabulates(t(x)).
# 2014-06-02
# o Made rowTabulates() an S3 method (was S4).
# o SPEEDUP: Now rowTabulates() utilizes rowCounts().
# 2009-06-20
# WORKAROUND: Cannot use "%#x" in rowTabulates() when creating the column
# names of the result matrix. It gav an error OSX with R v2.9.0 devel
# (2009-01-13 r47593b) current the OSX server at R-forge.
# 2009-02-02
# o Fixed Rdoc comments.
# 2008-07-01
# o Created.
############################################################################
matrixStats/R/weightedMad.R 0000644 0001751 0000144 00000011636 12542546242 015376 0 ustar hornik users ############################################################################/**
# @RdocFunction weightedMad
# @alias rowWeightedMads
# @alias colWeightedMads
#
# @title "Weighted Median Absolute Deviation (MAD)"
#
# \usage{
# @usage weightedMad
# @usage colWeightedMads
# @usage rowWeightedMads
# }
#
# \description{
# Computes a weighted MAD of a numeric vector.
# }
#
# \arguments{
# \item{x}{a @numeric @vector containing the values whose weighted MAD is
# to be computed.}
# \item{w}{a vector of weights the same length as \code{x} giving the weights
# to use for each element of \code{x}. Negative weights are treated
# as zero weights. Default value is equal weight to all values.}
# \item{na.rm}{a logical value indicating whether @NA values in
# \code{x} should be stripped before the computation proceeds,
# or not. If @NA, no check at all for @NAs is done.
# Default value is @NA (for efficiency).}
# \item{constant}{A @numeric scale factor, cf. @see "stats::mad".}
# \item{center}{Optional @numeric scalar specifying the center
# location of the data. If @NULL, it is estimated from data.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric scalar.
# }
#
# \section{Missing values}{
# Missing values are dropped at the very beginning, if argument
# \code{na.rm} is @TRUE, otherwise not.
# }
#
# @examples "../incl/weightedMad.Rex"
#
# \seealso{
# For the non-weighted MAD, see @see "stats::mad".
# Internally @see "weightedMedian" is used to
# calculate the weighted median.
# }
#
# @author "HB"
#
# @keyword "univar"
# @keyword "robust"
#*/############################################################################
weightedMad <- function(x, w, na.rm=FALSE, constant=1.4826, center=NULL, ...) {
# Argument 'x':
n <- length(x);
# Argument 'w':
if (missing(w)) {
# By default use weights that are one.
w <- rep(1, times=n);
} else if (length(w) != n) {
stop("The number of elements in arguments 'w' and 'x' does not match: ", length(w), " != ", n);
}
# Argument 'na.rm':
naValue <- NA;
storage.mode(naValue) <- storage.mode(x);
# Remove values with zero (and negative) weight. This will:
# 1) take care of the case when all weights are zero,
# 2) it will most likely speed up the sorting.
tmp <- (w > 0);
if (!all(tmp)) {
x <- .subset(x, tmp);
w <- .subset(w, tmp);
n <- length(x);
}
tmp <- NULL; # Not needed anymore
# Drop missing values?
if (na.rm) {
keep <- which(!is.na(x) & !is.na(w));
x <- .subset(x, keep);
w <- .subset(w, keep);
n <- length(x);
keep <- NULL; # Not needed anymore
} else if (anyMissing(x)) {
return(naValue);
}
# Are any weights Inf? Then treat them with equal weight and all others
# with weight zero.
tmp <- is.infinite(w);
if (any(tmp)) {
keep <- tmp;
x <- .subset(x, keep);
n <- length(x);
w <- rep(1, times=n);
keep <- NULL; # Not needed anymore
}
tmp <- NULL; # Not needed anymore
# Are there any values left to calculate the weighted median of?
# This is consistent with how stats::mad() works.
if (n == 0L) {
return(naValue);
} else if (n == 1L) {
zeroValue <- 0;
storage.mode(zeroValue) <- storage.mode(x);
return(zeroValue);
}
# Estimate the mean?
if (is.null(center)) {
center <- weightedMedian(x, w=w, na.rm=NA);
}
# Estimate the standard deviation
x <- abs(x - center);
sigma <- weightedMedian(x, w=w, na.rm=NA);
x <- w <- NULL; # Not needed anymore
# Rescale for normal distributions
sigma <- constant * sigma;
sigma;
} # weightedMad()
rowWeightedMads <- function(x, w=NULL, na.rm=FALSE, ...) {
apply(x, MARGIN=1L, FUN=weightedMad, w=w, na.rm=na.rm, ...)
} # rowWeightedMads()
colWeightedMads <- function(x, w=NULL, na.rm=FALSE, ...) {
apply(x, MARGIN=2L, FUN=weightedMad, w=w, na.rm=na.rm, ...)
} # colWeightedMads()
############################################################################
# HISTORY:
# 2014-11-10
# o Turned weightedMad() into a plain function.
# 2013-11-23
# o MEMORY: Now weightedMad() cleans out allocated objects sooner.
# 2013-09-26
# o Now utilizing anyMissing().
# 2012-03-22
# o Added an Rdoc example, which also serves as a redundancy test.
# o SPEEDUP: Now weightedMad() lets weightedMedian() know that there are
# now missing values remaining.
# o Now weightedMad() is smarter about returning early, e.g. if missing
# values are not removed, there are none or only one value left.
# o Added validation of argument 'w' for weightedMad().
# o Added Rdoc comments to weightedMad().
# o Made weightedMad() into a default method.
# 2009-05-13
# o Added weightedMad().
# o Created.
############################################################################
matrixStats/R/rowCounts.R 0000644 0001751 0000144 00000016663 12542546242 015164 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowCounts
# @alias colCounts
# @alias count
# @alias allValue
# @alias anyValue
# @alias rowAnys
# @alias colAnys
# @alias rowAlls
# @alias colAlls
#
# @title "Counts the number of TRUE values in each row (column) of a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage count
# @usage rowCounts
# @usage colCounts
# @usage rowAlls
# @usage colAlls
# @usage rowAnys
# @usage colAnys
# }
#
# \arguments{
# \item{x}{An NxK @matrix or an N*K @vector.}
# \item{value}{A value to search for.}
# \item{na.rm}{If @TRUE, @NAs are excluded first, otherwise not.}
# \item{dim.}{An @integer @vector of length two specifying the
# dimension of \code{x}, also when not a @matrix.}
# \item{...}{Not used.}
# }
#
# \value{
# \code{rowCounts()} (\code{colCounts()}) returns an @integer @vector
# of length N (K).
# The other methods returns a @logical @vector of length N (K).
# }
#
# \details{
# @include "../incl/rowNNN_by_vector.Rd"
# }
#
# @examples "../incl/rowCounts.Rex"
#
# @author "HB"
#
# @keyword array
# @keyword logic
# @keyword iteration
# @keyword univar
#*/###########################################################################
rowCounts <- function(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) {
# Argument 'x':
if (is.matrix(x)) {
} else if (is.vector(x)) {
} else {
stop("Argument 'x' must be a matrix or a vector: ", mode(x)[1L])
}
# Argument 'dim.':
dim. <- as.integer(dim.)
# Argument 'value':
if (length(value) != 1L) {
stop("Argument 'value' has to be a single value: ", length(value))
}
# Coerce 'value' to matrix
storage.mode(value) <- storage.mode(x)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Count
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (is.numeric(x) || is.logical(x)) {
na.rm <- as.logical(na.rm)
hasNAs <- TRUE
counts <- .Call("rowCounts", x, dim., value, 2L, na.rm, hasNAs, PACKAGE="matrixStats")
} else {
if (is.vector(x)) dim(x) <- dim.
if (is.na(value)) {
counts <- apply(x, MARGIN=1L, FUN=function(x) sum(is.na(x)))
} else {
counts <- apply(x, MARGIN=1L, FUN=function(x) sum(x == value, na.rm=na.rm))
}
}
as.integer(counts)
} # rowCounts()
colCounts <- function(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) {
# Argument 'x':
if (is.matrix(x)) {
} else if (is.vector(x)) {
} else {
stop("Argument 'x' must be a matrix or a vector: ", mode(x)[1L])
}
# Argument 'dim.':
dim. <- as.integer(dim.)
# Argument 'value':
if (length(value) != 1L) {
stop("Argument 'value' has to be a single value: ", length(value))
}
# Coerce 'value' to matrix
storage.mode(value) <- storage.mode(x)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Count
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (is.numeric(x) || is.logical(x)) {
na.rm <- as.logical(na.rm)
hasNAs <- TRUE
counts <- .Call("colCounts", x, dim., value, 2L, na.rm, hasNAs, PACKAGE="matrixStats")
} else {
if (is.vector(x)) dim(x) <- dim.
if (is.na(value)) {
counts <- apply(x, MARGIN=2L, FUN=function(x) sum(is.na(x)))
} else {
counts <- apply(x, MARGIN=2L, FUN=function(x) sum(x == value, na.rm=na.rm))
}
}
as.integer(counts)
} # colCounts()
count <- function(x, value=TRUE, na.rm=FALSE, ...) {
# Argument 'x':
if (!is.vector(x)) {
stop("Argument 'x' must be a vector: ", mode(x)[1L])
}
# Argument 'value':
if (length(value) != 1L) {
stop("Argument 'value' has to be a single value: ", length(value))
}
# Coerce 'value' to matrix
storage.mode(value) <- storage.mode(x)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Count
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (is.numeric(x) || is.logical(x)) {
na.rm <- as.logical(na.rm)
hasNAs <- TRUE
counts <- .Call("count", x, value, 2L, na.rm, hasNAs, PACKAGE="matrixStats")
} else {
if (is.na(value)) {
counts <- sum(is.na(x))
} else {
counts <- sum(x == value, na.rm=na.rm)
}
}
as.integer(counts)
} # count()
rowAlls <- function(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) {
if (is.numeric(x) || is.logical(x)) {
na.rm <- as.logical(na.rm)
hasNAs <- TRUE
counts <- .Call("rowCounts", x, dim., value, 0L, na.rm, hasNAs, PACKAGE="matrixStats")
as.logical(counts)
} else {
if (is.na(value)) {
rowAlls(is.na(x), na.rm=na.rm, dim.=dim., ...)
} else {
rowAlls(x == value, na.rm=na.rm, dim.=dim., ...)
}
}
}
colAlls <- function(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) {
if (is.numeric(x) || is.logical(x)) {
na.rm <- as.logical(na.rm)
hasNAs <- TRUE
counts <- .Call("colCounts", x, dim., value, 0L, na.rm, hasNAs, PACKAGE="matrixStats")
as.logical(counts)
} else {
if (is.na(value)) {
colAlls(is.na(x), na.rm=na.rm, dim.=dim., ...)
} else {
colAlls(x == value, na.rm=na.rm, dim.=dim., ...)
}
}
}
allValue <- function(x, value=TRUE, na.rm=FALSE, ...) {
if (is.numeric(x) || is.logical(x)) {
na.rm <- as.logical(na.rm)
hasNAs <- TRUE
counts <- .Call("count", x, value, 0L, na.rm, hasNAs, PACKAGE="matrixStats")
as.logical(counts)
} else {
if (is.na(value)) {
allValue(is.na(x), na.rm=na.rm, ...)
} else {
allValue(x == value, na.rm=na.rm, ...)
}
}
}
rowAnys <- function(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) {
if (is.numeric(x) || is.logical(x)) {
na.rm <- as.logical(na.rm)
hasNAs <- TRUE
counts <- .Call("rowCounts", x, dim., value, 1L, na.rm, hasNAs, PACKAGE="matrixStats")
as.logical(counts)
} else {
if (is.na(value)) {
rowAnys(is.na(x), na.rm=na.rm, dim.=dim., ...)
} else {
rowAnys(x == value, na.rm=na.rm, dim.=dim., ...)
}
}
}
colAnys <- function(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...) {
if (is.numeric(x) || is.logical(x)) {
na.rm <- as.logical(na.rm)
hasNAs <- TRUE
counts <- .Call("colCounts", x, dim., value, 1L, na.rm, hasNAs, PACKAGE="matrixStats")
as.logical(counts)
} else {
if (is.na(value)) {
colAnys(is.na(x), na.rm=na.rm, dim.=dim., ...)
} else {
colAnys(x == value, na.rm=na.rm, dim.=dim., ...)
}
}
}
anyValue <- function(x, value=TRUE, na.rm=FALSE, ...) {
if (is.numeric(x) || is.logical(x)) {
na.rm <- as.logical(na.rm)
hasNAs <- TRUE
counts <- .Call("count", x, value, 1L, na.rm, hasNAs, PACKAGE="matrixStats")
as.logical(counts)
} else {
if (is.na(value)) {
anyValue(is.na(x), na.rm=na.rm, ...)
} else {
anyValue(x == value, na.rm=na.rm, ...)
}
}
}
############################################################################
# HISTORY:
# 2014-11-14 [HB]
# o SPEEDUP: Now colCounts() is implemented in native code.
# o CLEANUP: Now (col|row)Count(x) when x is logical utilizes the
# same code as as.integer(x).
# o As a part of transitioning to plain functions, rowCounts() for matrix
# was turned into a default method that understands matrix inputs.
# It also understand vector input, if argument 'dim' is given.
# 2014-06-02 [HB]
# o Made rowCounts() an S3 method (was S4).
# o Added argument 'value' to col- and rowCounts().
# 2008-03-25 [HB]
# o Created.
############################################################################
matrixStats/R/psortKM.R 0000644 0001751 0000144 00000000607 12542546242 014547 0 ustar hornik users .psortKM <- function(x, k=length(x), m=1L, ...) {
.Call("psortKM", as.numeric(x), k=as.integer(k), m=as.integer(m), PACKAGE="matrixStats");
} # .psortKM()
############################################################################
# HISTORY:
# 2012-09-10
# o Added internal .psortKM() method.
# o Created.
############################################################################
matrixStats/R/rowDiffs.R 0000644 0001751 0000144 00000002661 12542546242 014735 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowDiffs
# @alias colDiffs
#
# @title "Calculates difference for each row (column) in a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowDiffs
# @usage colDiffs
# }
#
# \arguments{
# \item{x}{A @numeric NxK @matrix.}
# \item{lag}{An @integer specifying the lag.}
# \item{differences}{An @integer specifying the order of difference.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric Nx(K-1) or (N-1)xK @matrix.
# }
#
# @examples "../incl/rowDiffs.Rex"
#
# @author "HB"
#
# \seealso{
# See also @see "diff2".
# }
#
# @keyword array
# @keyword iteration
# @keyword robust
# @keyword univar
#*/###########################################################################
rowDiffs <- function(x, lag=1L, differences=1L, ...) {
.Call("rowDiffs", x, dim(x), as.integer(lag), as.integer(differences), TRUE, PACKAGE="matrixStats")
}
colDiffs <- function(x, lag=1L, differences=1L, ...) {
.Call("rowDiffs", x, dim(x), as.integer(lag), as.integer(differences), FALSE, PACKAGE="matrixStats")
}
############################################################################
# HISTORY:
# 2014-11-15 [HB]
# o SPEEDUP: Now colDiffs(x) no longer uses rowDiffs(t(x)).
# 2008-03-26 [HB]
# o Created.
############################################################################
matrixStats/R/logSumExp.R 0000644 0001751 0000144 00000006522 12542546242 015075 0 ustar hornik users ###########################################################################/**
# @RdocFunction logSumExp
#
# @title "Accurately computes the logarithm of the sum of exponentials"
#
# \description{
# @get "title", that is, \eqn{log(sum(exp(lx)))}.
# If \eqn{lx = log(x)}, then this is equivalently to calculating
# \eqn{log(sum(x))}.
#
# This function, which avoid numerical underflow, is often used when
# computing the logarithm of the sum of small numbers (\eqn{|x| << 1})
# such as probabilities.
# }
#
# @synopsis
#
# \arguments{
# \item{lx}{A @numeric @vector.
# Typically \code{lx} are \eqn{log(x)} values.}
# \item{na.rm}{If @TRUE, any missing values are ignored, otherwise not.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric scalar.
# }
#
# \details{
# This is function is more accurate than \code{log(sum(exp(lx)))}
# when the values of \eqn{x = exp(lx)} are \eqn{|x| << 1}.
# The implementation of this function is based on the observation that
# \deqn{
# log(a + b)
# = [ la = log(a), lb = log(b) ]
# = log( exp(la) + exp(lb) )
# = la + log ( 1 + exp(lb - la) )
# }
# Assuming \eqn{la > lb}, then \eqn{|lb - la| < |lb|}, and it is
# less likely that the computation of \eqn{1 + exp(lb - la)} will
# not underflow/overflow numerically. Because of this, the overall
# result from this function should be more accurate.
# Analogously to this, the implementation of this function finds the
# maximum value of \code{lx} and subtracts it from the remaining values
# in \code{lx}.
# }
#
# \section{Benchmarking}{
# This method is optimized for correctness, that avoiding underflowing.
# It is implemented in native code that is optimized for speed and memory.
# }
#
# @examples "../incl/logSumExp.Rex"
#
# @author "HB"
#
# \seealso{
# To compute this function on rows or columns of a matrix,
# see @see "rowLogSumExps".
#
# For adding \emph{two} double values in native code, R provides
# the C function \code{logspace_add()} [1].
# For properties of the log-sum-exponential function, see [2].
# }
#
# \references{
# [1] R Core Team, \emph{Writing R Extensions}, v3.0.0, April 2013. \cr
# [2] Laurent El Ghaoui,
# \emph{Hyper-Textbook: Optimization Models and Applications},
# University of California at Berkeley, August 2012.
# (Chapter 'Log-Sum-Exp (LSE) Function and Properties',
# \url{http://inst.eecs.berkeley.edu/~ee127a/book/login/def_lse_fcn.html})
# \cr
# [3] R-help thread \emph{logsumexp function in R}, 2011-02-17.
# \url{https://stat.ethz.ch/pipermail/r-help/2011-February/269205.html}\cr
# }
#
#*/###########################################################################
logSumExp <- function(lx, na.rm=FALSE, ...) {
hasNA <- TRUE;
.Call("logSumExp", as.numeric(lx), as.logical(na.rm), as.logical(hasNA),
PACKAGE="matrixStats");
} # logSumExp()
##############################################################################
# HISTORY:
# 2013-04-30 [HB]
# o Added native implementation.
# o Renamed to logSumExp(), because that seems to be the naming convention
# elsewhere, e.g. Python.
# 2013-04-29 [HB]
# o Added sumInLogspace().
# o Created.
##############################################################################
matrixStats/R/binMeans.R 0000644 0001751 0000144 00000012474 12542546242 014711 0 ustar hornik users ############################################################################/**
# @RdocFunction binMeans
#
# @title "Fast mean calculations in non-overlapping bins"
#
# @synopsis
#
# \description{
# Computes the sample means in non-overlapping bins
# }
#
# \arguments{
# \item{y}{A @numeric @vector of K values to calculate means on.}
# \item{x}{A @numeric @vector of K positions for to be binned.}
# \item{bx}{A @numeric @vector of B+1 ordered positions specifying
# the B > 0 bins \code{[bx[1],bx[2])}, \code{[bx[2],bx[3])}, ...,
# \code{[bx[B],bx[B+1])}.}
# \item{na.rm}{If @TRUE, missing values in \code{y} are dropped
# before calculating the mean, otherwise not.}
# \item{count}{If @TRUE, the number of data points in each bins is
# returned as attribute \code{count}, which is an @integer @vector
# of length B.}
# \item{right}{If @TRUE, the bins are right-closed (left open),
# otherwise left-closed (right open).}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric @vector of length B.
# }
#
# \details{
# \code{binMeans(x, bx, right=TRUE)} gives equivalent results as
# \code{rev(binMeans(-x, bx=sort(-bx), right=FALSE))}, but is faster.
# }
#
# \section{Missing and non-finite values}{
# Data points where either of \code{y} and \code{x} is missing are
# dropped (and therefore are also not counted).
# Non-finite values in \code{y} are not allowed and gives an error.
# Missing values in \code{bx} are not allowed and gives an error.
# }
#
# \section{Empty bins}{
# Empty bins will get value @NaN.
# }
#
# @examples "../incl/binMeans.Rex"
#
# \seealso{
# @see "binCounts".
# @see "stats::aggregate" and @see "base::mean".
# }
#
# \references{
# [1] R-devel thread \emph{Fastest non-overlapping binning mean function
# out there?} on Oct 3, 2012\cr
# }
#
# \author{
# Henrik Bengtsson with initial code contributions by Martin Morgan [1].
# }
#
# @keyword "univar"
#*/############################################################################
binMeans <- function(y, x, bx, na.rm=TRUE, count=TRUE, right=FALSE, ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'y':
if (!is.numeric(y)) {
stop("Argument 'y' is not numeric: ", mode(y));
}
if (any(is.infinite(y))) {
stop("Argument 'y' must not contain Inf values.");
}
n <- length(y);
# Argument 'x':
if (!is.numeric(x)) {
stop("Argument 'x' is not numeric: ", mode(x));
}
if (length(x) != n) {
stop("Argument 'y' and 'x' are of different lengths: ", length(y), " != ", length(x));
}
# Argument 'bx':
if (!is.numeric(bx)) {
stop("Argument 'bx' is not numeric: ", mode(bx));
}
if (any(is.infinite(bx))) {
stop("Argument 'bx' must not contain Inf values.");
}
if (is.unsorted(bx)) {
stop("Argument 'bx' is not ordered.");
}
# Argument 'na.rm':
if (!is.logical(na.rm)) {
stop("Argument 'na.rm' is not logical: ", mode(na.rm));
}
# Argument 'count':
if (!is.logical(count)) {
stop("Argument 'count' is not logical: ", mode(count));
}
# Argument 'right':
right <- as.logical(right);
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Preprocessing of (x,y)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Drop missing values in 'x'
keep <- which(!is.na(x));
if (length(keep) < n) {
x <- x[keep];
y <- y[keep];
n <- length(y);
}
keep <- NULL; # Not needed anymore
# Drop missing values in 'y'?
if (na.rm) {
keep <- which(!is.na(y));
if (length(keep) < n) {
x <- x[keep];
y <- y[keep];
}
keep <- NULL; # Not needed anymore
}
# Order (x,y) by increasing x.
# If 'x' is already sorted, the overhead of (re)sorting is
# relatively small.
x <- sort.int(x, method="quick", index.return=TRUE);
y <- y[x$ix];
x <- x$x;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Bin
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
y <- as.numeric(y);
x <- as.numeric(x);
bx <- as.numeric(bx);
count <- as.logical(count);
.Call("binMeans", y, x, bx, count, right, PACKAGE="matrixStats");
} # binMeans()
############################################################################
# HISTORY:
# 2014-12-29 [HB]
# o SPEEDUP: Now binCounts() and binMeans() uses is.unsorted() instead
# of o <- order(); any(diff(o) != 1L).
# 2014-12-17 [HB]
# o CLEANUP: Made binCounts() and binMeans() plain R functions.
# 2013-11-23 [HB]
# o MEMORY: binMeans() cleans out more temporary variables as soon as
# possible such that the garbage collector can remove them sooner.
# 2013-05-10 [HB]
# o SPEEDUP: Now binMeans() and binCounts() use Hoare's Quicksort
# method for sorting 'x'.
# 2012-10-04 [HB in Anahola]
# o Added argument 'na.rm' to binMeans().
# o Updated Rdocs.
# 2012-10-03 [HB]
# o Added binMeans() based on native code adopted from code by
# Martin Morgan, Fred Hutchinson Cancer Research Center, Seattle.
# o Created.
############################################################################
matrixStats/R/999.package.R 0000644 0001751 0000144 00000001263 12542546242 015073 0 ustar hornik users #########################################################################/**
# @RdocPackage matrixStats
#
# \description{
# @eval "packageDescription('matrixStats')$Description"
# }
#
# \section{Installation}{
# To install this package, please do:
# \preformatted{
# install.packages("matrixStats")
# }
# }
#
# \section{Vignettes}{
# For an overview of the package, see the '\href{../doc/index.html}{vignettes}';
# \enumerate{
# \item Summary of functions.
# }
# }
#
# \section{How to cite this package}{
# @eval "x <- citation('matrixStats'); format(x, 'textVersion')"
# }
#
# @author "*"
#*/#########################################################################
matrixStats/R/weightedVar.R 0000644 0001751 0000144 00000011272 12542546242 015421 0 ustar hornik users ############################################################################/**
# @RdocFunction weightedVar
# @alias weightedSd
# @alias colWeightedVars
# @alias rowWeightedVars
# @alias colWeightedSds
# @alias rowWeightedSds
#
# @title "Weighted variance and weighted standard deviation"
#
# \usage{
# @usage weightedVar
# @usage colWeightedVars
# @usage rowWeightedVars
#
# @usage weightedSd
# @usage colWeightedSds
# @usage rowWeightedSds
# }
#
#
# \description{
# Computes a weighted variance / standard deviation of a numeric
# vector or across rows or columns of a matrix.
# }
#
# \arguments{
# \item{x}{a @numeric @vector containing the values whose
# weighted variance is to be computed.}
# \item{w}{a vector of weights the same length as \code{x} giving the weights
# to use for each element of \code{x}. Negative weights are treated
# as zero weights. Default value is equal weight to all values.}
# \item{na.rm}{a logical value indicating whether @NA values in
# \code{x} should be stripped before the computation proceeds,
# or not. If @NA, no check at all for @NAs is done.
# Default value is @NA (for efficiency).}
# \item{center}{Optional @numeric scalar specifying the center
# location of the data. If @NULL, it is estimated from data.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns a @numeric scalar.
# }
#
# \section{Missing values}{
# Missing values are dropped at the very beginning, if argument
# \code{na.rm} is @TRUE, otherwise not.
# }
#
# \seealso{
# For the non-weighted variance, see @see "stats::var".
# }
#
# @author "HB"
#
# @keyword "univar"
# @keyword "robust"
#*/############################################################################
weightedVar <- function(x, w, na.rm=FALSE, center=NULL, ...) {
# Argument 'x':
n <- length(x);
# Argument 'w':
if (missing(w)) {
# By default use weights that are one.
w <- rep(1, times=n);
} else if (length(w) != n) {
stop("The number of elements in arguments 'w' and 'x' does not match: ", length(w), " != ", n);
}
# Argument 'na.rm':
naValue <- NA;
storage.mode(naValue) <- storage.mode(x);
# Remove values with zero (and negative) weight. This will:
# 1) take care of the case when all weights are zero,
# 2) it will most likely speed up the sorting.
tmp <- (w > 0);
if (!all(tmp)) {
x <- .subset(x, tmp);
w <- .subset(w, tmp);
n <- length(x);
}
tmp <- NULL; # Not needed anymore
# Drop missing values?
if (na.rm) {
keep <- which(!is.na(x) & !is.na(w));
x <- .subset(x, keep);
w <- .subset(w, keep);
n <- length(x);
keep <- NULL; # Not needed anymore
} else if (anyMissing(x)) {
return(naValue);
}
# Are any weights Inf? Then treat them with equal weight and all others
# with weight zero.
tmp <- is.infinite(w);
if (any(tmp)) {
keep <- tmp;
x <- .subset(x, keep);
n <- length(x);
w <- rep(1, times=n);
keep <- NULL; # Not needed anymore
}
tmp <- NULL; # Not needed anymore
# Are there any values left to calculate the weighted median of?
# This is consistent with how stats::mad() works.
if (n == 0L) {
return(naValue);
} else if (n == 1L) {
zeroValue <- 0;
storage.mode(zeroValue) <- storage.mode(x);
return(zeroValue);
}
# Standardize weights to sum to one
w <- w / sum(w);
# Estimate the mean?
if (is.null(center)) {
center <- sum(w*x);
}
# Estimate the variance
x <- x - center; # Residuals
x <- x^2; # Squared residuals
sigma2 <- sum(w*x) * (n / (n-1L))
x <- w <- NULL; # Not needed anymore
sigma2;
} # weightedVar()
weightedSd <- function(...) {
sqrt(weightedVar(...))
} # weightedSd()
rowWeightedVars <- function(x, w=NULL, na.rm=FALSE, ...) {
apply(x, MARGIN=1L, FUN=weightedVar, w=w, na.rm=na.rm, ...)
} # rowWeightedVars()
colWeightedVars <- function(x, w=NULL, na.rm=FALSE, ...) {
apply(x, MARGIN=2L, FUN=weightedVar, w=w, na.rm=na.rm, ...)
} # colWeightedVars()
rowWeightedSds <- function(x, w=NULL, na.rm=FALSE, ...) {
sqrt(rowWeightedVars(x=x, w=w, na.rm=na.rm, ...))
} # rowWeightedSds()
colWeightedSds <- function(x, w=NULL, na.rm=FALSE, ...) {
sqrt(colWeightedVars(x=x, w=w, na.rm=na.rm, ...))
} # colWeightedSds()
############################################################################
# HISTORY:
# 2014-11-10
# o Turned weightedSd() and weightedVar() into plain function.
# 2014-03-26
# o Created from weightedMad.R.
############################################################################
matrixStats/R/rowSds.R 0000644 0001751 0000144 00000003634 12542546242 014434 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowSds
# @alias rowSds
# @alias colSds
# @alias rowMads
# @alias colMads
# \alias{rowSds,matrix-method}
# \alias{colSds,matrix-method}
#
# @title "Standard deviation estimates for each row (column) in a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowSds
# @usage colSds
# @usage rowMads
# @usage colMads
# }
#
# \arguments{
# \item{x}{A @numeric NxK @matrix.}
# \item{center}{A optional @numeric @vector of length N (K) with centers.
# By default, they are calculated using @see "rowMedians".}
# \item{constant}{A scale factor. See @see "stats::mad" for details.}
# \item{na.rm}{If @TRUE, missing values are removed first, otherwise not.}
# \item{dim.}{An @integer @vector of length two specifying the
# dimension of \code{x}, also when not a @matrix.}
# \item{...}{Additional arguments passed to @see "rowVars" and
# @see "rowMedians", respectively.}
# \item{centers}{(deprectated) use \code{center} instead.}
# }
#
# \value{
# Returns a @numeric @vector of length N (K).
# }
#
# @author "HB"
#
# \seealso{
# @see "stats::sd", @see "stats::mad" and \code{\link[stats:cor]{var}}.
# @see "rowIQRs".
# }
#
# @keyword array
# @keyword iteration
# @keyword robust
# @keyword univar
#*/###########################################################################
rowSds <- function(x, ...) {
x <- rowVars(x, ...);
sqrt(x);
}
colSds <- function(x, ...) {
x <- colVars(x, ...);
sqrt(x);
}
############################################################################
# HISTORY:
# 2012-03-19 [HC]
# o Changed description of centers argument to rowMads and colMads
# 2008-03-26 [HB]
# o Created from genefilter::rowVars() by Wolfgang Huber.
############################################################################
matrixStats/R/zzz.R 0000644 0001751 0000144 00000000470 12542546242 014003 0 ustar hornik users .onUnload <- function (libpath) {
library.dynam.unload("matrixStats", libpath)
}
.onAttach <- function(libname, pkgname) {
## covr: skip=3
pd <- utils::packageDescription(pkgname);
pkgStartupMessage(pkgname, " v", pd$Version, " (",
pd$Date, ") successfully loaded. See ?", pkgname, " for help.");
}
matrixStats/R/anyMissing.R 0000644 0001751 0000144 00000005060 12542546242 015267 0 ustar hornik users ###########################################################################/**
# @RdocFunction anyMissing
# \alias{colAnyMissings}
# \alias{rowAnyMissings}
#
# @title "Checks if there are any missing values in an object or not"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage anyMissing
# @usage colAnyMissings
# @usage rowAnyMissings
# }
#
# \arguments{
# \item{x}{A @vector, a @list, a @matrix, a @data.frame, or @NULL.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns @TRUE if a missing value was detected, otherwise @FALSE.
# }
#
# \details{
# The implementation of this method is optimized for both speed and memory.
# The method will return @TRUE as soon as a missing value is detected.
# }
#
# \examples{
# x <- rnorm(n=1000)
# x[seq(300,length(x),by=100)] <- NA
# stopifnot(anyMissing(x) == any(is.na(x)))
# }
#
# @author "HB"
#
# \seealso{
# Starting with R v3.1.0, there is \code{anyNA()} in the \pkg{base},
# which provides the same functionality as this function.
# }
#
# @keyword iteration
# @keyword logic
#*/###########################################################################
anyMissing <- function(x, ...) {
## All list or a data.frame?
if (is.list(x)) {
for (kk in seq(along=x)) {
if (.Call("anyMissing", x[[kk]], PACKAGE="matrixStats"))
return(TRUE)
}
return(FALSE)
} else {
## All other data types
.Call("anyMissing", x, PACKAGE="matrixStats")
}
}
colAnyMissings <- function(x, ...) {
colAnys(x, value=NA, ...)
}
rowAnyMissings <- function(x, ...) {
rowAnys(x, value=NA, ...)
}
############################################################################
# HISTORY:
# 2015-02-10
# o CLEANUP: anyMissing() is no longer an S4 generic, cf. base::anyNA().
# 2015-01-20
# o CLEANUP: In the effort of migrating anyMissing() into a plain R
# function, specific anyMissing() implementations for data.frame:s and
# and list:s were dropped and is now handled by anyMissing() for "ANY".
# 2014-12-08
# o Added (col|row)AnyMissings().
# 2013-09-26
# o Added help reference to base::anyNA().
# 2013-01-13
# o Added anyMissing() for raw, which always returns FALSE.
# 2008-03-25
# o Added anyMissing() for matrices, data.frames, lists and NULL.
# o Added anyMissing() for numeric, logical, complex and character.
# o Made anyMissing() into an S4 method.
# 2007-08-14
# o Created. See also R-devel thread "hasNA()/anyNA()?" on 2007-08-13.
############################################################################
matrixStats/R/rowIQRs.R 0000644 0001751 0000144 00000005206 12542546242 014516 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowIQRs
# @alias colIQRs
# @alias iqr
#
# @title "Estimates of the interquartile range for each row (column) in a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowIQRs
# @usage colIQRs
# @usage iqr
# }
#
# \arguments{
# \item{x}{A @numeric NxK @matrix.}
# \item{na.rm}{If @TRUE, missing values are dropped first, otherwise not.}
# \item{...}{Additional arguments passed to @see "rowQuantiles"
# (\code{colQuantiles()}).}
# }
#
# \value{
# Returns a @numeric @vector of length N (K).
# }
#
# \section{Missing values}{
# Contrary to @see "stats::IQR", which gives an error if there are missing
# values and \code{na.rm=FALSE}, \code{iqr()} and its corresponding row and
# column-specific functions return @NA_real_.
# }
#
# @examples "../incl/rowIQRs.Rex"
#
# @author "HB"
#
# \seealso{
# See @see "stats::IQR".
# See @see "rowSds".
# }
#
# @keyword array
# @keyword iteration
# @keyword robust
# @keyword univar
#*/###########################################################################
rowIQRs <- function(x, na.rm=FALSE, ...) {
Q <- rowQuantiles(x, probs=c(0.25, 0.75), na.rm=na.rm, drop=FALSE, ...)
ans <- Q[,2L,drop=TRUE] - Q[,1L,drop=TRUE]
# Remove attributes
attributes(ans) <- NULL
ans
}
colIQRs <- function(x, na.rm=FALSE, ...) {
Q <- colQuantiles(x, probs=c(0.25, 0.75), na.rm=na.rm, drop=FALSE, ...)
ans <- Q[,2L,drop=TRUE] - Q[,1L,drop=TRUE]
# Remove attributes
attributes(ans) <- NULL
ans
}
iqr <- function(x, na.rm=FALSE, ...) {
if(na.rm) {
x <- x[!is.na(x)]
} else if (anyMissing(x)) {
return(NA_real_)
}
# At this point, there should be no missing values
# Nothing to do?
n <- length(x)
if (n == 0L) {
return(NA_real_)
} else if (n == 1L) {
return(0)
}
q <- quantile(x, probs=c(0.25, 0.75), names=FALSE, na.rm=FALSE, ...)
q[2L] - q[1L]
}
############################################################################
# HISTORY:
# 2015-01-16
# o Now iqr(..., na.rm=FALSE) returns NA_real_ if there are missing values.
# 2015-01-11
# o Now iqr() no longer returns a named value.
# 2014-12-19
# o Added iqr().
# 2011-11-25
# o Added help and example to rowIQRs() and colIQRs().
# o BUG FIX: rowIQRs() and colIQRs() would return the 25% and the 75%
# quantiles, not the difference between them. Thanks Pierre Neuvial
# at CNRS, Evry, France for the report.
# 2008-03-26 [HB]
# o Created.
############################################################################
matrixStats/R/rowQuantiles.R 0000644 0001751 0000144 00000015153 12542546242 015647 0 ustar hornik users ###########################################################################/**
# @RdocFunction rowQuantiles
# @alias colQuantiles
#
# @title "Estimates quantiles for each row (column) in a matrix"
#
# \description{
# @get "title".
# }
#
# \usage{
# @usage rowQuantiles
# @usage colQuantiles
# }
#
# \arguments{
# \item{x}{A @numeric NxK @matrix with N >= 0.}
# \item{probs}{A @numeric @vector of J probabilities in [0,1].}
# \item{na.rm}{If @TRUE, @NAs are excluded first, otherwise not.}
# \item{type}{An @integer specify the type of estimator.
# See @see "stats::quantile" for more details.}
# \item{...}{Additional arguments passed to @see "stats::quantile".}
# \item{drop}{If TRUE, singleton dimensions in the result are dropped,
# otherwise not.}
# }
#
# \value{
# Returns a @numeric NxJ (KxJ) @matrix, where
# N (K) is the number of rows (columns) for which the J quantiles are
# calculated.
# }
#
# @examples "../incl/rowQuantiles.Rex"
#
# @author "HB"
#
# \seealso{
# @see "stats::quantile".
# }
#
# @keyword array
# @keyword iteration
# @keyword robust
# @keyword univar
#*/###########################################################################
rowQuantiles <- function(x, probs=seq(from=0, to=1, by=0.25), na.rm=FALSE, type=7L, ..., drop=TRUE) {
# Argument 'probs':
if (anyMissing(probs)) {
stop("Argument 'probs' contains missing values")
}
eps <- 100 * .Machine$double.eps
if (any((probs < -eps | probs > 1 + eps))) {
stop("Argument 'probs' is out of range [0-eps,1+eps]")
}
# Argument 'x':
nrow <- nrow(x)
ncol <- ncol(x)
if (nrow > 0L && ncol > 0L) {
naRows <- rowAnyMissings(x)
hasNA <- any(naRows)
if (!hasNA) na.rm <- FALSE
if (!hasNA && type == 7L) {
n <- ncol
idxs <- 1 + (n-1) * probs
idxs_lo <- floor(idxs)
idxs_hi <- ceiling(idxs)
partial <- sort(unique(c(idxs_lo, idxs_hi)))
xp <- apply(x, MARGIN=1L, FUN=sort, partial=partial)
if (is.null(dim(xp))) dim(xp) <- c(1L, length(xp))
q <- apply(xp, MARGIN=2L, FUN=.subset, idxs_lo)
if (is.null(dim(q))) dim(q) <- c(1L, length(q))
# Adjust
idxs_adj <- which(idxs > idxs_lo)
if (length(idxs_adj) > 0L) {
qL <- q[idxs_adj,,drop=FALSE]
idxs_hi <- idxs_hi[idxs_adj]
qH <- apply(xp, MARGIN=2L, FUN=.subset, idxs_hi)
w <- (idxs - idxs_lo)[idxs_adj]
q[idxs_adj,] <- (1-w)*qL + w*qH
# Not needed anymore
xp <- qL <- qH <- NULL
}
# Backward compatibility
q <- t(q)
} else {
# Allocate result
naValue <- NA_real_
storage.mode(naValue) <- storage.mode(x)
q <- matrix(naValue, nrow=nrow, ncol=length(probs))
# For each row...
rows <- seq_len(nrow)
# Rows with NAs should return all NAs (so skip those)
if (hasNA && !na.rm) rows <- rows[!naRows]
for (kk in rows) {
xkk <- x[kk,]
if (na.rm) xkk <- xkk[!is.na(xkk)]
q[kk,] <- quantile(xkk, probs=probs, na.rm=FALSE, type=type, ...)
}
} # if (type ...)
} else {
naValue <- NA_real_
storage.mode(naValue) <- storage.mode(x)
q <- matrix(naValue, nrow=nrow, ncol=length(probs))
}
# Add names
digits <- max(2L, getOption("digits"))
colnames(q) <- sprintf("%.*g%%", digits, 100*probs)
# Drop singleton dimensions?
if (drop) {
q <- drop(q)
}
q
} # rowQuantiles()
colQuantiles <- function(x, probs=seq(from=0, to=1, by=0.25), na.rm=FALSE, type=7L, ..., drop=TRUE) {
# Argument 'probs':
if (anyMissing(probs)) {
stop("Argument 'probs' contains missing values")
}
eps <- 100 * .Machine$double.eps
if (any((probs < -eps | probs > 1 + eps))) {
stop("Argument 'probs' is out of range [0-eps,1+eps]")
}
# Argument 'x':
nrow <- nrow(x)
ncol <- ncol(x)
if (nrow > 0L && ncol > 0L) {
naCols <- colAnyMissings(x)
hasNA <- any(naCols)
if (!hasNA) na.rm <- FALSE
if (!hasNA && type == 7L) {
n <- nrow
idxs <- 1 + (n-1) * probs
idxs_lo <- floor(idxs)
idxs_hi <- ceiling(idxs)
partial <- sort(unique(c(idxs_lo, idxs_hi)))
xp <- apply(x, MARGIN=2L, FUN=sort, partial=partial)
if (is.null(dim(xp))) dim(xp) <- c(1L, length(xp))
q <- apply(xp, MARGIN=2L, FUN=.subset, idxs_lo)
if (is.null(dim(q))) dim(q) <- c(1L, length(q))
# Adjust
idxs_adj <- which(idxs > idxs_lo)
if (length(idxs_adj) > 0L) {
qL <- q[idxs_adj,,drop=FALSE]
idxs_hi <- idxs_hi[idxs_adj]
qH <- apply(xp, MARGIN=2L, FUN=.subset, idxs_hi)
w <- (idxs - idxs_lo)[idxs_adj]
q[idxs_adj,] <- (1-w)*qL + w*qH
# Not needed anymore
xp <- qL <- qH <- NULL
}
# Backward compatibility
q <- t(q)
} else {
# Allocate result
naValue <- NA_real_
storage.mode(naValue) <- storage.mode(x)
q <- matrix(naValue, nrow=ncol, ncol=length(probs))
# For each column...
cols <- seq_len(ncol)
# Columns with NAs should return all NAs (so skip those)
if (hasNA && !na.rm) cols <- cols[!naCols]
for (kk in cols) {
xkk <- x[,kk]
if (na.rm) xkk <- xkk[!is.na(xkk)]
q[kk,] <- quantile(xkk, probs=probs, na.rm=FALSE, type=type, ...)
}
} # if (type ...)
} else {
naValue <- NA_real_
storage.mode(naValue) <- storage.mode(x)
q <- matrix(naValue, nrow=ncol, ncol=length(probs))
}
# Add names
digits <- max(2L, getOption("digits"))
colnames(q) <- sprintf("%.*g%%", digits, 100*probs)
# Drop singleton dimensions?
if (drop) {
q <- drop(q)
}
q
}
############################################################################
# HISTORY:
# 2015-01-26
# o CONSISTENCY: Now rowQuantiles(x, na.rm=TRUE) returns all NAs for rows
# with missing values. Analogously for colQuantiles(). Previously, an
# error was thrown saying missing values are not allowed.
# 2014-11-18 [HB]
# o SPEEDUP: Made (col|row)Quantiles(x) faster for 'x' without missing
# values (and default type=7L quantiles).
# 2014-11-16 [HB]
# o SPEEDUP: colQuantiles(x) is no longer using colQuantiles(t(x)).
# 2013-07-29 [HB]
# o DOCUMENTATION: The dimension of the return value was swapped
# in help("rowQuantiles"). Noticed by PL.
# 2011-11-29 [HB]
# o Added an Rdoc example.
# 2010-10-06 [HB]
# o Now the result of {row|col}Quantiles() contains column names.
# 2008-03-26 [HB]
# o Created.
############################################################################
matrixStats/vignettes/ 0000755 0001751 0000144 00000000000 12542546311 014626 5 ustar hornik users matrixStats/vignettes/matrixStats-methods.md.rsp 0000644 0001751 0000144 00000020606 12542546242 021746 0 ustar hornik users <%@meta language="R-vignette" content="--------------------------------
DIRECTIVES FOR R:
%\VignetteIndexEntry{matrixStats: Summary of functions}
%\VignetteAuthor{Henrik Bengtsson}
%\VignetteKeyword{matrix}
%\VignetteKeyword{vector}
%\VignetteKeyword{apply}
%\VignetteKeyword{rows}
%\VignetteKeyword{columns}
%\VignetteKeyword{memory}
%\VignetteKeyword{speed}
%\VignetteEngine{R.rsp::rsp}
%\VignetteTangle{FALSE}
--------------------------------------------------------------------"%>
<%
pkgName <- "matrixStats"
library(pkgName, character.only=TRUE)
ns <- getNamespace(pkgName)
env <- as.environment(sprintf("package:%s", pkgName))
R.utils::use("R.utils")
kable <- function(df, ...) {
fcns <- as.character(df$Functions)
fcns <- strsplit(fcns, split=",")
fcns <- sapply(fcns, FUN=function(names) {
names <- trim(names)
ok <- sapply(names, FUN=exists, envir=ns, mode="function")
names[ok] <- sprintf("%s()", names[ok])
names[!ok] <- sprintf("~~%s()~~", names[!ok])
names <- paste(names, collapse=", ")
})
df$Functions <- fcns
df$Example <- sprintf("`%s`", df$Example)
print(knitr::kable(df, ..., format="markdown"))
}
# Find all functions
all <- ls(envir=env)
keep <- sapply(all, FUN=function(name) {
is.function(get(name, envir=env))
})
all <- all[keep]
keep <- !grepl("[.]([^.]*)$", all)
all <- all[keep]
# Hidden functions
skip <- c("rowAvgsPerColSet", "colAvgsPerRowSet")
skip <- c(skip, "allocArray", "allocMatrix", "allocVector")
all <- setdiff(all, skip)
# Column and row functions
crfcns <- grep("^(col|row)", all, value=TRUE)
# Vector functions
vfcns <- setdiff(all, crfcns)
%>
# <%@meta name="title"%>
<% pkg <- R.oo::Package(pkgName) %>
<%@meta name="author"%> on <%=format(as.Date(pkg$date), format="%B %d, %Y")%>
<%
fcns <- crfcns
base <- gsub("^(col|row)", "", fcns)
groups <- tapply(fcns, base, FUN=list)
stopifnot(all(sapply(groups, FUN=length) == 2L))
groups <- matrix(unlist(groups, use.names=FALSE), nrow=2L)
%>
<%---
## Functions that apply to column and rows of matrices
```
<% print(fcns) %>
```
---%>
<%
fcns <- vfcns
%>
<%---
## Functions that apply to vectors
```
<% print(fcns) %>
```
---%>
## Location and scale estimators
<%
tbl <- NULL
row <- data.frame(
"Estimator" = "Weighted sample mean",
"Functions" = "weightedMean, colWeightedMeans, rowWeightedMeans",
"Example" = "weightedMean(x, w); rowWeightedMeans(x, w)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Median",
"Functions" = "median, colMedians, rowMedians",
"Example" = "median(x); rowMedians(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Weighted median",
"Functions" = "weightedMedian, colWeightedMedians, rowWeightedMedians",
"Example" = "weightedMedian(x, w); rowWeightedMedians(x, w)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Sample variance",
"Functions" = "var, colVars, rowVars",
"Example" = "var(x); rowVars(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Weighted sample variance",
"Functions" = "weightedVar, colWeightedVars, rowWeightedVars",
"Example" = "weightedVar(x, w), rowWeightedVars(x, w)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Sample variance by n-order differences",
"Functions" = "varDiff, colVarDiffs, rowVarDiffs",
"Example" = "varDiff(x); rowVarDiffs(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Sample standard deviation",
"Functions" = "sd, colSds, rowSds",
"Example" = "sd(x); rowSds(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Weighted sample deviation",
"Functions" = "weightedSd, colWeightedSds, rowWeightedSds",
"Example" = "weightedSd(x, w), rowWeightedSds(x, w)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Sample standard deviation by n-order differences",
"Functions" = "sdDiff, colSdDiffs, rowSdDiffs",
"Example" = "sdDiff(x); rowSdDiffs(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Median absolute deviation (MAD)",
"Functions" = "mad, colMads, rowMads",
"Example" = "mad(x); rowMads(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Weighted median absolute deviation (MAD)",
"Functions" = "weightedMad, colWeightedMads, rowWeightedMads",
"Example" = "weightedMad(x, w), rowWeightedMads(x, w)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Median absolute deviation (MAD) by n-order differences",
"Functions" = "madDiff, colMadDiffs, rowMadDiffs",
"Example" = "madDiff(x); rowMadDiffs()"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Quantile",
"Functions" = "quantile, colQuantiles, rowQuantiles",
"Example" = "quantile(x, probs); rowQuantiles(x, probs)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Interquartile range (IQR)",
"Functions" = "iqr, colIQRs, rowIQRs",
"Example" = "iqr(x); rowIQRs(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Interquartile range (IQR) by n-order differences",
"Functions" = "iqrDiff, colIQRDiffs, rowIQRDiffs",
"Example" = "iqrDiff(x); rowIQRDiffs(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Range",
"Functions" = "range, colRanges, rowRanges",
"Example" = "range(x); rowRanges(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Minimum",
"Functions" = "min, colMins, rowMins",
"Example" = "min(x); rowMins(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Maximum",
"Functions" = "max, colMaxs, rowMaxs",
"Example" = "max(x); rowMaxs(x)"
)
tbl <- rbind(tbl, row)
%>
<% kable(tbl) %>
## Testing for and counting values
<%
tbl <- NULL
row <- data.frame(
"Operator" = "Are there any missing values?",
"Functions" = "anyMissing, colAnyMissings, rowAnyMissings",
"Example" = "anyMissing(x); rowAnyMissings(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Does TRUE exists?",
"Functions" = "any, colAnys, rowAnys",
"Example" = "any(x); rowAnys(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Are all values TRUE?",
"Functions" = "all, colAlls, rowAlls",
"Example" = "all(x); rowAlls(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Does value exists?",
"Functions" = "anyValue, colAnys, rowAnys",
"Example" = "anyValue(x, value); rowAnys(x, value)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Do all element have a given value?",
"Functions" = "allValue, colAlls, rowAlls",
"Example" = "allValue(x, value); rowAlls(x, value)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Number of occurrences of a value?",
"Functions" = "count, colCounts, rowCounts",
"Example" = "count(x, value); rowCounts(x, value)"
)
tbl <- rbind(tbl, row)
%>
<% kable(tbl) %>
## Cumulative functions
<%
tbl <- NULL
row <- data.frame(
"Operator" = "Cumulative sum",
"Functions" = "cumsum, colCumsums, rowCumsums",
"Example" = "cumsum(x); rowCumsums(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Cumulative product",
"Functions" = "cumprod, colCumprods, rowCumprods",
"Example" = "cumprod(x); rowCumprods(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Cumulative minimum",
"Functions" = "cummin, colCummins, rowCummins",
"Example" = "cummin(x); rowCummins(x)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Operator" = "Cumulative maximum",
"Functions" = "cummax, colCummaxs, rowCummaxs",
"Example" = "cummax(x); rowCummaxs(x)"
)
tbl <- rbind(tbl, row)
%>
<% kable(tbl) %>
## Binning
<%
tbl <- NULL
row <- data.frame(
"Estimator" = "Counts in disjoint bins",
"Functions" = "binCounts",
"Example" = "binCounts(x, bx)"
)
tbl <- rbind(tbl, row)
row <- data.frame(
"Estimator" = "Sample means (and counts) in disjoint bins",
"Functions" = "binMeans",
"Example" = "binMeans(y, x, bx)"
)
tbl <- rbind(tbl, row)
%>
<% kable(tbl) %>
## Miscellaneous
<%
tbl <- NULL
row <- data.frame(
"Operation" = "Lagged differences",
"Functions" = c("diff2, colDiffs, rowDiffs"),
"Example" = "diff2(x), rowDiffs(x)"
)
tbl <- rbind(tbl, row)
%>
<% kable(tbl) %>
-------------------------------------------------------------
<%=pkgName%> v<%=getVersion(pkg)%>. Release: [CRAN](http://cran.r-project.org/package=<%=pkgName%>), Development: [GitHub](<%=getUrl(pkg)%>).
matrixStats/MD5 0000644 0001751 0000144 00000032524 12542554541 013140 0 ustar hornik users dab7f205c5bcaeaad15e117d08c73599 *DESCRIPTION
42f6507f74264f16d54851706a146a8e *NAMESPACE
1049916234ad2c20de78d7ecdfdc99f8 *NEWS
e11dfd66d9a104fbc5598c52bd2f516d *R/999.package.R
bc8d37bc4988831234eea6d7d6066e4a *R/allocMatrix.R
73feb0716c01b707b332fb56b0ea14c9 *R/anyMissing.R
93d3ad6fb517ebf2f4011fd0f21903d9 *R/benchmark.R
fa58bfb1e01279b6f9c8442463bbee0a *R/binCounts.R
9815c4e21f5374bc7568fc983f46dc3f *R/binMeans.R
00f9f170fcc64e4d4019b8df23b961e6 *R/diff2.R
3ae809e6c90777039d408aba05934f7e *R/indexByRow.R
351c0d25c7870c255224b76477f9414b *R/logSumExp.R
907f12741c8541f5eda282e69881cae9 *R/meanOver.R
51602457a9df5aecf0ef4755ec0cf546 *R/pkgStartupMessage.R
a4582e300fa9ef7a75493edfa0ef528b *R/product.R
934740512f4b37d67740ae31ab288251 *R/psortKM.R
8801c5bb7366daede4acd34d34fe384a *R/rowAvgsPerColSet.R
9a483696d5b2b81b0be2a646b2a448f5 *R/rowCollapse.R
ef31b87fb36b51431cc5e8bf8f6fe41f *R/rowCounts.R
738a09811f93fecc48c6d6a430795820 *R/rowCumsums.R
30ee83aa0f04a8437a20578153b5e0db *R/rowDiffs.R
054299075b22b9c24b9c5eb7f5e62271 *R/rowIQRs.R
a0bad869fa6811da08df8d4e2aef98f0 *R/rowLogSumExps.R
75cf6345a14478c4bae92347fd9bc558 *R/rowMads.R
2cc6857bb0199e06abecb738e956eb40 *R/rowMedians.S4.R
22942d09b53a6e830eb6aa87252fa6bb *R/rowOrderStats.R
5df083adc25dd0cacfee4813328edd4f *R/rowProds.R
e4bdeeee5c7aaf98cf841553245bc058 *R/rowQuantiles.R
2c9e5f27450a8814a21a1d92465fbad8 *R/rowRanges.R
f270ced4c3ea9fa78f3464b6838dda26 *R/rowRanks.R
640689af849a8249df850fc4ed105dce *R/rowSds.R
5423ef502727587e71135b2efa8392d9 *R/rowTabulates.R
bcebec7173597479ddcdd6534d5a4310 *R/rowVars.R
15998fc70fa30c96861e93a96393b2b4 *R/rowWeightedMeans.R
c174648188253fb8b6a955e973b5d44b *R/rowWeightedMedians.R
8bb515dea9f9fd9a09756c926be9f023 *R/signTabulate.R
5ded10f28438fc575f28aadb3329fb40 *R/sumOver.R
b1b14cc21e3acdd5bc4cf12359ced39d *R/varDiff.R
f3e211bee40bb36de2cb0bebfa957232 *R/weightedMad.R
8d62aa96333f44a71f88988c0bdd0a84 *R/weightedMean.R
c2a4666b83e496e70ae705c24c4a1a2c *R/weightedMedian.R
97954e9842b4395ae9b3f870770849e7 *R/weightedVar.R
4b547323019375a9fc89ff14675675cc *R/x_OP_y.R
c07b43bd895eab9749e812b1870baf95 *R/zzz.R
dfc2bd98cb36498deed1e78cd414d3d2 *build/vignette.rds
ca0c103b1be9f97f60c0623aea65f5c2 *inst/benchmarking/R/random-matrices.R
4a38dc3c38475a36786249204cf20588 *inst/benchmarking/R/random-vectors.R
7256378a1eb54d9d41e0d075a11b3612 *inst/benchmarking/allocMatrix.md.rsp
e5a5e47c87dad6280a47d91e1cbdf46a *inst/benchmarking/allocVector.md.rsp
c4e95805c1acbd37aab9d7ec7de023d7 *inst/benchmarking/anyMissing.md.rsp
330dac464260cce1864cacd0ecf73f26 *inst/benchmarking/binCounts.md.rsp
7b71826081ac00e187f63ab9821a8d8e *inst/benchmarking/binMeans.md.rsp
2955e6a91517fd43ad799e52824176ba *inst/benchmarking/colRowAlls.md.rsp
861057a45857a4626c6179dba6cadd0c *inst/benchmarking/colRowAnyMissings.md.rsp
98a3d92a9b8217cb3d4eb73c3ae91eed *inst/benchmarking/colRowAnys.md.rsp
4c5523efdcf9c92b073a5ade831c331a *inst/benchmarking/colRowCounts.md.rsp
82c16b2f37c797888940e4bd02d866ce *inst/benchmarking/colRowCummins.md.rsp
aa66215e97c52a6d18f89775e3b27fca *inst/benchmarking/colRowCumprods.md.rsp
590182276674b77503d28a6be8d1ccfa *inst/benchmarking/colRowCumsums.md.rsp
3c876337a72df11a91441bea1761f115 *inst/benchmarking/colRowDiffs.md.rsp
a6523b452344ef2b78073d50fe9a534c *inst/benchmarking/colRowLogSumExps.md.rsp
f3baebca9ee8ad627605269e996dffb4 *inst/benchmarking/colRowMads.md.rsp
eccd2ee0c78f2eb286aa4d110a0558a1 *inst/benchmarking/colRowMeans.md.rsp
68e0230c9aa586c216cf1a83094c3acc *inst/benchmarking/colRowMedians.md.rsp
f870694f8d86741fb3194661879db142 *inst/benchmarking/colRowMins.md.rsp
dd1f69270726961bc78cb2e723495610 *inst/benchmarking/colRowOrderStats.md.rsp
f0647a93c27525f62e8b5250001b2a37 *inst/benchmarking/colRowProds.md.rsp
45789c523622419efbc47059b6574273 *inst/benchmarking/colRowQuantiles.md.rsp
90e3f4c1d92a47457ca70bbc52c1c145 *inst/benchmarking/colRowRanges.md.rsp
07a64514251f5f55a62951a871e23d21 *inst/benchmarking/colRowRanks.md.rsp
409e4c37addbf7a2c15cb0b276692ca7 *inst/benchmarking/colRowSums.md.rsp
ac5da10e80f1ff0ca8722176b7b41cf9 *inst/benchmarking/colRowTabulates.md.rsp
72cacc64cf3e106dadfe865650c0c57f *inst/benchmarking/colRowVars.md.rsp
7a8c58c7090624f9536603917180509f *inst/benchmarking/colRowWeightedMeans.md.rsp
beb6d3d46a9903d3cdc37bba972030f4 *inst/benchmarking/colRowWeightedMedians.md.rsp
27de27a4248555e4fad67ee53e8d622e *inst/benchmarking/count.md.rsp
ab9f5049c780d39eca80e084f60cf68e *inst/benchmarking/includes/appendix.md.rsp
75a4dbe1cebc11442ce3f8626cb2f786 *inst/benchmarking/includes/footer.md.rsp
80fec8731611547e148434c4a8af80a7 *inst/benchmarking/includes/header.md.rsp
d4fe14ce0a8fe23b829bd5d40ecb8638 *inst/benchmarking/includes/references.md.rsp
0ede7b741c87e3b9552562b25aa84950 *inst/benchmarking/includes/results.md.rsp
1127a9839354eb9de5fd6641e0d2949e *inst/benchmarking/includes/setup.md.rsp
8744fca6100198e786187172e3e04beb *inst/benchmarking/index.md.rsp
672790929425f88eb1235e579c7ae487 *inst/benchmarking/indexByRow.md.rsp
713004c069039309a947587cae09b9c4 *inst/benchmarking/logSumExp.md.rsp
03268c8274ecc261af5a583484679b94 *inst/benchmarking/madDiff.md.rsp
20b9686d1ccade9286f73e8a9a80e96c *inst/benchmarking/meanOver.md.rsp
87bd2dbcf60d75ca69eaf37f52c4b436 *inst/benchmarking/product.md.rsp
5e0977113a979580db793bce8c0bdbda *inst/benchmarking/sumOver.md.rsp
4438b091dd392c8a0c305cbf48e1b354 *inst/benchmarking/t_tx_OP_y.md.rsp
2e5cf31ad74af1915311a48f5c83732a *inst/benchmarking/varDiff.md.rsp
3e75a042f402c6d60fbdb4c236bdb0a9 *inst/benchmarking/weightedMean.md.rsp
8e98dd36afed6de19c3f9cee6fd6516f *inst/benchmarking/weightedMedian.md.rsp
7b063158ba0bca2a2550bd7c4f9524c4 *inst/benchmarking/x_OP_y.md.rsp
4c60cf123ab3a6f4bbc1fa104dd340e9 *inst/doc/matrixStats-methods.html
582b682419b6ba0c71915913bfb93be4 *inst/doc/matrixStats-methods.md.rsp
3be5b91c497cfc857af456c5458bb6e0 *man/allocMatrix.Rd
6802f901274f465d85960be20c594bc6 *man/anyMissing.Rd
b8b516f170c13a1eae9720526c6572f9 *man/binCounts.Rd
d0baceff9a154bd804fd4f649ee49f57 *man/binMeans.Rd
9630ce05eefdd0ca82adcdf85dd71a52 *man/diff2.Rd
553286071b44b0c10001ec59d437642a *man/indexByRow.Rd
45c48dae20f6411127fffac4b2fa0cc6 *man/logSumExp.Rd
6be5a9ac1eb991947dc901e4608e1909 *man/matrixStats-package.Rd
aba0b7e73733a5832840a144413e05c8 *man/meanOver.Rd
3c28f9576492c952ec24547672ff423e *man/rowAvgsPerColSet.Rd
151d60d30326172538f26016c92d570d *man/rowCollapse.Rd
20420a3b0dacfc58a8ec0164609e3b73 *man/rowCounts.Rd
67e0da9580f75d0b0a5ffdb1945b355e *man/rowCumsums.Rd
9cf589f37acbbf753c2a07f727ad75bb *man/rowDiffs.Rd
1d670261d4c847d534b97fb20a2fa391 *man/rowIQRs.Rd
ce1971ace17567d261da000b417b6d61 *man/rowLogSumExps.Rd
b32e2de3b302f0d8b035a035fd28672b *man/rowMedians.Rd
68c7d2b6c4648f46430e47aa074d5a8e *man/rowOrderStats.Rd
bb938ccc32375f5972dd63aba0aa2672 *man/rowProds.Rd
55c7420af5c986a765ce762906495131 *man/rowQuantiles.Rd
230807a1ab0e186ce9d6233f630a4c58 *man/rowRanges.Rd
ef6dad5770bd7c5dbe50a2d04c53ced6 *man/rowRanks.Rd
ec9c07afc5da5adba37f0485cd0a5ef5 *man/rowSds.Rd
6ea08c053265b8ce0adc815d92229f5a *man/rowTabulates.Rd
2ae459ef917ef4564100e842ecc46a07 *man/rowVars.Rd
aaa62497523fd11029ec56cc5b449b3f *man/rowWeightedMeans.Rd
13d24b46725782d3a536480d12b0c569 *man/rowWeightedMedians.Rd
e64eb1340b963b776807cfcd45cb49f6 *man/signTabulate.Rd
f78cf336fa52437301f854bc1372e427 *man/sumOver.Rd
51f34f8007755bc3c4376a56c1cb4701 *man/varDiff.Rd
11c6c137c9e9473d02acbe9a99c26140 *man/weightedMad.Rd
927e23972d04b71d7acff9edc2e6feed *man/weightedMean.Rd
57e5484018f9adf4b19a659c709469d8 *man/weightedMedian.Rd
1726d21237faa5093379ea6edc9ea1cd *man/weightedVar.Rd
01c6a83158307172f1d51d45e69bf8b6 *man/xUNDERSCOREOPUNDERSCOREy.Rd
1c6ff48974c75f5640d09f0919304d35 *src/allocMatrix2.c
dd0a5a51a4beb5f240611b379355974e *src/anyMissing.c
7eb8fcd98f39e47623a2b85583906632 *src/binCounts-BINBY-template.h
4ced9b328349ef007414044632c398ea *src/binCounts.c
553e87b0967e23606116ef44054836fd *src/binMeans-BINBY-template.h
b73e99b94350876ba274b7964499ec35 *src/binMeans.c
82a634bcb3722cb35a1460185bdb2dc5 *src/colCounts.c
e5f276f3c58aa0fc2427e4776305655a *src/colCounts_TYPE-template.h
c148b21db92c74008311c15b4720e9b7 *src/colOrderStats.c
d547a6be4752200e2f0f42eb3fb64f89 *src/colOrderStats_TYPE-template.h
02ba21b3917368d337adf7add87625a7 *src/colRanges.c
a867a5496ab04e8ab5d4a49262dbd73e *src/colRanges_TYPE-template.h
2c871e6b2476286f48719a205f3a9c5e *src/diff2.c
b1a80b6033ed39718422109a0bab98b1 *src/diff2_TYPE-template.h
122818a15b1026f4617413b21a43f1a1 *src/indexByRow.c
dff19ccbacd1467c4762d78bf1e28943 *src/logSumExp.c
c84dba16e698ca588e987679937c4d19 *src/logSumExp_internal.c
10e83fcf5abc6f470b78866955cd3eb5 *src/logSumExp_internal.h
cea76871ca495cdaf172a0324091fdcd *src/meanOver.c
d20995d2f2b6106d5c89857791e8fa23 *src/meanOver_TYPE-template.h
8b7240246168563c8ff34c7a716d2908 *src/productExpSumLog.c
ebe837c6edd137ddd45593f75c12b7d9 *src/productExpSumLog_TYPE-template.h
b0e357e87f4826e34602a1e739b6c1b0 *src/psortKM.c
427e0e0b915a2131bbaf7b0ea8e9fa23 *src/rowCounts.c
0e2d0d426fbbe10ba3afdf4ae5c1bfcf *src/rowCounts_TYPE-template.h
b4d9d1c221e27a87af7f681354a855b2 *src/rowCumMinMaxs.c
9251465451b640135bb3052c8f701e50 *src/rowCumMinMaxs_TYPE-template.h
8a68febb8b652a3b815f81d317d9efaf *src/rowCumprods.c
3657ab012153b21b7b2735e8085c8ea6 *src/rowCumprods_TYPE-template.h
f55ef3d1110b2f3308ea57f6fbd022d7 *src/rowCumsums.c
23aa90044e6e700da513e01ba8d917d0 *src/rowCumsums_TYPE-template.h
8dbd2448b2bf9332aa6d365b7f14602a *src/rowDiffs.c
26c82aefba665e025ae5491716c7c7db *src/rowDiffs_TYPE-template.h
e5ef7b7ad5e06479598f237b73b78367 *src/rowLogSumExp.c
04849d41cd620515daa2cc7ac2790255 *src/rowMads.c
48bb539cf54ec1d4b37c11703e7c1b24 *src/rowMads_TYPE-template.h
692e21c160d2a6f3aebe14b5180a0ede *src/rowMedians.c
e8caf7ee5ba1afe8fc678dfb749ca6ca *src/rowMedians_TYPE-template.h
fbe2f52fa8d1f55bffb3f0bc858662b8 *src/rowOrderStats.c
bc888497ca6e4192e05d2c4b54dbfbeb *src/rowOrderStats_TYPE-template.h
09ab341736ee3ae5a5440214c46086d9 *src/rowRanges.c
253e54b4039e4d22d57cb616f8a27cf5 *src/rowRanges_TYPE-template.h
559d563d7933cc7812e330b05b8fae8a *src/rowRanksWithTies.c
73037c62b5d39bbc61e50eb4c0ef5d8e *src/rowRanksWithTies_TYPE_TIES-template.h
f03ebb783ef8e534cdc582dda37f9d84 *src/rowVars.c
c1d9ee48aa2ed322993b5034e67e9336 *src/rowVars_TYPE-template.h
1d49333c1c268d62b0fd0a2f8082f172 *src/signTabulate.c
a52a4563500b0342c567eef1610f07ce *src/signTabulate_TYPE-template.h
f7bdb2b04143fc5653e1ea354c6bb730 *src/sumOver.c
bfebc523926a7c74df61a83c93e338b8 *src/sumOver_TYPE-template.h
99698bd63ccb7c17cb4e5dc7b07c9e97 *src/templates-types.h
7d48cfb1024e60465d7c6964206e310e *src/templates-types_undef.h
ec49eaadeaacde7c6c624caf640960d9 *src/types.h
b122d69ccf94104d5b3011d985b78b13 *src/utils.h
ea3621211c22893848083535e4eda33e *src/weightedMean.c
4f25f36f4da2341e8242d9523fbedd2a *src/weightedMean_TYPE-template.h
af4c80e388864cf4790700f347b1a197 *src/weightedMedian.c
b0290ced0f75f7ab1d605a7d5d031522 *src/weightedMedian_TYPE-template.h
3d4e2f4571c5ab285a3173eaeafc6cec *src/x_OP_y.c
47a57b15b5e985c1a233491fbf8b9b81 *src/x_OP_y_TYPE-template.h
6b09aca9e9b79f8fcb85cdca51b305ad *tests/allocArray.R
0844bf090717347c46eeae7e7761fd31 *tests/allocMatrix.R
21b5cbdf48874928843e56f60ee8c449 *tests/allocVector.R
f3f5505626d59ecc6c8d498bf6885de0 *tests/anyMissing.R
33ccf3764c08913bc1fa6ed4fa00ada6 *tests/benchmark.R
fd636ec43f81ad7f1ecef7bb12062450 *tests/binCounts.R
0ed9bb218de4455a7ba63036d18cdfb1 *tests/binMeans,binCounts.R
53679ebbeee7d4ef5e7217ff8076ddce *tests/count.R
851e382ed2c520ccfe239a1edb720200 *tests/diff2.R
323eff3b3c96e679600ed54cbc281bb2 *tests/indexByRow.R
d229e4e189c7de0db2fd280c6bd4fde6 *tests/logSumExp.R
361afd9da7276beab8ed7999d92c9e32 *tests/meanOver.R
0890c6dab2f723c5194917924d817066 *tests/product.R
e996793a93f55357ddea79aa865c45f3 *tests/psortKM.R
1e61292da20a213bbccc7461421b3c48 *tests/rowAllAnys.R
6b645494332ef6d9e128bfa98cc73960 *tests/rowAvgsPerColSet.R
1408f031afac3258638f8e91cbac75bd *tests/rowCollapse.R
993e34a7064fe8bb019bef8288336231 *tests/rowCounts.R
ddaf3ca847d84668b552e4f2b8911cd3 *tests/rowCumMinMaxs.R
8fa680f875c13e4e6739ac9a626a6e30 *tests/rowCumprods.R
984184fbadd7344bdd5ccbe93a2dfaa0 *tests/rowCumsums.R
e20a6674d9c33035c2bed8b7eb4193a2 *tests/rowDiffs.R
9dc45d060ce75b9ae14eec0fadf1dc36 *tests/rowIQRs.R
89451022b4573db0619d7c3a98a7afdd *tests/rowLogSumExps.R
3ce723b5a981c34b8c4f892a5db9bfd5 *tests/rowMads.R
51155625a0ac69cb17e3e42f2ff7fab4 *tests/rowMedians.R
08dfbd77077dad6ceb84e76547464999 *tests/rowOrderStats.R
6ac776e7386e3798ec63371259e2815d *tests/rowProds.R
6fa79a760b002afb3801d810d6a919a8 *tests/rowQuantiles.R
267dc45a8a893e1461425f1975702d37 *tests/rowRanges.R
6d7f9ee3fdf75cb8533f2170e101c2a3 *tests/rowRanks.R
09eef93a3460acac4ce33fccca26fdef *tests/rowSds.R
b4fa113751df716127c5e3edda1e6153 *tests/rowTabulates.R
8694849a466b91f95fe7540c24e6d78c *tests/rowVarDiffs.R
410338392cd50c0c953263be0063fa38 *tests/rowVars.R
0cbe7b6379097b2ea7aa1c14d980339a *tests/rowWeightedMeans.R
65d1b8c60a95d11c90a2ace6679017ec *tests/rowWeightedMedians.R
6bac1702606b78c8cd82697494e74bb1 *tests/rowWeightedVars.R
824536a7e210775091eddc2fc7f5aac1 *tests/signTabulate.R
3d8e7ebbc2249cb0249856b4a6ab8f25 *tests/sumOver.R
8e4cf994f2ecd7f21347931fd03c4392 *tests/varDiff_etal.R
f681d2b636aadef71934fb37eaa3fb2c *tests/weightedMean.R
7e51437418f75869a9d7b8759f9b6b6c *tests/weightedMedian.R
93e729b911e5844a2d8d715ecf1f4367 *tests/weightedVar_etal.R
8a54c1e37d221d2ada391942bef3a760 *tests/x_OP_y.R
5087a4cbb2884533520210c7bb1fe20d *tests/zzz.package-unload.R
582b682419b6ba0c71915913bfb93be4 *vignettes/matrixStats-methods.md.rsp
matrixStats/build/ 0000755 0001751 0000144 00000000000 12542546263 013723 5 ustar hornik users matrixStats/build/vignette.rds 0000644 0001751 0000144 00000000415 12542546263 016262 0 ustar hornik users ‹ mPËNÃ0tMR/ˆSø ò=W½pA”W+Þ¨‘ü’½¡äÆ—S6$®ZKkïÚ³³ãùX1ÆR–¥ K3J³5mÅ=EÂr¶¤óQqtí×9úgx0ÂWJTÎÛùtÜ”ûN)îúÒ4eÓé[£}Ôððõ•ŒpƒÊõ_=ÞÓÊÓ‹÷™æ
bòù,h1\ÿDýó RŒóCõ 58¹µ²ŸŠÜ™£¼µ‘ÒþÌʸ œy FyÉ)R´xþh\t…)¤]+!½·x.²×ínJ“·ø‹7üWö,I},ºÌü¦íD+ö±–ÜÇ>®G^5ŽúÝ¿)„4 matrixStats/DESCRIPTION 0000644 0001751 0000144 00000002736 12542554541 014340 0 ustar hornik users Package: matrixStats
Version: 0.14.2
Depends: R (>= 2.9.0)
Imports: methods
Suggests: base64enc, ggplot2, knitr, microbenchmark, R.devices, R.rsp
VignetteBuilder: R.rsp
Date: 2015-06-23
Title: Methods that Apply to Rows and Columns of Matrices (and to
Vectors)
Authors@R: c(
person("Henrik", "Bengtsson", role=c("aut", "cre", "cph"),
email="henrikb@braju.com"),
person("Hector", "Corrada Bravo", role="ctb"),
person("Robert", "Gentleman", role="ctb"),
person("Ola", "Hossjer", role="ctb"),
person("Harris", "Jaffee", role="ctb"),
person("Dongcan", "Jiang", role="ctb"),
person("Peter", "Langfelder", role="ctb"))
Author: Henrik Bengtsson [aut, cre, cph], Hector Corrada Bravo [ctb], Robert Gentleman [ctb], Ola Hossjer [ctb], Harris Jaffee [ctb], Dongcan Jiang [ctb], Peter Langfelder [ctb]
Maintainer: Henrik Bengtsson
Description: Methods operating on rows and columns of matrices, e.g. col / rowMedians(), col / rowRanks(), and col / rowSds(). There are also some vector-based methods, e.g. binMeans(), madDiff() and weightedMedians(). All methods have been optimized for speed and memory usage.
License: Artistic-2.0
LazyLoad: TRUE
NeedsCompilation: yes
ByteCompile: TRUE
biocViews: Infrastructure, Statistics
URL: https://github.com/HenrikBengtsson/matrixStats
BugReports: https://github.com/HenrikBengtsson/matrixStats/issues
Packaged: 2015-06-24 15:29:13 UTC; hb
Repository: CRAN
Date/Publication: 2015-06-24 18:22:57
matrixStats/man/ 0000755 0001751 0000144 00000000000 12542546241 013373 5 ustar hornik users matrixStats/man/rowOrderStats.Rd 0000644 0001751 0000144 00000003537 12542546241 016514 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowOrderStats.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowOrderStats}
\alias{rowOrderStats}
\alias{colOrderStats}
\title{Gets an order statistic for each row (column) in a matrix}
\description{
Gets an order statistic for each row (column) in a matrix.
}
\usage{
rowOrderStats(x, which, dim.=dim(x), ...)
colOrderStats(x, which, dim.=dim(x), ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{which}{An \code{\link[base]{integer}} index in [1,K] ([1,N]) indicating which
order statistic to be returned.}
\item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the
dimension of \code{x}, also when not a \code{\link[base]{matrix}}.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K).
}
\details{
The implementation of \code{rowOrderStats()} is optimized for both
speed and memory.
To avoid coercing to \code{\link[base]{double}}s (and hence memory allocation), there
is a unique implementation for \code{\link[base]{integer}} matrices.
}
\section{Missing values}{
This method does \emph{not} handle missing values, that is, the result
corresponds to having \code{na.rm=FALSE} (if such an argument would
be available).
}
\author{
The native implementation of \code{rowOrderStats()} was adopted
by Henrik Bengtsson from Robert Gentleman's \code{rowQ()}
in the \pkg{Biobase} package.
}
\seealso{
See \code{rowMeans()} in \code{\link[base]{colSums}}().
}
\keyword{array}
\keyword{iteration}
\keyword{robust}
\keyword{univar}
matrixStats/man/rowWeightedMedians.Rd 0000644 0001751 0000144 00000004473 12542546241 017463 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowWeightedMedians.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowWeightedMedians}
\alias{rowWeightedMedians}
\alias{colWeightedMedians}
\title{Calculates the weighted medians for each row (column) in a matrix}
\description{
Calculates the weighted medians for each row (column) in a matrix.
}
\usage{
rowWeightedMedians(x, w=NULL, na.rm=FALSE, ...)
colWeightedMedians(x, w=NULL, na.rm=FALSE, ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{w}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length K (N).}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded from the calculation,
otherwise not.}
\item{...}{Additional arguments passed to \code{\link{weightedMedian}}().}
}
\value{
Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K).
}
\details{
The implementations of these methods are optimized for both speed
and memory.
If no weights are given, the corresponding
\code{\link{rowMedians}}()/\code{colMedians()} is used.
}
\examples{
x <- matrix(rnorm(20), nrow=5, ncol=4)
print(x)
# Non-weighted row averages
xM0 <- rowMedians(x)
xM <- rowWeightedMedians(x)
stopifnot(all.equal(xM, xM0))
# Weighted row averages (uniform weights)
w <- rep(2.5, ncol(x))
xM <- rowWeightedMedians(x, w=w)
stopifnot(all.equal(xM, xM0))
# Weighted row averages (excluding some columns)
w <- c(1,1,0,1)
xM0 <- rowMedians(x[,(w == 1),drop=FALSE]);
xM <- rowWeightedMedians(x, w=w)
stopifnot(all.equal(xM, xM0))
# Weighted row averages (excluding some columns)
w <- c(0,1,0,0)
xM0 <- rowMedians(x[,(w == 1),drop=FALSE]);
xM <- rowWeightedMedians(x, w=w)
stopifnot(all.equal(xM, xM0))
# Weighted averages by rows and columns
w <- 1:4
xM1 <- rowWeightedMedians(x, w=w)
xM2 <- colWeightedMedians(t(x), w=w)
stopifnot(all.equal(xM2, xM1))
}
\author{Henrik Bengtsson}
\seealso{
See \code{\link{rowMedians}}() and \code{colMedians()} for non-weighted medians.
Internally, \code{\link{weightedMedian}}() is used.
}
\keyword{array}
\keyword{iteration}
\keyword{robust}
\keyword{univar}
matrixStats/man/weightedMedian.Rd 0000644 0001751 0000144 00000012332 12542546241 016601 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% weightedMedian.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{weightedMedian}
\alias{weightedMedian}
\encoding{latin1}
\title{Weighted Median Value}
\usage{
weightedMedian(x, w=rep(1, times = length(x)), na.rm=FALSE, interpolate=is.null(ties),
ties=NULL, ...)
}
\description{
Computes a weighted median of a numeric vector.
}
\arguments{
\item{x}{a \code{\link[base]{numeric}} \code{\link[base]{vector}} containing the values whose weighted median is
to be computed.}
\item{w}{a vector of weights the same length as \code{x} giving the weights
to use for each element of \code{x}. Negative weights are treated
as zero weights. Default value is equal weight to all values.}
\item{na.rm}{a logical value indicating whether \code{\link[base]{NA}} values in
\code{x} should be stripped before the computation proceeds,
or not. If \code{\link[base]{NA}}, no check at all for \code{\link[base]{NA}}s is done.
Default value is \code{\link[base]{NA}} (for efficiency).}
\item{interpolate}{If \code{\link[base:logical]{TRUE}}, linear interpolation is used to get a
consistent estimate of the weighted median.}
\item{ties}{If \code{interpolate == FALSE},
a character string specifying how to solve ties between two
\code{x}'s that are satisfying the weighted median criteria.
Note that at most two values can satisfy the criteria.
When \code{ties} is \code{"min"}, the smaller value of the two
is returned and when it is \code{"max"}, the larger value is
returned.
If \code{ties} is \code{"mean"}, the mean of the two values is
returned.
Finally, if \code{ties} is \code{"weighted"} (or \code{\link[base]{NULL}}) a
weighted average of the two are returned, where the weights are
weights of all values \code{x[i] <= x[k]} and \code{x[i] >= x[k]},
respectively.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} scalar.
}
\details{
For the \code{n} elements \code{x = c(x[1], x[2], ..., x[n])} with positive
weights \code{w = c(w[1], w[2], ..., w[n])} such that \code{sum(w) = S},
the \emph{weighted median} is defined as the element \code{x[k]} for which
the total weight of all elements \code{x[i] < x[k]} is less or equal to
\code{S/2} and for which the total weight of all elements \code{x[i] > x[k]}
is less or equal to \code{S/2} (c.f. [1]).
If \code{w} is missing then all elements of \code{x} are given the same
positive weight. If all weights are zero, \code{\link[base]{NA}}_real_ is returned.
If one or more weights are \code{Inf}, it is the same as these weights
have the same weight and the others has zero. This makes things easier for
cases where the weights are result of a division with zero.
The weighted median solves the following optimization problem:
\deqn{\alpha^* = \arg_\alpha \min \sum_{k=1}{K} w_k |x_k-\alpha|}
where \eqn{x=(x_1,x_2,\ldots,x_K)} are scalars and
\eqn{w=(w_1,w_2,\ldots,w_K)} are the corresponding "weights" for
each individual \eqn{x} value.
}
\examples{
x <- 1:10
n <- length(x)
m1 <- median(x) # 5.5
m2 <- weightedMedian(x) # 5.5
stopifnot(identical(m1, m2))
w <- rep(1, n)
m1 <- weightedMedian(x, w) # 5.5 (default)
m2 <- weightedMedian(x, ties="weighted") # 5.5 (default)
m3 <- weightedMedian(x, ties="min") # 5
m4 <- weightedMedian(x, ties="max") # 6
stopifnot(identical(m1,m2))
# Pull the median towards zero
w[1] <- 5
m1 <- weightedMedian(x, w) # 3.5
y <- c(rep(0,w[1]), x[-1]) # Only possible for integer weights
m2 <- median(y) # 3.5
stopifnot(identical(m1,m2))
# Put even more weight on the zero
w[1] <- 8.5
weightedMedian(x, w) # 2
# All weight on the first value
w[1] <- Inf
weightedMedian(x, w) # 1
# All weight on the last value
w[1] <- 1
w[n] <- Inf
weightedMedian(x, w) # 10
# All weights set to zero
w <- rep(0, n)
weightedMedian(x, w) # NA
# Simple benchmarking
bench <- function(N=1e5, K=10) {
x <- rnorm(N)
gc()
t <- c()
t[1] <- system.time(for (k in 1:K) median(x))[3]
t[2] <- system.time(for (k in 1:K) weightedMedian(x))[3]
t <- t / t[1]
names(t) <- c("median", "weightedMedian")
t
}
print(bench(N= 5, K=100))
print(bench(N= 50, K=100))
print(bench(N= 200, K=100))
print(bench(N= 1000, K=100))
print(bench(N= 10e3, K= 20))
print(bench(N=100e3, K= 20))
}
\seealso{
\code{\link[stats]{median}}, \code{\link[base]{mean}}() and \code{\link{weightedMean}}().
}
\references{
[1] T.H. Cormen, C.E. Leiserson, R.L. Rivest, Introduction to Algorithms,
The MIT Press, Massachusetts Institute of Technology, 1989.
}
\author{
Henrik Bengtsson and Ola Hossjer, Centre for Mathematical
Sciences, Lund University.
Thanks to Roger Koenker, Econometrics, University of Illinois, for
the initial ideas.
}
\keyword{univar}
\keyword{robust}
matrixStats/man/rowCollapse.Rd 0000644 0001751 0000144 00000003126 12542546241 016156 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowCollapse.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowCollapse}
\alias{rowCollapse}
\alias{colCollapse}
\title{Extracts one cell per row (column) from a matrix}
\description{
Extracts one cell per row (column) from a matrix.
The implementation is optimized for memory and speed.
}
\usage{
rowCollapse(x, idxs, dim.=dim(x), ...)
colCollapse(x, idxs, dim.=dim(x), ...)
}
\arguments{
\item{x}{An NxK \code{\link[base]{matrix}}.}
\item{idxs}{An index \code{\link[base]{vector}} of (maximum) length N (K) specifying the
columns (rows) to be extracted.}
\item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the
dimension of \code{x}, also when not a \code{\link[base]{matrix}}.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{vector}} of length N (K).
}
\examples{
x <- matrix(1:27, ncol=3)
y <- rowCollapse(x, 1)
stopifnot(identical(y, x[,1]))
y <- rowCollapse(x, 2)
stopifnot(identical(y, x[,2]))
y <- rowCollapse(x, c(1,1,1,1,1,3,3,3,3))
stopifnot(identical(y, c(x[1:5,1], x[6:9,3])))
y <- rowCollapse(x, 1:3)
print(y)
yT <- c(x[1,1],x[2,2],x[3,3],x[4,1],x[5,2],x[6,3],x[7,1],x[8,2],x[9,3])
stopifnot(identical(y, yT))
}
\author{Henrik Bengtsson}
\seealso{
\emph{Matrix indexing} to index elements in matrices and arrays,
cf. \code{\link[base]{[}}().
}
\keyword{utilities}
matrixStats/man/xUNDERSCOREOPUNDERSCOREy.Rd 0000644 0001751 0000144 00000004112 12542546241 017543 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% x_OP_y.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{xUNDERSCOREOPUNDERSCOREy}
\alias{xUNDERSCOREOPUNDERSCOREy}
\alias{x_OP_y}
\alias{t_tx_OP_y}
\title{Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)'}
\usage{
x_OP_y(x, y, OP, commute=FALSE, na.rm=FALSE)
t_tx_OP_y(x, y, OP, commute=FALSE, na.rm=FALSE)
}
\description{
Fast calculation of 'z <- x OP y' and 'z <- t(t(x) OP y)', where OP can be +, -, *, and /.
For + and *, na.rm=TRUE will drop missing values first.
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{y}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length L.}
\item{OP}{A \code{\link[base]{character}} specifying which operator to use.}
\item{commute}{If \code{\link[base:logical]{TRUE}}, 'y OP x' ('t(y OP t(x))') is calculated,
otherwise 'x OP y' ('t(t(x) OP y)').}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are ignored, otherwise not.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.
}
\section{Missing values}{
If \code{na.rm=TRUE}, then missing values are "dropped" before applying
the operator to each pair of values. For instance, if \code{x[1,1]} is
a missing value, then the result of \code{x[1,1] + y[1]} equals
\code{y[1]}. If also \code{y[1]} is a missing value, then the result
is a missing value. This only applies to additions and multiplications.
For subtractions and divisions, argument \code{na.rm} is ignored.
}
\examples{
x <- matrix(c(1,2,3,NA,5,6), nrow=3, ncol=2)
# Add 'y' to each column
y <- 1:2
z0 <- x + y
z1 <- x_OP_y(x, y, OP="+")
print(z1)
stopifnot(all.equal(z1, z0))
# Add 'y' to each row
y <- 1:3
z0 <- t(t(x) + y)
z1 <- t_tx_OP_y(x, y, OP="+")
print(z1)
stopifnot(all.equal(z1, z0))
}
\author{Henrik Bengtsson}
\keyword{internal}
matrixStats/man/binCounts.Rd 0000644 0001751 0000144 00000003755 12542546241 015640 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% binCounts.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{binCounts}
\alias{binCounts}
\title{Fast element counting in non-overlapping bins}
\usage{
binCounts(x, bx, right=FALSE, ...)
}
\description{
Counts the number of elements in non-overlapping bins
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K positions for to be binned and counted.}
\item{bx}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B+1 ordered positions specifying
the B > 0 bins \code{[bx[1],bx[2])}, \code{[bx[2],bx[3])}, ...,
\code{[bx[B],bx[B+1])}.}
\item{right}{If \code{\link[base:logical]{TRUE}}, the bins are right-closed (left open),
otherwise left-closed (right open).}
\item{...}{Not used.}
}
\value{
Returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of length B with non-negative integers.
}
\details{
\code{binCounts(x, bx, right=TRUE)} gives equivalent results as
\code{rev(binCounts(-x, bx=rev(-bx), right=FALSE))}, but is
faster and more memory efficient.
}
\section{Missing and non-finite values}{
Missing values in \code{x} are ignored/dropped.
Missing values in \code{bx} are not allowed and gives an error.
}
\seealso{
An alternative for counting occurrences within bins is
\code{\link[graphics]{hist}}, e.g. \code{hist(x, breaks=bx, plot=FALSE)$counts}.
That approach is ~30-60\% slower than \code{binCounts(..., right=TRUE)}.
To count occurrences of indices \code{x} (positive \code{\link[base]{integer}}s) in
\code{[1,B]}, use \code{tabulate(x, nbins=B)}, where \code{x} does
\emph{not} have to be sorted first.
For details, see \code{\link[base]{tabulate}}().
To average values within bins, see \code{\link{binMeans}}().
}
\author{Henrik Bengtsson}
\keyword{univar}
matrixStats/man/rowProds.Rd 0000644 0001751 0000144 00000003731 12542546241 015505 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowProds.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowProds}
\alias{rowProds}
\alias{rowProds}
\alias{colProds}
\alias{product}
\title{Calculates the product for each row (column) in a matrix}
\description{
Calculates the product for each row (column) in a matrix.
}
\usage{
rowProds(x, na.rm=FALSE, method=c("direct", "expSumLog"), ...)
colProds(x, na.rm=FALSE, method=c("direct", "expSumLog"), ...)
product(x, na.rm=FALSE, ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are ignored, otherwise not.}
\item{method}{A \code{\link[base]{character}} string specifying how each product
is calculated.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K).
}
\details{
If \code{method="expSumLog"}, then then \code{\link{product}}() function
is used, which calculates the produce via the logarithmic transform
(treating negative values specially). This improves the precision
and lowers the risk for numeric overflow.
If \code{method="direct"}, the direct product is calculated via
the \code{\link[base]{prod}}() function.
}
\section{Missing values}{
Note, if \code{method="expSumLog"}, \code{na.rm=FALSE}, and \code{x}
contains missing values (\code{\link[base]{NA}} or \code{\link[base:is.finite]{NaN}}), then the calculated value
is also missing value.
Note that it depends on platform whether \code{\link[base:is.finite]{NaN}} or \code{\link[base]{NA}} is returned
when an \code{\link[base:is.finite]{NaN}} exists, cf. \code{\link[base]{is.nan}}().
}
\author{Henrik Bengtsson}
\keyword{array}
\keyword{iteration}
\keyword{robust}
\keyword{univar}
matrixStats/man/binMeans.Rd 0000644 0001751 0000144 00000005211 12542546241 015415 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% binMeans.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{binMeans}
\alias{binMeans}
\title{Fast mean calculations in non-overlapping bins}
\usage{
binMeans(y, x, bx, na.rm=TRUE, count=TRUE, right=FALSE, ...)
}
\description{
Computes the sample means in non-overlapping bins
}
\arguments{
\item{y}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K values to calculate means on.}
\item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of K positions for to be binned.}
\item{bx}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of B+1 ordered positions specifying
the B > 0 bins \code{[bx[1],bx[2])}, \code{[bx[2],bx[3])}, ...,
\code{[bx[B],bx[B+1])}.}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values in \code{y} are dropped
before calculating the mean, otherwise not.}
\item{count}{If \code{\link[base:logical]{TRUE}}, the number of data points in each bins is
returned as attribute \code{count}, which is an \code{\link[base]{integer}} \code{\link[base]{vector}}
of length B.}
\item{right}{If \code{\link[base:logical]{TRUE}}, the bins are right-closed (left open),
otherwise left-closed (right open).}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length B.
}
\details{
\code{binMeans(x, bx, right=TRUE)} gives equivalent results as
\code{rev(binMeans(-x, bx=sort(-bx), right=FALSE))}, but is faster.
}
\section{Missing and non-finite values}{
Data points where either of \code{y} and \code{x} is missing are
dropped (and therefore are also not counted).
Non-finite values in \code{y} are not allowed and gives an error.
Missing values in \code{bx} are not allowed and gives an error.
}
\section{Empty bins}{
Empty bins will get value \code{\link[base:is.finite]{NaN}}.
}
\examples{
x <- 1:200
mu <- double(length(x))
mu[1:50] <- 5
mu[101:150] <- -5
y <- mu + rnorm(length(x))
# Binning
bx <- c(0,50,100,150,200)+0.5
yS <- binMeans(y, x=x, bx=bx)
plot(x,y)
for (kk in seq(along=yS)) {
lines(bx[c(kk,kk+1)], yS[c(kk,kk)], col="blue", lwd=2)
}
}
\seealso{
\code{\link{binCounts}}().
\code{\link[stats]{aggregate}} and \code{\link[base]{mean}}().
}
\references{
[1] R-devel thread \emph{Fastest non-overlapping binning mean function
out there?} on Oct 3, 2012\cr
}
\author{
Henrik Bengtsson with initial code contributions by Martin Morgan [1].
}
\keyword{univar}
matrixStats/man/anyMissing.Rd 0000644 0001751 0000144 00000002637 12542546241 016013 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% anyMissing.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{anyMissing}
\alias{anyMissing}
\alias{colAnyMissings}
\alias{rowAnyMissings}
\title{Checks if there are any missing values in an object or not}
\description{
Checks if there are any missing values in an object or not.
}
\usage{
anyMissing(x, ...)
colAnyMissings(x, ...)
rowAnyMissings(x, ...)
}
\arguments{
\item{x}{A \code{\link[base]{vector}}, a \code{\link[base]{list}}, a \code{\link[base]{matrix}}, a \code{\link[base]{data.frame}}, or \code{\link[base]{NULL}}.}
\item{...}{Not used.}
}
\value{
Returns \code{\link[base:logical]{TRUE}} if a missing value was detected, otherwise \code{\link[base:logical]{FALSE}}.
}
\details{
The implementation of this method is optimized for both speed and memory.
The method will return \code{\link[base:logical]{TRUE}} as soon as a missing value is detected.
}
\examples{
x <- rnorm(n=1000)
x[seq(300,length(x),by=100)] <- NA
stopifnot(anyMissing(x) == any(is.na(x)))
}
\author{Henrik Bengtsson}
\seealso{
Starting with R v3.1.0, there is \code{anyNA()} in the \pkg{base},
which provides the same functionality as this function.
}
\keyword{iteration}
\keyword{logic}
matrixStats/man/logSumExp.Rd 0000644 0001751 0000144 00000006560 12542546241 015614 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% logSumExp.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{logSumExp}
\alias{logSumExp}
\title{Accurately computes the logarithm of the sum of exponentials}
\description{
Accurately computes the logarithm of the sum of exponentials, that is, \eqn{log(sum(exp(lx)))}.
If \eqn{lx = log(x)}, then this is equivalently to calculating
\eqn{log(sum(x))}.
This function, which avoid numerical underflow, is often used when
computing the logarithm of the sum of small numbers (\eqn{|x| << 1})
such as probabilities.
}
\usage{
logSumExp(lx, na.rm=FALSE, ...)
}
\arguments{
\item{lx}{A \code{\link[base]{numeric}} \code{\link[base]{vector}}.
Typically \code{lx} are \eqn{log(x)} values.}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, any missing values are ignored, otherwise not.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} scalar.
}
\details{
This is function is more accurate than \code{log(sum(exp(lx)))}
when the values of \eqn{x = exp(lx)} are \eqn{|x| << 1}.
The implementation of this function is based on the observation that
\deqn{
log(a + b)
= [ la = log(a), lb = log(b) ]
= log( exp(la) + exp(lb) )
= la + log ( 1 + exp(lb - la) )
}
Assuming \eqn{la > lb}, then \eqn{|lb - la| < |lb|}, and it is
less likely that the computation of \eqn{1 + exp(lb - la)} will
not underflow/overflow numerically. Because of this, the overall
result from this function should be more accurate.
Analogously to this, the implementation of this function finds the
maximum value of \code{lx} and subtracts it from the remaining values
in \code{lx}.
}
\section{Benchmarking}{
This method is optimized for correctness, that avoiding underflowing.
It is implemented in native code that is optimized for speed and memory.
}
\examples{
## EXAMPLE #1
lx <- c(1000.01, 1000.02)
y0 <- log(sum(exp(lx)))
print(y0) ## Inf
y1 <- logSumExp(lx)
print(y1) ## 1000.708
## EXAMPLE #2
lx <- c(-1000.01, -1000.02)
y0 <- log(sum(exp(lx)))
print(y0) ## -Inf
y1 <- logSumExp(lx)
print(y1) ## -999.3218
## EXAMPLE #3
## R-help thread 'Beyond double-precision?' on May 9, 2009.
set.seed(1)
x <- runif(50)
## The logarithm of the harmonic mean
y0 <- log(1/mean(1/x))
print(y0) ## -1.600885
lx <- log(x)
y1 <- log(length(x)) - logSumExp(-lx)
print(y1) ## [1] -1.600885
# Sanity check
stopifnot(all.equal(y1, y0))
}
\author{Henrik Bengtsson}
\seealso{
To compute this function on rows or columns of a matrix,
see \code{\link{rowLogSumExps}}().
For adding \emph{two} double values in native code, R provides
the C function \code{logspace_add()} [1].
For properties of the log-sum-exponential function, see [2].
}
\references{
[1] R Core Team, \emph{Writing R Extensions}, v3.0.0, April 2013. \cr
[2] Laurent El Ghaoui,
\emph{Hyper-Textbook: Optimization Models and Applications},
University of California at Berkeley, August 2012.
(Chapter 'Log-Sum-Exp (LSE) Function and Properties',
\url{http://inst.eecs.berkeley.edu/~ee127a/book/login/def_lse_fcn.html})
\cr
[3] R-help thread \emph{logsumexp function in R}, 2011-02-17.
\url{https://stat.ethz.ch/pipermail/r-help/2011-February/269205.html}\cr
}
matrixStats/man/varDiff.Rd 0000644 0001751 0000144 00000006516 12542546241 015253 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% varDiff.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{varDiff}
\alias{varDiff}
\alias{sdDiff}
\alias{madDiff}
\alias{iqrDiff}
\alias{colVarDiffs}
\alias{rowVarDiffs}
\alias{colSdDiffs}
\alias{rowSdDiffs}
\alias{colMadDiffs}
\alias{rowMadDiffs}
\alias{colIQRDiffs}
\alias{rowIQRDiffs}
\title{Estimation of scale based on sequential-order differences}
\description{
Estimation of scale based on sequential-order differences, corresponding to the scale estimates provided by
\code{\link[stats]{var}}, \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and
\code{\link[stats]{IQR}}.
}
\usage{
varDiff(x, na.rm=FALSE, diff=1L, trim=0, ...)
colVarDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...)
rowVarDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...)
sdDiff(x, na.rm=FALSE, diff=1L, trim=0, ...)
colSdDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...)
rowSdDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...)
madDiff(x, na.rm=FALSE, diff=1L, trim=0, constant=1.4826, ...)
colMadDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...)
rowMadDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...)
iqrDiff(x, na.rm=FALSE, diff=1L, trim=0, ...)
colIQRDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...)
rowIQRDiffs(x, na.rm=FALSE, diff=1L, trim=0, ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N or a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded, otherwise not.}
\item{diff}{The positional distance of elements for which the
difference should be calculated.}
\item{trim}{A \code{\link[base]{double}} in [0,1/2] specifying the fraction of
observations to be trimmed from each end of (sorted) \code{x}
before estimation.}
\item{constant}{A scale factor adjusting for asymptotically
normal consistency.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length 1, length N, or length K.
}
\author{Henrik Bengtsson}
\seealso{
For the corresponding non-differentiated estimates, see
\code{\link[stats]{var}}, \code{\link[stats]{sd}}, \code{\link[stats]{mad}} and
\code{\link[stats]{IQR}}.
Internally, \code{\link{diff2}}() is used which is a faster version
of \code{\link[base]{diff}}().
}
\details{
Note that n-order difference MAD estimates, just like the ordinary
MAD estimate by \code{\link[stats]{mad}}, apply a correction factor such
that the estimates are consistent with the standard deviation
under Gaussian distributions.
The interquartile range (IQR) estimates does \emph{not} apply such
a correction factor. If asymptotically normal consistency is wanted,
the correction factor for IQR estimate is \code{1 / (2 * qnorm(3/4))},
which is half of that used for MAD estimates, which is
\code{1 / qnorm(3/4)}. This correction factor needs to be applied
manually, i.e. there is no \code{constant} argument for the IQR
functions.
}
\references{
[1] J. von Neumann et al., \emph{The mean square successive difference}.
Annals of Mathematical Statistics, 1941, 12, 153-162.\cr
}
\keyword{iteration}
\keyword{robust}
\keyword{univar}
matrixStats/man/weightedVar.Rd 0000644 0001751 0000144 00000004166 12542546241 016142 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% weightedVar.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{weightedVar}
\alias{weightedVar}
\alias{weightedSd}
\alias{colWeightedVars}
\alias{rowWeightedVars}
\alias{colWeightedSds}
\alias{rowWeightedSds}
\title{Weighted variance and weighted standard deviation}
\usage{
weightedVar(x, w, na.rm=FALSE, center=NULL, ...)
colWeightedVars(x, w=NULL, na.rm=FALSE, ...)
rowWeightedVars(x, w=NULL, na.rm=FALSE, ...)
weightedSd(...)
colWeightedSds(x, w=NULL, na.rm=FALSE, ...)
rowWeightedSds(x, w=NULL, na.rm=FALSE, ...)
}
\description{
Computes a weighted variance / standard deviation of a numeric
vector or across rows or columns of a matrix.
}
\arguments{
\item{x}{a \code{\link[base]{numeric}} \code{\link[base]{vector}} containing the values whose
weighted variance is to be computed.}
\item{w}{a vector of weights the same length as \code{x} giving the weights
to use for each element of \code{x}. Negative weights are treated
as zero weights. Default value is equal weight to all values.}
\item{na.rm}{a logical value indicating whether \code{\link[base]{NA}} values in
\code{x} should be stripped before the computation proceeds,
or not. If \code{\link[base]{NA}}, no check at all for \code{\link[base]{NA}}s is done.
Default value is \code{\link[base]{NA}} (for efficiency).}
\item{center}{Optional \code{\link[base]{numeric}} scalar specifying the center
location of the data. If \code{\link[base]{NULL}}, it is estimated from data.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} scalar.
}
\section{Missing values}{
Missing values are dropped at the very beginning, if argument
\code{na.rm} is \code{\link[base:logical]{TRUE}}, otherwise not.
}
\seealso{
For the non-weighted variance, see \code{\link[stats]{var}}.
}
\author{Henrik Bengtsson}
\keyword{univar}
\keyword{robust}
matrixStats/man/rowDiffs.Rd 0000644 0001751 0000144 00000002271 12542546241 015447 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowDiffs.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowDiffs}
\alias{rowDiffs}
\alias{colDiffs}
\title{Calculates difference for each row (column) in a matrix}
\description{
Calculates difference for each row (column) in a matrix.
}
\usage{
rowDiffs(x, lag=1L, differences=1L, ...)
colDiffs(x, lag=1L, differences=1L, ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{lag}{An \code{\link[base]{integer}} specifying the lag.}
\item{differences}{An \code{\link[base]{integer}} specifying the order of difference.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} Nx(K-1) or (N-1)xK \code{\link[base]{matrix}}.
}
\examples{
x <- matrix(1:27, ncol=3)
d1 <- rowDiffs(x)
print(d1)
d2 <- t(colDiffs(t(x)))
stopifnot(all.equal(d2, d1))
}
\author{Henrik Bengtsson}
\seealso{
See also \code{\link{diff2}}().
}
\keyword{array}
\keyword{iteration}
\keyword{robust}
\keyword{univar}
matrixStats/man/diff2.Rd 0000644 0001751 0000144 00000001730 12542546241 014655 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% diff2.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{diff2}
\alias{diff2}
\title{Fast lagged differences}
\usage{
diff2(x, lag=1L, differences=1L, ...)
}
\description{
Computes the lagged and iterated differences.
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N.}
\item{lag}{An \code{\link[base]{integer}} specifying the lag.}
\item{differences}{An \code{\link[base]{integer}} specifying the order of difference.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N - \code{differences}.
}
\examples{
diff2(1:10)
}
\seealso{
\code{\link[base]{diff}}().
}
\author{Henrik Bengtsson}
\keyword{univar}
\keyword{internal}
matrixStats/man/rowMedians.Rd 0000644 0001751 0000144 00000003721 12542546241 015775 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowMedians.S4.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowMedians}
\alias{rowMedians}
\alias{colMedians}
\alias{rowMedians,matrix-method}
\alias{colMedians,matrix-method}
\title{Calculates the median for each row (column) in a matrix}
\description{
Calculates the median for each row (column) in a matrix.
}
\usage{
rowMedians(x, na.rm=FALSE, dim.=dim(x), ...)
colMedians(x, na.rm=FALSE, dim.=dim(x), ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded first, otherwise not.}
\item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the
dimension of \code{x}, also when not a \code{\link[base]{matrix}}.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K).
}
\details{
The implementation of \code{rowMedians()} and \code{colMedians()}
is optimized for both speed and memory.
To avoid coercing to \code{\link[base]{double}}s (and hence memory allocation), there
is a special implementation for \code{\link[base]{integer}} matrices.
That is, if \code{x} is an \code{\link[base]{integer}} \code{\link[base]{matrix}}, then
\code{rowMedians(as.double(x))} (\code{rowMedians(as.double(x))})
would require three times the memory of \code{rowMedians(x)}
(\code{colMedians(x)}), but all this is avoided.
}
\author{Henrik Bengtsson, Harris Jaffee}
\seealso{
See \code{\link{rowMedians}}() and \code{colMedians()} for weighted medians.
For mean estimates, see \code{rowMeans()} in \code{\link[base]{colSums}}().
}
\keyword{array}
\keyword{iteration}
\keyword{robust}
\keyword{univar}
matrixStats/man/meanOver.Rd 0000644 0001751 0000144 00000004765 12542546241 015452 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% meanOver.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{meanOver}
\alias{meanOver}
\title{Fast averaging over subset of vector elements}
\usage{
meanOver(x, idxs=NULL, na.rm=FALSE, refine=TRUE, ...)
}
\description{
Computes the sample mean of all or a subset of values.
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N.}
\item{idxs}{A \code{\link[base]{numeric}} index \code{\link[base]{vector}} in [1,N] of elements to mean over.
If \code{\link[base]{NULL}}, all elements are considered.}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are skipped, otherwise not.}
\item{refine}{If \code{\link[base:logical]{TRUE}} and \code{x} is \code{\link[base]{numeric}}, then extra effort is
used to calculate the average with greater numerical precision,
otherwise not.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} scalar.
}
\details{
\code{meanOver(x, idxs)} gives equivalent results as
\code{mean(x[idxs])}, but is faster and more memory efficient
since it avoids the actual subsetting which requires copying
of elements and garbage collection thereof.
If \code{x} is \code{\link[base]{numeric}} and \code{refine=TRUE}, then a two-pass scan
is used to calculate the average. The first scan calculates the total
sum and divides by the number of (non-missing) values. In the second
scan, this average is refined by adding the residuals towards the first
average. The \code{\link[base]{mean}}() uses this approach.
\code{meanOver(..., refine=FALSE)} is almost twice as fast as
\code{meanOver(..., refine=TRUE)}.
}
\examples{
x <- 1:10
n <- length(x)
idxs <- seq(from=1, to=n, by=2)
s1 <- mean(x[idxs]) # 25
s2 <- meanOver(x, idxs=idxs) # 25
stopifnot(identical(s1, s2))
idxs <- seq(from=n, to=1, by=-2)
s1 <- mean(x[idxs]) # 25
s2 <- meanOver(x, idxs=idxs) # 25
stopifnot(identical(s1, s2))
s1 <- mean(x) # 55
s2 <- meanOver(x) # 55
stopifnot(identical(s1, s2))
}
\seealso{
\code{\link[base]{mean}}().
To efficiently sum over a subset, see \code{\link{sumOver}}().
}
\author{Henrik Bengtsson}
\keyword{univar}
\keyword{internal}
matrixStats/man/weightedMean.Rd 0000644 0001751 0000144 00000006120 12542546241 016262 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% weightedMean.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{weightedMean}
\alias{weightedMean}
\encoding{latin1}
\title{Weighted Arithmetic Mean}
\usage{
weightedMean(x, w, na.rm=FALSE, refine=FALSE, ...)
}
\description{
Computes the weighted sample mean of a numeric vector.
}
\arguments{
\item{x}{a \code{\link[base]{numeric}} \code{\link[base]{vector}} containing the values whose weighted mean is
to be computed.}
\item{w}{a vector of weights the same length as \code{x} giving the weights
to use for each element of \code{x}. Negative weights are treated
as zero weights. Default value is equal weight to all values.}
\item{na.rm}{a logical value indicating whether \code{\link[base]{NA}} values in
\code{x} should be stripped before the computation proceeds,
or not. If \code{\link[base]{NA}}, no check at all for \code{\link[base]{NA}}s is done.
Default value is \code{\link[base]{NA}} (for efficiency).}
\item{refine}{If \code{\link[base:logical]{TRUE}} and \code{x} is \code{\link[base]{numeric}}, then extra effort is
used to calculate the average with greater numerical precision,
otherwise not.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} scalar.
If \code{x} is of zero length, then \code{NaN} is returned,
which is consistent with \code{\link[base]{mean}}().
}
\examples{
x <- 1:10
n <- length(x)
w <- rep(1, times=n)
m0 <- weighted.mean(x, w)
m1 <- weightedMean(x, w)
stopifnot(identical(m1,m0))
# Pull the mean towards zero
w[1] <- 5
m0 <- weighted.mean(x, w)
m1 <- weightedMean(x, w)
stopifnot(identical(m1,m0))
# Put even more weight on the zero
w[1] <- 8.5
m0 <- weighted.mean(x, w)
m1 <- weightedMean(x, w)
stopifnot(identical(m1,m0))
# All weight on the first value
w[1] <- Inf
m0 <- weighted.mean(x, w)
m1 <- weightedMean(x, w)
stopifnot(identical(m1,m0))
# All weight on the last value
w[1] <- 1
w[n] <- Inf
m0 <- weighted.mean(x, w)
m1 <- weightedMean(x, w)
stopifnot(identical(m1,m0))
# All weights set to zero
w <- rep(0, n)
m0 <- weighted.mean(x, w)
m1 <- weightedMean(x, w)
stopifnot(identical(m1,m0))
}
\section{Missing values}{
This function handles missing values consistently
\code{\link[stats]{weighted.mean}}. More precisely,
if \code{na.rm=FALSE}, then any missing values in either \code{x}
or \code{w} will give result \code{NA_real_}.
If \code{na.rm=TRUE}, then all \code{(x,w)} data points for which
\code{x} is missing are skipped. Note that if both \code{x} and
\code{w} are missing for a data points, then it is also skipped
(by the same rule). However, if only \code{w} is missing, then
the final results will always be \code{NA_real_} regardless of
\code{na.rm}.
}
\seealso{
\code{\link[base]{mean}}() and \code{\link[stats]{weighted.mean}}.
}
\author{Henrik Bengtsson}
\keyword{univar}
\keyword{robust}
matrixStats/man/rowQuantiles.Rd 0000644 0001751 0000144 00000003745 12542546241 016370 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowQuantiles.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowQuantiles}
\alias{rowQuantiles}
\alias{colQuantiles}
\title{Estimates quantiles for each row (column) in a matrix}
\description{
Estimates quantiles for each row (column) in a matrix.
}
\usage{
rowQuantiles(x, probs=seq(from = 0, to = 1, by = 0.25), na.rm=FALSE, type=7L, ...,
drop=TRUE)
colQuantiles(x, probs=seq(from = 0, to = 1, by = 0.25), na.rm=FALSE, type=7L, ...,
drop=TRUE)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}} with N >= 0.}
\item{probs}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of J probabilities in [0,1].}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded first, otherwise not.}
\item{type}{An \code{\link[base]{integer}} specify the type of estimator.
See \code{\link[stats]{quantile}} for more details.}
\item{...}{Additional arguments passed to \code{\link[stats]{quantile}}.}
\item{drop}{If TRUE, singleton dimensions in the result are dropped,
otherwise not.}
}
\value{
Returns a \code{\link[base]{numeric}} NxJ (KxJ) \code{\link[base]{matrix}}, where
N (K) is the number of rows (columns) for which the J quantiles are
calculated.
}
\examples{
set.seed(1)
x <- matrix(rnorm(50*40), nrow=50, ncol=40)
str(x)
probs <- c(0.25,0.5,0.75)
# Row quantiles
q <- rowQuantiles(x, probs=probs)
print(q)
q0 <- apply(x, MARGIN=1, FUN=quantile, probs=probs)
stopifnot(all.equal(q0, t(q)))
# Column IQRs
q <- colQuantiles(x, probs=probs)
print(q)
q0 <- apply(x, MARGIN=2, FUN=quantile, probs=probs)
stopifnot(all.equal(q0, t(q)))
}
\author{Henrik Bengtsson}
\seealso{
\code{\link[stats]{quantile}}.
}
\keyword{array}
\keyword{iteration}
\keyword{robust}
\keyword{univar}
matrixStats/man/allocMatrix.Rd 0000644 0001751 0000144 00000002440 12542546241 016141 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% allocMatrix.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{allocMatrix}
\alias{allocMatrix}
\alias{allocVector}
\alias{allocArray}
\title{Allocates an empty vector, matrix or array}
\usage{
allocVector(length, value=0, ...)
allocMatrix(nrow, ncol, value=0, ...)
allocArray(dim, value=0, ...)
}
\description{
Allocates an empty vector, matrix or array faster than the corresponding function in R.
}
\arguments{
\item{length, nrow, ncol, dim}{\code{\link[base]{numeric}}s specifying the dimension of
the created \code{\link[base]{vector}}, \code{\link[base]{matrix}} or \code{\link[base]{array}}.}
\item{value}{A \code{\link[base]{numeric}} scalar that all elements will have as value.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{vector}}, \code{\link[base]{matrix}} and \code{\link[base]{array}} respectively of the same data
type as \code{value}.
}
\author{Henrik Bengtsson}
\seealso{
See also \code{\link[base]{vector}}, \code{\link[base]{matrix}} and \code{\link[base]{array}}.
}
\keyword{programming}
\keyword{internal}
matrixStats/man/rowTabulates.Rd 0000644 0001751 0000144 00000002457 12542546241 016346 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowTabulates.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowTabulates}
\alias{rowTabulates}
\alias{colTabulates}
\title{Tabulates the values in a matrix by row (column)}
\description{
Tabulates the values in a matrix by row (column).
}
\usage{
rowTabulates(x, values=NULL, ...)
colTabulates(x, values=NULL, ...)
}
\arguments{
\item{x}{An \code{\link[base]{integer}} or \code{\link[base]{raw}} NxK \code{\link[base]{matrix}}.}
\item{values}{An \code{\link[base]{vector}} of J values of count. If \code{\link[base]{NULL}}, all (unique)
values are counted.}
\item{...}{Not used.}
}
\value{
Returns a NxJ (KxJ) \code{\link[base]{matrix}} where
N (K) is the number of row (column) \code{\link[base]{vector}}s tabulated and
J is the number of values counted.
}
\examples{
x <- matrix(1:5, nrow=10, ncol=5)
print(x)
print(rowTabulates(x))
print(colTabulates(x))
# Count only certain values
print(rowTabulates(x, values=1:3))
y <- as.raw(x)
dim(y) <- dim(x)
print(y)
print(rowTabulates(y))
print(colTabulates(y))
}
\author{Henrik Bengtsson}
\keyword{utilities}
matrixStats/man/rowVars.Rd 0000644 0001751 0000144 00000004537 12542546241 015336 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowVars.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowVars}
\alias{rowVars}
\alias{rowVars}
\alias{colVars}
\alias{rowVars,matrix-method}
\alias{colVars,matrix-method}
\title{Variance estimates for each row (column) in a matrix}
\description{
Variance estimates for each row (column) in a matrix.
}
\usage{
rowVars(x, na.rm=FALSE, center=NULL, dim.=dim(x), ...)
colVars(x, na.rm=FALSE, center=NULL, dim.=dim(x), ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{center}{(optional) The center, defaults to the row means.}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded first, otherwise not.}
\item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the
dimension of \code{x}, also when not a \code{\link[base]{matrix}}.}
\item{...}{Additional arguments passed to \code{rowMeans()} and
\code{rowSums()}.}
}
\value{
Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K).
}
\examples{
set.seed(1)
x <- matrix(rnorm(20), nrow=5, ncol=4)
print(x)
# Row averages
print(rowMeans(x))
print(rowMedians(x))
# Column averages
print(colMeans(x))
print(colMedians(x))
# Row variabilities
print(rowVars(x))
print(rowSds(x))
print(rowMads(x))
print(rowIQRs(x))
# Column variabilities
print(rowVars(x))
print(colSds(x))
print(colMads(x))
print(colIQRs(x))
# Row ranges
print(rowRanges(x))
print(cbind(rowMins(x), rowMaxs(x)))
print(cbind(rowOrderStats(x, 1), rowOrderStats(x, ncol(x))))
# Column ranges
print(colRanges(x))
print(cbind(colMins(x), colMaxs(x)))
print(cbind(colOrderStats(x, 1), colOrderStats(x, nrow(x))))
x <- matrix(rnorm(2400), nrow=50, ncol=40)
# Row standard deviations
d <- rowDiffs(x)
s1 <- rowSds(d)/sqrt(2)
s2 <- rowSds(x)
print(summary(s1-s2))
# Column standard deviations
d <- colDiffs(x)
s1 <- colSds(d)/sqrt(2)
s2 <- colSds(x)
print(summary(s1-s2))
}
\author{Henrik Bengtsson}
\seealso{
See \code{rowMeans()} and \code{rowSums()} in \code{\link[base]{colSums}}().
}
\keyword{array}
\keyword{iteration}
\keyword{robust}
\keyword{univar}
matrixStats/man/rowWeightedMeans.Rd 0000644 0001751 0000144 00000004346 12542546241 017145 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowWeightedMeans.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowWeightedMeans}
\alias{rowWeightedMeans}
\alias{colWeightedMeans}
\title{Calculates the weighted means for each row (column) in a matrix}
\description{
Calculates the weighted means for each row (column) in a matrix.
}
\usage{
rowWeightedMeans(x, w=NULL, na.rm=FALSE, ...)
colWeightedMeans(x, w=NULL, na.rm=FALSE, ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{w}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length K (N).}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are excluded from the calculation,
otherwise not.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K).
}
\details{
The implementations of these methods are optimized for both speed
and memory.
If no weights are given, the corresponding
\code{rowMeans()}/\code{colMeans()} is used.
}
\examples{
x <- matrix(rnorm(20), nrow=5, ncol=4)
print(x)
# Non-weighted row averages
xM0 <- rowMeans(x)
xM <- rowWeightedMeans(x)
stopifnot(all.equal(xM, xM0))
# Weighted row averages (uniform weights)
w <- rep(2.5, ncol(x))
xM <- rowWeightedMeans(x, w=w)
stopifnot(all.equal(xM, xM0))
# Weighted row averages (excluding some columns)
w <- c(1,1,0,1)
xM0 <- rowMeans(x[,(w == 1),drop=FALSE]);
xM <- rowWeightedMeans(x, w=w)
stopifnot(all.equal(xM, xM0))
# Weighted row averages (excluding some columns)
w <- c(0,1,0,0)
xM0 <- rowMeans(x[,(w == 1),drop=FALSE]);
xM <- rowWeightedMeans(x, w=w)
stopifnot(all.equal(xM, xM0))
# Weighted averages by rows and columns
w <- 1:4
xM1 <- rowWeightedMeans(x, w=w)
xM2 <- colWeightedMeans(t(x), w=w)
stopifnot(all.equal(xM2, xM1))
}
\author{Henrik Bengtsson}
\seealso{
See \code{rowMeans()} and \code{colMeans()} in \code{\link[base]{colSums}}()
for non-weighted means.
See also \code{\link[stats]{weighted.mean}}.
}
\keyword{array}
\keyword{iteration}
\keyword{robust}
\keyword{univar}
matrixStats/man/signTabulate.Rd 0000644 0001751 0000144 00000001765 12542546241 016315 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% signTabulate.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{signTabulate}
\alias{signTabulate}
\alias{signTabulate}
\title{Calculates the number of negative, zero, positive and missing values}
\usage{
signTabulate(x, ...)
}
\description{
Calculates the number of negative, zero, positive and missing values in a \code{\link[base]{numeric}} vector. For \code{\link[base]{double}} vectors, the number of
negative and positive infinite values are also counted.
}
\arguments{
\item{x}{a \code{\link[base]{numeric}} \code{\link[base]{vector}}.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{name}}d \code{\link[base]{numeric}} \code{\link[base]{vector}}.
}
\seealso{
\code{\link[base]{sign}}().
}
\author{Henrik Bengtsson}
\keyword{internal}
matrixStats/man/rowSds.Rd 0000644 0001751 0000144 00000003621 12542546241 015145 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowSds.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowSds}
\alias{rowSds}
\alias{rowSds}
\alias{colSds}
\alias{rowMads}
\alias{colMads}
\alias{rowSds,matrix-method}
\alias{colSds,matrix-method}
\title{Standard deviation estimates for each row (column) in a matrix}
\description{
Standard deviation estimates for each row (column) in a matrix.
}
\usage{
rowSds(x, ...)
colSds(x, ...)
rowMads(x, center=NULL, constant=1.4826, na.rm=FALSE, dim.=dim(x), centers=NULL, ...)
colMads(x, center=NULL, constant=1.4826, na.rm=FALSE, dim.=dim(x), centers=NULL, ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{center}{A optional \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K) with centers.
By default, they are calculated using \code{\link{rowMedians}}().}
\item{constant}{A scale factor. See \code{\link[stats]{mad}} for details.}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are removed first, otherwise not.}
\item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the
dimension of \code{x}, also when not a \code{\link[base]{matrix}}.}
\item{...}{Additional arguments passed to \code{\link{rowVars}}() and
\code{\link{rowMedians}}(), respectively.}
\item{centers}{(deprectated) use \code{center} instead.}
}
\value{
Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K).
}
\author{Henrik Bengtsson}
\seealso{
\code{\link[stats]{sd}}, \code{\link[stats]{mad}} and \code{\link[stats:cor]{var}}.
\code{\link{rowIQRs}}().
}
\keyword{array}
\keyword{iteration}
\keyword{robust}
\keyword{univar}
matrixStats/man/rowAvgsPerColSet.Rd 0000644 0001751 0000144 00000007475 12542546241 017110 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowAvgsPerColSet.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowAvgsPerColSet}
\alias{rowAvgsPerColSet}
\alias{colAvgsPerRowSet}
\title{Applies a row-by-row (column-by-column) averaging function to equally-sized subsets of matrix columns (rows)}
\description{
Applies a row-by-row (column-by-column) averaging function to equally-sized subsets of matrix columns (rows).
Each subset is averaged independently of the others.
}
\usage{
rowAvgsPerColSet(X, W=NULL, S, FUN=rowMeans, ..., tFUN=FALSE)
}
\arguments{
\item{X}{A \code{\link[base]{numeric}} NxM \code{\link[base]{matrix}}.}
\item{W}{An optional \code{\link[base]{numeric}} NxM \code{\link[base]{matrix}} of weights.}
\item{S}{An \code{\link[base]{integer}} KxJ \code{\link[base]{matrix}} specifying the J subsets. Each
column holds K column (row) indices for the corresponding subset.}
\item{FUN}{The row-by-row (column-by-column) \code{\link[base]{function}} used to average
over each subset of \code{X}. This function must accept a \code{\link[base]{numeric}}
NxK (KxM) \code{\link[base]{matrix}} and the \code{\link[base]{logical}} argument \code{na.rm} (which is
automatically set), and return a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (M).}
\item{...}{Additional arguments passed to then \code{FUN} \code{\link[base]{function}}.}
\item{tFUN}{If \code{\link[base:logical]{TRUE}}, the NxK (KxM) \code{\link[base]{matrix}} passed to \code{FUN()}
is transposed first.}
}
\value{
Returns a \code{\link[base]{numeric}} JxN (MxJ) \code{\link[base]{matrix}},
where row names equal \code{rownames(X)} (\code{colnames(S)})
and column names \code{colnames(S)} (\code{colnames(X)}).
}
\details{
If argument \code{S} is a single column vector with indices
\code{1:N}, then \code{rowAvgsPerColSet(X, S=S, FUN=rowMeans)}
gives the same result as \code{rowMeans(X)}.
Analogously, for \code{rowAvgsPerColSet()}.
}
\examples{
X <- matrix(rnorm(20*6), nrow=20, ncol=6)
rownames(X) <- LETTERS[1:nrow(X)]
colnames(X) <- letters[1:ncol(X)]
print(X)
# - - - - - - - - - - - - - - - - - - - - - - - - - -
# Apply rowMeans() for 3 sets of 2 columns
# - - - - - - - - - - - - - - - - - - - - - - - - - -
nbrOfSets <- 3
S <- matrix(1:ncol(X), ncol=nbrOfSets)
colnames(S) <- sprintf("s\%d", 1:nbrOfSets)
print(S)
Z <- rowAvgsPerColSet(X, S=S)
print(Z)
# Validation
Z0 <- cbind(s1=rowMeans(X[,1:2]), s2=rowMeans(X[,3:4]),
s3=rowMeans(X[,5:6]))
stopifnot(identical(drop(Z), Z0))
# - - - - - - - - - - - - - - - - - - - - - - - - - -
# Apply colMeans() for 5 sets of 4 rows
# - - - - - - - - - - - - - - - - - - - - - - - - - -
nbrOfSets <- 5
S <- matrix(1:nrow(X), ncol=nbrOfSets)
colnames(S) <- sprintf("s\%d", 1:nbrOfSets)
print(S)
Z <- colAvgsPerRowSet(X, S=S)
print(Z)
# Validation
Z0 <- rbind(s1=colMeans(X[1:4,]), s2=colMeans(X[5:8,]),
s3=colMeans(X[9:12,]), s4=colMeans(X[13:16,]),
s5=colMeans(X[17:20,]))
stopifnot(identical(drop(Z), Z0))
# - - - - - - - - - - - - - - - - - - - - - - - - - -
# When there is only one "complete" set
# - - - - - - - - - - - - - - - - - - - - - - - - - -
nbrOfSets <- 1
S <- matrix(1:ncol(X), ncol=nbrOfSets)
colnames(S) <- sprintf("s\%d", 1:nbrOfSets)
print(S)
Z <- rowAvgsPerColSet(X, S=S, FUN=rowMeans)
print(Z)
Z0 <- rowMeans(X)
stopifnot(identical(drop(Z), Z0))
nbrOfSets <- 1
S <- matrix(1:nrow(X), ncol=nbrOfSets)
colnames(S) <- sprintf("s\%d", 1:nbrOfSets)
print(S)
Z <- colAvgsPerRowSet(X, S=S, FUN=colMeans)
print(Z)
Z0 <- colMeans(X)
stopifnot(identical(drop(Z), Z0))
}
\author{Henrik Bengtsson}
\keyword{internal}
\keyword{utilities}
matrixStats/man/rowCumsums.Rd 0000644 0001751 0000144 00000003544 12542546241 016054 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowCumsums.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowCumsums}
\alias{rowCumsums}
\alias{rowCumsums}
\alias{colCumsums}
\alias{rowCumprods}
\alias{colCumprods}
\alias{rowCummins}
\alias{colCummins}
\alias{rowCummaxs}
\alias{colCummaxs}
\title{Cumulative sums, products, minima and maxima for each row (column) in a matrix}
\description{
Cumulative sums, products, minima and maxima for each row (column) in a matrix.
}
\usage{
rowCumsums(x, dim.=dim(x), ...)
colCumsums(x, dim.=dim(x), ...)
rowCumprods(x, dim.=dim(x), ...)
colCumprods(x, dim.=dim(x), ...)
rowCummins(x, dim.=dim(x), ...)
colCummins(x, dim.=dim(x), ...)
rowCummaxs(x, dim.=dim(x), ...)
colCummaxs(x, dim.=dim(x), ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the
dimension of \code{x}, also when not a \code{\link[base]{matrix}}.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}} of the same mode as \code{x}.
}
\examples{
x <- matrix(1:12, nrow=4, ncol=3)
print(x)
yr <- rowCumsums(x)
print(yr)
yc <- colCumsums(x)
print(yc)
yr <- rowCumprods(x)
print(yr)
yc <- colCumprods(x)
print(yc)
yr <- rowCummaxs(x)
print(yr)
yc <- colCummaxs(x)
print(yc)
yr <- rowCummins(x)
print(yr)
yc <- colCummins(x)
print(yc)
}
\author{Henrik Bengtsson}
\seealso{
See \code{\link[base]{cumsum}}(), \code{\link[base]{cumprod}}(),
\code{\link[base]{cummin}}(), and \code{\link[base]{cummax}}().
}
\keyword{array}
\keyword{iteration}
\keyword{univar}
matrixStats/man/rowRanges.Rd 0000644 0001751 0000144 00000003361 12542546241 015634 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowRanges.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowRanges}
\alias{rowRanges}
\alias{colRanges}
\alias{rowMins}
\alias{rowMaxs}
\alias{colMins}
\alias{colMaxs}
\title{Gets the range of values in each row (column) of a matrix}
\description{
Gets the range of values in each row (column) of a matrix.
}
\usage{
rowRanges(x, na.rm=FALSE, dim.=dim(x), ...)
colRanges(x, na.rm=FALSE, dim.=dim(x), ...)
rowMins(x, na.rm=FALSE, dim.=dim(x), ...)
colMins(x, na.rm=FALSE, dim.=dim(x), ...)
rowMaxs(x, na.rm=FALSE, dim.=dim(x), ...)
colMaxs(x, na.rm=FALSE, dim.=dim(x), ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded first, otherwise not.}
\item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the
dimension of \code{x}, also when not a \code{\link[base]{matrix}}.}
\item{...}{Not used.}
}
\value{
\code{rowRanges()} (\code{colRanges()}) returns a
\code{\link[base]{numeric}} Nx2 (Kx2) \code{\link[base]{matrix}}, where
N (K) is the number of rows (columns) for which the ranges are
calculated.
\code{rowMins()/rowMaxs()} (\code{colMins()/colMaxs()}) returns a
\code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K).
}
\author{Henrik Bengtsson}
\seealso{
\code{\link{rowOrderStats}}() and \code{\link[base]{pmin.int}}().
}
\keyword{array}
\keyword{iteration}
\keyword{robust}
\keyword{univar}
matrixStats/man/indexByRow.Rd 0000644 0001751 0000144 00000002257 12542546241 015762 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% indexByRow.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{indexByRow}
\alias{indexByRow}
\title{Translates matrix indices by rows into indices by columns}
\description{
Translates matrix indices by rows into indices by columns.
}
\usage{
indexByRow(dim, idxs=NULL, ...)
}
\arguments{
\item{dim}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length two specifying the length
of the "template" matrix.}
\item{idxs}{A \code{\link[base]{vector}} of indices. If \code{\link[base]{NULL}}, all indices are returned.}
\item{...}{Not use.}
}
\value{
Returns an \code{\link[base]{integer}} \code{\link[base]{vector}} of indices.
}
\examples{
dim <- c(5, 4)
X <- matrix(NA_integer_, nrow=dim[1], ncol=dim[2])
Y <- t(X)
idxs <- seq(along=X)
# Assign by columns
X[idxs] <- idxs
print(X)
# Assign by rows
Y[indexByRow(dim(Y), idxs)] <- idxs
print(Y)
stopifnot(X == t(Y))
}
\author{Henrik Bengtsson}
\keyword{iteration}
\keyword{logic}
matrixStats/man/rowCounts.Rd 0000644 0001751 0000144 00000005000 12542546241 015660 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowCounts.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowCounts}
\alias{rowCounts}
\alias{colCounts}
\alias{count}
\alias{allValue}
\alias{anyValue}
\alias{rowAnys}
\alias{colAnys}
\alias{rowAlls}
\alias{colAlls}
\title{Counts the number of TRUE values in each row (column) of a matrix}
\description{
Counts the number of TRUE values in each row (column) of a matrix.
}
\usage{
count(x, value=TRUE, na.rm=FALSE, ...)
rowCounts(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...)
colCounts(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...)
rowAlls(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...)
colAlls(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...)
rowAnys(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...)
colAnys(x, value=TRUE, na.rm=FALSE, dim.=dim(x), ...)
}
\arguments{
\item{x}{An NxK \code{\link[base]{matrix}} or an N*K \code{\link[base]{vector}}.}
\item{value}{A value to search for.}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, \code{\link[base]{NA}}s are excluded first, otherwise not.}
\item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the
dimension of \code{x}, also when not a \code{\link[base]{matrix}}.}
\item{...}{Not used.}
}
\value{
\code{rowCounts()} (\code{colCounts()}) returns an \code{\link[base]{integer}} \code{\link[base]{vector}}
of length N (K).
The other methods returns a \code{\link[base]{logical}} \code{\link[base]{vector}} of length N (K).
}
\details{
These functions takes either a @matrix or a @vector as input.
If a @vector, then argument \code{dim} must be specified and
fulfill \code{prod(dim) == length(x)}. The result will be
identical to the results obtained when passing
\code{matrix(x, nrow=dim[1L], ncol=dim[2L])}, but avoids
having to temporarily create/allocate a @matrix, if only such
is needed only for these calculations.
}
\examples{
x <- matrix(FALSE, nrow=10, ncol=5)
x[3:7,c(2,4)] <- TRUE
x[2:4,] <- TRUE
x[,1] <- TRUE
x[5,] <- FALSE
x[,5] <- FALSE
print(x)
print(rowCounts(x)) # 1 4 4 4 0 3 3 1 1 1
print(colCounts(x)) # 9 5 3 5 0
print(rowAnys(x))
print(which(rowAnys(x))) # 1 2 3 4 6 7 8 9 10
print(colAnys(x))
print(which(colAnys(x))) # 1 2 3 4
}
\author{Henrik Bengtsson}
\keyword{array}
\keyword{logic}
\keyword{iteration}
\keyword{univar}
matrixStats/man/matrixStats-package.Rd 0000644 0001751 0000144 00000002502 12542546241 017575 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% 999.package.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{matrixStats-package}
\alias{matrixStats-package}
\alias{matrixStats}
\docType{package}
\title{Package matrixStats}
\description{
Methods operating on rows and columns of matrices, e.g. col / rowMedians(), col / rowRanks(), and col / rowSds(). There are also some vector-based methods, e.g. binMeans(), madDiff() and weightedMedians(). All methods have been optimized for speed and memory usage.
}
\section{Installation}{
To install this package, please do:
\preformatted{
install.packages("matrixStats")
}
}
\section{Vignettes}{
For an overview of the package, see the '\href{../doc/index.html}{vignettes}';
\enumerate{
\item Summary of functions.
}
}
\section{How to cite this package}{
Henrik Bengtsson (2015). matrixStats: Methods that Apply to Rows and Columns of Matrices (and to
Vectors). R package version 0.14.2. https://github.com/HenrikBengtsson/matrixStats
}
\author{Henrik Bengtsson, Hector Corrada Bravo, Robert Gentleman, Ola Hossjer, Harris Jaffee, Dongcan Jiang, Peter Langfelder}
\keyword{package}
matrixStats/man/sumOver.Rd 0000644 0001751 0000144 00000005740 12542546241 015330 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% sumOver.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{sumOver}
\alias{sumOver}
\title{Fast sum over subset of vector elements}
\usage{
sumOver(x, idxs=NULL, na.rm=FALSE, mode=typeof(x), ...)
}
\description{
Computes the sum of all or a subset of values.
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N.}
\item{idxs}{A \code{\link[base]{numeric}} index \code{\link[base]{vector}} in [1,N] of elements to sum over.
If \code{\link[base]{NULL}}, all elements are considered.}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are skipped, otherwise not.}
\item{mode}{A \code{\link[base]{character}} string specifying the data type of the
return value. Default is to use the same mode as argument
\code{x}.}
\item{...}{Not used.}
}
\value{
Returns a scalar of the data type specified by argument \code{mode}.
If \code{mode="integer"}, then integer overflow occurs if the
\emph{sum} is outside the range of defined integer values.
}
\details{
\code{sumOver(x, idxs)} gives equivalent results as
\code{sum(x[idxs])}, but is faster and more memory efficient
since it avoids the actual subsetting which requires copying
of elements and garbage collection thereof.
Furthermore, \code{sumOver(x, mode="double")} is equivalent to
\code{sum(as.numeric(x))}, but is much more memory efficient when
\code{x} is an \code{\link[base]{integer}} vector.
}
\examples{
x <- 1:10
n <- length(x)
idxs <- seq(from=1, to=n, by=2)
s1 <- sum(x[idxs]) # 25
s2 <- sumOver(x, idxs=idxs) # 25
stopifnot(identical(s1, s2))
idxs <- seq(from=n, to=1, by=-2)
s1 <- sum(x[idxs]) # 25
s2 <- sumOver(x, idxs=idxs) # 25
stopifnot(identical(s1, s2))
s1 <- sum(x) # 55
s2 <- sumOver(x) # 55
stopifnot(identical(s1, s2))
# Total gives integer overflow
x <- c(.Machine$integer.max, 1L, -.Machine$integer.max)
s1 <- sum(x[1:2]) # NA_integer_
s2 <- sumOver(x[1:2]) # NA_integer_
stopifnot(identical(s1, s2))
# Total gives integer overflow (coerce to numeric)
s1 <- sum(as.numeric(x[1:2])) # 2147483648
s2 <- sumOver(as.numeric(x[1:2])) # 2147483648
s3 <- sumOver(x[1:2], mode="double") # 2147483648
stopifnot(identical(s1, s2))
stopifnot(identical(s1, s3))
# Cumulative sum would give integer overflow but not the total
s1 <- sum(x) # 1L
s2 <- sumOver(x) # 1L
stopifnot(identical(s1, s2))
}
\seealso{
\code{\link[base]{sum}}().
To efficiently average over a subset, see \code{\link{meanOver}}().
}
\author{Henrik Bengtsson}
\keyword{univar}
\keyword{internal}
matrixStats/man/weightedMad.Rd 0000644 0001751 0000144 00000005026 12542546241 016107 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% weightedMad.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{weightedMad}
\alias{weightedMad}
\alias{rowWeightedMads}
\alias{colWeightedMads}
\title{Weighted Median Absolute Deviation (MAD)}
\usage{
weightedMad(x, w, na.rm=FALSE, constant=1.4826, center=NULL, ...)
colWeightedMads(x, w=NULL, na.rm=FALSE, ...)
rowWeightedMads(x, w=NULL, na.rm=FALSE, ...)
}
\description{
Computes a weighted MAD of a numeric vector.
}
\arguments{
\item{x}{a \code{\link[base]{numeric}} \code{\link[base]{vector}} containing the values whose weighted MAD is
to be computed.}
\item{w}{a vector of weights the same length as \code{x} giving the weights
to use for each element of \code{x}. Negative weights are treated
as zero weights. Default value is equal weight to all values.}
\item{na.rm}{a logical value indicating whether \code{\link[base]{NA}} values in
\code{x} should be stripped before the computation proceeds,
or not. If \code{\link[base]{NA}}, no check at all for \code{\link[base]{NA}}s is done.
Default value is \code{\link[base]{NA}} (for efficiency).}
\item{constant}{A \code{\link[base]{numeric}} scale factor, cf. \code{\link[stats]{mad}}.}
\item{center}{Optional \code{\link[base]{numeric}} scalar specifying the center
location of the data. If \code{\link[base]{NULL}}, it is estimated from data.}
\item{...}{Not used.}
}
\value{
Returns a \code{\link[base]{numeric}} scalar.
}
\section{Missing values}{
Missing values are dropped at the very beginning, if argument
\code{na.rm} is \code{\link[base:logical]{TRUE}}, otherwise not.
}
\examples{
x <- 1:10
n <- length(x)
m1 <- mad(x)
m2 <- weightedMad(x)
stopifnot(identical(m1, m2))
w <- rep(1, times=n)
m1 <- weightedMad(x, w)
stopifnot(identical(m1,m2))
# All weight on the first value
w[1] <- Inf
m <- weightedMad(x, w)
stopifnot(m == 0)
# All weight on the first two values
w[1:2] <- Inf
m1 <- mad(x[1:2])
m2 <- weightedMad(x, w)
stopifnot(identical(m1,m2))
# All weights set to zero
w <- rep(0, times=n)
m <- weightedMad(x, w)
stopifnot(is.na(m))
}
\seealso{
For the non-weighted MAD, see \code{\link[stats]{mad}}.
Internally \code{\link{weightedMedian}}() is used to
calculate the weighted median.
}
\author{Henrik Bengtsson}
\keyword{univar}
\keyword{robust}
matrixStats/man/rowRanks.Rd 0000644 0001751 0000144 00000007140 12542546241 015472 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowRanks.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowRanks}
\alias{rowRanks}
\alias{colRanks}
\title{Gets the rank of each row (column) of a matrix}
\description{
Gets the rank of each row (column) of a matrix.
}
\usage{
rowRanks(x, ties.method=c("max", "average", "min"), dim.=dim(x), ...)
colRanks(x, ties.method=c("max", "average", "min"), dim.=dim(x), preserveShape=FALSE,
...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} or \code{\link[base]{integer}} NxK \code{\link[base]{matrix}}.}
\item{ties.method}{A \code{\link[base]{character}} string specifying how ties are treated.
For details, see below.}
\item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the
dimension of \code{x}, also when not a \code{\link[base]{matrix}}.}
\item{preserveShape}{A \code{\link[base]{logical}} specifying whether the \code{\link[base]{matrix}}
returned should preserve the input shape of \code{x}, or not.}
\item{...}{Not used.}
}
\value{
An \code{\link[base]{integer}} \code{\link[base]{matrix}} is returned.
The \code{rowRanks()} function always returns an NxK \code{\link[base]{matrix}},
where N (K) is the number of rows (columns) whose ranks are calculated.
The \code{colRanks()} function returns an NxK \code{\link[base]{matrix}},
if \code{preserveShape = TRUE}, otherwise a KxN \code{\link[base]{matrix}}.
%% The mode of the returned matrix is \code{\link[base]{integer}}, except for
%% \code{ties.method == "average"} when it is \code{\link[base]{double}}.
}
\details{
The row ranks of \code{x} are collected as \emph{rows}
of the result matrix.
The column ranks of \code{x} are collected as \emph{rows}
if \code{preserveShape = FALSE}, otherwise as \emph{columns}.
The implementation is optimized for both speed and memory.
To avoid coercing to \code{\link[base]{double}}s (and hence memory allocation), there
is a unique implementation for \code{\link[base]{integer}} matrices.
It is more memory efficient to do
\code{colRanks(x, preserveShape=TRUE)} than
\code{t(colRanks(x, preserveShape=FALSE))}.
Any \code{\link[base]{names}} of \code{x} are ignored and absent in the result.
}
\section{Missing and non- values}{
These are ranked as \code{NA}, as with \code{na.last="keep"}
in the \code{\link[base]{rank}}() function.
}
\section{Ties}{
When some values are equal ("ties"), argument \code{ties.method}
specifies what their ranks should be.
If \code{ties.method} is \code{"max"}, ties
are ranked as the maximum value.
If \code{ties.method} is \code{"average"}, ties are ranked
by their average.
If \code{ties.method} is \code{"max"} (\code{"min"}), ties
are ranked as the maximum (minimum) value.
If \code{ties.method} is \code{"average"}, ties are ranked
by their average.
For further details, see \code{\link[base]{rank}}().
}
\author{
Hector Corrada Bravo and Harris Jaffee.
Peter Langfelder for adding 'ties.method' support.
Henrik Bengtsson adapted the original native implementation
of \code{rowRanks()} from Robert Gentleman's \code{rowQ()}
in the \pkg{Biobase} package.
}
\seealso{
\code{\link[base]{rank}}().
For developers, see also Section 'Utility functions' in
'Writing R Extensions manual', particularly the native functions
\code{R_qsort_I()} and \code{R_qsort_int_I()}.
}
\keyword{array}
\keyword{iteration}
\keyword{robust}
\keyword{univar}
matrixStats/man/rowIQRs.Rd 0000644 0001751 0000144 00000003253 12542546241 015233 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowIQRs.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowIQRs}
\alias{rowIQRs}
\alias{colIQRs}
\alias{iqr}
\title{Estimates of the interquartile range for each row (column) in a matrix}
\description{
Estimates of the interquartile range for each row (column) in a matrix.
}
\usage{
rowIQRs(x, na.rm=FALSE, ...)
colIQRs(x, na.rm=FALSE, ...)
iqr(x, na.rm=FALSE, ...)
}
\arguments{
\item{x}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, missing values are dropped first, otherwise not.}
\item{...}{Additional arguments passed to \code{\link{rowQuantiles}}()
(\code{colQuantiles()}).}
}
\value{
Returns a \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K).
}
\section{Missing values}{
Contrary to \code{\link[stats]{IQR}}, which gives an error if there are missing
values and \code{na.rm=FALSE}, \code{iqr()} and its corresponding row and
column-specific functions return \code{\link[base]{NA}}_real_.
}
\examples{
set.seed(1)
x <- matrix(rnorm(50*40), nrow=50, ncol=40)
str(x)
# Row IQRs
q <- rowIQRs(x)
print(q)
q0 <- apply(x, MARGIN=1, FUN=IQR)
stopifnot(all.equal(q0, q))
# Column IQRs
q <- colIQRs(x)
print(q)
q0 <- apply(x, MARGIN=2, FUN=IQR)
stopifnot(all.equal(q0, q))
}
\author{Henrik Bengtsson}
\seealso{
See \code{\link[stats]{IQR}}.
See \code{\link{rowSds}}().
}
\keyword{array}
\keyword{iteration}
\keyword{robust}
\keyword{univar}
matrixStats/man/rowLogSumExps.Rd 0000644 0001751 0000144 00000003060 12542546241 016457 0 ustar hornik users %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
%
% rowLogSumExps.R
%
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\name{rowLogSumExps}
\alias{rowLogSumExps}
\alias{colLogSumExps}
\alias{rowLogSumExps,matrix-method}
\alias{colLogSumExps,matrix-method}
\title{Accurately computes the logarithm of the sum of exponentials across rows or columns}
\description{
Accurately computes the logarithm of the sum of exponentials across rows or columns.
}
\usage{
rowLogSumExps(lx, na.rm=FALSE, dim.=dim(lx), ...)
colLogSumExps(lx, na.rm=FALSE, dim.=dim(lx), ...)
}
\arguments{
\item{lx}{A \code{\link[base]{numeric}} NxK \code{\link[base]{matrix}}.
Typically \code{lx} are \eqn{log(x)} values.}
\item{na.rm}{If \code{\link[base:logical]{TRUE}}, any missing values are ignored, otherwise not.}
\item{dim.}{An \code{\link[base]{integer}} \code{\link[base]{vector}} of length two specifying the
dimension of \code{x}, also when not a \code{\link[base]{matrix}}.}
\item{...}{Not used.}
}
\value{
A \code{\link[base]{numeric}} \code{\link[base]{vector}} of length N (K).
}
\section{Benchmarking}{
These methods are implemented in native code and have been optimized
for speed and memory.
}
\author{
Native implementation by Henrik Bengtsson.
Original R code by Nakayama ??? (Japan).
}
\seealso{
To calculate the same on vectors, \code{\link{logSumExp}}().
}
\keyword{array}