maxstat/0000755000176200001440000000000015005153362011733 5ustar liggesusersmaxstat/tests/0000755000176200001440000000000014616370013013076 5ustar liggesusersmaxstat/tests/maxstat-bugs.R0000644000176200001440000000176014616370013015644 0ustar liggesusers library("maxstat") library("exactRankTests") set.seed(290875) # by Achim Zeileis, 13.09.2002 y <- c(0.9, 1, 0.8, 0.8, 0.85, 0.3, 0.2, 0.2, 0.1, 0.2, 0.3) index <- 1:length(y) mydata <- data.frame(cbind(y, index)) maxstat.test(y ~ index, data=mydata, smethod = "Wilcoxon", pmethod = "HL") # this one failed: QUANT not known maxstat.test(y ~ index, data=mydata) # spotted and fixed 16.09.2002 y <- rnorm(20) x <- factor(c(rep(0,10), rep(1,10))) mydata <- data.frame(cbind(y,x)) a <- maxstat.test(y ~ x, data=mydata, smethod="Wilcoxon", pmethod="HL") b <- wilcox.exact(y ~ x, data=mydata) stopifnot(all.equal(a$p.value, b$p.value)) # check new conditional Monte-Carlo p-values set.seed(290875) a <- maxstat.test(y ~ x, data=mydata, smethod="Wilcoxon", pmethod="condMC", B = 9999)$p.value a set.seed(290875) b <- maxstat.test(y ~ x, data=mydata, smethod="Wilcoxon", pmethod="condMC", B = 9999, alpha = 0.9)$p.value b stopifnot(all.equal(a, b)) maxstat/tests/maxstat-bugs.Rout.save0000644000176200001440000000442214172227730017333 0ustar liggesusers R Under development (unstable) (2013-09-01 r63796) -- "Unsuffered Consequences" Copyright (C) 2013 The R Foundation for Statistical Computing Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > library("maxstat") > library("exactRankTests") Loading required package: survival Loading required package: splines Package 'exactRankTests' is no longer under development. Please consider using package 'coin' instead. > set.seed(290875) > > # by Achim Zeileis, 13.09.2002 > y <- c(0.9, 1, 0.8, 0.8, 0.85, 0.3, 0.2, 0.2, 0.1, 0.2, 0.3) > index <- 1:length(y) > mydata <- data.frame(cbind(y, index)) > maxstat.test(y ~ index, data=mydata, smethod = "Wilcoxon", pmethod = "HL") Maximally selected Wilcoxon statistics using HL data: y by index M = 2.7767, p-value < 2.2e-16 sample estimates: estimated cutpoint 5 > # this one failed: QUANT not known > maxstat.test(y ~ index, data=mydata) Maximally selected Wilcoxon statistics using none data: y by index M = 2.7767, p-value = NA sample estimates: estimated cutpoint 5 > > # spotted and fixed 16.09.2002 > y <- rnorm(20) > x <- factor(c(rep(0,10), rep(1,10))) > mydata <- data.frame(cbind(y,x)) > a <- maxstat.test(y ~ x, data=mydata, smethod="Wilcoxon", pmethod="HL") > b <- wilcox.exact(y ~ x, data=mydata) > stopifnot(all.equal(a$p.value, b$p.value)) > > # check new conditional Monte-Carlo p-values > > set.seed(290875) > a <- maxstat.test(y ~ x, data=mydata, smethod="Wilcoxon", pmethod="condMC", + B = 9999)$p.value > a [1] 0.9150915 > set.seed(290875) > b <- maxstat.test(y ~ x, data=mydata, smethod="Wilcoxon", pmethod="condMC", + B = 9999, alpha = 0.9)$p.value > b [1] 0.9150915 > stopifnot(all.equal(a, b)) > > > proc.time() user system elapsed 0.192 0.036 0.223 maxstat/tests/Examples/0000755000176200001440000000000014172227730014660 5ustar liggesusersmaxstat/tests/Examples/maxstat-Ex.Rout.save0000644000176200001440000003774714172227730020545 0ustar liggesusers R version 3.1.2 (2014-10-31) -- "Pumpkin Helmet" Copyright (C) 2014 The R Foundation for Statistical Computing Platform: x86_64-unknown-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > pkgname <- "maxstat" > source(file.path(R.home("share"), "R", "examples-header.R")) > options(warn = 1) > library('maxstat') > > base::assign(".oldSearch", base::search(), pos = 'CheckExEnv') > cleanEx() > nameEx("DLBCL") > ### * DLBCL > > flush(stderr()); flush(stdout()) > > ### Name: DLBCL > ### Title: Diffuse large B-cell lymphoma > ### Aliases: DLBCL > ### Keywords: datasets > > ### ** Examples > > > library("survival") > > set.seed(29) > > # compute the cutpoint and plot the empirical process > > mod <- maxstat.test(Surv(time, cens) ~ MGE, data=DLBCL, smethod="LogRank") > > print(mod) Maximally selected LogRank statistics using none data: Surv(time, cens) by MGE M = 3.1772, p-value = NA sample estimates: estimated cutpoint 0.1860526 > > ## Not run: > ##D # postscript("statDLBCL.ps", horizontal=F, width=8, height=8) > ##D pdf("statDLBCL.pdf", width=8, height=8) > ## End(Not run) > par(mai=c(1.0196235, 1.0196235, 0.8196973, 0.4198450)) > plot(mod, cex.lab=1.6, cex.axis=1.6, xlab="Mean gene expression",lwd=2) > ## Not run: > ##D dev.off() > ## End(Not run) > > # significance of the cutpoint > # limiting distribution > > maxstat.test(Surv(time, cens) ~ MGE, data=DLBCL, + smethod="LogRank", pmethod="Lau92", iscores=TRUE) Maximally selected LogRank statistics using Lau92 data: Surv(time, cens) by MGE M = 3.171, p-value = 0.03611 sample estimates: estimated cutpoint 0.1860526 > > # improved Bonferroni inequality, plot with significance bound > > maxstat.test(Surv(time, cens) ~ MGE, data=DLBCL, + smethod="LogRank", pmethod="Lau94", iscores=TRUE) Maximally selected LogRank statistics using Lau94 data: Surv(time, cens) by MGE M = 3.171, p-value = 0.02435 sample estimates: estimated cutpoint 0.1860526 > > mod <- maxstat.test(Surv(time, cens) ~ MGE, data=DLBCL, smethod="LogRank", + pmethod="Lau94", alpha=0.05) > plot(mod, xlab="Mean gene expression") > > ## Not run: > ##D # postscript(file="RNewsStat.ps",horizontal=F, width=8, height=8) > ##D pdf("RNewsStat.pdf", width=8, height=8) > ##D > ## End(Not run) > par(mai=c(1.0196235, 1.0196235, 0.8196973, 0.4198450)) > plot(mod, xlab="Mean gene expression", cex.lab=1.6, cex.axis=1.6) > ## Not run: > ##D dev.off() > ## End(Not run) > > # small sample solution Hothorn & Lausen > > maxstat.test(Surv(time, cens) ~ MGE, data=DLBCL, + smethod="LogRank", pmethod="HL") Maximally selected LogRank statistics using HL data: Surv(time, cens) by MGE M = 3.171, p-value = 0.02218 sample estimates: estimated cutpoint 0.1860526 > > # normal approximation > > maxstat.test(Surv(time, cens) ~ MGE, data=DLBCL, + smethod="LogRank", pmethod="exactGauss", iscores=TRUE, + abseps=0.01) Maximally selected LogRank statistics using exactGauss data: Surv(time, cens) by MGE M = 3.171, p-value = 0.01898 sample estimates: estimated cutpoint 0.1860526 > > # conditional Monte-Carlo > maxstat.test(Surv(time, cens) ~ MGE, data=DLBCL, + smethod="LogRank", pmethod="condMC", B = 9999) Maximally selected LogRank statistics using condMC data: Surv(time, cens) by MGE M = 3.1772, p-value = 0.0102 sample estimates: estimated cutpoint 0.1860526 > > # survival analysis and plotting like in Alizadeh et al. (2000) > > splitGEG <- rep(1, nrow(DLBCL)) > DLBCL <- cbind(DLBCL, splitGEG) > DLBCL$splitGEG[DLBCL$GEG == "Activated B-like"] <- 0 > > plot(survfit(Surv(time, cens) ~ splitGEG, data=DLBCL), + xlab="Survival time in month", ylab="Probability") > > text(90, 0.7, "GC B-like") > text(60, 0.3, "Activated B-like") > > splitIPI <- rep(1, nrow(DLBCL)) > DLBCL <- cbind(DLBCL, splitIPI) > DLBCL$splitIPI[DLBCL$IPI <= 2] <- 0 > > plot(survfit(Surv(time, cens) ~ splitIPI, data=DLBCL), + xlab="Survival time in month", ylab="Probability") > > text(90, 0.7, "Low clinical risk") > text(60, 0.25, "High clinical risk") > > # survival analysis using the cutpoint > > splitMGE <- rep(1, nrow(DLBCL)) > DLBCL <- cbind(DLBCL, splitMGE) > DLBCL$splitMGE[DLBCL$MGE <= mod$estimate] <- 0 > > ## Not run: > ##D # postscript("survDLBCL.ps",horizontal=F, width=8, height=8) > ##D pdf("survDLBCL.pdf", width=8, height=8) > ##D > ##D > ## End(Not run) > par(mai=c(1.0196235, 1.0196235, 0.8196973, 0.4198450)) > > plot(survfit(Surv(time, cens) ~ splitMGE, data=DLBCL), + xlab = "Survival time in month", + ylab="Probability", cex.lab=1.6, cex.axis=1.6, lwd=2) > > text(90, 0.9, expression("Mean gene expression" > 0.186), cex=1.6) > text(90, 0.45, expression("Mean gene expression" <= 0.186 ), cex=1.6) > > ## Not run: > ##D dev.off() > ##D > ## End(Not run) > > > > graphics::par(get("par.postscript", pos = 'CheckExEnv')) > cleanEx() detaching ‘package:survival’ > nameEx("corrmsrs") > ### * corrmsrs > > flush(stderr()); flush(stdout()) > > ### Name: corrmsrs > ### Title: Correlation Matrix > ### Aliases: corrmsrs > ### Keywords: misc > > ### ** Examples > > > set.seed(29) > > # matrix of hypothetical prognostic factors > > X <- matrix(rnorm(30), ncol=3) > > # this function > > a <- corrmsrs(X, minprop=0, maxprop=0.999) > > # coded by just typing the definition of the correlation > > testcorr <- function(X) { + wh <- function(cut, x) + which(x <= cut) + index <- function(x) { + ux <- unique(x) + ux <- ux[ux < max(ux)] + lapply(ux, wh, x = x) + } + a <- unlist(test <- apply(X, 2, index), recursive=FALSE) + cnull <- rep(0, nrow(X)) + mycorr <- diag(length(a)) + for (i in 1:(length(a)-1)) { + for (j in (i+1):length(a)) { + cone <- cnull + cone[a[[i]]] <- 1 + ctwo <- cnull + ctwo[a[[j]]] <- 1 + sone <- sqrt(sum((cone - mean(cone))^2)) + stwo <- sqrt(sum((ctwo - mean(ctwo))^2)) + tcorr <- sum((cone - mean(cone))*(ctwo - mean(ctwo))) + tcorr <- tcorr/(sone * stwo) + mycorr[i,j] <- tcorr + } + } + mycorr + } > > tc <- testcorr(X) > tc <- tc + t(tc) > diag(tc) <- 1 > stopifnot(all.equal(tc, a)) > > > > > cleanEx() > nameEx("hohnloser") > ### * hohnloser > > flush(stderr()); flush(stdout()) > > ### Name: hohnloser > ### Title: Left ventricular ejection fraction of patients with malignant > ### ventricular tachyarrhythmias. > ### Aliases: hohnloser > ### Keywords: datasets > > ### ** Examples > > > set.seed(29) > > library("survival") > > # limiting distribution > > maxstat.test(Surv(month, cens) ~ EF, data=hohnloser, + smethod="LogRank", pmethod="Lau92") Maximally selected LogRank statistics using Lau92 data: Surv(month, cens) by EF M = 3.5691, p-value = 0.01065 sample estimates: estimated cutpoint 39 > > # with integer valued scores for comparison > > maxstat.test(Surv(month, cens) ~ EF, data=hohnloser, + smethod="LogRank", pmethod="Lau92", iscores=TRUE) Maximally selected LogRank statistics using Lau92 data: Surv(month, cens) by EF M = 3.5639, p-value = 0.01083 sample estimates: estimated cutpoint 39 > > # improved Bonferroni inequality > > maxstat.test(Surv(month, cens) ~ EF, data=hohnloser, + smethod="LogRank", pmethod="Lau94") Maximally selected LogRank statistics using Lau94 data: Surv(month, cens) by EF M = 3.5691, p-value = 0.005453 sample estimates: estimated cutpoint 39 > > maxstat.test(Surv(month, cens) ~ EF, data=hohnloser, + smethod="LogRank", pmethod="Lau94", iscores=TRUE) Maximally selected LogRank statistics using Lau94 data: Surv(month, cens) by EF M = 3.5639, p-value = 0.005556 sample estimates: estimated cutpoint 39 > > > # small sample solution by Hothorn & Lausen > > maxstat.test(Surv(month, cens) ~ EF, data=hohnloser, + smethod="LogRank", pmethod="HL") Maximally selected LogRank statistics using HL data: Surv(month, cens) by EF M = 3.5639, p-value = 0.00667 sample estimates: estimated cutpoint 39 > > # normal approximation > > maxstat.test(Surv(month, cens) ~ EF, data=hohnloser, + smethod="LogRank", pmethod="exactGauss") Maximally selected LogRank statistics using exactGauss data: Surv(month, cens) by EF M = 3.5691, p-value = 0.004435 sample estimates: estimated cutpoint 39 > > maxstat.test(Surv(month, cens) ~ EF, data=hohnloser, + smethod="LogRank", pmethod="exactGauss", iscores=TRUE) Maximally selected LogRank statistics using exactGauss data: Surv(month, cens) by EF M = 3.5639, p-value = 0.004338 sample estimates: estimated cutpoint 39 > > # conditional Monte-Carlo > maxstat.test(Surv(month, cens) ~ EF, data=hohnloser, + smethod="LogRank", pmethod="condMC", B = 9999) Maximally selected LogRank statistics using condMC data: Surv(month, cens) by EF M = 3.5691, p-value = 0.0045 sample estimates: estimated cutpoint 39 > > > > > cleanEx() detaching ‘package:survival’ > nameEx("maxstat.test") > ### * maxstat.test > > flush(stderr()); flush(stdout()) > > ### Name: maxstat.test > ### Title: Maximally Selected Rank and Statistics > ### Aliases: maxstat.test maxstat.test.data.frame maxstat.test.default > ### maxstat > ### Keywords: htest > > ### ** Examples > > > set.seed(29) > > x <- sort(runif(20)) > y <- c(rnorm(10), rnorm(10, 2)) > mydata <- data.frame(cbind(x,y)) > > mod <- maxstat.test(y ~ x, data=mydata, smethod="Wilcoxon", pmethod="HL", + minprop=0.25, maxprop=0.75, alpha=0.05) > print(mod) Maximally selected Wilcoxon statistics using HL data: y by x M = 3.4017, p-value = 0.0009476 sample estimates: estimated cutpoint 0.3621201 > plot(mod) > > # adjusted for more than one prognostic factor. > library("survival") > mstat <- maxstat.test(Surv(time, cens) ~ IPI + MGE, data=DLBCL, + smethod="LogRank", pmethod="exactGauss", + abseps=0.01) > plot(mstat) > > ### sphase > set.seed(29) > data("sphase", package = "TH.data") > > maxstat.test(Surv(RFS, event) ~ SPF, data=sphase, smethod="LogRank", + pmethod="Lau94") Maximally selected LogRank statistics using Lau94 data: Surv(RFS, event) by SPF M = 2.4033, p-value = 0.2727 sample estimates: estimated cutpoint 107 > > maxstat.test(Surv(RFS, event) ~ SPF, data=sphase, smethod="LogRank", + pmethod="Lau94", iscores=TRUE) Maximally selected LogRank statistics using Lau94 data: Surv(RFS, event) by SPF M = 2.4017, p-value = 0.2738 sample estimates: estimated cutpoint 107 > > maxstat.test(Surv(RFS, event) ~ SPF, data=sphase, smethod="LogRank", + pmethod="HL") Maximally selected LogRank statistics using HL data: Surv(RFS, event) by SPF M = 2.4017, p-value = 0.5494 sample estimates: estimated cutpoint 107 > > maxstat.test(Surv(RFS, event) ~ SPF, data=sphase, smethod="LogRank", + pmethod="condMC", B = 9999) Maximally selected LogRank statistics using condMC data: Surv(RFS, event) by SPF M = 2.4033, p-value = 0.1581 sample estimates: estimated cutpoint 107 > > plot(maxstat.test(Surv(RFS, event) ~ SPF, data=sphase, smethod="LogRank")) > > > > > > cleanEx() detaching ‘package:survival’ > nameEx("pLausen92") > ### * pLausen92 > > flush(stderr()); flush(stdout()) > > ### Name: pLausen92 > ### Title: Approximating Maximally Selected Statistics > ### Aliases: pLausen92 qLausen92 > ### Keywords: distribution > > ### ** Examples > > > # Compute quantiles. Should be equal to Table 2 in Lausen and Schumacher > > load(file.path(system.file(package = "maxstat"), "results", "LausenTab2.rda")) > > a <- rev(c(0.01, 0.025, 0.05, 0.1)) > prop <- rbind(c(0.25, 0.75), c(0.4, 0.6), c(0.1, 0.9), c(0.4, 0.9)) > Quant <- matrix(rep(0, length(a)*nrow(prop)), nrow=length(a)) > > for (i in 1:length(a)) { + for (j in 1:nrow(prop)) { + Quant[i,j] <- qLausen92(a[i], minprop=prop[j,1], maxprop=prop[j,2]) + } + } > > Quant <- round(Quant, 3) > rownames(Quant) <- a > colnames(Quant) <- c("A2575", "A46", "A19", "A49") > Quant <- as.data.frame(Quant) > rownames(LausenTab2) <- a > > Quant A2575 A46 A19 A49 0.1 2.539 2.263 2.784 2.597 0.05 2.829 2.560 3.054 2.883 0.025 3.088 2.828 3.297 3.138 0.01 3.395 3.149 3.588 3.442 > > LausenTab2 A2575 A46 A19 A49 0.1 2.539 2.263 2.784 2.597 0.05 2.829 2.560 3.054 2.883 0.025 3.088 2.828 3.297 3.138 0.01 3.395 3.149 3.588 3.442 > > if(!all.equal(LausenTab2, Quant)) stop("error checking pLausen92") > > > > > cleanEx() > nameEx("pLausen94") > ### * pLausen94 > > flush(stderr()); flush(stdout()) > > ### Name: pLausen94 > ### Title: Approximating Maximally Selected Statistics > ### Aliases: pLausen94 qLausen94 > ### Keywords: distribution > > ### ** Examples > > > p <- pLausen94(2.5, 20, 0.25, 0.75) > > # Lausen 94, page 489 > > if (round(p, 3) != 0.073) stop("error checking pLausen94") > > # the same > > p2 <- pLausen94(2.5, 200, 0.25, 0.75, m=seq(from=50, to=150, by=10)) > > stopifnot(all.equal(round(p,3), round(p2,3))) > > > > > cleanEx() > nameEx("pexactgauss") > ### * pexactgauss > > flush(stderr()); flush(stdout()) > > ### Name: pexactgauss > ### Title: Computing Maximally Selected Gauss Statistics > ### Aliases: pexactgauss qexactgauss > ### Keywords: distribution > > ### ** Examples > > > set.seed(29) > > x <- rnorm(20) > > pexact <- pexactgauss(2.5, x, abseps=0.01) > > > > > cleanEx() > nameEx("plot.maxtest") > ### * plot.maxtest > > flush(stderr()); flush(stdout()) > > ### Name: plot.maxtest > ### Title: Print and Plot Standardized Statistics > ### Aliases: plot.maxtest print.maxtest plot.mmaxtest print.mmaxtest > ### Keywords: htest > > ### ** Examples > > > set.seed(29) > > x <- sort(runif(20)) > y <- rbinom(20, 1, 0.5) > mydata <- data.frame(c(x,y)) > > mod <- maxstat.test(y ~ x, data=mydata, smethod="Median", + pmethod="HL", alpha=0.05) > print(mod) Maximally selected Median statistics using HL data: y by x M = 2.7822, p-value = 0.02024 sample estimates: estimated cutpoint 0.1761709 > plot(mod) > > > > > cleanEx() > nameEx("pmaxstat") > ### * pmaxstat > > flush(stderr()); flush(stdout()) > > ### Name: pmaxstat > ### Title: Approximating Maximally Selected Statistics > ### Aliases: pmaxstat qmaxstat > ### Keywords: distribution > > ### ** Examples > > > pmaxstat(2.5, 1:20, 5:15) [1] 0.09403538 > > > > > cleanEx() > nameEx("treepipit") > ### * treepipit > > flush(stderr()); flush(stdout()) > > ### Name: treepipit > ### Title: Tree Pipit Data > ### Aliases: treepipit > ### Keywords: datasets > > ### ** Examples > > > mod <- maxstat.test(counts ~ coverstorey, data = treepipit, + smethod = "Data", pmethod = "HL", minprop = 0.2, + maxprop = 0.8) > print(mod) Maximally selected Data statistics using HL data: counts by coverstorey M = 4.3139, p-value = 0.0001141 sample estimates: estimated cutpoint 40 > plot(mod) > > > > ### *