RcmdrMisc/0000755000176200001440000000000015127645235012147 5ustar liggesusersRcmdrMisc/MD50000644000176200001440000000665615127645235012474 0ustar liggesuserscb1f7563a036496a3a6299f104c7db69 *DESCRIPTION 094fe86e9604238d5913e80dc603ea79 *NAMESPACE c082bf659725945e8b32de0f82260aca *NEWS 5d16a33b0f53336afc736e2a2317ed85 *R/Barplot.R 7f43d46e0858287ac3155333e6539989 *R/DeltaMethod.R ccb5346b237f6ecfb06cae3f9840cab9 *R/Dotplot.R a51d191c7d9ad228ce521c1341bb36f9 *R/Percents.R 600821ec45b8d6e45560b144d5c9a390 *R/RcmdrMisc-package.R 18b42f1d1bcb30c2e1f0d41962147a14 *R/bin.var.R 2e10e85076d332b103a79ddae94b042d *R/binnedCounts.R 12e07692e23e2664836ad46bd0e9e572 *R/cluster.R fedd33bb47980485272b4be68cc99abb *R/cv.R 3d61d1f3d7662650e8a0975ad56cfc95 *R/discreteCounts.R c7f2aab6b8429458f28562bbf8cd94e6 *R/discretePlot.R 4c74fd1cf20da385d60231a7a1bbf0e6 *R/gumbel.R ddcf3704abf8880c72311cf3c83b045d *R/mergeRows.R e7c0cad7e0715c20fca4227414db6a90 *R/normalityTest.R b36e4cec5e2dae273272bc5ac5cf9d39 *R/numSummary.R ea039e39ebcdfd8cfe3599b7078d30bd *R/partial.cor.R eb4c1f86e662133eedf167b1fad469ca *R/piechart.R fe8f1f2def0cea50ea3e0d29eb16942b *R/plotBoot.R 75f0ce268c1a5dc948dfb13e16cfb230 *R/plots.R 6d8074309a65657c0fd5ff25f3d2ec46 *R/rcorr.adjust.R a219744c18917283e92d0cbd4a6e5b0a *R/readSAS.R adb0c475ec31109f613c31953ac7f236 *R/readSPSS.R c6b648d82b6b477d97fe0304ed91d751 *R/readStata.R 67cc0513537f4af1f0e6918d0a81d2f1 *R/readXL.R 6d8c2db179c7051a7b22ce9af4a50ef1 *R/reliability.R 22da11fe77058b002c05b0b3c54492d6 *R/repeatedMeasuresPlot.R 046a6b8cbd5456d9887ef948fe847028 *R/reshapeL2W.R bc9303bd0221ec47ccdb8d88d188c527 *R/reshapeW2L.R 17ff874c04d12c9c61cc90fc1dcd0bd4 *R/stepwise.R 1134904bdcae0582ec350d6a796aa356 *R/summarySandwich.R 799db69875d6cb88e2e9a935b256b32d *man/Barplot.Rd 219d6fa8e00d70cd0324d4661b6c95f0 *man/DeltaMethod.Rd 95f50dd88f8a778d1e7f5bf876b336a2 *man/Dotplot.Rd 31cfc0ac4de47f76b181139aea0eca6b *man/Gumbel.Rd 2501e04cf7b3fe0a1042bf72ba74dfcd *man/Hist.Rd a1955eb8110b1edac9d4652ee348aac6 *man/KMeans.Rd 6bec295cc26c95f026bcba8c613ce967 *man/RcmdrMisc-package.Rd 9cb2790d599f10ac9d04d6920eb04440 *man/assignCluster.Rd 33479065cc1b3d4ec1c85b3c8968e2db *man/bin.var.Rd f7baa48c6a5ef008e12bbc7e7113425d *man/binVariable.Rd 5ce9dc225d1322bb93549e74343e4f1b *man/binnedCounts.Rd c6c6fb7cf92c73917172bfb6acab5406 *man/colPercents.Rd 6bee0cc752e66ebfa2db4c2f9f223a28 *man/cv.Rd 3789a75ea67fe2fd0ccc4a131b00a197 *man/discreteCounts.Rd 65f0dded10beb91097685b8ec9a5b719 *man/discretePlot.Rd 60df3b04c78d32d0106d3e48d765e941 *man/indexplot.Rd 543bc48e66e0fe1caee4d1632dc56ea0 *man/lineplot.Rd 2b4fd061ab7133b52d7b4a9cf261afea *man/mergeRows.Rd a9e110742077a723a49c650ef8ea1abf *man/normalityTest.Rd 4784b1feec4f3d8539c6d3d0b96847b0 *man/numSummary.Rd a192abd397486ac47faf313a661fdbf2 *man/partial.cor.Rd 7a6b192bb332ad462f87f811a0bca74b *man/piechart.Rd 9980e63b407a282e135a1358e932694e *man/plotBoot.Rd 4e729c004fbbad03fcdc20fc767d78c8 *man/plotDistr.Rd cfa7726d09b1a5d16816f7afd090560a *man/plotMeans.Rd c79dab9c843514073c16107bae3ea301 *man/rcorr.adjust.Rd 916cb8a5134c35dc09aea87925f19431 *man/readSAS.Rd 122d516484304f55febb6032b7d354d3 *man/readSPSS.Rd bd5f5eb43a5841f059193be26efd29d6 *man/readStata.Rd 18037c1329f9dba9263a483c4ccb3cc9 *man/readXL.Rd fb497137e375633678868fd1e57b3c76 *man/reexports.Rd 34bc0af86832d8964785b6e611d1fc8e *man/reliability.Rd 78c68f807ba55c6f3c38f7cb18c54099 *man/repeatedMeasuresPlot.Rd 5281d7fbd6057d4c2c427436edde7dbe *man/reshapeL2W.Rd b868a08addf536682384b676223059c7 *man/reshapeW2L.Rd f5277a71b7c649517e2b66130887fcb6 *man/stepwise.Rd 656b172bd82a0614f5fda66ea078b161 *man/summarySandwich.Rd RcmdrMisc/R/0000755000176200001440000000000015126147233012342 5ustar liggesusersRcmdrMisc/R/summarySandwich.R0000644000176200001440000000420615124561103015637 0ustar liggesusers#' Linear Model Summary with Sandwich Standard Errors #' #' @name summarySandwich #' #' @aliases summarySandwich summarySandwich.lm #' #' @keywords misc #' #' @details #' \code{summarySandwich} creates a summary of a \code{"lm"} object similar to the standard one, with sandwich estimates of the coefficient standard errors in the place of the usual OLS standard errors, also modifying as a consequence the reported t-tests and p-values for the coefficients. #' Standard errors may be computed from a heteroscedasticity-consistent ("HC") covariance matrix for the coefficients (of several varieties), or from a heteroscedasticity-and-autocorrelation-consistent ("HAC") covariance matrix. #' #' @param model a linear-model object. #' @param type type of sandwich standard errors to be computed; see \code{\link[car]{hccm}} in the \pkg{car} package, and \code{\link[sandwich]{vcovHAC}} in the \pkg{sandwich} package, for details. #' @param \dots arguments to be passed to \code{hccm} or \code{vcovHAC}. #' #' @return #' an object of class \code{"summary.lm"}, with sandwich standard errors substituted for the usual OLS standard errors; the omnibus F-test is similarly adjusted. #' #' @author John Fox #' #' @seealso \code{\link[car]{hccm}}, \code{\link[sandwich]{vcovHAC}}. #' #' @examples #' mod <- lm(prestige ~ income + education + type, data=Prestige) #' summary(mod) #' summarySandwich(mod) #' #' @export summarySandwich <- function(model, ...){ UseMethod("summarySandwich") } #' @rdname summarySandwich #' @export summarySandwich.lm <- function(model, type=c("hc3", "hc0", "hc1", "hc2", "hc4", "hac"), ...){ s <- summary(model) c <- coef(s) type <- match.arg(type) v <- if (type != "hac") hccm(model, type=type, ...) else vcovHAC(model, ...) c[, 2] <- sqrt(diag(v)) c[, 3] <- c[,1]/c[,2] c[, 4] <- 2*pt(abs(c[,3]), df=s$df[2], lower.tail=FALSE) colnames(c)[2] <- paste("Std.Err(", type, ")", sep="") s$coefficients <- c coefs <- names(coef(model)) coefs <- coefs[coefs != "(Intercept)"] h <- linearHypothesis(model, coefs, vcov.=v) s$fstatistic <- c(value=h$F[2], numdf=length(coefs), dendf=s$df[2]) s } RcmdrMisc/R/piechart.R0000644000176200001440000000273715124562573014303 0ustar liggesusers#' Draw a Piechart With Percents or Counts in the Labels #' #' @name piechart #' #' @keywords hplot #' #' @details #' \code{piechart} is a front-end to the standard R \code{\link[graphics]{pie}} function, with the capability of adding percents or counts to the pie-segment labels. #' #' @param x a factor or other discrete variable; the segments of the pie correspond to the unique values (levels) of \code{x} and are proportional to the frequency counts in the various levels. #' @param scale parenthetical numbers to add to the pie-segment labels; the default is \code{"percent"}. #' @param col colors for the segments; the default is provided by the \code{\link[colorspace]{rainbow_hcl}} function in the \pkg{colorspace} package. #' @param \dots further arguments to be passed to \code{\link[graphics]{pie}}. #' #' @author John Fox #' #' @seealso \code{\link[graphics]{pie}}, \code{\link[colorspace]{rainbow_hcl}} #' #' @examples #' with(Duncan, piechart(type)) #' #' @export piechart <- function(x, scale=c("percent", "frequency", "none"), col=rainbow_hcl(nlevels(x)), ...){ scale <- match.arg(scale) if (!is.factor(x)) x <- as.factor(x) labels <- levels(x) tab <- table(x) labels <- if (scale == "percent") { tab <- 100*tab/sum(tab) paste0(labels, " (", round(tab), "%)") } else if (scale == "frequency") paste0(labels, " (", tab, ")") else labels pie(tab, labels=labels, col=col, ...) } RcmdrMisc/R/Barplot.R0000644000176200001440000001303415122223760014065 0ustar liggesusers#' Bar Plots #' #' @name Barplot #' #' @keywords hplot #' #' @details #' Create bar plots for one or two factors scaled by frequency or precentages. #' In the case of two factors, the bars can be divided (stacked) or plotted in parallel (side-by-side). #' This function is a front end to \code{\link[graphics]{barplot}} in the \pkg{graphics} package. #' #' @param x a factor (or character or logical variable). #' @param by optionally, a second factor (or character or logical variable). #' @param scale either \code{"frequency"} (the default) or \code{"percent"}. #' @param conditional if \code{TRUE} then percentages are computed separately for each value of \code{x} (i.e., conditional percentages of \code{by} within levels of \code{x}); if \code{FALSE} then total percentages are graphed; ignored if \code{scale="frequency"}. #' @param style for two-factor plots, either \code{"divided"} (the default) or \code{"parallel"}. #' @param col if \code{by} is missing, the color for the bars, defaulting to \code{"gray"}; otherwise colors for the levels of the \code{by} factor in two-factor plots, defaulting to colors provided by \code{\link[colorspace]{rainbow_hcl}} in the \pkg{colorspace} package. #' @param xlab an optional character string providing a label for the horizontal axis. #' @param legend.title an optional character string providing a title for the legend. #' @param ylab an optional character string providing a label for the vertical axis. #' @param main an optional main title for the plot. #' @param legend.pos position of the legend, in a form acceptable to the \code{\link[graphics]{legend}} function; the default, \code{"above"}, puts the legend above the plot. #' @param label.bars if \code{TRUE} (the default is \code{FALSE}) show values of frequencies or percents in the bars. #' @param ... arguments to be passed to the \code{\link[graphics]{barplot}} function. #' #' @return Invisibly returns the horizontal coordinates of the centers of the bars. #' #' @author John Fox #' #' @seealso \code{\link[graphics]{barplot}}, \code{\link[graphics]{legend}}, \code{\link[colorspace]{rainbow_hcl}} #' #' @examples #' with(Mroz, Barplot(wc)) #' with(Mroz, Barplot(wc, col="lightblue", label.bars=TRUE)) #' with(Mroz, Barplot(wc, by=hc)) #' with(Mroz, Barplot(wc, by=hc, scale="percent", label.bars=TRUE)) #' with(Mroz, Barplot(wc, by=hc, style="parallel", scale="percent", legend.pos="center")) #' #' @export Barplot <- function(x, by, scale=c("frequency", "percent"), conditional=TRUE, style=c("divided", "parallel"), col=if (missing(by)) "gray" else rainbow_hcl(length(levels(by))), xlab=deparse(substitute(x)), legend.title=deparse(substitute(by)), ylab=scale, main=NULL, legend.pos="above", label.bars=FALSE, ...){ find.legend.columns <- function(n, target=min(4, n)){ rem <- n %% target if (rem != 0 && rem < target/2) target <- target - 1 target } force(xlab) force(legend.title) if (!is.factor(x)) { if (!(is.character(x) || is.logical(x))) stop("x must be a factor, character, or logical") x <- as.factor(x) } if (!missing(by) && !is.factor(by)) { if (!(is.character(by) || is.logical(by))) stop("by must be a factor, character, or logical") by <- as.factor(by) } scale <- match.arg(scale) style <- match.arg(style) if (legend.pos == "above"){ mar <- par("mar") mar[3] <- mar[3] + 2 old.mar <- par(mar=mar) on.exit(par(old.mar)) } if (missing(by)){ y <- table(x) if (scale == "percent") y <- 100*y/sum(y) mids <- barplot(y, xlab=xlab, ylab=ylab, col=col, main=main, ...) if(label.bars){ labels <- if (scale == "percent") paste0(round(y), "%") else y text(mids, y, labels, pos=1, offset=0.5) } } else{ nlevels <- length(levels(by)) col <- col[1:nlevels] y <- table(by, x) if (scale == "percent") { y <- if (conditional) 100*apply(y, 2, function(x) x/sum(x)) else 100*y/sum(y) } if (legend.pos == "above"){ legend.columns <- find.legend.columns(nlevels) top <- 4 + ceiling(nlevels/legend.columns) xpd <- par(xpd=TRUE) on.exit(par(xpd=xpd), add=TRUE) mids <- barplot(y, xlab=xlab, ylab=ylab, col=col, beside = style == "parallel", ...) usr <- par("usr") legend.x <- usr[1] legend.y <- usr[4] + 1.2*top*strheight("x") legend.pos <- list(x=legend.x, y=legend.y) title(main=main, line=mar[3] - 1) legend(legend.pos, title=legend.title, legend=levels(by), fill=col, ncol=legend.columns, inset=0.05) } else mids <- barplot(y, xlab=xlab, ylab=ylab, main=main, legend.text=levels(by), col=col, args.legend=list(x=legend.pos, title=legend.title, inset=0.05, bg="white"), beside = style == "parallel", ...) if (label.bars){ yy <- if (is.matrix(mids)) as.vector(y) else as.vector(apply(y, 2, cumsum)) labels <- if (scale == "percent") paste0(round(as.vector(y)), "%") else as.vector(y) xx <- if (is.vector(mids)) rep(mids, each=nrow(y)) else as.vector(mids) text(xx, yy, labels, pos=1, offset=0.5) } } return(invisible(mids)) } RcmdrMisc/R/DeltaMethod.R0000644000176200001440000000356115124553763014673 0ustar liggesusers#' Confidence Intervals by the Delta Method #' #' @name DeltaMethod #' #' @aliases DeltaMethod print.DeltaMethod #' #' @keywords models #' #' @details \code{DeltaMethod} is a wrapper for the \code{\link[car]{deltaMethod}} function in the \pkg{car} package. #' It computes the asymptotic standard error of an arbitrary, usually nonlinear, function of model coefficients, which are named \code{b0} (if there is an intercept in the model), \code{b1}, \code{b2}, etc., and based on the standard error, a confidence interval based on the normal distribution. #' #' @param model a regression model; see the \code{\link[car]{deltaMethod}} documentation. #' @param g the expression --- that is, function of the coefficients --- to evaluate, as a character string. #' @param level the confidence level, defaults to \code{0.95}. #' @param x an object of class \code{"DeltaMethod"}. #' @param ... optional arguments to pass to \code{print} to show the results. #' #' @return \code{DeltaMethod} returns an objects of class \code{"DeltaMethod"}, for which a \code{print} method is provided. #' #' @author John Fox #' #' @seealso \code{\link[car]{deltaMethod}} function in the \pkg{car} package. #' #' @examples #' DeltaMethod(lm(prestige ~ income + education, data=Duncan), "b1/b2") #' #' @export DeltaMethod <- function(model, g, level=0.95){ coefs <- coef(model) p <- length(coefs) nms <- if (names(coefs)[1] == "(Intercept)") paste0("b", 0:(p - 1)) else paste0("b", 1:p) res <- car::deltaMethod(model, g, level=level, parameterNames=nms) result <- list(test=res, coef=rbind(names(coefs), nms)) class(result) <- "DeltaMethod" result } #' @rdname DeltaMethod #' @export print.DeltaMethod <- function(x, ...){ coef <- x$coef par <- data.frame(t(coef)) colnames(par) <- c("parameter", "name") print(par, row.names=FALSE) cat("\n") print(x$test) invisible(x) } RcmdrMisc/R/reshapeL2W.R0000644000176200001440000001216315124565743014454 0ustar liggesusers#' Reshape Repeated-Measures Data from Long to Wide Format #' #' @name reshapeL2W #' #' @keywords manip #' #' @details #' A simple front-end to the standard R \code{\link[stats]{reshape}} function. The data are assumed to be in "long" format, with several rows for each subject. #' #' Between-subjects variables don't vary by occasions for each subject. Variables that aren't listed explicitly in the arguments to the function are assumed to be between-subjects variables, and a warning is printed if their values aren't invariant for each subject (see the \code{ignore} argument). #' #' Within-subjects factors vary by occasions for each subject, and it is assumed that the within-subjects design is regular, completely crossed, and balanced, so that the same combinations of within-subjects factors are observed for each subject. #' #' Occasion-varying variables, as their name implies, (potentially) vary by occasions for each subject, and include one or more "response" variables, possibly along with occasion-varying covariates; these variables can be factors as well as numeric variables. #' #' The data are reshaped so that there is one row per subject, with columns for the between-subjects variables, and each occasion-varying variable as multiple columns representing the combinations of levels of the within-subjects factors. #' The names of the columns for the occasion-varying variables are composed from the combinations of levels of the within-subjects factors and from the names of the occasion-varying variables. If a subject in the long form of the data set lacks any combination of levels of within-subjects factors, he or she is excluded (with a warning) from the wide form of the data. #' @param data a data frame in long format. #' @param within a character vector of names of the within-subjects factors in the long form of the data; there must be at least one within-subjects factor. #' @param id the (character) name of the variable representing the subject identifier in the long form of the data set; that is, rows with the same \code{id} belong to the same subject. #' @param varying a character vector of names of the occasion-varying variables in the long form of the data; there must be at least one such variable, and typically there will be just one, an occasion-varying response variable. #' @param ignore an optional character vector of names of variables in the long form of the data to exclude from the wide data set. #' #' @return a data frame in "wide" format, with one row for each subject, columns representing the between subjects factors, and columns for the occasion-varying variable(s) for each combination of within-subjects factors. #' #' @author John Fox #' #' @seealso \code{\link[stats]{reshape}}, \code{\link[carData]{OBrienKaiser}}, \code{\link[carData]{OBrienKaiserLong}}. #' #' @examples #' OBW <- reshapeL2W(OBrienKaiserLong, within=c("phase", "hour"), id="id", varying="score") #' brief(OBW) #' # should be the same as OBrienKaiser in the carData package: #' all.equal(OBrienKaiser, OBW, check.attributes=FALSE) #' #' @export reshapeL2W <- function(data, within, id, varying, ignore){ ## create wide data set if (missing(ignore)) ignore <- NULL names <- colnames(data) all <- c(within, id, varying, ignore) bad <- all[!all %in% names] if (length(bad) > 0) stop("variables not in the data set: ", bad) duplicated <- unique(all[duplicated(all)]) if (length(duplicated) > 0) stop(paste0("the following variables appear more than once: ", paste(duplicated, collapse=", "))) if (!is.null(ignore)){ remove <- which(names(data) %in% ignore ) data <- data[, -remove] } within.factors <- data[, within, drop=FALSE] within.var <- apply(within.factors, 1, function(x) paste(as.character(x), collapse=".")) data <- cbind(data, within.var) occasions <- paste(within, collapse=".") names(data)[length(data)] <- occasions occasions.1 <- paste0(occasions, ".1") result <- reshape(data, timevar=occasions, idvar=id, v.names=varying, direction="wide", drop=if (length(within) > 1) within) ## create names for the repeated-measures columns rownames(result) <- result[, id] result <- result[, - which(colnames(result) %in% c(id, occasions.1))] ## within.levels <- lapply(within.factors[, rev(within), drop=FALSE], levels) ## grid <- expand.grid(within.levels) ## repeated.names <- apply(grid, 1, function(x) paste(rev(x), collapse=".")) all.repeated.cols <- NULL for (var in varying){ repeated.cols <- grep(paste0("^", var, "."), names(result)) ## nms <- if (length(varying) > 1) paste0(repeated.names, ".", var) else repeated.names ## names(result)[repeated.cols] <- make.names(nms) all.repeated.cols <- c(all.repeated.cols, repeated.cols) } ## remove cases with incomplete repeated measures bad <- apply(result[, all.repeated.cols], 1, function(x) anyNA(x)) n.bad <- sum(bad) if (n.bad > 0){ warning(n.bad, " ", if (n.bad == 1) "case" else "cases", " removed due to missing repeated measures") result <- result[!bad, ] } result } RcmdrMisc/R/mergeRows.R0000644000176200001440000000240215123333747014441 0ustar liggesusers#' Function to Merge Rows of Two Data Frames #' #' @name mergeRows #' #' @keywords manip #' #' @details #' This function merges two data frames by combining their rows. #' #' @param X First data frame. #' @param Y Second data frame. #' @param common.only If \code{TRUE}, only variables (columns) common to the two data frame are included in the merged data set; the default is \code{FALSE}. #' @param \dots Not used. #' #' @return #' A data frame containing the rows from both input data frames. #' #' @author John Fox #' #' @seealso For column merges and more complex merges, see \code{\link[base]{merge}}. #' #' @examples #' data(Duncan) #' D1 <- Duncan[1:20,] #' D2 <- Duncan[21:45,] #' D <- mergeRows(D1, D2) #' print(D) #' dim(D) #' #' @export mergeRows <- function(X, Y, common.only=FALSE, ...){ UseMethod("mergeRows") } #' @rdname mergeRows #' @export mergeRows.data.frame <- function(X, Y, common.only=FALSE, ...){ cols1 <- names(X) cols2 <- names(Y) if (common.only){ common <- intersect(cols1, cols2) rbind(X[, common], Y[, common]) } else { all <- union(cols1, cols2) miss1 <- setdiff(all, cols1) miss2 <- setdiff(all, cols2) X[, miss1] <- NA Y[, miss2] <- NA rbind(X, Y) } } RcmdrMisc/R/readXL.R0000644000176200001440000000373415126147222013651 0ustar liggesusers#' Read an Excel File #' #' @name readXL #' #' @keywords manip #' #' @details #' \code{readXL} reads an Excel file, either of type \code{.xls} or \code{.xlsx} into an R data frame; it provides a front end to the \code{\link[readxl]{read_excel}} function in the \pkg{readxl} package. #' \code{\link[readxl]{excel_sheets}} is re-exported from the \pkg{readxl} package and reports the names of spreadsheets in an Excel file. #' #' @param file name of an Excel file including its path. #' @param rownames if \code{TRUE} (the default is \code{FALSE}), the first column in the spreadsheet contains row names (which must be unique---i.e., no duplicates). #' @param header if \code{TRUE} (the default), the first row in the spreadsheet contains column (variable) names. #' @param na character string denoting missing data; the default is the empty string, \code{""}. #' @param sheet number of the spreadsheet in the file containing the data to be read; the default is \code{1}. #' @param stringsAsFactors if \code{TRUE} (the default is \code{FALSE}) then columns containing character data are converted to factors. #' #' @return a data frame. #' #' @author John Fox #' #' @seealso \code{\link[readxl]{read_excel}}, \code{\link[readxl]{excel_sheets}}. #' #' @export readXL <- function(file, rownames=FALSE, header=TRUE, na="", sheet=1, stringsAsFactors=FALSE){ Data <- readxl::read_excel(path=file, sheet=sheet, col_names=header, na=na) class(Data) <- "data.frame" if (rownames){ check <- length(unique(col1 <- Data[[1]])) == nrow(Data) if (!check) warning ("row names are not unique, ignored") else { rownames(Data) <- col1 Data[[1]] <- NULL } } colnames(Data) <- make.names(colnames(Data), unique=TRUE) if (stringsAsFactors){ char <- sapply(Data, class) == "character" for (var in which(char)){ Data[[var]] <- factor(Data[[var]]) } } Data } #' @export readxl::excel_sheets RcmdrMisc/R/reliability.R0000644000176200001440000000556215124436574015015 0ustar liggesusers#' Reliability of a Composite Scale #' #' @name reliability #' #' @aliases reliability print.reliability #' #' @keywords misc #' #' @details #' Calculates Cronbach's alpha and standardized alpha (lower bounds on reliability) for a composite (summated-rating) scale. #' Standardized alpha is for the sum of the standardized items. #' In addition, the function calculates alpha and standardized alpha for the scale with each item deleted in turn, and computes the correlation between each item and the sum of the other items. #' #' @param S the covariance matrix of the items; normally, there should be at least 3 items and certainly no fewer than 2. #' @param x reliability object to be printed. #' @param digits number of decimal places. #' @param \dots not used: for compatibility with the print generic." #' #' @return #' an object of class reliability, which normally would be printed. #' #' @author John Fox #' #' @examples #' data(DavisThin) #' reliability(cov(DavisThin)) #' #' @references #' N. Cliff (1986) Psychological testing theory. Pp. 343--349 in S. Kotz and N. Johnson, eds., \emph{Encyclopedia of Statistical Sciences, Vol. 7}. Wiley. #' #' @seealso \code{\link[stats]{cov}} #' #' @export reliability <- function(S){ reliab <- function(S, R){ k <- dim(S)[1] ones <- rep(1, k) v <- as.vector(ones %*% S %*% ones) alpha <- (k/(k - 1)) * (1 - (1/v)*sum(diag(S))) rbar <- mean(R[lower.tri(R)]) std.alpha <- k*rbar/(1 + (k - 1)*rbar) c(alpha=alpha, std.alpha=std.alpha) } result <- list() if ((!is.numeric(S)) || !is.matrix(S) || (nrow(S) != ncol(S)) || any(abs(S - t(S)) > max(abs(S))*1e-10) || nrow(S) < 2) stop("argument must be a square, symmetric, numeric covariance matrix") k <- dim(S)[1] s <- sqrt(diag(S)) R <- S/(s %o% s) rel <- reliab(S, R) result$alpha <- rel[1] result$st.alpha <- rel[2] if (k < 3) { warning("there are fewer than 3 items in the scale") return(invisible(NULL)) } rel <- matrix(0, k, 3) for (i in 1:k) { rel[i, c(1,2)] <- reliab(S[-i, -i], R[-i, -i]) a <- rep(0, k) b <- rep(1, k) a[i] <- 1 b[i] <- 0 cov <- a %*% S %*% b var <- b %*% S %*% b rel[i, 3] <- cov/(sqrt(var * S[i,i])) } rownames(rel) <- rownames(S) colnames(rel) <- c("Alpha", "Std.Alpha", "r(item, total)") result$rel.matrix <- rel class(result) <- "reliability" result } #' @name reliability #' @export print.reliability <- function(x, digits=4, ...){ cat(paste("Alpha reliability = ", round(x$alpha, digits), "\n")) cat(paste("Standardized alpha = ", round(x$st.alpha, digits), "\n")) cat("\nReliability deleting each item in turn:\n") print(round(x$rel.matrix, digits)) invisible(x) } RcmdrMisc/R/cluster.R0000644000176200001440000000770115123332024014142 0ustar liggesusers#' Append a Cluster Membership Variable to a Dataframe #' #' @name assignCluster #' #' @details #' Correctly creates a cluster membership variable that can be attached to a dataframe when only a subset of the observations in that dataframe were used to create the clustering solution. #' NAs are assigned to the observations of the original dataframe not used in creating the clustering solution. #' #' This code originally by Dan Putler, used with permission. #' #' @author Dan Putler #' #' @param clusterData The data matrix used in the clustering solution. The data matrix may have have only a subset of the observations contained in the original dataframe. #' @param origData The original dataframe from which the data used in the clustering solution were taken. #' @param clusterVec An integer variable containing the cluster membership assignments for the observations used in creating the clustering solution. #' This vector can be created using \code{cutree} for clustering solutions generated by \code{hclust} or the \code{cluster} component of a list object created by \code{kmeans} or \code{KMeans}. #' #' @return A factor (with integer labels) that indicate the cluster assignment for each observation, with an NA value given to observations not used in the clustering solution. #' #' @seealso \code{\link[stats]{hclust}}, \code{\link[stats]{cutree}}, \code{\link[stats]{kmeans}}, \code{\link{KMeans}}. #' #' @examples #' ## Load USArrests data set #' data(USArrests) #' #' ## Create three cluster #' USArrkm3 <- KMeans(USArrests[USArrests$UrbanPop<66, ], centers=3) #' #' ## Create a variable with cluster assignment #' assignCluster(USArrests[USArrests$UrbanPop<66, ], USArrests, USArrkm3$cluster) #' #' @export assignCluster <- function(clusterData, origData, clusterVec){ rowsDX <- row.names(clusterData) rowsX <- row.names(origData) clustAssign <- rep(NA, length(rowsX)) validData <- rowsX %in% rowsDX clustAssign[validData] <- clusterVec return(as.factor(clustAssign)) } #' K-Means Clustering Using Multiple Random Seeds #' #' @name KMeans #' #' @keywords misc #' #' @details #' Finds a number of k-means clusting solutions using R's \code{kmeans} function, and selects as the final solution the one that has the minimum total within-cluster sum of squared distances. #' @param x A numeric matrix of data, or an object that can be coerced to such a matrix (such as a numeric vector or a dataframe with all numeric columns). #' @param centers The number of clusters in the solution. #' @param iter.max The maximum number of iterations allowed. #' @param num.seeds The number of different starting random seeds to use. Each random seed results in a different k-means solution. #' #' @return #' A list with components: #' \describe{ #' \item{cluster}{A vector of integers indicating the cluster to which each point is allocated.} #' \item{centers}{A matrix of cluster centres (centroids).} #' \item{withinss}{The within-cluster sum of squares for each cluster.} #' \item{tot.withinss}{The within-cluster sum of squares summed across clusters.} #' \item{betweenss}{The between-cluster sum of squared distances.} #' \item{size}{The number of points in each cluster.} #' } #' #' @author Dan Putler #' #' @seealso \code{\link[stats]{kmeans}} #' #' @examples #' data(USArrests) #' KMeans(USArrests, centers=3, iter.max=5, num.seeds=5) #' #' @export KMeans <- function (x, centers, iter.max=10, num.seeds=10) { ## fixed 15 Mar 05 by J. Fox if(mode(x)=="numeric") x<-data.frame(new.x=x) KM <- kmeans(x=x, centers=centers, iter.max=iter.max) for(i in 2:num.seeds) { newKM <- kmeans(x=x, centers=centers, iter.max=iter.max) if(sum(newKM$withinss) < sum(KM$withinss)) { KM <- newKM } } KM$tot.withinss <- sum(KM$withinss) xmean <- apply(x, 2, mean) centers <- rbind(KM$centers, xmean) bss1 <- as.matrix(dist(centers)^2) KM$betweenss <- sum(as.vector(bss1[nrow(bss1),])*c(KM$size,0)) return(KM) } RcmdrMisc/R/binnedCounts.R0000644000176200001440000000401315122260066015112 0ustar liggesusers#' Binned Frequency Distributions of Numeric Variables #' #' @name binnedCounts #' #' @keywords univar #' #' @details #' Bins a numeric variable, as for a histogram, and reports the count and percentage in each bin. #' The computations are done by the \code{\link[graphics]{hist}} function, but no histogram is drawn. #' If supplied a numeric matrix or data frame, the distribution of each column is printed. #' #' @param x a numeric vector, matrix, or data frame. #' @param breaks specification of the breaks between bins, to be passed to the \code{\link[graphics]{hist}} function. #' @param round.percents number of decimal places to round percentages; default is \code{2}. #' @param name for the variable; only used for vector argument \code{x}. #' #' @return For a numeric vector, invisibly returns the vector of counts, named with the end-points of the corresponding bins. For a matrix or data frame, invisibly returns \code{NULL} #' #' @author John Fox #' #' @seealso \code{\link[graphics]{hist}}, \code{\link{discreteCounts}} #' #' @examples #' with(Prestige, binnedCounts(income)) #' binnedCounts(Prestige[, 1:4]) #' #' @export binnedCounts <- function(x, breaks="Sturges", round.percents=2, name=deparse(substitute(x))){ if (is.data.frame(x)) x <- as.matrix(x) if (is.matrix(x)) { names <- colnames(x) for (j in 1:ncol(x)){ binnedCounts(x[, j], breaks=breaks, name=names[j]) cat("\n") } return(invisible(NULL)) } dist <- hist(x, breaks=breaks, plot=FALSE) Count <- dist$counts breaks <- dist$breaks tot <- sum(Count) Percent <- round(100*Count/tot, round.percents) tot.percent <- round(sum(Percent), round.percents) names(Count) <- paste0(c("[", rep("(", length(breaks) - 2)), breaks[1:(length(breaks) - 1)], ", ", breaks[-1], "]") table <- cbind(Count, Percent) table <- rbind(table, c(tot, tot.percent)) rownames(table)[nrow(table)] <- "Total" cat("Binned distribution of", name, "\n") print(table) return(invisible(Count)) } RcmdrMisc/R/discretePlot.R0000644000176200001440000000665115123005023015121 0ustar liggesusers#' Plot Distribution of Discrete Numeric Variable #' #' @name discretePlot #' #' @keywords hplot #' #' @details #' Plot the distribution of a discrete numeric variable, optionally classified by a factor. #' #' If the \code{by} argument is specified, then one plot is produced for each level of \code{by}; these are arranged vertically and all use the same scale for the horizontal and vertical axes. #' #' @param x a numeric variable. #' @param by optionally a factor (or character or logical variable) by which to classify \code{x}. #' @param scale either \code{"frequency"} (the default) or \code{"percent"}. #' @param xlab optional character string to label the horizontal axis. #' @param ylab optional character string to label the vertical axis. #' @param main optional main label for the plot (ignored if the \code{by} argument is specified). #' @param xlim two-element numeric vectors specifying the range of the x axes; if not specified, will be determined from the data. #' @param ylim two-element numeric vectors specifying the range of the y axes; if not specified, will be determined from the data; the lower limit of the y-axis should normally be 0 and a warning will be printed if it isn't. #' @param ... other arguments to be passed to \code{\link[graphics:plot.default]{plot}}. #' #' @return Returns \code{NULL} invisibly. #' #' @author John Fox #' #' @seealso \code{\link{Hist}}, \code{\link{Dotplot}}. #' #' @examples #' data(mtcars) #' mtcars$cyl <- factor(mtcars$cyl) #' with(mtcars, discretePlot(carb)) #' with(mtcars, discretePlot(carb, scale="percent")) #' with(mtcars, discretePlot(carb, by=cyl)) #' #' @export discretePlot <- function(x, by, scale=c("frequency", "percent"), xlab=deparse(substitute(x)), ylab=scale, main="", xlim=NULL, ylim=NULL, ...){ force(xlab) scale <- match.arg(scale) dp <- function(x, scale, xlab, ylab, main, xlim, ylim){ y <- as.vector(table(x)) if (scale == "percent") y <- 100*y/sum(y) x <- sort(unique(x)) if (is.null(ylim)) ylim <- c(0, max(y, na.rm=TRUE)) plot(x, y, type=if (min(ylim) == 0) "h" else "n", xlab=xlab, ylab=ylab, main=main, xlim=xlim, ylim=ylim, axes=FALSE, frame.plot=TRUE, ...) axis(2) axis(1, at=x) points(x, y, pch=16) abline(h=0, col="gray") } if (is.null(xlim)) xlim <- range(x, na.rm=TRUE) if (!is.null(ylim) && min(ylim) != 0) warning("the lower end of the y-axis is not 0") if (missing(by)){ dp(na.omit(x), scale, xlab, ylab, main, xlim, ylim, ...) } else{ by.var <- deparse(substitute(by)) if (!is.factor(by)){ if (!(is.character(by) || is.logical(by))){ stop("by must be a factor, character, or logical") } by <- as.factor(by) } complete <- complete.cases(x, by) x <- x[complete] by <- by[complete] if (is.null(ylim)){ max.y <- if (scale == "frequency") max(table(x, by)) else { tab <- colPercents(table(x, by)) max(tab[1:(nrow(tab) - 2), ]) } ylim <- c(0, max.y) } levels <- levels(by) save.par <- par(mfcol=c(length(levels), 1)) on.exit(par(save.par)) for (level in levels){ dp(x[by == level], scale=scale, xlab=xlab, ylab=ylab, main = paste(by.var, "=", level), xlim=xlim, ylim=ylim, ...) } } } RcmdrMisc/R/cv.R0000644000176200001440000000226315123534215013075 0ustar liggesusers#' Coefficient of variation #' #' @name cv #' #' @keywords misc #' #' @details #' \code{numSummary} creates neatly formatted tables of means, standard deviations, coefficients of variation, skewness, kurtosis, and quantiles of numeric variables. \code{CV} computes the coefficient of variation. #' #' @param x data a numeric vector, matrix, or data frame. #' @param na.rm if \code{TRUE} (the default) remove \code{NA}s before computing the coefficient of variation. #' #' @return \code{cv} returns the coefficient(s) of variation. #' #' @author John Fox #' #' @examples #' data(Prestige) #' print(cv(Prestige[,c("income", "education")])) #' #' @export cv <- function(x, na.rm=TRUE){ x <- as.matrix(x) if (is.numeric(x)) { mean <- colMeans(x, na.rm=na.rm) sd <- apply(as.matrix(x), 2, stats::sd, na.rm=na.rm) if (any(x <= 0, na.rm=na.rm)) warning("not all values are positive") cv <- sd/mean cv[mean <= 0] <- NA } else { stop("x is not numeric") } cv } #' @rdname cv #' @keywords internal #' @export CV <- function(x, na.rm = TRUE){ warning("CV is deprecated in RcmdrMisc package. Use cv instead.") cv(x = x, na.rm = na.rm) } RcmdrMisc/R/normalityTest.R0000644000176200001440000001031315123526364015344 0ustar liggesusers#' Normality Tests #' #' @name normalityTest #' #' @aliases normalityTest normalityTest.default normalityTest.formula #' #' @keywords htest #' #' @details #' Perform one of several tests of normality, either for a variable or for a variable by groups. #' The \code{normalityTest} function uses the \code{\link[stats]{shapiro.test}} function or one of several functions in the \pkg{nortest} package. #' If tests are done by groups, then adjusted p-values, computed by the Holm method, are also reported (see \code{\link[stats]{p.adjust}}). #' #' @param x numeric vector or formula. #' @param formula one-sided formula of the form \code{~x} or two-sided formula of the form \code{x ~ groups}, where \code{x} is a numeric variable and \code{groups} is a factor. #' @param data a data frame containing the data for the test. #' @param test quoted name of the function to perform the test. #' @param groups optional factor to divide the data into groups. #' @param vname optional name for the variable; if absent, taken from \code{x}. #' @param gname optional name for the grouping factor; if absent, taken from \code{groups}. #' @param \dots any arguments to be passed down; the only useful such arguments are for the \code{\link[nortest]{pearson.test}} function in the \pkg{nortest} package. #' #' @return #' If testing by groups, the function invisibly returns \code{NULL}; otherwise it returns an object of class \code{"htest"}, which normally would be printed. #' #' @author John Fox #' #' @seealso \code{\link[stats]{shapiro.test}}, \code{\link[nortest]{ad.test}}, \code{\link[nortest]{cvm.test}}, \code{\link[nortest]{lillie.test}}, \code{\link[nortest]{pearson.test}}, \code{\link[nortest]{sf.test}}. #' #' @examples #' data(Prestige, package="car") #' with(Prestige, normalityTest(income)) #' normalityTest(income ~ type, data=Prestige, test="ad.test") #' normalityTest(~income, data=Prestige, test="pearson.test", n.classes=5) #' #' @export normalityTest <- function(x, ...){ UseMethod("normalityTest") } #' @rdname normalityTest #' @export normalityTest.formula <- function(formula, test, data, ...){ cl <- match.call() mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data"), names(mf), 0L) mf <- mf[c(1L, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- quote(stats::model.frame) mf <- eval(mf, parent.frame()) if (missing(test)) test <- NULL if (ncol(mf) == 1) normalityTest(mf[, 1], test=test, vname=colnames(mf), ...) else if (ncol(mf) == 2) normalityTest(mf[, 1], test=test, groups=mf[, 2], vname=colnames(mf)[1], gname=colnames(mf)[2], ...) else stop("the formula must specify one or two variables") } #' @rdname normalityTest #' @export normalityTest.default <- function(x, test=c("shapiro.test", "ad.test", "cvm.test", "lillie.test", "pearson.test", "sf.test"), groups, vname, gname, ...){ test <- match.arg(test) if (missing(vname)) vname <- deparse(substitute(x)) if (missing(groups)){ result <- do.call(test, list(x=x, ...)) result$data.name <- vname result } else { if (!is.factor(groups)) stop("'groups' must be a factor.") { if (missing(gname)) gname <- deparse(substitute(groups)) levels <- levels(groups) pvalues <- matrix(0, length(levels), 2) rownames(pvalues) <- levels cat("\n --------") for (level in levels){ result <- do.call(test, list(x=x[groups == level], ...)) result$data.name <- vname pvalues[level, 1] <- result$p.value cat("\n", gname, "=", level, "\n") print(result) cat(" --------") } pvalues[, 2] <- p.adjust(pvalues[, 1]) pvals <- matrix("", length(levels), 2) colnames(pvals) <- c("unadjusted", "adjusted") rownames(pvals) <- levels pvals[, 1] <- format.pval(pvalues[, 1]) pvals[, 2] <- format.pval(pvalues[, 2]) cat("\n\n p-values adjusted by the Holm method:\n") print(pvals, quote=FALSE) return(invisible(NULL)) } } } RcmdrMisc/R/plotBoot.R0000644000176200001440000000400315123725367014273 0ustar liggesusers#' Plot Bootstrap Distributions #' #' @name plotBoot #' #' @aliases plotBoot plotBoot.boot #' #' @keywords hplot #' #' @details #' The function takes an object of class \code{"boot"} and creates an array of density estimates for the bootstrap distributions of the parameters. #' #' Creates an array of adaptive kernal density plots, using \code{\link[car]{densityPlot}} in the \pkg{car} package, showing the bootstrap distribution, point estimate ,and (optionally) confidence limits for each parameter. #' #' @param object an object of class \code{"boot"}. #' @param confint an object of class \code{"confint.boot"} (or an ordinary 2-column matrix) containing confidence limits for the parameters in \code{object}; if \code{NULL} (the default), these are computed from the first argument, using the defaults for \code{"boot"} objects. #' @param \dots not used #' #' @return #' Invisibly returns the object produced by \code{densityPlot}. #' #' @author John Fox #' #' @seealso \code{\link[car]{densityPlot}} #' #' @examples #' \dontrun{ #' plotBoot(Boot(lm(prestige ~ income + education + type, data=Duncan))) #' } #' #' @export plotBoot <- function(object, confint=NULL, ...){ UseMethod("plotBoot") } #' @rdname plotBoot #' @export plotBoot.boot <- function(object, confint=NULL, ...){ mfrow <- function (n) { rows <- round(sqrt(n)) cols <- ceiling(n/rows) c(rows, cols) } if (is.null(confint)) confint <- confint(object) t0 <- object$t0 t <- object$t if (any(is.na(t))){ t <- na.omit(t) warning("bootstrap samples with missing parameter values suppressed") } npars <- length(t0) pars <- names(t0) savepar <- par(mfrow=mfrow(npars), oma=c(0, 0, 2, 0), mar=c(5.1, 4.1, 2.1, 2.1)) on.exit(par(savepar)) for (i in 1:npars){ car::densityPlot(t[, i], xlab=pars[i], method="adaptive") abline(v=t0[i], lty=2, col="blue") abline(v=confint[i, ], lty=2, col="magenta") } title(main="Bootstrap Distributions", outer=TRUE, line=0.5) } RcmdrMisc/R/repeatedMeasuresPlot.R0000644000176200001440000002240415124567030016623 0ustar liggesusers#' Plot Means for Repeated-Measures ANOVA Designs #' #' @name repeatedMeasuresPlot #' #' @keywords hplot #' #' @details #' Creates a means plot for a repeated-measures ANOVA design with one or two within-subjects factor and zero or more between-subjects factors, for data in "wide" format. #' #' @param data a data frame in wide format. #' @param within a character vector with the names of the data columns containing the repeated measures. #' @param within.names a character vector with one or two elements, of names of the within-subjects factor(s). #' @param within.levels a named list whose elements are character vectors of level names for the within-subjects factors, with names corresponding to the names of the within-subjects factors; the product of the numbers of levels should be equal to the number of repeated-measures columns in \code{within}. #' @param between.names a column vector of names of the between-subjects factors (if any). #' @param response.name optional quoted name for the response variable, defaults to \code{"score"}. #' @param trace optional quoted name of the (either within- or between-subjects) factor to define profiles of means in each panel of the graph; the default is the within-subjects factor with the smaller number of levels, if there are two, or not used if there is one. #' @param xvar optional quoted name of the factor to define the horizontal axis of each panel; the default is the within-subjects factor with the larger number of levels. #' @param pch vector of symbol numbers to use for the profiles of means (i.e., levels of the \code{trace} factor); for the meaning of the defaults, see \code{\link[graphics]{points}} and \code{\link[graphics]{par}}. #' @param lty vector of line-type numbers to use for the profiles of means. #' @param col vector of colors for the profiles of means; the default is given by \code{palette()}, starting at the second color. #' @param plot.means if \code{TRUE} (the default), draw a plot of means by the factors. #' @param print.tables if \code{TRUE} (the default is \code{FALSE}), print tables of means and standard deviations of the response by the factors. #' #' @return #' A \code{"trellis"} object, which normally is just "printed" (i.e., plotted). #' #' @author John Fox #' #' @seealso \code{\link[car]{Anova}}, \code{\link[carData]{OBrienKaiser}} #' #' @examples #' repeatedMeasuresPlot( #' data=OBrienKaiser, #' within=c("pre.1", "pre.2", "pre.3", "pre.4", "pre.5", #' "post.1", "post.2", "post.3", "post.4", "post.5", #' "fup.1", "fup.2", "fup.3", "fup.4", "fup.5"), #' within.names=c("phase", "hour"), #' within.levels=list(phase=c("pre", "post", "fup"), #' hour = c("1", "2", "3", "4", "5")), #' between.names=c("gender", "treatment"), #' response.name="improvement", #' print.tables=TRUE #' ) #' #' repeatedMeasuresPlot(data=OBrienKaiser, #' within=c("pre.1", "pre.2", "pre.3", "pre.4", "pre.5", #' "post.1", "post.2", "post.3", "post.4", "post.5", #' "fup.1", "fup.2", "fup.3", "fup.4", "fup.5"), #' within.names=c("phase", "hour"), #' within.levels=list(phase=c("pre", "post", "fup"), hour = c("1", "2", "3", "4", "5")), #' between.names=c("gender", "treatment"), #' trace="gender") # note that gender is between subjects #' #' repeatedMeasuresPlot( #' data=OBrienKaiser, #' within=c("fup.1", "fup.2", "fup.3", "fup.4", "fup.5"), #' within.names="hour", #' within.levels=list(hour = c("1", "2", "3", "4", "5")), #' between.names=c("treatment", "gender"), #' response.name="improvement") #' #' @export repeatedMeasuresPlot <- function(data, within, within.names, within.levels, between.names=NULL, response.name="score", trace, xvar, pch=15:25, lty=1:6, col=palette()[-1], plot.means=TRUE, print.tables=FALSE){ if (!(plot.means || print.tables)) stop("nothing to do (neither print tables nor plot means)!") if (missing(trace)) trace <- NA if (missing(xvar)) xvar <- NA reshapeW2L <- function(data){ timevar <- paste(within.names, collapse=".") long <- reshape(data, varying=within, v.names=response.name, timevar=timevar, direction="long") n.levels <- sapply(within.levels, length) n.within <- length(within.names) if (n.within > 2 || n.within < 1) stop("there must be 1 or 2 within factors") if (prod(n.levels) != length(within)){ stop("the number of repeated measures, ", length(within), ", is not equal to the product of the numbers of levels of the within factors, ", prod(n.levels)) } if (length(within.names) != length(within.levels)){ stop("the number of within factors, ", length(within.names), ", is not equal to the number of sets of within-factor levels, ", length(within.levels)) } if (n.within == 2){ long[[within.names[1]]] <- factor(within.levels[[within.names[1]]][1 + ((long[[timevar]] - 1) %/% n.levels[2])], levels=within.levels[[within.names[1]]]) long[[within.names[2]]] <- factor(within.levels[[within.names[2]]][1 + ((long[[timevar]] - 1) %% n.levels[2])], levels=within.levels[[within.names[2]]]) } else{ long[[within.names]] <- factor(within.levels[[1]][long[[timevar]]], levels=within.levels[[1]]) } long } computeMeans <- function(data){ formula <- paste(response.name, " ~", paste(c(within.names, between.names), collapse="+")) meanTable <- Tapply(formula, mean, data=data) sdTable <- Tapply(formula, sd, data=data) means <- meanTable if(length(dim(means)) > 1){ means <- as.data.frame(ftable(means)) names(means)[ncol(means)] <- response.name } else { means <- data.frame(factor(names(means), levels=levels(data[, within.names])), means) names(means) <- c(within.names, response.name) } list(means=means, meanTable=meanTable, sdTable=sdTable) } rmPlot <- function(data) { n.levels <-sapply(data[,-ncol(data), drop = FALSE], function(x) length(levels(x))) n.factors <- length(n.levels) fnames <- names(data)[-ncol(data), drop = FALSE] if (is.na(trace)) { wnames <- if (!is.na(xvar)) within.names[!(within.names == xvar)] else within.names trace <- if (length(wnames) > 0) wnames[which.min(n.levels[wnames])] else NULL } if (is.na(xvar)) { wnames <- if (!is.na(trace)) within.names[!(within.names == trace)] else within.names xvar <- wnames[which.max(n.levels[wnames])] } if (length(within.names) == 1 && length(xvar) == 0){ xvar <- within.names trace <- NULL } if (!is.null(trace) && trace == xvar) trace <- NULL form <- paste(response.name, " ~", xvar, if (n.factors > 1 + !is.null(trace)) "|", paste(setdiff(fnames, c(trace, xvar)), collapse = "+")) tr.levels <- n.levels[trace] if (!is.null(trace)) { xyplot( as.formula(form), groups = if (!is.null(trace)) data[[trace]] else 1, type = "b", lty = lty[1:tr.levels], pch = pch[1:tr.levels], col = col[1:tr.levels], cex = 1.25, strip = function(...) strip.default(strip.names = c(TRUE, TRUE), ...), data = data, ylab = paste("mean", response.name), key = list( title = trace, cex.title = 1, text = list(levels(data[[trace]])), lines = list(lty = lty[1:tr.levels], col = col[1:tr.levels]), points = list( pch = pch[1:tr.levels], col = col[1:tr.levels], cex = 1.25 ) ) ) } else { xyplot( as.formula(form), type = "b", lty = lty[1], pch = pch[1], col = col[1], cex = 1.25, strip = function(...) strip.default(strip.names = c(TRUE, TRUE), ...), data = data, ylab = paste("mean", response.name) ) } } Long <- reshapeW2L(data) Means <- computeMeans(Long) if (print.tables){ cat("\n Means of", response.name, "\n") if (length(dim(Means$meanTable)) > 1) print(ftable(Means$meanTable)) else print(Means$meanTable) cat("\n\n Standard deviations of", response.name, "\n") if (length(dim(Means$sdTable)) > 1) print(ftable(Means$sdTable)) else print(Means$sdTable) } if (plot.means) rmPlot(Means$means) else invisible(NULL) } RcmdrMisc/R/numSummary.R0000644000176200001440000002705615124565537014665 0ustar liggesusers#' Summary Statistics for Numeric Variables #' #' @name numSummary #' #' @aliases numSummary print.numSummary #' #' @keywords misc #' #' @details #' \code{numSummary} creates neatly formatted tables of means, standard deviations, coefficients of variation, skewness, kurtosis, and quantiles of numeric variables. \code{CV} computes the coefficient of variation. #' #' @param data a numeric vector, matrix, or data frame. #' @param statistics any of \code{"mean"}, \code{"sd"}, \code{"se(mean)"}, \code{"var"}, \code{"cv"}, \code{"IQR"}, \code{"quantiles"}, \code{"skewness"}, or \code{"kurtosis"}, defaulting to \code{c("mean", "sd", "quantiles", "IQR")}. #' @param type definition to use in computing skewness and kurtosis; see the \code{\link[e1071]{skewness}} and \code{\link[e1071]{kurtosis}} functions in the \pkg{e1071} package. The default is \code{"2"}. #' @param quantiles quantiles to report; default is \code{c(0, 0.25, 0.5, 0.75, 1)}. #' @param groups optional variable, typically a factor, to be used to partition the data. #' @param x object of class \code{"numSummary"} to print, or for \code{CV}, a numeric vector or matrix. #' @param \dots arguments to pass down from the print method. #' #' @return \code{numSummary} returns an object of class \code{"numSummary"} containing the table of statistics to be reported along with information on missing data, if there are any. #' #' @author John Fox #' #' @seealso \code{\link[base]{mean}}, \code{\link[stats]{sd}}, \code{\link{cv}}, \code{\link[stats]{quantile}}, \code{\link[e1071]{skewness}}, \code{\link[e1071]{kurtosis}}. #' #' @examples #' data(Prestige) #' Prestige[1, "income"] <- NA #' print(numSummary(Prestige[,c("income", "education")], #' statistics=c("mean", "sd", "quantiles", "cv", "skewness", "kurtosis"))) #' print(numSummary(Prestige[,c("income", "education")], groups=Prestige$type)) #' #' @export numSummary <- function(data, statistics=c("mean", "sd", "se(mean)", "var", "cv", "IQR", "quantiles", "skewness", "kurtosis"), type=c("2", "1", "3"), quantiles=c(0, .25, .5, .75, 1), groups){ sd <- function(x, type, ...){ apply(as.matrix(x), 2, stats::sd, na.rm=TRUE) } IQR <- function(x, type, ...){ apply(as.matrix(x), 2, stats::IQR, na.rm=TRUE) } std.err.mean <- function(x, ...){ x <- as.matrix(x) sd <- sd(x) n <- colSums(!is.na(x)) sd/sqrt(n) } var <- function(x, type, ...){ apply(as.matrix(x), 2, stats::var, na.rm=TRUE) } skewness <- function(x, type, ...){ if (is.vector(x)) return(e1071::skewness(x, type=type, na.rm=TRUE)) apply(x, 2, skewness, type=type) } kurtosis <- function(x, type, ...){ if (is.vector(x)) return(e1071::kurtosis(x, type=type, na.rm=TRUE)) apply(x, 2, kurtosis, type=type) } data <- as.data.frame(data) if (!missing(groups)) { groups <- as.factor(groups) counts <- table(groups) if (any(counts == 0)){ levels <- levels(groups) warning("the following groups are empty: ", paste(levels[counts == 0], collapse=", ")) groups <- factor(groups, levels=levels[counts != 0]) } } variables <- names(data) if (missing(statistics)) statistics <- c("mean", "sd", "quantiles", "IQR") statistics <- match.arg(statistics, c("mean", "sd", "se(mean)", "var", "cv", "IQR", "quantiles", "skewness", "kurtosis"), several.ok=TRUE) type <- match.arg(type) type <- as.numeric(type) ngroups <- if(missing(groups)) 1 else length(grps <- levels(groups)) quantiles <- if ("quantiles" %in% statistics) quantiles else NULL if (anyDuplicated(quantiles)){ warning("there are duplicated quantiles, which are ignored") quantiles <- sort(unique(quantiles)) } quants <- if (length(quantiles) >= 1) paste(100*quantiles, "%", sep="") else NULL nquants <- length(quants) stats <- c(c("mean", "sd", "se(mean)", "var", "IQR", "cv", "skewness", "kurtosis")[c("mean", "sd", "se(mean)", "var", "IQR", "cv", "skewness", "kurtosis") %in% statistics], quants) nstats <- length(stats) nvars <- length(variables) result <- list() if ((ngroups == 1) && (nvars == 1) && (length(statistics) == 1)){ if (statistics == "quantiles") table <- quantile(data[,variables], probs=quantiles, na.rm=TRUE) else { stats <- statistics stats[stats == "se(mean)"] <- "std.err.mean" table <- do.call(stats, list(x=data[,variables], na.rm=TRUE, type=type)) names(table) <- statistics } NAs <- sum(is.na(data[,variables])) n <- nrow(data) - NAs result$type <- 1 } else if ((ngroups > 1) && (nvars == 1) && (length(statistics) == 1)){ if (statistics == "quantiles"){ table <- matrix(unlist(tapply(data[, variables], groups, quantile, probs=quantiles, na.rm=TRUE)), ngroups, nquants, byrow=TRUE) rownames(table) <- grps colnames(table) <- quants } else table <- tapply(data[,variables], groups, statistics, na.rm=TRUE, type=type) NAs <- tapply(data[, variables], groups, function(x) sum(is.na(x))) n <- table(groups) - NAs result$type <- 2 } else if ((ngroups == 1) ){ X <- as.matrix(data[, variables]) table <- matrix(0, nvars, nstats) rownames(table) <- if (length(variables) > 1) variables else "" colnames(table) <- stats if ("mean" %in% stats) table[,"mean"] <- colMeans(X, na.rm=TRUE) if ("sd" %in% stats) table[,"sd"] <- sd(X) if ("se(mean)" %in% stats) table[, "se(mean)"] <- std.err.mean(X) if ("var" %in% stats) table[,"var"] <- var(X) if ("cv" %in% stats) table[,"cv"] <- cv(X) if ("IQR" %in% stats) table[, "IQR"] <- IQR(X) if ("skewness" %in% statistics) table[, "skewness"] <- skewness(X, type=type) if ("kurtosis" %in% statistics) table[, "kurtosis"] <- kurtosis(X, type=type) if ("quantiles" %in% statistics){ table[,quants] <- t(apply(data[, variables, drop=FALSE], 2, quantile, probs=quantiles, na.rm=TRUE)) } NAs <- colSums(is.na(data[, variables, drop=FALSE])) n <- nrow(data) - NAs result$type <- 3 } else { table <- array(0, c(ngroups, nstats, nvars), dimnames=list(Group=grps, Statistic=stats, Variable=variables)) NAs <- matrix(0, nvars, ngroups) rownames(NAs) <- variables colnames(NAs) <- grps for (variable in variables){ if ("mean" %in% stats) table[, "mean", variable] <- tapply(data[, variable], groups, mean, na.rm=TRUE) if ("sd" %in% stats) table[, "sd", variable] <- tapply(data[, variable], groups, sd, na.rm=TRUE) if ("se(mean)" %in% stats) table[, "se(mean)", variable] <- tapply(data[, variable], groups, std.err.mean, na.rm=TRUE) if ("var" %in% stats) table[, "var", variable] <- tapply(data[, variable], groups, var, na.rm=TRUE) if ("IQR" %in% stats) table[, "IQR", variable] <- tapply(data[, variable], groups, IQR, na.rm=TRUE) if ("cv" %in% stats) table[, "cv", variable] <- tapply(data[, variable], groups, CV, na.rm=TRUE) if ("skewness" %in% stats) table[, "skewness", variable] <- tapply(data[, variable], groups, skewness, type=type) if ("kurtosis" %in% stats) table[, "kurtosis", variable] <- tapply(data[, variable], groups, kurtosis, type=type) if ("quantiles" %in% statistics) { res <- matrix(unlist(tapply(data[, variable], groups, quantile, probs=quantiles, na.rm=TRUE)), ngroups, nquants, byrow=TRUE) table[, quants, variable] <- res } NAs[variable,] <- tapply(data[, variable], groups, function(x) sum(is.na(x))) } if (nstats == 1) table <- table[,1,] if (nvars == 1) table <- table[,,1] n <- table(groups) n <- matrix(n, nrow=nrow(NAs), ncol=ncol(NAs), byrow=TRUE) n <- n - NAs result$type <- 4 } result$table <- table result$statistics <- statistics result$n <- n if (any(NAs > 0)) result$NAs <- NAs class(result) <- "numSummary" result } #' @rdname numSummary #' @export print.numSummary <- function(x, ...){ NAs <- x$NAs table <- x$table n <- x$n statistics <- x$statistics switch(x$type, "1" = { if (!is.null(NAs)) { table <- c(table, n, NAs) names(table)[length(table) - 1:0] <- c("n", "NA") } print(table) }, "2" = { if (statistics == "quantiles") { table <- cbind(table, n) colnames(table)[ncol(table)] <- "n" if (!is.null(NAs)) { table <- cbind(table, NAs) colnames(table)[ncol(table)] <- "NA" } } else { table <- rbind(table, n) rownames(table)[c(1, nrow(table))] <- c(statistics, "n") if (!is.null(NAs)) { table <- rbind(table, NAs) rownames(table)[nrow(table)] <- "NA" } table <- t(table) } print(table) }, "3" = { table <- cbind(table, n) colnames(table)[ncol(table)] <- "n" if (!is.null(NAs)) { table <- cbind(table, NAs) colnames(table)[ncol(table)] <- "NA" } print(table) }, "4" = { if (length(dim(table)) == 2){ n <- t(n) nms <- colnames(n) colnames(n) <- paste(nms, ":n", sep="") table <- cbind(table, n) if (!is.null(NAs)) { NAs <- t(NAs) nms <- colnames(NAs) colnames(NAs) <- paste(nms, ":NA", sep="") table <- cbind(table, NAs) } print(table) } else { table <- abind(table, t(n), along=2) dimnames(table)[[2]][dim(table)[2]] <- "n" if (!is.null(NAs)) { table <- abind(table, t(NAs), along=2) dimnames(table)[[2]][dim(table)[2]] <- "NA" } nms <- dimnames(table)[[3]] for (name in nms){ cat("\nVariable:", name, "\n") print(table[,,name]) } } } ) invisible(x) } RcmdrMisc/R/Percents.R0000644000176200001440000000440515123011164014241 0ustar liggesusers#' Row, Column, and Total Percentage Tables #' #' @name colPercents #' #' @aliases colPercents rowPercents totPercents #' #' @keywords misc #' #' @details #' Percentage a matrix or higher-dimensional array of frequency counts by rows, columns, or total frequency. #' #' @param tab a matrix or higher-dimensional array of frequency counts. #' @param digits number of places to the right of the decimal place for percentages. #' #' @return Returns an array of the same size and shape as \code{tab} percentaged by rows or columns, plus rows or columns of totals and counts, or by the table total. #' #' @examples #' data(Mroz) # from car package #' cat("\n\n column percents:\n") #' print(colPercents(xtabs(~ lfp + wc, data=Mroz))) #' cat("\n\n row percents:\n") #' print(rowPercents(xtabs(~ hc + lfp, data=Mroz))) #' cat("\n\n total percents:\n") #' print(totPercents(xtabs(~ hc + wc, data=Mroz))) #' cat("\n\n three-way table, column percents:\n") #' print(colPercents(xtabs(~ lfp + wc + hc, data=Mroz))) #' #' @author John Fox #' #' @export colPercents <- function(tab, digits=1){ dim <- length(dim(tab)) if (is.null(dimnames(tab))){ dims <- dim(tab) dimnames(tab) <- lapply(1:dim, function(i) 1:dims[i]) } sums <- apply(tab, 2:dim, sum) per <- apply(tab, 1, function(x) x/sums) dim(per) <- dim(tab)[c(2:dim,1)] per <- aperm(per, c(dim, 1:(dim-1))) dimnames(per) <- dimnames(tab) per <- round(100*per, digits) result <- abind(per, Total=apply(per, 2:dim, sum), Count=sums, along=1) names(dimnames(result)) <- names(dimnames(tab)) result } #' @rdname colPercents #' @export rowPercents <- function(tab, digits=1){ dim <- length(dim(tab)) if (dim == 2) return(t(colPercents(t(tab), digits=digits))) tab <- aperm(tab, c(2,1,3:dim)) aperm(colPercents(tab, digits=digits), c(2,1,3:dim)) } #' @rdname colPercents #' @export totPercents <- function(tab, digits=1){ dim <- length(dim(tab)) if (is.null(dimnames(tab))){ dims <- dim(tab) dimnames(tab) <- lapply(1:dim, function(i) 1:dims[i]) } tab <- 100*tab/sum(tab) tab <- cbind(tab, rowSums(tab)) tab <- rbind(tab, colSums(tab)) rownames(tab)[nrow(tab)] <- "Total" colnames(tab)[ncol(tab)] <- "Total" round(tab, digits=digits) } RcmdrMisc/R/stepwise.R0000644000176200001440000000577515124677107014354 0ustar liggesusers#' Stepwise Model Selection #' #' @name stepwise #' #' @keywords models #' #' @details #' This function is a front end to the \code{\link[MASS]{stepAIC}} function in the \pkg{MASS} package. #' #' @param mod a model object of a class that can be handled by \code{stepAIC}. #' @param direction if \code{"backward/forward"} (the default), selection starts with the full model and eliminates predictors one at a time, at each step considering whether the criterion will be improved by adding back in a variable removed at a previous step; if \code{"forward/backwards"}, selection starts with a model including only a constant, and adds predictors one at a time, at each step considering whether the criterion will be improved by removing a previously added variable; \code{"backwards"} and \code{"forward"} are similar without the reconsideration at each step. #' @param criterion for selection. Either \code{"BIC"} (the default) or \code{"AIC"}. Note that \code{stepAIC} labels the criterion in the output as \code{"AIC"} regardless of which criterion is employed. #' @param \dots arguments to be passed to \code{stepAIC}. #' #' @return The model selected by \code{stepAIC}. #' #' @author John Fox #' #' @seealso \code{\link[MASS]{stepAIC}} #' #' @references #' W. N. Venables and B. D. Ripley \emph{Modern Applied Statistics Statistics with S, Fourth Edition} Springer, 2002. #' #' @examples #' ## adapted from stepAIC in MASS #' ## Assigning bwt to the global environment is required to run this example within #' ## the browser-based help system. In other contexts, standard assignment can be used. #' if (require(MASS)){ #' data(birthwt) #' bwt <<- with(birthwt, { #' race <- factor(race, labels = c("white", "black", "other")) #' ptd <- factor(ptl > 0) #' ftv <- factor(ftv) #' levels(ftv)[-(1:2)] <- "2+" #' data.frame(low = factor(low), age, lwt, race, smoke = (smoke > 0), ptd, #' ht = (ht > 0), ui = (ui > 0), ftv) #' }) #' birthwt.glm <- glm(low ~ ., family = binomial, data = bwt) #' print(stepwise(birthwt.glm, trace = FALSE)) #' print(stepwise(birthwt.glm, direction="forward/backward")) #' } #' #' ## wrapper for stepAIC in the MASS package #' @export stepwise <- function(mod, direction=c("backward/forward", "forward/backward", "backward", "forward"), criterion=c("BIC", "AIC"), ...){ criterion <- match.arg(criterion) direction <- match.arg(direction) cat("\nDirection: ", direction) cat("\nCriterion: ", criterion, "\n\n") k <- if (criterion == "BIC") log(nrow(model.matrix(mod))) else 2 rhs <- paste(c("~", deparse(formula(mod)[[3]])), collapse="") rhs <- gsub(" ", "", rhs) if (direction == "forward" || direction == "forward/backward") mod <- update(mod, . ~ 1) if (direction == "backward/forward" || direction == "forward/backward") direction <- "both" lower <- ~ 1 upper <- eval(parse(text=rhs)) stepAIC(mod, scope=list(lower=lower, upper=upper), direction=direction, k=k, ...) } RcmdrMisc/R/rcorr.adjust.R0000644000176200001440000000603015124563546015113 0ustar liggesusers#' Compute Pearson or Spearman Correlations with p-Values #' #' @name rcorr.adjust #' #' @aliases rcorr.adjust print.rcorr.adjust #' #' @keywords htest #' #' @details #' This function uses the \code{\link[Hmisc]{rcorr}} function in the \pkg{Hmisc} package to compute matrices of Pearson or Spearman correlations along with the pairwise p-values among the correlations. #' The p-values are corrected for multiple inference using Holm's method (see \code{\link[stats]{p.adjust}}). #' Observations are filtered for missing data, and only complete observations are used. #' #' @param x a numeric matrix or data frame, or an object of class \code{"rcorr.adjust"} to be printed. #' @param type \code{"pearson"} or \code{"spearman"}, depending upon the type of correlations desired; the default is \code{"pearson"}. #' @param use how to handle missing data: \code{"complete.obs"}, the default, use only complete cases; \code{"pairwise.complete.obs"}, use all cases with valid data for each pair. #' @param \dots not used. #' #' @return #' Returns an object of class \code{"rcorr.adjust"}, which is normally just printed. #' #' @author John Fox, adapting code from Robert A. Muenchen. #' #' @seealso \code{\link[Hmisc]{rcorr}}, \code{\link[stats]{p.adjust}}. #' #' @examples #' data(Mroz) #' print(rcorr.adjust(Mroz[,c("k5", "k618", "age", "lwg", "inc")])) #' print(rcorr.adjust(Mroz[,c("k5", "k618", "age", "lwg", "inc")], type="spearman")) #' ## the following function is adapted from a suggestion by Robert Muenchen ## uses rcorr in the Hmisc package #' @export rcorr.adjust <- function (x, type = c("pearson", "spearman"), use = c("complete.obs", "pairwise.complete.obs")) { opt <- options(scipen = 5) on.exit(options(opt)) type <- match.arg(type) use <- match.arg(use) x <- if (use == "complete.obs") as.matrix(na.omit(x)) else as.matrix(x) R <- rcorr(x, type = type) P <- P.unadj <- R$P p <- P[lower.tri(P)] adj.p <- p.adjust(p, method = "holm") P[lower.tri(P)] <- adj.p P[upper.tri(P)] <- 0 P <- P + t(P) P <- ifelse(P < 1e-04, 0, P) P <- format(round(P, 4)) diag(P) <- "" P[c(grep("0.0000", P), grep("^ 0$", P))] <- "<.0001" P[grep("0.000$", P)] <- "<.001" P.unadj <- ifelse(P.unadj < 1e-04, 0, P.unadj) P.unadj <- format(round(P.unadj, 4)) diag(P.unadj) <- "" P.unadj[c(grep("0.0000$", P.unadj), grep("^ 0$", P.unadj))] <- "<.0001" P.unadj[grep("0.000$", P.unadj)] <- "<.001" result <- list(R = R, P = P, P.unadj = P.unadj, type = type) class(result) <- "rcorr.adjust" result } #' @rdname rcorr.adjust #' @export print.rcorr.adjust <- function(x, ...){ cat("\n", if (x$type == "pearson") "Pearson" else "Spearman", "correlations:\n") print(round(x$R$r, 4)) cat("\n Number of observations: ") n <- x$R$n if (all(n[1] == n)) cat(n[1], "\n") else{ cat("\n") print(n) } cat("\n Pairwise two-sided p-values:\n") print(x$P.unadj, quote=FALSE) cat("\n Adjusted p-values (Holm's method)\n") print(x$P, quote=FALSE) } RcmdrMisc/R/Dotplot.R0000644000176200001440000001121315123006024014075 0ustar liggesusers#' Dot Plots #' #' @name Dotplot #' #' @details #' Dot plot of numeric variable, either using raw values or binned, optionally classified by a factor. Dot plots are useful for visualizing the distribution of a numeric variable in a small data set. #' #' If the \code{by} argument is specified, then one dot plot is produced for each level of \code{by}; these are arranged vertically and all use the same scale for \code{x}. #' An attempt is made to adjust the size of the dots to the space available without making them too big. #' #' @keywords hplot #' #' @param x a numeric variable. #' @param by optionally a factor (or character or logical variable) by which to classify \code{x}. #' @param bin if \code{TRUE} (the default is \code{FALSE}), the values of \code{x} are binned, as in a histogram, prior to plotting. #' @param breaks breaks for the bins, in a form acceptable to the \code{\link[graphics]{hist}} function; the default is \code{"Sturges"}. #' @param xlim optional 2-element numeric vector giving limits of the horizontal axis. #' @param xlab optional character string to label horizontal axis. #' #' @return Returns \code{NULL} invisibly. #' #' @author John Fox #' #' @seealso \code{\link[graphics]{hist}} #' #' @examples #' data(Duncan) #' with(Duncan, Dotplot(education)) #' with(Duncan, Dotplot(education, bin=TRUE)) #' with(Duncan, Dotplot(education, by=type)) #' with(Duncan, Dotplot(education, by=type, bin=TRUE)) #' #' @export Dotplot <- function(x, by, bin=FALSE, breaks, xlim, xlab=deparse(substitute(x))){ dotplot <- function(x, by, bin=FALSE, breaks, xlim, xlab=deparse(substitute(x)), main="", correction=1/3, correction.char=1, y.max){ bylab <- if (!missing(by)) deparse(substitute(by)) if (bin) hist <- hist(x, breaks=breaks, plot=FALSE) if (missing(by)){ y <- if (bin) hist$counts else table(x) x <- if (bin) hist$mids else sort(unique(x)) plot(range(x), 0:1, type="n", xlab=xlab, ylab="", main=main, axes=FALSE, xlim=xlim) y.limits <- par("usr")[3:4] char.height <- correction.char*par("cxy")[2] axis(1, pos=0) if (missing(y.max)) y.max <- max(y) abline(h=0) cex <- min(((y.limits[2] - y.limits[1])/char.height)/ y.max, 2) for (i in 1:length(y)){ if (y[i] == 0) next points(rep(x[i], y[i]), cex*correction*char.height*seq(1, y[i]), pch=16, cex=cex, xpd=TRUE) } return(invisible(NULL)) } else{ if (missing(xlim)) xlim <- range(x) levels <- levels(by) n.groups <- length(levels) save.par <- par(mfrow=c(n.groups, 1)) on.exit(par(save.par)) if (bin){ for(level in levels){ # compute histograms by level to find maximum count max.count <- 0 hist.level <- hist(x[by == level], breaks=hist$breaks, plot=FALSE) max.count <- max(max.count, hist.level$counts) } for (level in levels){ mainlabel <- paste(bylab, "=", level) dotplot(x[by == level], xlab=xlab, main=mainlabel, bin=TRUE, breaks=hist$breaks, xlim=xlim, correction=1/2, correction.char=0.5, y.max=max.count) } } else { y <- table(x, by) for (level in levels){ mainlabel <- paste(bylab, "=", level) dotplot(x[by == level], xlab=xlab, main=mainlabel, xlim=xlim, correction=1/2, correction.char=0.5, y.max=max(y)) } } } } if (!is.numeric(x)) stop("x must be a numeric variable") if (!missing(by) && !is.factor(by)) { bylab <- deparse(substitute(by)) if (!(is.character(by) || is.logical(by))) stop("by must be a factor, character, or logical") by <- as.factor(by) } force(xlab) if (missing(by)){ x <- na.omit(x) } else{ keep <- complete.cases(x, by) x <- x[keep] by <- by[keep] } if (missing(xlim)) xlim <- range(x) force(xlab) if (missing(breaks)) breaks <- "Sturges" if (missing(by)) dotplot(x=x, bin=bin, breaks=breaks, xlim=xlim, xlab=xlab) else dotplot(x=x, by=by, bin=bin, breaks=breaks, xlim=xlim, xlab=xlab) } RcmdrMisc/R/reshapeW2L.R0000644000176200001440000001320315124567621014445 0ustar liggesusers#' Reshape Repeated-Measures Data from Wide to Long Format #' #' @name reshapeW2L #' #' @keywords manip #' #' @details #' The data are assumed to be in "wide" format, with a single row for each subject, and different columns for values of one or more repeated-measures variables classified by one or more within-subjects factors. #' #' Between-subjects variables don't vary by occasions for each subject. Variables that aren't listed explicitly in the arguments to the function are assumed to be between-subjects variables. #' The values of these variables are duplicated in each row pertaining to a given subject. #' #' Within-subjects factors vary by occasions for each subject, and it is assumed that the within-subjects design is regular, completely crossed, and balanced, so that the same combinations of within-subjects factors are observed for each subject. #' There are typically one or two within-subject factors. #' #' Occasion-varying variables, as their name implies, (potentially) vary by occasions for each subject, and include one or more "response" variables, possibly along with occasion-varying covariates; these variables can be factors as well as numeric variables. #' Each occasion-varying variable is encoded in multiple columns of the wide form of the data and in a single column in the long form. There is typically one occasion-varying variable, a response variable. #' #' There is one value of each occasion-varying variable for each combination of levels of the within-subjects factors. #' Thus, the number of variables in the wide data for each occasion-varying variable must be equal to the product of levels of the within-subjects factors, with the levels of the within-subjects factors varying most quickly from right to left in the \code{within} argument. #' #' @param data wide version of data set. #' @param within a character vector of names for the crossed within-subjects factors to be created in the long form of the data. #' @param levels a named list of character vectors, each element giving the names of the levels for a within-subjects factor; the names of the list elements are the names of the within-subjects factor, given in the \code{within} argument. #' @param varying a named list of the names of variables in the wide data set specifying the occasion-varying variables to be created in the long data set; each element in the list is named for an occasion-varying variable and is a character vector of column names in the wide data for that occasion-varying variable. #' @param ignore a character vector of names of variables in the wide data to be dropped in the long form of the data. #' @param id the (character) name of the subject ID variable to be created in the long form of the data, default \code{"id"}. #' #' @return a data frame in "long" format, with multiple rows for each subject (equal to the number of combinations of levels of the within-subject factors) and one column for each between-subjects and occasion-varying variable. #' #' @author John Fox #' #' @seealso \code{\link{reshapeL2W}}, \code{\link[stats]{reshape}}, \code{\link[carData]{OBrienKaiser}}, \code{\link[carData]{OBrienKaiserLong}}. #' #' @examples #' OBrienKaiserL <- reshapeW2L(OBrienKaiser, within=c("phase", "hour"), #' levels=list(phase=c("pre", "post", "fup"), hour=1:5), #' varying=list(score=c("pre.1", "pre.2", "pre.3", "pre.4", "pre.5", #' "post.1", "post.2", "post.3", "post.4", "post.5", #' "fup.1", "fup.2", "fup.3", "fup.4", "fup.5"))) #' brief(OBrienKaiserL, c(15, 15)) #' m1 <- Tapply(score ~ phase + hour + treatment + gender, mean, data=OBrienKaiserL) #' m2 <- Tapply(score ~ phase + hour + treatment + gender, mean, data=OBrienKaiserLong) #' all.equal(m1, m2) # should be equal #' #' OBrienKaiserL2 <- reshapeW2L(OBrienKaiser, within="phase", #' levels=list(phase=c("pre", "post", "fup")), #' ignore=c("pre.2", "pre.3", "pre.4", "pre.5", #' "post.2", "post.3", "post.4", "post.5", #' "fup.2", "fup.3", "fup.4", "fup.5"), #' varying=list(score=c("pre.1", "post.1", "fup.1"))) #' brief(OBrienKaiserL2, c(6, 6)) #' m1 <- Tapply(score ~ phase + treatment + gender, mean, data=OBrienKaiserL2) #' m2 <- Tapply(score ~ phase + treatment + gender, mean, data=subset(OBrienKaiserLong, hour==1)) #' all.equal(m1, m2) # should be equal #' #' @export reshapeW2L <- function(data, within, levels, varying, ignore, id="id"){ ## process variable names if (missing(ignore)) ignore <- NULL all <- colnames(data) use <- setdiff(all, ignore) all.varying <- unlist(varying) between <- setdiff(use, all.varying) levs <- expand.grid(rev(levels)) ## create empty output long data set m <- nrow(levs) n <- nrow(data) * m out <- data.frame(id=character(0), stringsAsFactors=FALSE) names(out)[1] <- id for (bet in between){ b <- data[[bet]] out[[bet]] <- if (is.factor(b)) factor(NULL, levels=levels(b)) else vector(0, mode=mode(b)) } for (win in within){ out[[win]] <- factor(NULL, levels[[win]]) } for (var in names(varying)){ v <- data[[varying[[var]][1]]] out[[var]] <- if (is.factor(v)) factor(NULL, levels=levels(v)) else vector(0, mode=mode(v)) } out[1:n, ] <- NA ## fill output data set by cases in the wide data set for (i in 1:nrow(data)){ j <- ((i - 1)*m + 1):(i*m) out[j, id] <- as.character(i) out[j, between] <- data[i, between] out[j, rev(within)] <- levs for (var in names(varying)){ out[j, var] <- unlist(data[i, varying[[var]]]) } } ## create row names rownames(out) <- paste0(out[[id]], ".", 1:m) out } RcmdrMisc/R/plots.R0000644000176200001440000005004515124566746013646 0ustar liggesusers#' Plot a Histogram #' #' @name Hist #' #' @keywords hplot #' #' @details #' This function is a wrapper for the \code{\link[graphics]{hist}} function in the \code{base} package, permitting percentage scaling of the vertical axis in addition to frequency and density scaling. #' #' @param x a vector of values for which a histogram is to be plotted. #' @param groups a factor (or character or logical variable) to create histograms by group with common horizontal and vertical scales. #' @param scale the scaling of the vertical axis: \code{"frequency"} (the default), \code{"percent"}, or \code{"density"}. #' @param xlab x-axis label, defaults to name of variable. #' @param ylab y-axis label, defaults to value of \code{scale}. #' @param main main title for graph, defaults to empty. #' @param breaks see the \code{breaks} argument for \code{\link[graphics]{hist}}. #' @param ... arguments to be passed to \code{hist}. #' #' @return This function is primarily called for its side effect --- plotting a histogram or histograms --- but it also invisibly returns an object of class \code{\link[graphics]{hist}} or a list of \code{hist} objects. #' #' @author John Fox #' #' @seealso \code{\link[graphics]{hist}} #' #' @examples #' data(Prestige, package="car") #' Hist(Prestige$income, scale="percent") #' with(Prestige, Hist(income, groups=type)) #' #' @export Hist <- function(x, groups, scale=c("frequency", "percent", "density"), xlab=deparse(substitute(x)), ylab=scale, main="", breaks="Sturges", ...){ xlab # evaluate scale <- match.arg(scale) ylab if (!missing(groups)){ groupsName <- deparse(substitute(groups)) if (!is.factor(groups)){ if (!(is.character(groups) || is.logical(groups))) warning("groups variable is not a factor, character, or logical") groups <- as.factor(groups) } counts <- table(groups) if (any(counts == 0)){ levels <- levels(groups) warning("the following groups are empty: ", paste(levels[counts == 0], collapse=", ")) } levels <- levels(groups) hists <- lapply(levels, function(level) if (counts[level] != 0) hist(x[groups == level], plot=FALSE, breaks=breaks) else list(breaks=NA)) range.x <- range(unlist(lapply(hists, function(hist) hist$breaks)), na.rm=TRUE) n.breaks <- max(sapply(hists, function(hist) length(hist$breaks))) breaks. <- seq(range.x[1], range.x[2], length=n.breaks) hists <- lapply(levels, function(level) if (counts[level] != 0) hist(x[groups == level], plot=FALSE, breaks=breaks.) else list(counts=0, density=0)) names(hists) <- levels ylim <- if (scale == "frequency"){ max(sapply(hists, function(hist) max(hist$counts))) } else if (scale == "density"){ max(sapply(hists, function(hist) max(hist$density))) } else { max.counts <- sapply(hists, function(hist) max(hist$counts)) tot.counts <- sapply(hists, function(hist) sum(hist$counts)) ylims <- tot.counts*(max(max.counts[tot.counts != 0]/tot.counts[tot.counts != 0])) names(ylims) <- levels ylims } save.par <- par(mfrow=n2mfrow(sum(counts != 0)), oma = c(0, 0, if (main != "") 1.5 else 0, 0)) on.exit(par(save.par)) for (level in levels){ if (counts[level] == 0) next if (scale != "percent") Hist(x[groups == level], scale=scale, xlab=xlab, ylab=ylab, main=paste(groupsName, "=", level), breaks=breaks., ylim=c(0, ylim), ...) else Hist(x[groups == level], scale=scale, xlab=xlab, ylab=ylab, main=paste(groupsName, "=", level), breaks=breaks., ylim=c(0, ylim[level]), ...) } if (main != "") mtext(side = 3, outer = TRUE, main, cex = 1.2) return(invisible(hists)) } x <- na.omit(x) if (scale == "frequency") { hist <- hist(x, xlab=xlab, ylab=ylab, main=main, breaks=breaks, ...) } else if (scale == "density") { hist <- hist(x, freq=FALSE, xlab=xlab, ylab=ylab, main=main, breaks=breaks, ...) } else { n <- length(x) hist <- hist(x, axes=FALSE, xlab=xlab, ylab=ylab, main=main, breaks=breaks, ...) axis(1) max <- ceiling(10*par("usr")[4]/n) at <- if (max <= 3) (0:(2*max))/20 else (0:max)/10 axis(2, at=at*n, labels=at*100) } box() abline(h=0) invisible(hist) } #' Index Plots #' #' @name indexplot #' #' @keywords hplot #' #' @details #' Index plots with point identification. #' #' @param x a numeric variable, a matrix whose columns are numeric variables, or a numeric data frame; if \code{x} is a matrix or data frame, plots vertically aligned index plots for the columns. #' @param labels point labels; if \code{x} is a data frame, defaults to the row names of \code{x}, otherwise to the case index. #' @param groups an optional grouping variable, typically a factor (or character or logical variable). #' @param id.method method for identifying points; see \code{\link[car]{showLabels}}. #' @param type to be passed to \code{\link{plot}}. #' @param id.n number of points to identify; see \code{\link[car]{showLabels}}. #' @param ylab label for vertical axis; if missing, will be constructed from \code{x}; for a data frame, defaults to the column names. #' @param legend see \code{\link[graphics]{legend}}) giving location of the legend if \code{groups} are specified; if \code{legend=FALSE}, the legend is suppressed. #' @param title title for the legend; may normally be omitted. #' @param col vector of colors for the \code{groups}. #' @param \dots to be passed to \code{plot}. #' #' @return Returns labelled indices of identified points or (invisibly) \code{NULL} if no points are identified or if there are multiple variables with some missing data. #' #' @author John Fox #' #' @seealso \code{\link[car]{showLabels}}, \code{\link[graphics]{plot.default}} #' #' @examples #' with(Prestige, indexplot(income, id.n=2, labels=rownames(Prestige))) #' with(Prestige, indexplot(Prestige[, c("income", "education", "prestige")], #' groups = Prestige$type, id.n=2)) #' #' @export indexplot <- function(x, groups, labels=seq_along(x), id.method="y", type="h", id.n=0, ylab, legend="topright", title, col=palette(), ...){ if (is.data.frame(x)) { if (missing(labels)) labels <- rownames(x) x <- as.matrix(x) } if (!missing(groups)){ if (missing(title)) title <- deparse(substitute(groups)) if (!is.factor(groups)) groups <- as.factor(groups) groups <- addNA(groups, ifany=TRUE) grps <- levels(groups) grps[is.na(grps)] <- "NA" levels(groups) <- grps if (length(grps) > length(col)) stop("too few colors to plot groups") } else { grps <- NULL legend <- FALSE } if (is.matrix(x)){ ids <- NULL mfrow <- par(mfrow=c(ncol(x), 1)) on.exit(par(mfrow)) if (missing(labels)) labels <- 1:nrow(x) if (is.null(colnames(x))) colnames(x) <- paste0("Var", 1:ncol(x)) for (i in 1:ncol(x)) { id <- indexplot(x[, i], groups=groups, labels=labels, id.method=id.method, type=type, id.n=id.n, ylab=if (missing(ylab)) colnames(x)[i] else ylab, legend=legend, title=title, ...) ids <- union(ids, id) legend <- FALSE } if (is.null(ids) || any(is.na(x))) return(invisible(NULL)) else { ids <- sort(ids) names(ids) <- labels[ids] if (any(is.na(names(ids))) || all(ids == names(ids))) names(ids) <- NULL return(ids) } } if (missing(ylab)) ylab <- deparse(substitute(x)) plot(x, type=type, col=if (is.null(grps)) col[1] else col[as.numeric(groups)], ylab=ylab, xlab="Observation Index", ...) if (!isFALSE(legend)){ legend(legend, title=title, bty="n", legend=grps, col=palette()[1:length(grps)], lty=1, horiz=TRUE, xpd=TRUE) } if (par("usr")[3] <= 0) abline(h=0, col='gray') ids <- showLabels(seq_along(x), x, labels=labels, method=id.method, n=id.n) if (is.null(ids)) return(invisible(NULL)) else return(ids) } #' Plot a one or more lines #' #' @name lineplot #' #' @keywords hplot #' #' @details #' This function plots lines for one or more variables against another variable, typically time series against time. #' #' @param x variable giving horizontal coordinates. #' @param \dots one or more variables giving vertical coordinates. #' @param legend plot legend? Default is \code{TRUE} if there is more than one variable to plot and \code{FALSE} is there is just one. #' #' @return #' Produces a plot; returns \code{NULL} invisibly. #' #' @author John Fox #' #' @examples #' data(Bfox) #' Bfox$time <- as.numeric(rownames(Bfox)) #' with(Bfox, lineplot(time, menwage, womwage)) #' #' @export lineplot <- function(x, ..., legend){ xlab <- deparse(substitute(x)) y <- cbind(...) m <- ncol(y) legend <- if (missing(legend)) m > 1 if (legend && m > 1) { mar <- par("mar") top <- 3.5 + m old.mar <- par(mar=c(mar[1:2], top, mar[4])) on.exit(par(old.mar)) } if (m > 1) matplot(x, y, type="b", lty=1, xlab=xlab, ylab="") else plot(x, y, type="b", pch=16, xlab=xlab, ylab=colnames(y)) if (legend && ncol(y) > 1){ xpd <- par(xpd=TRUE) on.exit(par(xpd), add=TRUE) ncols <- length(palette()) cols <- rep(1:ncols, 1 + m %/% ncols)[1:m] usr <- par("usr") legend(usr[1], usr[4] + 1.2*top*strheight("x"), legend=colnames(y), col=cols, lty=1, pch=as.character(1:m)) } return(invisible(NULL)) } #' Plot a probability density, mass, or distribution function. #' #' @name plotDistr #' #' @keywords hplot #' #' @details #' This function plots a probability density, mass, or distribution function, adapting the form of the plot as appropriate. #' #' @param x horizontal coordinates #' @param p vertical coordinates #' @param discrete is the random variable discrete? #' @param cdf is this a cumulative distribution (as opposed to mass) function? #' @param regions for continuous distributions only, if non-\code{NULL}, a list of regions to fill with color \code{col}; each element of the list is a pair of \code{x} values with the minimum and maximum horizontal coordinates of the corresponding region. #' @param col color for plot, \code{col} may be a single value or a vector. #' @param legend plot a legend of the regions (default \code{TRUE}). #' @param legend.pos position for the legend (see \code{\link[graphics]{legend}}, default \code{"topright"}). #' @param \dots arguments to be passed to \code{plot}. #' #' @return Produces a plot; returns \code{NULL} invisibly. #' #' @author John Fox #' #' @examples #' x <- seq(-4, 4, length=100) #' plotDistr(x, dnorm(x), xlab="Z", ylab="p(z)", main="Standard Normal Density") #' plotDistr(x, dnorm(x), xlab="Z", ylab="p(z)", main="Standard Normal Density", #' region=list(c(1.96, Inf), c(-Inf, -1.96)), col=c("red", "blue"), new = TRUE) #' plotDistr(x, dnorm(x), xlab="Z", ylab="p(z)", main="Standard Normal Density", #' region=list(c(qnorm(0), qnorm(.025)), c(qnorm(.975), qnorm(1)))) # same #' #' x <- 0:10 #' plotDistr(x, pbinom(x, 10, 0.5), xlab="successes", discrete=TRUE, cdf=TRUE, #' main="Binomial Distribution Function, p=0.5, n=10") #' #' @export plotDistr <- function(x, p, discrete=FALSE, cdf=FALSE, regions=NULL, col="gray", legend=TRUE, legend.pos="topright", ...){ if (discrete){ if (cdf){ plot(x, p, ..., type="n") abline(h=0:1, col="gray") lines(x, p, ..., type="s") } else { plot(x, p, ..., type="h") points(x, p, pch=16) abline(h=0, col="gray") } } else{ if (cdf){ plot(x, p, ..., type="n") abline(h=0:1, col="gray") lines(x, p, ..., type="l") } else{ plot(x, p, ..., type="n") abline(h=0, col="gray") lines(x, p, ..., type="l") } if (!is.null(regions)){ col <- rep(col, length=length(regions)) for (i in 1:length(regions)){ region <- regions[[i]] which.xs <- (x >= region[1] & x <= region[2]) xs <- x[which.xs] ps <- p[which.xs] xs <- c(xs[1], xs, xs[length(xs)]) ps <- c(0, ps, 0) polygon(xs, ps, col=col[i]) } if (legend){ if (length(unique(col)) > 1){ legend(legend.pos, title = if (length(regions) > 1) "Regions" else "Region", legend=sapply(regions, function(region){ paste(round(region[1], 2), "to", round(region[2], 2)) }), col=col, pch=15, pt.cex=2.5, inset=0.02) } else { legend(legend.pos, title = if (length(regions) > 1) "Regions" else "Region", legend=sapply(regions, function(region){ paste(round(region[1], 2), "to", round(region[2], 2)) }), inset=0.02) } } } } return(invisible(NULL)) } #' Plot Means for One or Two-Way Layout #' #' @name plotMeans #' #' @keywords hplot #' #' @details #' Plots cell means for a numeric variable in each category of a factor or in each combination of categories of two factors, optionally along with error bars based on cell standard errors or standard deviations. #' #' @param response Numeric variable for which means are to be computed. #' @param factor1 Factor defining horizontal axis of the plot. #' @param factor2 If present, factor defining profiles of means. #' @param error.bars If \code{"se"}, the default, error bars around means give plus or minus one standard error of the mean; if \code{"sd"}, error bars give plus or minus one standard deviation; if \code{"conf.int"}, error bars give a confidence interval around each mean; if \code{"none"}, error bars are suppressed. #' @param level level of confidence for confidence intervals; default is .95 #' @param xlab Label for horizontal axis. #' @param ylab Label for vertical axis. #' @param legend.lab Label for legend. #' @param legend.pos Position of legend; if \code{"farright"} (the default), extra space is left at the right of the plot. #' @param main Label for the graph. #' @param pch Plotting characters for profiles of means. #' @param lty Line types for profiles of means. #' @param col Colours for profiles of means. #' @param connect connect profiles of means, default \code{TRUE}. #' @param \ldots arguments to be passed to \code{plot}. #' #' @return The function invisibly returns \code{NULL}. #' #' @examples #' data(Moore) #' with(Moore, plotMeans(conformity, fcategory, partner.status, ylim=c(0, 25))) #' #' @author John Fox #' #' @seealso \code{\link[stats]{interaction.plot}} #' #' @export plotMeans <- function(response, factor1, factor2, error.bars = c("se", "sd", "conf.int", "none"), level=0.95, xlab=deparse(substitute(factor1)), ylab=paste("mean of", deparse(substitute(response))), legend.lab=deparse(substitute(factor2)), legend.pos=c("farright", "bottomright", "bottom", "bottomleft", "left", "topleft", "top", "topright", "right", "center"), main="Plot of Means", pch=1:n.levs.2, lty=1:n.levs.2, col=palette(), connect=TRUE, ...){ if (!is.numeric(response)) stop("Argument response must be numeric.") xlab # force evaluation ylab legend.lab legend.pos <- match.arg(legend.pos) error.bars <- match.arg(error.bars) if (!is.factor(factor1)) { if (!(is.character(factor1) || is.logical(factor1))) stop("Argument factor1 must be a factor, character, or logical.") factor1 <- as.factor(factor1) } if (missing(factor2)){ valid <- complete.cases(factor1, response) factor1 <- factor1[valid] response <- response[valid] means <- tapply(response, factor1, mean) sds <- tapply(response, factor1, sd) ns <- tapply(response, factor1, length) if (error.bars == "se") sds <- sds/sqrt(ns) if (error.bars == "conf.int") sds <- qt((1 - level)/2, df=ns - 1, lower.tail=FALSE) * sds/sqrt(ns) sds[is.na(sds)] <- 0 yrange <- if (error.bars != "none") c( min(means - sds, na.rm=TRUE), max(means + sds, na.rm=TRUE)) else range(means, na.rm=TRUE) levs <- levels(factor1) n.levs <- length(levs) plot(c(1, n.levs), yrange, type="n", xlab=xlab, ylab=ylab, axes=FALSE, main=main, ...) points(1:n.levs, means, type=if (connect) "b" else "p", pch=16, cex=2) box() axis(2) axis(1, at=1:n.levs, labels=levs) if (error.bars != "none") arrows(1:n.levs, means - sds, 1:n.levs, means + sds, angle=90, lty=2, code=3, length=0.125) } else { if (!is.factor(factor2)) { if (!(is.character(factor2) || is.logical(factor2))) stop("Argument factor2 must be a factor, charcter, or logical.") factor2 <- as.factor(factor2) } valid <- complete.cases(factor1, factor2, response) factor1 <- factor1[valid] factor2 <- factor2[valid] response <- response[valid] means <- tapply(response, list(factor1, factor2), mean) sds <- tapply(response, list(factor1, factor2), sd) ns <- tapply(response, list(factor1, factor2), length) if (error.bars == "se") sds <- sds/sqrt(ns) if (error.bars == "conf.int") sds <- qt((1 - level)/2, df=ns - 1, lower.tail=FALSE) * sds/sqrt(ns) sds[is.na(sds)] <- 0 yrange <- if (error.bars != "none") c( min(means - sds, na.rm=TRUE), max(means + sds, na.rm=TRUE)) else range(means, na.rm=TRUE) levs.1 <- levels(factor1) levs.2 <- levels(factor2) n.levs.1 <- length(levs.1) n.levs.2 <- length(levs.2) if (length(pch) == 1) pch <- rep(pch, n.levs.2) if (length(col) == 1) col <- rep(col, n.levs.2) if (length(lty) == 1) lty <- rep(lty, n.levs.2) expand.x.range <- if (legend.pos == "farright") 1.4 else 1 if (n.levs.2 > length(col)) stop(sprintf("Number of groups for factor2, %d, exceeds number of distinct colours, %d."), n.levs.2, length(col)) plot(c(1, n.levs.1 * expand.x.range), yrange, type="n", xlab=xlab, ylab=ylab, axes=FALSE, main=main, ...) box() axis(2) axis(1, at=1:n.levs.1, labels=levs.1) for (i in 1:n.levs.2){ points(1:n.levs.1, means[, i], type=if (connect) "b" else "p", pch=pch[i], cex=2, col=col[i], lty=lty[i]) if (error.bars != "none") arrows(1:n.levs.1, means[, i] - sds[, i], 1:n.levs.1, means[, i] + sds[, i], angle=90, code=3, col=col[i], lty=lty[i], length=0.125) } if (legend.pos == "farright"){ x.posn <- n.levs.1 * 1.1 y.posn <- sum(c(0.1, 0.9) * par("usr")[c(3,4)]) # text(x.posn, y.posn, legend.lab, adj=c(0, -.5)) legend(x.posn, y.posn, levs.2, pch=pch, col=col, lty=lty, title=legend.lab) } else legend(legend.pos, levs.2, pch=pch, col=col, lty=lty, title=legend.lab, inset=0.02) } invisible(NULL) } RcmdrMisc/R/bin.var.R0000644000176200001440000000371715124556337014043 0ustar liggesusers#' Bin a Numeric Variable #' #' @name binVariable #' #' @details Create a factor dissecting the range of a numeric variable into bins of equal width, (roughly) equal frequency, or at "natural" cut points. #' The \code{\link[base]{cut}} function is used to create the factor. #' #' @keywords manip #' #' @param x numeric variable to be binned. #' @param bins number of bins. #' @param method one of \code{"intervals"} for equal-width bins; \code{"proportions"} for equal-count bins; \code{"natural"} for cut points between bins to be determined by a k-means clustering. #' @param labels if \code{FALSE}, numeric labels will be used for the factor levels; if \code{NULL}, the cut points are used to define labels; otherwise a character vector of level names. #' #' @return A factor. #' #' @author Dan Putler, slightly modified by John Fox (5 Dec 04 & 5 Mar 13) with the original author's permission. #' #' @seealso \code{\link[base]{cut}}, \code{\link[stats]{kmeans}}. #' #' @examples #' summary(binVariable(rnorm(100), method="prop", labels=letters[1:4])) #' #' @export binVariable <- function (x, bins=4, method=c("intervals", "proportions", "natural"), labels=FALSE) { method <- match.arg(method) if(length(x) < bins) { stop("The number of bins exceeds the number of data values") } x <- if(method == "intervals") cut(x, bins, labels=labels) else if (method == "proportions") cut(x, quantile(x, probs=seq(0,1,1/bins), na.rm=TRUE), include.lowest = TRUE, labels=labels) else { xx <- na.omit(x) breaks <- c(-Inf, tapply(xx, KMeans(xx, bins)$cluster, max)) cut(x, breaks, labels=labels) } as.factor(x) } #' Bin a Numeric Variable #' #' @name bin.var #' #' @keywords internal #' #' @details \code{bin.var} is a synomym for \code{binVariable}, retained for backwards compatibility. #' #' @param ... arguments to be passed to \code{binVariable}. #' #' @export bin.var <- function(...) binVariable(...) RcmdrMisc/R/discreteCounts.R0000644000176200001440000000401615123002145015451 0ustar liggesusers#' Frequency Distributions of Numeric Variables #' #' @name discreteCounts #' #' @keywords univar #' #' @details #' Computes the frequency and percentage distribution of a descrete numeric variable or the distributions of the variables in a numeric matrix or data frame. #' #' @param x a discrete numeric vector, matrix, or data frame. #' @param round.percents number of decimal places to round percentages; default is \code{2}. #' @param name name for the variable; only used for vector argument \code{x}. #' @param max.values maximum number of unique values (default is the smallest of twice the square root of the number of elements in \code{x}, 10 times the log10 of the number of elements, and \code{100}); if exceeded, an error is reported. #' #' @return For a numeric vector, invisibly returns the table of counts. For a matrix or data frame, invisibly returns \code{NULL} #' #' @author John Fox #' #' @seealso \code{\link{binnedCounts}} #' #' @examples #' set.seed(12345) # for reproducibility #' discreteCounts(data.frame(x=rpois(51, 2), y=rpois(51, 10))) #' #' @export discreteCounts <- function(x, round.percents=2, name=deparse(substitute(x)), max.values=min(round(2*sqrt(length(x))), round(10*log10(length(x))), 100)){ if (is.data.frame(x)) x <- as.matrix(x) if (is.matrix(x)) { names <- colnames(x) for (j in 1:ncol(x)){ discreteCounts(x[, j], round.percents=round.percents, name=names[j], max.values=max.values) cat("\n") } return(invisible(NULL)) } Count <- table(x) if ((nv <- length(Count)) > max.values) stop("number of unique values of ", name, ", ", nv, ", exceeds maximum, ", max.values) tot <- sum(Count) Percent <- round(100*Count/tot, round.percents) tot.percent <- round(sum(Percent), round.percents) table <- cbind(Count, Percent) table <- rbind(table, c(tot, tot.percent)) rownames(table) <- c(names(Count), "Total") cat("Distribution of", name, "\n") print(table) return(invisible(Count)) } RcmdrMisc/R/readSPSS.R0000644000176200001440000000675015124273016014116 0ustar liggesusers#' Read an SPSS Data Set #' #' @name readSPSS #' #' @keywords manip #' #' @details #' \code{readSPSS} reads an SPSS data set, stored in a file of type \code{.sav} or \code{.por}, into an R data frame; it provides a front end to the \code{\link[haven]{read_spss}} function in the \pkg{haven} package and the \code{\link[foreign]{read.spss}} function in the \pkg{foreign} package. #' #' @param file path to an SPSS \code{.sav} or \code{.por} file. #' @param rownames if \code{TRUE} (the default is \code{FALSE}), the first column in the data set contains row names, which should be unique. #' @param stringsAsFactors if \code{TRUE} (the default is \code{FALSE}) then columns containing character data are converted to factors and factors are created from SPSS value labels. #' @param tolower change variable names to lowercase, default \code{TRUE}. #' @param use.value.labels if \code{TRUE}, the default, variables with value labels in the SPSS data set will become either factors or character variables (depending on the \code{stringsAsFactors} argument) with the value labels as their levels or values. As for \code{\link[foreign]{read.spss}}, this is only done if there are at least as many labels as values of the variable (and values without a matching label are returned as \code{NA}). #' @param use.haven use \code{\link[haven]{read_spss}} from the \pkg{haven} package to read the file, in preference to \code{\link[foreign]{read.spss}} from the \pkg{foreign} package; the default is \code{TRUE} for a \code{.sav} file and \code{FALSE} for a \code{.por} file. #' #' @return a data frame. #' #' @author John Fox #' #' @seealso \code{\link[haven]{read_spss}}, \code{\link[foreign]{read.spss}} #' #' @export readSPSS <- function(file, rownames=FALSE, stringsAsFactors=FALSE, tolower=TRUE, use.value.labels=TRUE, use.haven=!por){ filename <- rev(strsplit(file, "\\.")[[1]]) por <- "por" == if (length(filename) > 1) filename[1] else "" Data <- if (use.haven) as.data.frame(haven::read_spss(file)) else foreign::read.spss(file, to.data.frame=TRUE, use.value.labels=use.value.labels) if (rownames){ col1 <- gsub("^\ *", "", gsub("\ *$", "", Data[[1]])) check <- length(unique(col1)) == nrow(Data) if (!check) warning ("row names are not unique, ignored") else { rownames(Data) <- col1 Data[[1]] <- NULL } } if (use.haven && use.value.labels){ na <- as.character(NA) n <- nrow(Data) for (col in names(Data)){ var <- Data[, col] if (!is.null(labs <- attr(var, "labels"))){ if (length(labs) < length(unique(var))) next nms <- names(labs) var2 <- rep(na, n) for (i in seq_along(labs)){ var2[var == labs[i]] <- nms[i] } Data[, col] <- var2 } } } if (stringsAsFactors){ char.cols <- sapply(Data, class) == "character" if (any(char.cols)){ for (col in names(Data)[char.cols]){ fac <- Data[, col] fac[fac == ""] <- NA Data[, col] <- as.factor(fac) } } } num.cols <- sapply(Data, is.numeric) if (use.haven && any(num.cols)){ for (col in names(Data)[num.cols]) { Data[, col] <- as.numeric(Data[, col]) Data[!is.finite(Data[, col]), col] <- NA } } if (tolower){ names(Data) <- tolower(names(Data)) } Data } RcmdrMisc/R/readStata.R0000644000176200001440000000317515124562111014375 0ustar liggesusers#' Read a Stata Data Set #' #' @name readStata #' #' @details #' \code{readStata} reads a Stata data set, stored in a file of type \code{.dta}, into an R data frame; it provides a front end to the \code{\link[readstata13]{read.dta13}} function in the \pkg{readstata13} package. #' #' @keywords manip #' #' @param file path to a Stata \code{.dta} file. #' @param rownames if \code{TRUE} (the default is \code{FALSE}), the first column in the data set contains row names, which should be unique. #' @param stringsAsFactors if \code{TRUE} (the default is \code{FALSE}) then columns containing character data are converted to factors and factors are created from Stata value labels. #' @param convert.dates if \code{TRUE} (the default) then Stata dates are converted to R dates. #' #' @return #' a data frame. #' #' @author John Fox #' #' @seealso #' \code{\link[readstata13]{read.dta13}} #' #' @export readStata <- function(file, rownames=FALSE, stringsAsFactors=FALSE, convert.dates=TRUE){ Data <- readstata13::read.dta13(file, convert.factors=stringsAsFactors, convert.dates=convert.dates) if (rownames){ check <- length(unique(col1 <- Data[[1]])) == nrow(Data) if (!check) warning ("row names are not unique, ignored") else { rownames(Data) <- col1 Data[[1]] <- NULL } } if (stringsAsFactors){ char.cols <- sapply(Data, class) == "character" if (any(char.cols)){ for (col in names(Data)[char.cols]){ fac <- Data[, col] fac[fac == ""] <- NA Data[, col] <- as.factor(fac) } } } Data } RcmdrMisc/R/readSAS.R0000644000176200001440000000263715124436002013750 0ustar liggesusers#' Read a SAS b7dat Data Set #' #' @name readSAS #' #' @keywords manip #' #' @details #' \code{readSAS} reads a SAS ``b7dat'' data set, stored in a file of type \code{.sas7bdat}, into an R data frame; it provides a front end to the \code{\link[haven]{read_sas}} function in the \pkg{haven} package. #' #' @param file path to a SAS b7dat file. #' @param rownames if \code{TRUE} (the default is \code{FALSE}), the first column in the data set contains row names (which must be unique---i.e., no duplicates). #' @param stringsAsFactors if \code{TRUE} (the default is \code{FALSE}) then columns containing character data are converted to factors. #' #' @return #' a data frame. #' #' @author John Fox #' #' @seealso \code{\link[haven]{read_sas}} #' #' @export readSAS <- function(file, rownames=FALSE, stringsAsFactors=FALSE){ Data <- as.data.frame(haven::read_sas(file)) if (rownames){ check <- length(unique(col1 <- Data[[1]])) == nrow(Data) if (!check) warning ("row names are not unique, ignored") else { rownames(Data) <- col1 Data[[1]] <- NULL } } if (stringsAsFactors){ char.cols <- sapply(Data, class) == "character" if (any(char.cols)){ for (col in names(Data)[char.cols]){ fac <- Data[, col] fac[fac == ""] <- NA Data[, col] <- as.factor(fac) } } } Data } RcmdrMisc/R/RcmdrMisc-package.R0000644000176200001440000000256315126146104015743 0ustar liggesusers#' @title R Commander Miscellaneous Functions #' #' @name RcmdrMisc-package #' #' @details #' \preformatted{ #' #' Package: RcmdrMisc #' #' Type: Package #' #' Version: 2.10.1 #' #' Date: 2026-01-03 #' #' License: GPL (>=3) #' } #' #' Various statistical, graphics, and data-management functions used by the Rcmdr package in the R Commander GUI for R. #' #' @author John Fox #' #' Mantainer: Manuel Munoz-Marquez #' #' @seealso \url{https://www.r-project.org}, \url{https://facsocsci.mcmaster.ca/jfox/} #' #' @importFrom abind abind #' @importFrom car showLabels hccm linearHypothesis deltaMethod Tapply #' @importFrom colorspace rainbow_hcl #' @importFrom e1071 skewness kurtosis #' @importFrom foreign read.spss #' @importFrom graphics abline arrows axis barplot box hist legend lines matplot mtext par pie plot points polygon strheight text title #' @importFrom grDevices n2mfrow palette #' @importFrom haven read_sas read_spss #' @importFrom Hmisc rcorr #' @importFrom lattice xyplot strip.default #' @importFrom MASS stepAIC #' @importFrom nortest ad.test cvm.test lillie.test pearson.test sf.test #' @importFrom readxl read_excel excel_sheets #' @importFrom sandwich vcovHAC #' @importFrom stats coef complete.cases cor dist formula kmeans model.matrix na.omit shapiro.test p.adjust pf pt qt quantile runif sd update as.formula ftable reshape NULL RcmdrMisc/R/gumbel.R0000644000176200001440000000337315123010360013731 0ustar liggesusers#' The Gumbel Distribution #' #' @name Gumbel #' #' @aliases Gumbel dgumbel pgumbel qgumbel rgumbel #' #' @keywords distribution #' @details #' Density, distribution function, quantile function and random generation for the Gumbel distribution with specified \code{location} and \code{scale} parameters. #' #' @param x vector of values of the variable. #' @param q vector of quantiles. #' @param p vector of probabilities. #' @param n number of observations. If \code{length(n)} > 1, the length is taken to be the number required. #' @param location location parameter (default \code{0}); potentially a vector. #' @param scale scale parameter (default \code{1}); potentially a vector. #' @param lower.tail logical; if \code{TRUE} (the default) probabilities and quantiles correspond to \eqn{P(X \le x)}, if \code{FALSE} to \eqn{P(X > x)}. #' #' @references #' See \url{https://en.wikipedia.org/wiki/Gumbel_distribution} for details of the Gumbel distribution. #' #' @author John Fox #' #' @examples #' x <- 100 + 5*c(-Inf, -1, 0, 1, 2, 3, Inf, NA) #' dgumbel(x, 100, 5) #' pgumbel(x, 100, 5) #' p <- c(0, .25, .5, .75, 1, NA) #' qgumbel(p, 100, 5) #' summary(rgumbel(1e5, 100, 5)) #' #' @export dgumbel <- function(x, location=0, scale=1){ z <- (x - location)/scale d <- exp(-exp(-z))*exp(-z)/scale d[z == -Inf] <- 0 d } #' @rdname Gumbel #' @export pgumbel <- function(q, location=0, scale=1, lower.tail=TRUE){ p <- exp(-exp(- (q - location)/scale)) if (lower.tail) p else 1 - p } #' @rdname Gumbel #' @export qgumbel <- function(p, location=0, scale=1, lower.tail=TRUE){ if (!lower.tail) p <- 1 - p location - scale*log(-log(p)) } #' @rdname Gumbel #' @export rgumbel <- function(n, location=0, scale=1){ location - scale*log(-log(runif(n))) } RcmdrMisc/R/partial.cor.R0000644000176200001440000000536715124555207014720 0ustar liggesusers#' Partial Correlations #' #' @name partial.cor #' #' @keywords misc #' #' @details #' Computes a matrix of partial correlations between each pair of variables controlling for the others. #' #' @param X data matrix. #' @param tests show two-sided p-value and p-value adjusted for multiple testing by Holm's method for each partial correlation. #' @param use observations to use to compute partial correlations, default is \code{"complete.obs"}. #' #' @return #' Returns the matrix of partial correlations, optionally with adjusted and unadjusted p-values. #' #' @author John Fox #' #' @seealso \code{\link[stats]{cor}} #' #' @examples #' data(DavisThin, package="car") #' partial.cor(DavisThin) #' partial.cor(DavisThin, tests=TRUE) #' #' @export partial.cor <- function(X, tests=FALSE, use=c("complete.obs", "pairwise.complete.obs")){ countValid <- function(X){ X <- !is.na(X) t(X) %*% X } use <- match.arg(use) if (use == "complete.obs"){ X <- na.omit(X) n <- nrow(X) } else n <- countValid(X) R <- cor(X, use=use) RI <- solve(R) D <- 1/sqrt(diag(RI)) R <- - RI * (D %o% D) diag(R) <- 0 rownames(R) <- colnames(R) <- colnames(X) result <- list(R=R, n=n, P=NULL, P.unadj=NULL) if (tests){ opt <- options(scipen=5) on.exit(options(opt)) df <- n - ncol(X) f <- (R^2)*df/(1 - R^2) P <- P.unadj <- pf(f, 1, df, lower.tail=FALSE) p <- P[lower.tri(P)] adj.p <- p.adjust(p, method="holm") P[lower.tri(P)] <- adj.p P[upper.tri(P)] <- 0 P <- P + t(P) P <- ifelse(P < 1e-04, 0, P) P <- format(round(P, 4)) diag(P) <- "" P[c(grep("0.0000", P), grep("^ 0$", P))] <- "<.0001" P.unadj <- ifelse(P.unadj < 1e-04, 0, P.unadj) P.unadj <- format(round(P.unadj, 4)) diag(P.unadj) <- "" P.unadj[c(grep("0.0000", P.unadj), grep("^ 0$", P.unadj))] <- "<.0001" result$P <- P result$P.unadj <- P.unadj } class(result) <- "partial.cor" result } #' @rdname partial.cor #' @param x data matrix. #' @param digits minimal number of _significant_ digits, see \code{\link[base]{print.default}}. #' @param \dots arguments to pass down from the print method. #' @export print.partial.cor <- function(x, digits=max(3, getOption("digits") - 2), ...){ cat("\n Partial correlations:\n") print(round(x$R, digits, ...)) cat("\n Number of observations: ") n <- x$n if (all(n[1] == n)) cat(n[1], "\n") else{ cat("\n") print(n) } if (!is.null(x$P)){ cat("\n Pairwise two-sided p-values:\n") print(x$P.unadj, quote=FALSE) cat("\n Adjusted p-values (Holm's method)\n") print(x$P, quote=FALSE) } x } RcmdrMisc/NEWS0000644000176200001440000001154315126146056012646 0ustar liggesusersChanges to Version 2.10.1 o Move all documentation into R files for use with roxygen o Fixe some minor bugs into examples that prevent them to run in browser enviroment o Change mantainer Changes to Version 2.9-2 o Change "CV" to "cv" to select coefficient of variation in numSummary() (after bug in Rcmdr reported by Michael Kemp). o Small fixes to docs. Changes to Version 2.9-1 o Fixed bug in numSummaries() introduced along with CV() (reported by Kenneth Knoblauch). Changes to Version 2.9-0 o Added CV() function. Changes to Version 2.7-2 o Added xlim, ylim, and ... args to discretePlot() (suggestion of Beatriz Lacruz Casaucau). o Fixed a bug in reshapeL2W() that could cause the columns produced from the varying variable(s) in the long data set to be misnamed (reported by Tamas Ferenci). o Duplicated quantiles in numSummary() now produce a warning and only unique quantiles are used (after report by Beatriz Lacruz Casaucau). o Fixed bug in Barplot() that produced misplaced text for counts/percents in many cases (also reported by Beatriz Lacruz Casaucau). Changes to Version 2.7-1 o Added repeatedMeasuresPlot(). o Added reshapeL2W() and reshapeW2L(). o The stringsAsFactors argument to readSAS(), readSPSSS(), readStata(), and readXL() now defaults to FALSE. o Barplot(), discretePlot(), Dotplot(), Hist(), plotMeans() accommodate logical and character variables as grouping factor(s). Changes to Version 2.7-0 o Added discreteCounts() and piechart() (after suggestions of Ethan Harris). o Added counts/percents to bars in Barplot() (suggestion of Ethan Harris). o Fixed DeltaMethod() so that it doesn't ignore the level argument (reported by on de Haan). o Small improvements. Changes to Version 2.5-1 o Depend on R >= version 3.5.0 because of use of isFALSE() (at request of CRAN). Changes to Version 2.5-0 o Synchronize version numbers with minor versions of the Rcmdr package. o Show axis ticks at observed values in discretePlot() (suggestion of Beatriz Lacruz Casaucau). o binnedCounts() reports percentage as well as frequency distribution (suggestion of Beatriz Lacruz Casaucau). o indexplot() accepts groups argument. Changes to Version 1.0-10 o Make indexplot() compatible with car 3.0-0. Changes to Version 1.0-9 o Fixed bug in discretePlot() (reported by Felipe Rafael Ribeiro Melo). Changes to Version 1.0-8 o readSPSS() now can process value labels when haven::read_SPSS() is called; new use.value.labels argument, defaults to TRUE (after report by Manuel Munoz Marquez). Changes to Version 1.0-7 o No longer import print.rcorr from Hmisc. Changes to Version 1.0-6 o Added readSAS() for SAS b7dat files. o Added readStata() for improved input of Stata .dta files. o Added readSPSS() for reading SPSS .sav and .por files. o Improvements to plotMeans() legends. o Barplot() can now compute conditional percentages (suggestion of Beatriz Lacruz); other improvements. o Hist() now invisibly returns a "hist" object or list of "hist" objects (suggestion of Beatriz Lacruz). o renamed bin.var() as binVariable(), retaining bin.var() as a synomym. o Added discretePlot() for plotting distributions of discrete numeric variables (after a suggestion of Beatriz Lacruz). o Added plotBoot() for plotting bootstrap distributions. o indexplot() can now plot multiple variables (suggestion of Manuel Munoz). o Added binnedCounts() for binned frequency distribution of a numeric variable (suggestion of Manuel Munoz). o Added normalityTests() for various tests of normality. o Small bug fixes. Changes to Version 1.0-5 o Added connect argument to plotMeans() (suggestion of Graham Smith). o Added capability to plot regions under density functions to plotDistr(). o Added *gumbel() functions for the Gumbel distribution, parametrized by location and scale. Changes to Version 1.0-4 o Added ... argument to Barplot (after Rcmdr bug reported by Fritz Leisch). o Added DeltaMethod(). Changes to Version 1.0-3 o Fixed bug in rcorr.adjust() that didn't properly convert .000 to <.001 for pairwise-complete correlations (reported by Bob Muenchen). o Added Barplot() and Dotplot(). o Added readXL(), export excel_sheets(), both from readxl package. o Conform to new CRAN package import requirements. Changes to Version 1.0-2 o Updated the following inadvertently reverted functions (and docs): partial.cor(), numSummary(), Hist(), rcorr.adjust() (following bug report by Mark Dunning). o Hist() reports a warning but doesn't fail for empty groups. Changes to Version 1.0-1 o Added "se(mean)" to numSummary(). Changes to Version 1.0-0 o First version of the package, with functions moved from the Rcmdr package to make them more conveniently available to other CRAN packages (at the suggestion of Liviu Andronic). RcmdrMisc/NAMESPACE0000644000176200001440000000543115126145573013370 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(mergeRows,data.frame) S3method(normalityTest,default) S3method(normalityTest,formula) S3method(plotBoot,boot) S3method(print,DeltaMethod) S3method(print,numSummary) S3method(print,partial.cor) S3method(print,rcorr.adjust) S3method(print,reliability) S3method(summarySandwich,lm) export(Barplot) export(CV) export(DeltaMethod) export(Dotplot) export(Hist) export(KMeans) export(assignCluster) export(bin.var) export(binVariable) export(binnedCounts) export(colPercents) export(cv) export(dgumbel) export(discreteCounts) export(discretePlot) export(excel_sheets) export(indexplot) export(lineplot) export(mergeRows) export(normalityTest) export(numSummary) export(partial.cor) export(pgumbel) export(piechart) export(plotBoot) export(plotDistr) export(plotMeans) export(qgumbel) export(rcorr.adjust) export(readSAS) export(readSPSS) export(readStata) export(readXL) export(reliability) export(repeatedMeasuresPlot) export(reshapeL2W) export(reshapeW2L) export(rgumbel) export(rowPercents) export(stepwise) export(summarySandwich) export(totPercents) importFrom(Hmisc,rcorr) importFrom(MASS,stepAIC) importFrom(abind,abind) importFrom(car,Tapply) importFrom(car,deltaMethod) importFrom(car,hccm) importFrom(car,linearHypothesis) importFrom(car,showLabels) importFrom(colorspace,rainbow_hcl) importFrom(e1071,kurtosis) importFrom(e1071,skewness) importFrom(foreign,read.spss) importFrom(grDevices,n2mfrow) importFrom(grDevices,palette) importFrom(graphics,abline) importFrom(graphics,arrows) importFrom(graphics,axis) importFrom(graphics,barplot) importFrom(graphics,box) importFrom(graphics,hist) importFrom(graphics,legend) importFrom(graphics,lines) importFrom(graphics,matplot) importFrom(graphics,mtext) importFrom(graphics,par) importFrom(graphics,pie) importFrom(graphics,plot) importFrom(graphics,points) importFrom(graphics,polygon) importFrom(graphics,strheight) importFrom(graphics,text) importFrom(graphics,title) importFrom(haven,read_sas) importFrom(haven,read_spss) importFrom(lattice,strip.default) importFrom(lattice,xyplot) importFrom(nortest,ad.test) importFrom(nortest,cvm.test) importFrom(nortest,lillie.test) importFrom(nortest,pearson.test) importFrom(nortest,sf.test) importFrom(readxl,excel_sheets) importFrom(readxl,read_excel) importFrom(sandwich,vcovHAC) importFrom(stats,as.formula) importFrom(stats,coef) importFrom(stats,complete.cases) importFrom(stats,cor) importFrom(stats,dist) importFrom(stats,formula) importFrom(stats,ftable) importFrom(stats,kmeans) importFrom(stats,model.matrix) importFrom(stats,na.omit) importFrom(stats,p.adjust) importFrom(stats,pf) importFrom(stats,pt) importFrom(stats,qt) importFrom(stats,quantile) importFrom(stats,reshape) importFrom(stats,runif) importFrom(stats,sd) importFrom(stats,shapiro.test) importFrom(stats,update) RcmdrMisc/man/0000755000176200001440000000000015126146177012722 5ustar liggesusersRcmdrMisc/man/assignCluster.Rd0000644000176200001440000000356115124673133016037 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cluster.R \name{assignCluster} \alias{assignCluster} \title{Append a Cluster Membership Variable to a Dataframe} \usage{ assignCluster(clusterData, origData, clusterVec) } \arguments{ \item{clusterData}{The data matrix used in the clustering solution. The data matrix may have have only a subset of the observations contained in the original dataframe.} \item{origData}{The original dataframe from which the data used in the clustering solution were taken.} \item{clusterVec}{An integer variable containing the cluster membership assignments for the observations used in creating the clustering solution. This vector can be created using \code{cutree} for clustering solutions generated by \code{hclust} or the \code{cluster} component of a list object created by \code{kmeans} or \code{KMeans}.} } \value{ A factor (with integer labels) that indicate the cluster assignment for each observation, with an NA value given to observations not used in the clustering solution. } \description{ Append a Cluster Membership Variable to a Dataframe } \details{ Correctly creates a cluster membership variable that can be attached to a dataframe when only a subset of the observations in that dataframe were used to create the clustering solution. NAs are assigned to the observations of the original dataframe not used in creating the clustering solution. This code originally by Dan Putler, used with permission. } \examples{ ## Load USArrests data set data(USArrests) ## Create three cluster USArrkm3 <- KMeans(USArrests[USArrests$UrbanPop<66, ], centers=3) ## Create a variable with cluster assignment assignCluster(USArrests[USArrests$UrbanPop<66, ], USArrests, USArrkm3$cluster) } \seealso{ \code{\link[stats]{hclust}}, \code{\link[stats]{cutree}}, \code{\link[stats]{kmeans}}, \code{\link{KMeans}}. } \author{ Dan Putler } RcmdrMisc/man/bin.var.Rd0000644000176200001440000000062515124673133014546 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bin.var.R \name{bin.var} \alias{bin.var} \title{Bin a Numeric Variable} \usage{ bin.var(...) } \arguments{ \item{...}{arguments to be passed to \code{binVariable}.} } \description{ Bin a Numeric Variable } \details{ \code{bin.var} is a synomym for \code{binVariable}, retained for backwards compatibility. } \keyword{internal} RcmdrMisc/man/readStata.Rd0000644000176200001440000000212715124673134015117 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/readStata.R \name{readStata} \alias{readStata} \title{Read a Stata Data Set} \usage{ readStata( file, rownames = FALSE, stringsAsFactors = FALSE, convert.dates = TRUE ) } \arguments{ \item{file}{path to a Stata \code{.dta} file.} \item{rownames}{if \code{TRUE} (the default is \code{FALSE}), the first column in the data set contains row names, which should be unique.} \item{stringsAsFactors}{if \code{TRUE} (the default is \code{FALSE}) then columns containing character data are converted to factors and factors are created from Stata value labels.} \item{convert.dates}{if \code{TRUE} (the default) then Stata dates are converted to R dates.} } \value{ a data frame. } \description{ Read a Stata Data Set } \details{ \code{readStata} reads a Stata data set, stored in a file of type \code{.dta}, into an R data frame; it provides a front end to the \code{\link[readstata13]{read.dta13}} function in the \pkg{readstata13} package. } \seealso{ \code{\link[readstata13]{read.dta13}} } \author{ John Fox } \keyword{manip} RcmdrMisc/man/Hist.Rd0000644000176200001440000000312415124673134014114 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plots.R \name{Hist} \alias{Hist} \title{Plot a Histogram} \usage{ Hist( x, groups, scale = c("frequency", "percent", "density"), xlab = deparse(substitute(x)), ylab = scale, main = "", breaks = "Sturges", ... ) } \arguments{ \item{x}{a vector of values for which a histogram is to be plotted.} \item{groups}{a factor (or character or logical variable) to create histograms by group with common horizontal and vertical scales.} \item{scale}{the scaling of the vertical axis: \code{"frequency"} (the default), \code{"percent"}, or \code{"density"}.} \item{xlab}{x-axis label, defaults to name of variable.} \item{ylab}{y-axis label, defaults to value of \code{scale}.} \item{main}{main title for graph, defaults to empty.} \item{breaks}{see the \code{breaks} argument for \code{\link[graphics]{hist}}.} \item{...}{arguments to be passed to \code{hist}.} } \value{ This function is primarily called for its side effect --- plotting a histogram or histograms --- but it also invisibly returns an object of class \code{\link[graphics]{hist}} or a list of \code{hist} objects. } \description{ Plot a Histogram } \details{ This function is a wrapper for the \code{\link[graphics]{hist}} function in the \code{base} package, permitting percentage scaling of the vertical axis in addition to frequency and density scaling. } \examples{ data(Prestige, package="car") Hist(Prestige$income, scale="percent") with(Prestige, Hist(income, groups=type)) } \seealso{ \code{\link[graphics]{hist}} } \author{ John Fox } \keyword{hplot} RcmdrMisc/man/discreteCounts.Rd0000644000176200001440000000255615124673133016212 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/discreteCounts.R \name{discreteCounts} \alias{discreteCounts} \title{Frequency Distributions of Numeric Variables} \usage{ discreteCounts( x, round.percents = 2, name = deparse(substitute(x)), max.values = min(round(2 * sqrt(length(x))), round(10 * log10(length(x))), 100) ) } \arguments{ \item{x}{a discrete numeric vector, matrix, or data frame.} \item{round.percents}{number of decimal places to round percentages; default is \code{2}.} \item{name}{name for the variable; only used for vector argument \code{x}.} \item{max.values}{maximum number of unique values (default is the smallest of twice the square root of the number of elements in \code{x}, 10 times the log10 of the number of elements, and \code{100}); if exceeded, an error is reported.} } \value{ For a numeric vector, invisibly returns the table of counts. For a matrix or data frame, invisibly returns \code{NULL} } \description{ Frequency Distributions of Numeric Variables } \details{ Computes the frequency and percentage distribution of a descrete numeric variable or the distributions of the variables in a numeric matrix or data frame. } \examples{ set.seed(12345) # for reproducibility discreteCounts(data.frame(x=rpois(51, 2), y=rpois(51, 10))) } \seealso{ \code{\link{binnedCounts}} } \author{ John Fox } \keyword{univar} RcmdrMisc/man/reliability.Rd0000644000176200001440000000253015124673134015516 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/reliability.R \name{reliability} \alias{reliability} \alias{print.reliability} \title{Reliability of a Composite Scale} \usage{ reliability(S) \method{print}{reliability}(x, digits = 4, ...) } \arguments{ \item{S}{the covariance matrix of the items; normally, there should be at least 3 items and certainly no fewer than 2.} \item{x}{reliability object to be printed.} \item{digits}{number of decimal places.} \item{\dots}{not used: for compatibility with the print generic."} } \value{ an object of class reliability, which normally would be printed. } \description{ Reliability of a Composite Scale } \details{ Calculates Cronbach's alpha and standardized alpha (lower bounds on reliability) for a composite (summated-rating) scale. Standardized alpha is for the sum of the standardized items. In addition, the function calculates alpha and standardized alpha for the scale with each item deleted in turn, and computes the correlation between each item and the sum of the other items. } \examples{ data(DavisThin) reliability(cov(DavisThin)) } \references{ N. Cliff (1986) Psychological testing theory. Pp. 343--349 in S. Kotz and N. Johnson, eds., \emph{Encyclopedia of Statistical Sciences, Vol. 7}. Wiley. } \seealso{ \code{\link[stats]{cov}} } \author{ John Fox } \keyword{misc} RcmdrMisc/man/reshapeL2W.Rd0000644000176200001440000000627015124673134015166 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/reshapeL2W.R \name{reshapeL2W} \alias{reshapeL2W} \title{Reshape Repeated-Measures Data from Long to Wide Format} \usage{ reshapeL2W(data, within, id, varying, ignore) } \arguments{ \item{data}{a data frame in long format.} \item{within}{a character vector of names of the within-subjects factors in the long form of the data; there must be at least one within-subjects factor.} \item{id}{the (character) name of the variable representing the subject identifier in the long form of the data set; that is, rows with the same \code{id} belong to the same subject.} \item{varying}{a character vector of names of the occasion-varying variables in the long form of the data; there must be at least one such variable, and typically there will be just one, an occasion-varying response variable.} \item{ignore}{an optional character vector of names of variables in the long form of the data to exclude from the wide data set.} } \value{ a data frame in "wide" format, with one row for each subject, columns representing the between subjects factors, and columns for the occasion-varying variable(s) for each combination of within-subjects factors. } \description{ Reshape Repeated-Measures Data from Long to Wide Format } \details{ A simple front-end to the standard R \code{\link[stats]{reshape}} function. The data are assumed to be in "long" format, with several rows for each subject. Between-subjects variables don't vary by occasions for each subject. Variables that aren't listed explicitly in the arguments to the function are assumed to be between-subjects variables, and a warning is printed if their values aren't invariant for each subject (see the \code{ignore} argument). Within-subjects factors vary by occasions for each subject, and it is assumed that the within-subjects design is regular, completely crossed, and balanced, so that the same combinations of within-subjects factors are observed for each subject. Occasion-varying variables, as their name implies, (potentially) vary by occasions for each subject, and include one or more "response" variables, possibly along with occasion-varying covariates; these variables can be factors as well as numeric variables. The data are reshaped so that there is one row per subject, with columns for the between-subjects variables, and each occasion-varying variable as multiple columns representing the combinations of levels of the within-subjects factors. The names of the columns for the occasion-varying variables are composed from the combinations of levels of the within-subjects factors and from the names of the occasion-varying variables. If a subject in the long form of the data set lacks any combination of levels of within-subjects factors, he or she is excluded (with a warning) from the wide form of the data. } \examples{ OBW <- reshapeL2W(OBrienKaiserLong, within=c("phase", "hour"), id="id", varying="score") brief(OBW) # should be the same as OBrienKaiser in the carData package: all.equal(OBrienKaiser, OBW, check.attributes=FALSE) } \seealso{ \code{\link[stats]{reshape}}, \code{\link[carData]{OBrienKaiser}}, \code{\link[carData]{OBrienKaiserLong}}. } \author{ John Fox } \keyword{manip} RcmdrMisc/man/plotBoot.Rd0000644000176200001440000000244515124673134015014 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotBoot.R \name{plotBoot} \alias{plotBoot} \alias{plotBoot.boot} \title{Plot Bootstrap Distributions} \usage{ plotBoot(object, confint = NULL, ...) \method{plotBoot}{boot}(object, confint = NULL, ...) } \arguments{ \item{object}{an object of class \code{"boot"}.} \item{confint}{an object of class \code{"confint.boot"} (or an ordinary 2-column matrix) containing confidence limits for the parameters in \code{object}; if \code{NULL} (the default), these are computed from the first argument, using the defaults for \code{"boot"} objects.} \item{\dots}{not used} } \value{ Invisibly returns the object produced by \code{densityPlot}. } \description{ Plot Bootstrap Distributions } \details{ The function takes an object of class \code{"boot"} and creates an array of density estimates for the bootstrap distributions of the parameters. Creates an array of adaptive kernal density plots, using \code{\link[car]{densityPlot}} in the \pkg{car} package, showing the bootstrap distribution, point estimate ,and (optionally) confidence limits for each parameter. } \examples{ \dontrun{ plotBoot(Boot(lm(prestige ~ income + education + type, data=Duncan))) } } \seealso{ \code{\link[car]{densityPlot}} } \author{ John Fox } \keyword{hplot} RcmdrMisc/man/readSPSS.Rd0000644000176200001440000000365415124673134014641 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/readSPSS.R \name{readSPSS} \alias{readSPSS} \title{Read an SPSS Data Set} \usage{ readSPSS( file, rownames = FALSE, stringsAsFactors = FALSE, tolower = TRUE, use.value.labels = TRUE, use.haven = !por ) } \arguments{ \item{file}{path to an SPSS \code{.sav} or \code{.por} file.} \item{rownames}{if \code{TRUE} (the default is \code{FALSE}), the first column in the data set contains row names, which should be unique.} \item{stringsAsFactors}{if \code{TRUE} (the default is \code{FALSE}) then columns containing character data are converted to factors and factors are created from SPSS value labels.} \item{tolower}{change variable names to lowercase, default \code{TRUE}.} \item{use.value.labels}{if \code{TRUE}, the default, variables with value labels in the SPSS data set will become either factors or character variables (depending on the \code{stringsAsFactors} argument) with the value labels as their levels or values. As for \code{\link[foreign]{read.spss}}, this is only done if there are at least as many labels as values of the variable (and values without a matching label are returned as \code{NA}).} \item{use.haven}{use \code{\link[haven]{read_spss}} from the \pkg{haven} package to read the file, in preference to \code{\link[foreign]{read.spss}} from the \pkg{foreign} package; the default is \code{TRUE} for a \code{.sav} file and \code{FALSE} for a \code{.por} file.} } \value{ a data frame. } \description{ Read an SPSS Data Set } \details{ \code{readSPSS} reads an SPSS data set, stored in a file of type \code{.sav} or \code{.por}, into an R data frame; it provides a front end to the \code{\link[haven]{read_spss}} function in the \pkg{haven} package and the \code{\link[foreign]{read.spss}} function in the \pkg{foreign} package. } \seealso{ \code{\link[haven]{read_spss}}, \code{\link[foreign]{read.spss}} } \author{ John Fox } \keyword{manip} RcmdrMisc/man/summarySandwich.Rd0000644000176200001440000000317415124673134016370 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/summarySandwich.R \name{summarySandwich} \alias{summarySandwich} \alias{summarySandwich.lm} \title{Linear Model Summary with Sandwich Standard Errors} \usage{ summarySandwich(model, ...) \method{summarySandwich}{lm}(model, type = c("hc3", "hc0", "hc1", "hc2", "hc4", "hac"), ...) } \arguments{ \item{model}{a linear-model object.} \item{\dots}{arguments to be passed to \code{hccm} or \code{vcovHAC}.} \item{type}{type of sandwich standard errors to be computed; see \code{\link[car]{hccm}} in the \pkg{car} package, and \code{\link[sandwich]{vcovHAC}} in the \pkg{sandwich} package, for details.} } \value{ an object of class \code{"summary.lm"}, with sandwich standard errors substituted for the usual OLS standard errors; the omnibus F-test is similarly adjusted. } \description{ Linear Model Summary with Sandwich Standard Errors } \details{ \code{summarySandwich} creates a summary of a \code{"lm"} object similar to the standard one, with sandwich estimates of the coefficient standard errors in the place of the usual OLS standard errors, also modifying as a consequence the reported t-tests and p-values for the coefficients. Standard errors may be computed from a heteroscedasticity-consistent ("HC") covariance matrix for the coefficients (of several varieties), or from a heteroscedasticity-and-autocorrelation-consistent ("HAC") covariance matrix. } \examples{ mod <- lm(prestige ~ income + education + type, data=Prestige) summary(mod) summarySandwich(mod) } \seealso{ \code{\link[car]{hccm}}, \code{\link[sandwich]{vcovHAC}}. } \author{ John Fox } \keyword{misc} RcmdrMisc/man/KMeans.Rd0000644000176200001440000000302015124673133014355 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cluster.R \name{KMeans} \alias{KMeans} \title{K-Means Clustering Using Multiple Random Seeds} \usage{ KMeans(x, centers, iter.max = 10, num.seeds = 10) } \arguments{ \item{x}{A numeric matrix of data, or an object that can be coerced to such a matrix (such as a numeric vector or a dataframe with all numeric columns).} \item{centers}{The number of clusters in the solution.} \item{iter.max}{The maximum number of iterations allowed.} \item{num.seeds}{The number of different starting random seeds to use. Each random seed results in a different k-means solution.} } \value{ A list with components: \describe{ \item{cluster}{A vector of integers indicating the cluster to which each point is allocated.} \item{centers}{A matrix of cluster centres (centroids).} \item{withinss}{The within-cluster sum of squares for each cluster.} \item{tot.withinss}{The within-cluster sum of squares summed across clusters.} \item{betweenss}{The between-cluster sum of squared distances.} \item{size}{The number of points in each cluster.} } } \description{ K-Means Clustering Using Multiple Random Seeds } \details{ Finds a number of k-means clusting solutions using R's \code{kmeans} function, and selects as the final solution the one that has the minimum total within-cluster sum of squared distances. } \examples{ data(USArrests) KMeans(USArrests, centers=3, iter.max=5, num.seeds=5) } \seealso{ \code{\link[stats]{kmeans}} } \author{ Dan Putler } \keyword{misc} RcmdrMisc/man/lineplot.Rd0000644000176200001440000000146615124673134015042 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plots.R \name{lineplot} \alias{lineplot} \title{Plot a one or more lines} \usage{ lineplot(x, ..., legend) } \arguments{ \item{x}{variable giving horizontal coordinates.} \item{\dots}{one or more variables giving vertical coordinates.} \item{legend}{plot legend? Default is \code{TRUE} if there is more than one variable to plot and \code{FALSE} is there is just one.} } \value{ Produces a plot; returns \code{NULL} invisibly. } \description{ Plot a one or more lines } \details{ This function plots lines for one or more variables against another variable, typically time series against time. } \examples{ data(Bfox) Bfox$time <- as.numeric(rownames(Bfox)) with(Bfox, lineplot(time, menwage, womwage)) } \author{ John Fox } \keyword{hplot} RcmdrMisc/man/normalityTest.Rd0000644000176200001440000000435715124673133016073 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/normalityTest.R \name{normalityTest} \alias{normalityTest} \alias{normalityTest.default} \alias{normalityTest.formula} \title{Normality Tests} \usage{ normalityTest(x, ...) \method{normalityTest}{formula}(formula, test, data, ...) \method{normalityTest}{default}( x, test = c("shapiro.test", "ad.test", "cvm.test", "lillie.test", "pearson.test", "sf.test"), groups, vname, gname, ... ) } \arguments{ \item{x}{numeric vector or formula.} \item{\dots}{any arguments to be passed down; the only useful such arguments are for the \code{\link[nortest]{pearson.test}} function in the \pkg{nortest} package.} \item{formula}{one-sided formula of the form \code{~x} or two-sided formula of the form \code{x ~ groups}, where \code{x} is a numeric variable and \code{groups} is a factor.} \item{test}{quoted name of the function to perform the test.} \item{data}{a data frame containing the data for the test.} \item{groups}{optional factor to divide the data into groups.} \item{vname}{optional name for the variable; if absent, taken from \code{x}.} \item{gname}{optional name for the grouping factor; if absent, taken from \code{groups}.} } \value{ If testing by groups, the function invisibly returns \code{NULL}; otherwise it returns an object of class \code{"htest"}, which normally would be printed. } \description{ Normality Tests } \details{ Perform one of several tests of normality, either for a variable or for a variable by groups. The \code{normalityTest} function uses the \code{\link[stats]{shapiro.test}} function or one of several functions in the \pkg{nortest} package. If tests are done by groups, then adjusted p-values, computed by the Holm method, are also reported (see \code{\link[stats]{p.adjust}}). } \examples{ data(Prestige, package="car") with(Prestige, normalityTest(income)) normalityTest(income ~ type, data=Prestige, test="ad.test") normalityTest(~income, data=Prestige, test="pearson.test", n.classes=5) } \seealso{ \code{\link[stats]{shapiro.test}}, \code{\link[nortest]{ad.test}}, \code{\link[nortest]{cvm.test}}, \code{\link[nortest]{lillie.test}}, \code{\link[nortest]{pearson.test}}, \code{\link[nortest]{sf.test}}. } \author{ John Fox } \keyword{htest} RcmdrMisc/man/piechart.Rd0000644000176200001440000000232315124673134015004 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/piechart.R \name{piechart} \alias{piechart} \title{Draw a Piechart With Percents or Counts in the Labels} \usage{ piechart( x, scale = c("percent", "frequency", "none"), col = rainbow_hcl(nlevels(x)), ... ) } \arguments{ \item{x}{a factor or other discrete variable; the segments of the pie correspond to the unique values (levels) of \code{x} and are proportional to the frequency counts in the various levels.} \item{scale}{parenthetical numbers to add to the pie-segment labels; the default is \code{"percent"}.} \item{col}{colors for the segments; the default is provided by the \code{\link[colorspace]{rainbow_hcl}} function in the \pkg{colorspace} package.} \item{\dots}{further arguments to be passed to \code{\link[graphics]{pie}}.} } \description{ Draw a Piechart With Percents or Counts in the Labels } \details{ \code{piechart} is a front-end to the standard R \code{\link[graphics]{pie}} function, with the capability of adding percents or counts to the pie-segment labels. } \examples{ with(Duncan, piechart(type)) } \seealso{ \code{\link[graphics]{pie}}, \code{\link[colorspace]{rainbow_hcl}} } \author{ John Fox } \keyword{hplot} RcmdrMisc/man/stepwise.Rd0000644000176200001440000000457515124677117015070 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/stepwise.R \name{stepwise} \alias{stepwise} \title{Stepwise Model Selection} \usage{ stepwise( mod, direction = c("backward/forward", "forward/backward", "backward", "forward"), criterion = c("BIC", "AIC"), ... ) } \arguments{ \item{mod}{a model object of a class that can be handled by \code{stepAIC}.} \item{direction}{if \code{"backward/forward"} (the default), selection starts with the full model and eliminates predictors one at a time, at each step considering whether the criterion will be improved by adding back in a variable removed at a previous step; if \code{"forward/backwards"}, selection starts with a model including only a constant, and adds predictors one at a time, at each step considering whether the criterion will be improved by removing a previously added variable; \code{"backwards"} and \code{"forward"} are similar without the reconsideration at each step.} \item{criterion}{for selection. Either \code{"BIC"} (the default) or \code{"AIC"}. Note that \code{stepAIC} labels the criterion in the output as \code{"AIC"} regardless of which criterion is employed.} \item{\dots}{arguments to be passed to \code{stepAIC}.} } \value{ The model selected by \code{stepAIC}. } \description{ Stepwise Model Selection } \details{ This function is a front end to the \code{\link[MASS]{stepAIC}} function in the \pkg{MASS} package. } \examples{ ## adapted from stepAIC in MASS ## Assigning bwt to the global environment is required to run this example within ## the browser-based help system. In other contexts, standard assignment can be used. if (require(MASS)){ data(birthwt) bwt <<- with(birthwt, { race <- factor(race, labels = c("white", "black", "other")) ptd <- factor(ptl > 0) ftv <- factor(ftv) levels(ftv)[-(1:2)] <- "2+" data.frame(low = factor(low), age, lwt, race, smoke = (smoke > 0), ptd, ht = (ht > 0), ui = (ui > 0), ftv) }) birthwt.glm <- glm(low ~ ., family = binomial, data = bwt) print(stepwise(birthwt.glm, trace = FALSE)) print(stepwise(birthwt.glm, direction="forward/backward")) } ## wrapper for stepAIC in the MASS package } \references{ W. N. Venables and B. D. Ripley \emph{Modern Applied Statistics Statistics with S, Fourth Edition} Springer, 2002. } \seealso{ \code{\link[MASS]{stepAIC}} } \author{ John Fox } \keyword{models} RcmdrMisc/man/mergeRows.Rd0000644000176200001440000000175715124673133015170 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mergeRows.R \name{mergeRows} \alias{mergeRows} \alias{mergeRows.data.frame} \title{Function to Merge Rows of Two Data Frames} \usage{ mergeRows(X, Y, common.only = FALSE, ...) \method{mergeRows}{data.frame}(X, Y, common.only = FALSE, ...) } \arguments{ \item{X}{First data frame.} \item{Y}{Second data frame.} \item{common.only}{If \code{TRUE}, only variables (columns) common to the two data frame are included in the merged data set; the default is \code{FALSE}.} \item{\dots}{Not used.} } \value{ A data frame containing the rows from both input data frames. } \description{ Function to Merge Rows of Two Data Frames } \details{ This function merges two data frames by combining their rows. } \examples{ data(Duncan) D1 <- Duncan[1:20,] D2 <- Duncan[21:45,] D <- mergeRows(D1, D2) print(D) dim(D) } \seealso{ For column merges and more complex merges, see \code{\link[base]{merge}}. } \author{ John Fox } \keyword{manip} RcmdrMisc/man/Gumbel.Rd0000644000176200001440000000272615124673133014426 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/gumbel.R \name{Gumbel} \alias{Gumbel} \alias{dgumbel} \alias{pgumbel} \alias{qgumbel} \alias{rgumbel} \title{The Gumbel Distribution} \usage{ dgumbel(x, location = 0, scale = 1) pgumbel(q, location = 0, scale = 1, lower.tail = TRUE) qgumbel(p, location = 0, scale = 1, lower.tail = TRUE) rgumbel(n, location = 0, scale = 1) } \arguments{ \item{x}{vector of values of the variable.} \item{location}{location parameter (default \code{0}); potentially a vector.} \item{scale}{scale parameter (default \code{1}); potentially a vector.} \item{q}{vector of quantiles.} \item{lower.tail}{logical; if \code{TRUE} (the default) probabilities and quantiles correspond to \eqn{P(X \le x)}, if \code{FALSE} to \eqn{P(X > x)}.} \item{p}{vector of probabilities.} \item{n}{number of observations. If \code{length(n)} > 1, the length is taken to be the number required.} } \description{ The Gumbel Distribution } \details{ Density, distribution function, quantile function and random generation for the Gumbel distribution with specified \code{location} and \code{scale} parameters. } \examples{ x <- 100 + 5*c(-Inf, -1, 0, 1, 2, 3, Inf, NA) dgumbel(x, 100, 5) pgumbel(x, 100, 5) p <- c(0, .25, .5, .75, 1, NA) qgumbel(p, 100, 5) summary(rgumbel(1e5, 100, 5)) } \references{ See \url{https://en.wikipedia.org/wiki/Gumbel_distribution} for details of the Gumbel distribution. } \author{ John Fox } \keyword{distribution} RcmdrMisc/man/partial.cor.Rd0000644000176200001440000000231415124673134015423 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/partial.cor.R \name{partial.cor} \alias{partial.cor} \alias{print.partial.cor} \title{Partial Correlations} \usage{ partial.cor(X, tests = FALSE, use = c("complete.obs", "pairwise.complete.obs")) \method{print}{partial.cor}(x, digits = max(3, getOption("digits") - 2), ...) } \arguments{ \item{X}{data matrix.} \item{tests}{show two-sided p-value and p-value adjusted for multiple testing by Holm's method for each partial correlation.} \item{use}{observations to use to compute partial correlations, default is \code{"complete.obs"}.} \item{x}{data matrix.} \item{digits}{minimal number of _significant_ digits, see \code{\link[base]{print.default}}.} \item{\dots}{arguments to pass down from the print method.} } \value{ Returns the matrix of partial correlations, optionally with adjusted and unadjusted p-values. } \description{ Partial Correlations } \details{ Computes a matrix of partial correlations between each pair of variables controlling for the others. } \examples{ data(DavisThin, package="car") partial.cor(DavisThin) partial.cor(DavisThin, tests=TRUE) } \seealso{ \code{\link[stats]{cor}} } \author{ John Fox } \keyword{misc} RcmdrMisc/man/reexports.Rd0000644000176200001440000000062515126145573015246 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/readXL.R \docType{import} \name{reexports} \alias{reexports} \alias{excel_sheets} \title{Objects exported from other packages} \keyword{internal} \description{ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ \item{readxl}{\code{\link[readxl]{excel_sheets}}} }} RcmdrMisc/man/colPercents.Rd0000644000176200001440000000233115124673133015464 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Percents.R \name{colPercents} \alias{colPercents} \alias{rowPercents} \alias{totPercents} \title{Row, Column, and Total Percentage Tables} \usage{ colPercents(tab, digits = 1) rowPercents(tab, digits = 1) totPercents(tab, digits = 1) } \arguments{ \item{tab}{a matrix or higher-dimensional array of frequency counts.} \item{digits}{number of places to the right of the decimal place for percentages.} } \value{ Returns an array of the same size and shape as \code{tab} percentaged by rows or columns, plus rows or columns of totals and counts, or by the table total. } \description{ Row, Column, and Total Percentage Tables } \details{ Percentage a matrix or higher-dimensional array of frequency counts by rows, columns, or total frequency. } \examples{ data(Mroz) # from car package cat("\n\n column percents:\n") print(colPercents(xtabs(~ lfp + wc, data=Mroz))) cat("\n\n row percents:\n") print(rowPercents(xtabs(~ hc + lfp, data=Mroz))) cat("\n\n total percents:\n") print(totPercents(xtabs(~ hc + wc, data=Mroz))) cat("\n\n three-way table, column percents:\n") print(colPercents(xtabs(~ lfp + wc + hc, data=Mroz))) } \author{ John Fox } \keyword{misc} RcmdrMisc/man/readXL.Rd0000644000176200001440000000270315126147232014363 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/readXL.R \name{readXL} \alias{readXL} \title{Read an Excel File} \usage{ readXL( file, rownames = FALSE, header = TRUE, na = "", sheet = 1, stringsAsFactors = FALSE ) } \arguments{ \item{file}{name of an Excel file including its path.} \item{rownames}{if \code{TRUE} (the default is \code{FALSE}), the first column in the spreadsheet contains row names (which must be unique---i.e., no duplicates).} \item{header}{if \code{TRUE} (the default), the first row in the spreadsheet contains column (variable) names.} \item{na}{character string denoting missing data; the default is the empty string, \code{""}.} \item{sheet}{number of the spreadsheet in the file containing the data to be read; the default is \code{1}.} \item{stringsAsFactors}{if \code{TRUE} (the default is \code{FALSE}) then columns containing character data are converted to factors.} } \value{ a data frame. } \description{ Read an Excel File } \details{ \code{readXL} reads an Excel file, either of type \code{.xls} or \code{.xlsx} into an R data frame; it provides a front end to the \code{\link[readxl]{read_excel}} function in the \pkg{readxl} package. \code{\link[readxl]{excel_sheets}} is re-exported from the \pkg{readxl} package and reports the names of spreadsheets in an Excel file. } \seealso{ \code{\link[readxl]{read_excel}}, \code{\link[readxl]{excel_sheets}}. } \author{ John Fox } \keyword{manip} RcmdrMisc/man/RcmdrMisc-package.Rd0000644000176200001440000000122415126146147016461 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/RcmdrMisc-package.R \name{RcmdrMisc-package} \alias{RcmdrMisc-package} \title{R Commander Miscellaneous Functions} \description{ R Commander Miscellaneous Functions } \details{ \preformatted{ Package: RcmdrMisc Type: Package Version: 2.10.1 Date: 2026-01-03 License: GPL (>=3) } Various statistical, graphics, and data-management functions used by the Rcmdr package in the R Commander GUI for R. } \seealso{ \url{https://www.r-project.org}, \url{https://facsocsci.mcmaster.ca/jfox/} } \author{ John Fox Mantainer: Manuel Munoz-Marquez } RcmdrMisc/man/plotDistr.Rd0000644000176200001440000000373015124673134015174 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plots.R \name{plotDistr} \alias{plotDistr} \title{Plot a probability density, mass, or distribution function.} \usage{ plotDistr( x, p, discrete = FALSE, cdf = FALSE, regions = NULL, col = "gray", legend = TRUE, legend.pos = "topright", ... ) } \arguments{ \item{x}{horizontal coordinates} \item{p}{vertical coordinates} \item{discrete}{is the random variable discrete?} \item{cdf}{is this a cumulative distribution (as opposed to mass) function?} \item{regions}{for continuous distributions only, if non-\code{NULL}, a list of regions to fill with color \code{col}; each element of the list is a pair of \code{x} values with the minimum and maximum horizontal coordinates of the corresponding region.} \item{col}{color for plot, \code{col} may be a single value or a vector.} \item{legend}{plot a legend of the regions (default \code{TRUE}).} \item{legend.pos}{position for the legend (see \code{\link[graphics]{legend}}, default \code{"topright"}).} \item{\dots}{arguments to be passed to \code{plot}.} } \value{ Produces a plot; returns \code{NULL} invisibly. } \description{ Plot a probability density, mass, or distribution function. } \details{ This function plots a probability density, mass, or distribution function, adapting the form of the plot as appropriate. } \examples{ x <- seq(-4, 4, length=100) plotDistr(x, dnorm(x), xlab="Z", ylab="p(z)", main="Standard Normal Density") plotDistr(x, dnorm(x), xlab="Z", ylab="p(z)", main="Standard Normal Density", region=list(c(1.96, Inf), c(-Inf, -1.96)), col=c("red", "blue"), new = TRUE) plotDistr(x, dnorm(x), xlab="Z", ylab="p(z)", main="Standard Normal Density", region=list(c(qnorm(0), qnorm(.025)), c(qnorm(.975), qnorm(1)))) # same x <- 0:10 plotDistr(x, pbinom(x, 10, 0.5), xlab="successes", discrete=TRUE, cdf=TRUE, main="Binomial Distribution Function, p=0.5, n=10") } \author{ John Fox } \keyword{hplot} RcmdrMisc/man/Dotplot.Rd0000644000176200001440000000310615124673133014631 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Dotplot.R \name{Dotplot} \alias{Dotplot} \title{Dot Plots} \usage{ Dotplot(x, by, bin = FALSE, breaks, xlim, xlab = deparse(substitute(x))) } \arguments{ \item{x}{a numeric variable.} \item{by}{optionally a factor (or character or logical variable) by which to classify \code{x}.} \item{bin}{if \code{TRUE} (the default is \code{FALSE}), the values of \code{x} are binned, as in a histogram, prior to plotting.} \item{breaks}{breaks for the bins, in a form acceptable to the \code{\link[graphics]{hist}} function; the default is \code{"Sturges"}.} \item{xlim}{optional 2-element numeric vector giving limits of the horizontal axis.} \item{xlab}{optional character string to label horizontal axis.} } \value{ Returns \code{NULL} invisibly. } \description{ Dot Plots } \details{ Dot plot of numeric variable, either using raw values or binned, optionally classified by a factor. Dot plots are useful for visualizing the distribution of a numeric variable in a small data set. If the \code{by} argument is specified, then one dot plot is produced for each level of \code{by}; these are arranged vertically and all use the same scale for \code{x}. An attempt is made to adjust the size of the dots to the space available without making them too big. } \examples{ data(Duncan) with(Duncan, Dotplot(education)) with(Duncan, Dotplot(education, bin=TRUE)) with(Duncan, Dotplot(education, by=type)) with(Duncan, Dotplot(education, by=type, bin=TRUE)) } \seealso{ \code{\link[graphics]{hist}} } \author{ John Fox } \keyword{hplot} RcmdrMisc/man/readSAS.Rd0000644000176200001440000000165215124673134014473 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/readSAS.R \name{readSAS} \alias{readSAS} \title{Read a SAS b7dat Data Set} \usage{ readSAS(file, rownames = FALSE, stringsAsFactors = FALSE) } \arguments{ \item{file}{path to a SAS b7dat file.} \item{rownames}{if \code{TRUE} (the default is \code{FALSE}), the first column in the data set contains row names (which must be unique---i.e., no duplicates).} \item{stringsAsFactors}{if \code{TRUE} (the default is \code{FALSE}) then columns containing character data are converted to factors.} } \value{ a data frame. } \description{ Read a SAS b7dat Data Set } \details{ \code{readSAS} reads a SAS ``b7dat'' data set, stored in a file of type \code{.sas7bdat}, into an R data frame; it provides a front end to the \code{\link[haven]{read_sas}} function in the \pkg{haven} package. } \seealso{ \code{\link[haven]{read_sas}} } \author{ John Fox } \keyword{manip} RcmdrMisc/man/indexplot.Rd0000644000176200001440000000371115124673134015215 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plots.R \name{indexplot} \alias{indexplot} \title{Index Plots} \usage{ indexplot( x, groups, labels = seq_along(x), id.method = "y", type = "h", id.n = 0, ylab, legend = "topright", title, col = palette(), ... ) } \arguments{ \item{x}{a numeric variable, a matrix whose columns are numeric variables, or a numeric data frame; if \code{x} is a matrix or data frame, plots vertically aligned index plots for the columns.} \item{groups}{an optional grouping variable, typically a factor (or character or logical variable).} \item{labels}{point labels; if \code{x} is a data frame, defaults to the row names of \code{x}, otherwise to the case index.} \item{id.method}{method for identifying points; see \code{\link[car]{showLabels}}.} \item{type}{to be passed to \code{\link{plot}}.} \item{id.n}{number of points to identify; see \code{\link[car]{showLabels}}.} \item{ylab}{label for vertical axis; if missing, will be constructed from \code{x}; for a data frame, defaults to the column names.} \item{legend}{see \code{\link[graphics]{legend}}) giving location of the legend if \code{groups} are specified; if \code{legend=FALSE}, the legend is suppressed.} \item{title}{title for the legend; may normally be omitted.} \item{col}{vector of colors for the \code{groups}.} \item{\dots}{to be passed to \code{plot}.} } \value{ Returns labelled indices of identified points or (invisibly) \code{NULL} if no points are identified or if there are multiple variables with some missing data. } \description{ Index Plots } \details{ Index plots with point identification. } \examples{ with(Prestige, indexplot(income, id.n=2, labels=rownames(Prestige))) with(Prestige, indexplot(Prestige[, c("income", "education", "prestige")], groups = Prestige$type, id.n=2)) } \seealso{ \code{\link[car]{showLabels}}, \code{\link[graphics]{plot.default}} } \author{ John Fox } \keyword{hplot} RcmdrMisc/man/numSummary.Rd0000644000176200001440000000430615124673133015364 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/numSummary.R \name{numSummary} \alias{numSummary} \alias{print.numSummary} \title{Summary Statistics for Numeric Variables} \usage{ numSummary( data, statistics = c("mean", "sd", "se(mean)", "var", "cv", "IQR", "quantiles", "skewness", "kurtosis"), type = c("2", "1", "3"), quantiles = c(0, 0.25, 0.5, 0.75, 1), groups ) \method{print}{numSummary}(x, ...) } \arguments{ \item{data}{a numeric vector, matrix, or data frame.} \item{statistics}{any of \code{"mean"}, \code{"sd"}, \code{"se(mean)"}, \code{"var"}, \code{"cv"}, \code{"IQR"}, \code{"quantiles"}, \code{"skewness"}, or \code{"kurtosis"}, defaulting to \code{c("mean", "sd", "quantiles", "IQR")}.} \item{type}{definition to use in computing skewness and kurtosis; see the \code{\link[e1071]{skewness}} and \code{\link[e1071]{kurtosis}} functions in the \pkg{e1071} package. The default is \code{"2"}.} \item{quantiles}{quantiles to report; default is \code{c(0, 0.25, 0.5, 0.75, 1)}.} \item{groups}{optional variable, typically a factor, to be used to partition the data.} \item{x}{object of class \code{"numSummary"} to print, or for \code{CV}, a numeric vector or matrix.} \item{\dots}{arguments to pass down from the print method.} } \value{ \code{numSummary} returns an object of class \code{"numSummary"} containing the table of statistics to be reported along with information on missing data, if there are any. } \description{ Summary Statistics for Numeric Variables } \details{ \code{numSummary} creates neatly formatted tables of means, standard deviations, coefficients of variation, skewness, kurtosis, and quantiles of numeric variables. \code{CV} computes the coefficient of variation. } \examples{ data(Prestige) Prestige[1, "income"] <- NA print(numSummary(Prestige[,c("income", "education")], statistics=c("mean", "sd", "quantiles", "cv", "skewness", "kurtosis"))) print(numSummary(Prestige[,c("income", "education")], groups=Prestige$type)) } \seealso{ \code{\link[base]{mean}}, \code{\link[stats]{sd}}, \code{\link{cv}}, \code{\link[stats]{quantile}}, \code{\link[e1071]{skewness}}, \code{\link[e1071]{kurtosis}}. } \author{ John Fox } \keyword{misc} RcmdrMisc/man/reshapeW2L.Rd0000644000176200001440000001063215124673134015163 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/reshapeW2L.R \name{reshapeW2L} \alias{reshapeW2L} \title{Reshape Repeated-Measures Data from Wide to Long Format} \usage{ reshapeW2L(data, within, levels, varying, ignore, id = "id") } \arguments{ \item{data}{wide version of data set.} \item{within}{a character vector of names for the crossed within-subjects factors to be created in the long form of the data.} \item{levels}{a named list of character vectors, each element giving the names of the levels for a within-subjects factor; the names of the list elements are the names of the within-subjects factor, given in the \code{within} argument.} \item{varying}{a named list of the names of variables in the wide data set specifying the occasion-varying variables to be created in the long data set; each element in the list is named for an occasion-varying variable and is a character vector of column names in the wide data for that occasion-varying variable.} \item{ignore}{a character vector of names of variables in the wide data to be dropped in the long form of the data.} \item{id}{the (character) name of the subject ID variable to be created in the long form of the data, default \code{"id"}.} } \value{ a data frame in "long" format, with multiple rows for each subject (equal to the number of combinations of levels of the within-subject factors) and one column for each between-subjects and occasion-varying variable. } \description{ Reshape Repeated-Measures Data from Wide to Long Format } \details{ The data are assumed to be in "wide" format, with a single row for each subject, and different columns for values of one or more repeated-measures variables classified by one or more within-subjects factors. Between-subjects variables don't vary by occasions for each subject. Variables that aren't listed explicitly in the arguments to the function are assumed to be between-subjects variables. The values of these variables are duplicated in each row pertaining to a given subject. Within-subjects factors vary by occasions for each subject, and it is assumed that the within-subjects design is regular, completely crossed, and balanced, so that the same combinations of within-subjects factors are observed for each subject. There are typically one or two within-subject factors. Occasion-varying variables, as their name implies, (potentially) vary by occasions for each subject, and include one or more "response" variables, possibly along with occasion-varying covariates; these variables can be factors as well as numeric variables. Each occasion-varying variable is encoded in multiple columns of the wide form of the data and in a single column in the long form. There is typically one occasion-varying variable, a response variable. There is one value of each occasion-varying variable for each combination of levels of the within-subjects factors. Thus, the number of variables in the wide data for each occasion-varying variable must be equal to the product of levels of the within-subjects factors, with the levels of the within-subjects factors varying most quickly from right to left in the \code{within} argument. } \examples{ OBrienKaiserL <- reshapeW2L(OBrienKaiser, within=c("phase", "hour"), levels=list(phase=c("pre", "post", "fup"), hour=1:5), varying=list(score=c("pre.1", "pre.2", "pre.3", "pre.4", "pre.5", "post.1", "post.2", "post.3", "post.4", "post.5", "fup.1", "fup.2", "fup.3", "fup.4", "fup.5"))) brief(OBrienKaiserL, c(15, 15)) m1 <- Tapply(score ~ phase + hour + treatment + gender, mean, data=OBrienKaiserL) m2 <- Tapply(score ~ phase + hour + treatment + gender, mean, data=OBrienKaiserLong) all.equal(m1, m2) # should be equal OBrienKaiserL2 <- reshapeW2L(OBrienKaiser, within="phase", levels=list(phase=c("pre", "post", "fup")), ignore=c("pre.2", "pre.3", "pre.4", "pre.5", "post.2", "post.3", "post.4", "post.5", "fup.2", "fup.3", "fup.4", "fup.5"), varying=list(score=c("pre.1", "post.1", "fup.1"))) brief(OBrienKaiserL2, c(6, 6)) m1 <- Tapply(score ~ phase + treatment + gender, mean, data=OBrienKaiserL2) m2 <- Tapply(score ~ phase + treatment + gender, mean, data=subset(OBrienKaiserLong, hour==1)) all.equal(m1, m2) # should be equal } \seealso{ \code{\link{reshapeL2W}}, \code{\link[stats]{reshape}}, \code{\link[carData]{OBrienKaiser}}, \code{\link[carData]{OBrienKaiserLong}}. } \author{ John Fox } \keyword{manip} RcmdrMisc/man/discretePlot.Rd0000644000176200001440000000360715124673133015653 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/discretePlot.R \name{discretePlot} \alias{discretePlot} \title{Plot Distribution of Discrete Numeric Variable} \usage{ discretePlot( x, by, scale = c("frequency", "percent"), xlab = deparse(substitute(x)), ylab = scale, main = "", xlim = NULL, ylim = NULL, ... ) } \arguments{ \item{x}{a numeric variable.} \item{by}{optionally a factor (or character or logical variable) by which to classify \code{x}.} \item{scale}{either \code{"frequency"} (the default) or \code{"percent"}.} \item{xlab}{optional character string to label the horizontal axis.} \item{ylab}{optional character string to label the vertical axis.} \item{main}{optional main label for the plot (ignored if the \code{by} argument is specified).} \item{xlim}{two-element numeric vectors specifying the range of the x axes; if not specified, will be determined from the data.} \item{ylim}{two-element numeric vectors specifying the range of the y axes; if not specified, will be determined from the data; the lower limit of the y-axis should normally be 0 and a warning will be printed if it isn't.} \item{...}{other arguments to be passed to \code{\link[graphics:plot.default]{plot}}.} } \value{ Returns \code{NULL} invisibly. } \description{ Plot Distribution of Discrete Numeric Variable } \details{ Plot the distribution of a discrete numeric variable, optionally classified by a factor. If the \code{by} argument is specified, then one plot is produced for each level of \code{by}; these are arranged vertically and all use the same scale for the horizontal and vertical axes. } \examples{ data(mtcars) mtcars$cyl <- factor(mtcars$cyl) with(mtcars, discretePlot(carb)) with(mtcars, discretePlot(carb, scale="percent")) with(mtcars, discretePlot(carb, by=cyl)) } \seealso{ \code{\link{Hist}}, \code{\link{Dotplot}}. } \author{ John Fox } \keyword{hplot} RcmdrMisc/man/Barplot.Rd0000644000176200001440000000544415124673133014616 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Barplot.R \name{Barplot} \alias{Barplot} \title{Bar Plots} \usage{ Barplot( x, by, scale = c("frequency", "percent"), conditional = TRUE, style = c("divided", "parallel"), col = if (missing(by)) "gray" else rainbow_hcl(length(levels(by))), xlab = deparse(substitute(x)), legend.title = deparse(substitute(by)), ylab = scale, main = NULL, legend.pos = "above", label.bars = FALSE, ... ) } \arguments{ \item{x}{a factor (or character or logical variable).} \item{by}{optionally, a second factor (or character or logical variable).} \item{scale}{either \code{"frequency"} (the default) or \code{"percent"}.} \item{conditional}{if \code{TRUE} then percentages are computed separately for each value of \code{x} (i.e., conditional percentages of \code{by} within levels of \code{x}); if \code{FALSE} then total percentages are graphed; ignored if \code{scale="frequency"}.} \item{style}{for two-factor plots, either \code{"divided"} (the default) or \code{"parallel"}.} \item{col}{if \code{by} is missing, the color for the bars, defaulting to \code{"gray"}; otherwise colors for the levels of the \code{by} factor in two-factor plots, defaulting to colors provided by \code{\link[colorspace]{rainbow_hcl}} in the \pkg{colorspace} package.} \item{xlab}{an optional character string providing a label for the horizontal axis.} \item{legend.title}{an optional character string providing a title for the legend.} \item{ylab}{an optional character string providing a label for the vertical axis.} \item{main}{an optional main title for the plot.} \item{legend.pos}{position of the legend, in a form acceptable to the \code{\link[graphics]{legend}} function; the default, \code{"above"}, puts the legend above the plot.} \item{label.bars}{if \code{TRUE} (the default is \code{FALSE}) show values of frequencies or percents in the bars.} \item{...}{arguments to be passed to the \code{\link[graphics]{barplot}} function.} } \value{ Invisibly returns the horizontal coordinates of the centers of the bars. } \description{ Bar Plots } \details{ Create bar plots for one or two factors scaled by frequency or precentages. In the case of two factors, the bars can be divided (stacked) or plotted in parallel (side-by-side). This function is a front end to \code{\link[graphics]{barplot}} in the \pkg{graphics} package. } \examples{ with(Mroz, Barplot(wc)) with(Mroz, Barplot(wc, col="lightblue", label.bars=TRUE)) with(Mroz, Barplot(wc, by=hc)) with(Mroz, Barplot(wc, by=hc, scale="percent", label.bars=TRUE)) with(Mroz, Barplot(wc, by=hc, style="parallel", scale="percent", legend.pos="center")) } \seealso{ \code{\link[graphics]{barplot}}, \code{\link[graphics]{legend}}, \code{\link[colorspace]{rainbow_hcl}} } \author{ John Fox } \keyword{hplot} RcmdrMisc/man/rcorr.adjust.Rd0000644000176200001440000000326615124673134015634 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rcorr.adjust.R \name{rcorr.adjust} \alias{rcorr.adjust} \alias{print.rcorr.adjust} \title{Compute Pearson or Spearman Correlations with p-Values} \usage{ rcorr.adjust( x, type = c("pearson", "spearman"), use = c("complete.obs", "pairwise.complete.obs") ) \method{print}{rcorr.adjust}(x, ...) } \arguments{ \item{x}{a numeric matrix or data frame, or an object of class \code{"rcorr.adjust"} to be printed.} \item{type}{\code{"pearson"} or \code{"spearman"}, depending upon the type of correlations desired; the default is \code{"pearson"}.} \item{use}{how to handle missing data: \code{"complete.obs"}, the default, use only complete cases; \code{"pairwise.complete.obs"}, use all cases with valid data for each pair.} \item{\dots}{not used.} } \value{ Returns an object of class \code{"rcorr.adjust"}, which is normally just printed. } \description{ Compute Pearson or Spearman Correlations with p-Values } \details{ This function uses the \code{\link[Hmisc]{rcorr}} function in the \pkg{Hmisc} package to compute matrices of Pearson or Spearman correlations along with the pairwise p-values among the correlations. The p-values are corrected for multiple inference using Holm's method (see \code{\link[stats]{p.adjust}}). Observations are filtered for missing data, and only complete observations are used. } \examples{ data(Mroz) print(rcorr.adjust(Mroz[,c("k5", "k618", "age", "lwg", "inc")])) print(rcorr.adjust(Mroz[,c("k5", "k618", "age", "lwg", "inc")], type="spearman")) } \seealso{ \code{\link[Hmisc]{rcorr}}, \code{\link[stats]{p.adjust}}. } \author{ John Fox, adapting code from Robert A. Muenchen. } \keyword{htest} RcmdrMisc/man/repeatedMeasuresPlot.Rd0000644000176200001440000000751715124673134017354 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/repeatedMeasuresPlot.R \name{repeatedMeasuresPlot} \alias{repeatedMeasuresPlot} \title{Plot Means for Repeated-Measures ANOVA Designs} \usage{ repeatedMeasuresPlot( data, within, within.names, within.levels, between.names = NULL, response.name = "score", trace, xvar, pch = 15:25, lty = 1:6, col = palette()[-1], plot.means = TRUE, print.tables = FALSE ) } \arguments{ \item{data}{a data frame in wide format.} \item{within}{a character vector with the names of the data columns containing the repeated measures.} \item{within.names}{a character vector with one or two elements, of names of the within-subjects factor(s).} \item{within.levels}{a named list whose elements are character vectors of level names for the within-subjects factors, with names corresponding to the names of the within-subjects factors; the product of the numbers of levels should be equal to the number of repeated-measures columns in \code{within}.} \item{between.names}{a column vector of names of the between-subjects factors (if any).} \item{response.name}{optional quoted name for the response variable, defaults to \code{"score"}.} \item{trace}{optional quoted name of the (either within- or between-subjects) factor to define profiles of means in each panel of the graph; the default is the within-subjects factor with the smaller number of levels, if there are two, or not used if there is one.} \item{xvar}{optional quoted name of the factor to define the horizontal axis of each panel; the default is the within-subjects factor with the larger number of levels.} \item{pch}{vector of symbol numbers to use for the profiles of means (i.e., levels of the \code{trace} factor); for the meaning of the defaults, see \code{\link[graphics]{points}} and \code{\link[graphics]{par}}.} \item{lty}{vector of line-type numbers to use for the profiles of means.} \item{col}{vector of colors for the profiles of means; the default is given by \code{palette()}, starting at the second color.} \item{plot.means}{if \code{TRUE} (the default), draw a plot of means by the factors.} \item{print.tables}{if \code{TRUE} (the default is \code{FALSE}), print tables of means and standard deviations of the response by the factors.} } \value{ A \code{"trellis"} object, which normally is just "printed" (i.e., plotted). } \description{ Plot Means for Repeated-Measures ANOVA Designs } \details{ Creates a means plot for a repeated-measures ANOVA design with one or two within-subjects factor and zero or more between-subjects factors, for data in "wide" format. } \examples{ repeatedMeasuresPlot( data=OBrienKaiser, within=c("pre.1", "pre.2", "pre.3", "pre.4", "pre.5", "post.1", "post.2", "post.3", "post.4", "post.5", "fup.1", "fup.2", "fup.3", "fup.4", "fup.5"), within.names=c("phase", "hour"), within.levels=list(phase=c("pre", "post", "fup"), hour = c("1", "2", "3", "4", "5")), between.names=c("gender", "treatment"), response.name="improvement", print.tables=TRUE ) repeatedMeasuresPlot(data=OBrienKaiser, within=c("pre.1", "pre.2", "pre.3", "pre.4", "pre.5", "post.1", "post.2", "post.3", "post.4", "post.5", "fup.1", "fup.2", "fup.3", "fup.4", "fup.5"), within.names=c("phase", "hour"), within.levels=list(phase=c("pre", "post", "fup"), hour = c("1", "2", "3", "4", "5")), between.names=c("gender", "treatment"), trace="gender") # note that gender is between subjects repeatedMeasuresPlot( data=OBrienKaiser, within=c("fup.1", "fup.2", "fup.3", "fup.4", "fup.5"), within.names="hour", within.levels=list(hour = c("1", "2", "3", "4", "5")), between.names=c("treatment", "gender"), response.name="improvement") } \seealso{ \code{\link[car]{Anova}}, \code{\link[carData]{OBrienKaiser}} } \author{ John Fox } \keyword{hplot} RcmdrMisc/man/plotMeans.Rd0000644000176200001440000000443415124673134015154 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plots.R \name{plotMeans} \alias{plotMeans} \title{Plot Means for One or Two-Way Layout} \usage{ plotMeans( response, factor1, factor2, error.bars = c("se", "sd", "conf.int", "none"), level = 0.95, xlab = deparse(substitute(factor1)), ylab = paste("mean of", deparse(substitute(response))), legend.lab = deparse(substitute(factor2)), legend.pos = c("farright", "bottomright", "bottom", "bottomleft", "left", "topleft", "top", "topright", "right", "center"), main = "Plot of Means", pch = 1:n.levs.2, lty = 1:n.levs.2, col = palette(), connect = TRUE, ... ) } \arguments{ \item{response}{Numeric variable for which means are to be computed.} \item{factor1}{Factor defining horizontal axis of the plot.} \item{factor2}{If present, factor defining profiles of means.} \item{error.bars}{If \code{"se"}, the default, error bars around means give plus or minus one standard error of the mean; if \code{"sd"}, error bars give plus or minus one standard deviation; if \code{"conf.int"}, error bars give a confidence interval around each mean; if \code{"none"}, error bars are suppressed.} \item{level}{level of confidence for confidence intervals; default is .95} \item{xlab}{Label for horizontal axis.} \item{ylab}{Label for vertical axis.} \item{legend.lab}{Label for legend.} \item{legend.pos}{Position of legend; if \code{"farright"} (the default), extra space is left at the right of the plot.} \item{main}{Label for the graph.} \item{pch}{Plotting characters for profiles of means.} \item{lty}{Line types for profiles of means.} \item{col}{Colours for profiles of means.} \item{connect}{connect profiles of means, default \code{TRUE}.} \item{\ldots}{arguments to be passed to \code{plot}.} } \value{ The function invisibly returns \code{NULL}. } \description{ Plot Means for One or Two-Way Layout } \details{ Plots cell means for a numeric variable in each category of a factor or in each combination of categories of two factors, optionally along with error bars based on cell standard errors or standard deviations. } \examples{ data(Moore) with(Moore, plotMeans(conformity, fcategory, partner.status, ylim=c(0, 25))) } \seealso{ \code{\link[stats]{interaction.plot}} } \author{ John Fox } \keyword{hplot} RcmdrMisc/man/cv.Rd0000644000176200001440000000150315124673133013613 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cv.R \name{cv} \alias{cv} \alias{CV} \title{Coefficient of variation} \usage{ cv(x, na.rm = TRUE) CV(x, na.rm = TRUE) } \arguments{ \item{x}{data a numeric vector, matrix, or data frame.} \item{na.rm}{if \code{TRUE} (the default) remove \code{NA}s before computing the coefficient of variation.} } \value{ \code{cv} returns the coefficient(s) of variation. } \description{ Coefficient of variation } \details{ \code{numSummary} creates neatly formatted tables of means, standard deviations, coefficients of variation, skewness, kurtosis, and quantiles of numeric variables. \code{CV} computes the coefficient of variation. } \examples{ data(Prestige) print(cv(Prestige[,c("income", "education")])) } \author{ John Fox } \keyword{internal} \keyword{misc} RcmdrMisc/man/binVariable.Rd0000644000176200001440000000241515124673133015424 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bin.var.R \name{binVariable} \alias{binVariable} \title{Bin a Numeric Variable} \usage{ binVariable( x, bins = 4, method = c("intervals", "proportions", "natural"), labels = FALSE ) } \arguments{ \item{x}{numeric variable to be binned.} \item{bins}{number of bins.} \item{method}{one of \code{"intervals"} for equal-width bins; \code{"proportions"} for equal-count bins; \code{"natural"} for cut points between bins to be determined by a k-means clustering.} \item{labels}{if \code{FALSE}, numeric labels will be used for the factor levels; if \code{NULL}, the cut points are used to define labels; otherwise a character vector of level names.} } \value{ A factor. } \description{ Bin a Numeric Variable } \details{ Create a factor dissecting the range of a numeric variable into bins of equal width, (roughly) equal frequency, or at "natural" cut points. The \code{\link[base]{cut}} function is used to create the factor. } \examples{ summary(binVariable(rnorm(100), method="prop", labels=letters[1:4])) } \seealso{ \code{\link[base]{cut}}, \code{\link[stats]{kmeans}}. } \author{ Dan Putler, slightly modified by John Fox (5 Dec 04 & 5 Mar 13) with the original author's permission. } \keyword{manip} RcmdrMisc/man/DeltaMethod.Rd0000644000176200001440000000271115124673133015377 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/DeltaMethod.R \name{DeltaMethod} \alias{DeltaMethod} \alias{print.DeltaMethod} \title{Confidence Intervals by the Delta Method} \usage{ DeltaMethod(model, g, level = 0.95) \method{print}{DeltaMethod}(x, ...) } \arguments{ \item{model}{a regression model; see the \code{\link[car]{deltaMethod}} documentation.} \item{g}{the expression --- that is, function of the coefficients --- to evaluate, as a character string.} \item{level}{the confidence level, defaults to \code{0.95}.} \item{x}{an object of class \code{"DeltaMethod"}.} \item{...}{optional arguments to pass to \code{print} to show the results.} } \value{ \code{DeltaMethod} returns an objects of class \code{"DeltaMethod"}, for which a \code{print} method is provided. } \description{ Confidence Intervals by the Delta Method } \details{ \code{DeltaMethod} is a wrapper for the \code{\link[car]{deltaMethod}} function in the \pkg{car} package. It computes the asymptotic standard error of an arbitrary, usually nonlinear, function of model coefficients, which are named \code{b0} (if there is an intercept in the model), \code{b1}, \code{b2}, etc., and based on the standard error, a confidence interval based on the normal distribution. } \examples{ DeltaMethod(lm(prestige ~ income + education, data=Duncan), "b1/b2") } \seealso{ \code{\link[car]{deltaMethod}} function in the \pkg{car} package. } \author{ John Fox } \keyword{models} RcmdrMisc/man/binnedCounts.Rd0000644000176200001440000000254515124673133015645 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/binnedCounts.R \name{binnedCounts} \alias{binnedCounts} \title{Binned Frequency Distributions of Numeric Variables} \usage{ binnedCounts( x, breaks = "Sturges", round.percents = 2, name = deparse(substitute(x)) ) } \arguments{ \item{x}{a numeric vector, matrix, or data frame.} \item{breaks}{specification of the breaks between bins, to be passed to the \code{\link[graphics]{hist}} function.} \item{round.percents}{number of decimal places to round percentages; default is \code{2}.} \item{name}{for the variable; only used for vector argument \code{x}.} } \value{ For a numeric vector, invisibly returns the vector of counts, named with the end-points of the corresponding bins. For a matrix or data frame, invisibly returns \code{NULL} } \description{ Binned Frequency Distributions of Numeric Variables } \details{ Bins a numeric variable, as for a histogram, and reports the count and percentage in each bin. The computations are done by the \code{\link[graphics]{hist}} function, but no histogram is drawn. If supplied a numeric matrix or data frame, the distribution of each column is printed. } \examples{ with(Prestige, binnedCounts(income)) binnedCounts(Prestige[, 1:4]) } \seealso{ \code{\link[graphics]{hist}}, \code{\link{discreteCounts}} } \author{ John Fox } \keyword{univar} RcmdrMisc/DESCRIPTION0000644000176200001440000000226215127645235013657 0ustar liggesusersPackage: RcmdrMisc Version: 2.10.1 Date: 2026-01-03 Encoding: UTF-8 Title: R Commander Miscellaneous Functions Authors@R: c(person("John", "Fox", role = c("aut"), email = "jfox@mcmaster.ca"), person("Manuel", "Munoz-Marquez", role = c("aut", "cre"), email = "manuel.munoz@uca.es"), person("Robert", "Muenchen", role = "ctb"), person("Dan", "Putler", role = "ctb") ) Depends: R (>= 3.5.0), utils, car (>= 3.0-0), sandwich Imports: abind, colorspace, Hmisc (>= 4.1-0), MASS, e1071, foreign, haven, readstata13, readxl, graphics, grDevices, stats, nortest, lattice Suggests: boot, datasets, carData ByteCompile: yes Description: Various statistical, graphics, and data-management functions used by the Rcmdr package in the R Commander GUI for R. License: GPL (>= 3) URL: https://cran.r-project.org/package=RcmdrMisc, https://github.com/RCmdr-Project/rcmdrmisc RoxygenNote: 7.3.2 NeedsCompilation: no Packaged: 2026-01-03 08:08:59 UTC; mmarquez Author: John Fox [aut], Manuel Munoz-Marquez [aut, cre], Robert Muenchen [ctb], Dan Putler [ctb] Maintainer: Manuel Munoz-Marquez Repository: CRAN Date/Publication: 2026-01-08 06:11:41 UTC