rsample/ 0000755 0001762 0000144 00000000000 13512203633 011713 5 ustar ligges users rsample/inst/ 0000755 0001762 0000144 00000000000 13512177703 012700 5 ustar ligges users rsample/inst/doc/ 0000755 0001762 0000144 00000000000 13512177703 013445 5 ustar ligges users rsample/inst/doc/Working_with_rsets.R 0000644 0001762 0000144 00000010216 13512177703 017463 0 ustar ligges users ## ----ex_setup, include=FALSE------------------------------------------------------------
knitr::opts_chunk$set(
message = FALSE,
digits = 3,
collapse = TRUE,
comment = "#>"
)
options(digits = 3, width = 90)
library(ggplot2)
theme_set(theme_bw())
## ----attrition, message=FALSE-----------------------------------------------------------
library(rsample)
data("attrition")
names(attrition)
table(attrition$Attrition)
## ----form, message=FALSE----------------------------------------------------------------
mod_form <- as.formula(Attrition ~ JobSatisfaction + Gender + MonthlyIncome)
## ----model_vfold, message=FALSE---------------------------------------------------------
library(rsample)
set.seed(4622)
rs_obj <- vfold_cv(attrition, v = 10, repeats = 10)
rs_obj
## ----lm_func----------------------------------------------------------------------------
## splits will be the `rsplit` object with the 90/10 partition
holdout_results <- function(splits, ...) {
# Fit the model to the 90%
mod <- glm(..., data = analysis(splits), family = binomial)
# Save the 10%
holdout <- assessment(splits)
# `augment` will save the predictions with the holdout data set
res <- broom::augment(mod, newdata = holdout)
# Class predictions on the assessment set from class probs
lvls <- levels(holdout$Attrition)
predictions <- factor(ifelse(res$.fitted > 0, lvls[2], lvls[1]),
levels = lvls)
# Calculate whether the prediction was correct
res$correct <- predictions == holdout$Attrition
# Return the assessment data set with the additional columns
res
}
## ----onefold, warning = FALSE-----------------------------------------------------------
example <- holdout_results(rs_obj$splits[[1]], mod_form)
dim(example)
dim(assessment(rs_obj$splits[[1]]))
## newly added columns:
example[1:10, setdiff(names(example), names(attrition))]
## ----model_purrr, warning=FALSE---------------------------------------------------------
library(purrr)
rs_obj$results <- map(rs_obj$splits,
holdout_results,
mod_form)
rs_obj
## ----model_acc--------------------------------------------------------------------------
rs_obj$accuracy <- map_dbl(rs_obj$results, function(x) mean(x$correct))
summary(rs_obj$accuracy)
## ----type_plot--------------------------------------------------------------------------
ggplot(attrition, aes(x = Gender, y = MonthlyIncome)) +
geom_boxplot() +
scale_y_log10()
## ----mean_diff--------------------------------------------------------------------------
median_diff <- function(splits) {
x <- analysis(splits)
median(x$MonthlyIncome[x$Gender == "Female"]) -
median(x$MonthlyIncome[x$Gender == "Male"])
}
## ----boot_mean_diff---------------------------------------------------------------------
set.seed(353)
bt_resamples <- bootstraps(attrition, times = 500)
## ----stats------------------------------------------------------------------------------
bt_resamples$wage_diff <- map_dbl(bt_resamples$splits, median_diff)
## ----stats_plot-------------------------------------------------------------------------
ggplot(bt_resamples, aes(x = wage_diff)) +
geom_line(stat = "density", adjust = 1.25) +
xlab("Difference in Median Monthly Income (Female - Male)")
## ----ci---------------------------------------------------------------------------------
quantile(bt_resamples$wage_diff,
probs = c(0.025, 0.500, 0.975))
## ----coefs------------------------------------------------------------------------------
glm_coefs <- function(splits, ...) {
## use `analysis` or `as.data.frame` to get the analysis data
mod <- glm(..., data = analysis(splits), family = binomial)
as.data.frame(t(coef(mod)))
}
bt_resamples$betas <- map(.x = bt_resamples$splits,
.f = glm_coefs,
mod_form)
bt_resamples
bt_resamples$betas[[1]]
## ----tidy_rsplit------------------------------------------------------------------------
first_resample <- bt_resamples$splits[[1]]
class(first_resample)
tidy(first_resample)
## ----tidy_rset--------------------------------------------------------------------------
class(bt_resamples)
tidy(bt_resamples)
rsample/inst/doc/Basics.R 0000644 0001762 0000144 00000001224 13512177675 015003 0 ustar ligges users ## ----ex_setup, include=FALSE---------------------------------------------
knitr::opts_chunk$set(
message = FALSE,
digits = 3,
collapse = TRUE,
comment = "#>"
)
options(digits = 3)
## ----mtcars_bt, message=FALSE--------------------------------------------
library(rsample)
set.seed(8584)
bt_resamples <- bootstraps(mtcars, times = 3)
bt_resamples
## ----rsplit--------------------------------------------------------------
first_resample <- bt_resamples$splits[[1]]
first_resample
## ----rsplit_df-----------------------------------------------------------
head(as.data.frame(first_resample))
as.data.frame(first_resample, data = "assessment")
rsample/inst/doc/Working_with_rsets.Rmd 0000644 0001762 0000144 00000016553 13356266567 020033 0 ustar ligges users ---
title: "Working with rsets"
vignette: >
%\VignetteEngine{knitr::rmarkdown}
%\VignetteIndexEntry{Working with rsets}
output:
knitr:::html_vignette:
toc: yes
---
```{r ex_setup, include=FALSE}
knitr::opts_chunk$set(
message = FALSE,
digits = 3,
collapse = TRUE,
comment = "#>"
)
options(digits = 3, width = 90)
library(ggplot2)
theme_set(theme_bw())
```
## Introduction
`rsample` can be used to create objects containing resamples of the original data. This page contains examples of how those objects can be used for data analysis.
For illustration, the `attrition` data is used. From the help file:
> These data are from the IBM Watson Analytics Lab. The website describes the data with "Uncover the factors that lead to employee attrition and explore important questions such as ‘show me a breakdown of distance from home by job role and attrition’ or 'compare average monthly income by education and attrition'. This is a fictional data set created by IBM data scientists." There are 1470 rows.
The data can be accessed using
```{r attrition, message=FALSE}
library(rsample)
data("attrition")
names(attrition)
table(attrition$Attrition)
```
## Model Assessment
Let's fit a logistic regression model to the data with model terms for the job satisfaction, gender, and monthly income.
If we were fitting the model to the entire data set, we might model attrition using
```r
glm(Attrition ~ JobSatisfaction + Gender + MonthlyIncome, data = attrition, family = binomial)
```
For convenience, we'll create a formula object that will be used later:
```{r form, message=FALSE}
mod_form <- as.formula(Attrition ~ JobSatisfaction + Gender + MonthlyIncome)
```
To evaluate this model, we will use 10 repeats of 10-fold cross-validation and use the 100 holdout samples to evaluate the overall accuracy of the model.
First, let's make the splits of the data:
```{r model_vfold, message=FALSE}
library(rsample)
set.seed(4622)
rs_obj <- vfold_cv(attrition, v = 10, repeats = 10)
rs_obj
```
Now let's write a function that will, for each resample:
1. obtain the analysis data set (i.e. the 90% used for modeling)
1. fit a logistic regression model
1. predict the assessment data (the other 10% not used for the model) using the `broom` package
1. determine if each sample was predicted correctly.
Here is our function:
```{r lm_func}
## splits will be the `rsplit` object with the 90/10 partition
holdout_results <- function(splits, ...) {
# Fit the model to the 90%
mod <- glm(..., data = analysis(splits), family = binomial)
# Save the 10%
holdout <- assessment(splits)
# `augment` will save the predictions with the holdout data set
res <- broom::augment(mod, newdata = holdout)
# Class predictions on the assessment set from class probs
lvls <- levels(holdout$Attrition)
predictions <- factor(ifelse(res$.fitted > 0, lvls[2], lvls[1]),
levels = lvls)
# Calculate whether the prediction was correct
res$correct <- predictions == holdout$Attrition
# Return the assessment data set with the additional columns
res
}
```
For example:
```{r onefold, warning = FALSE}
example <- holdout_results(rs_obj$splits[[1]], mod_form)
dim(example)
dim(assessment(rs_obj$splits[[1]]))
## newly added columns:
example[1:10, setdiff(names(example), names(attrition))]
```
For this model, the `.fitted` value is the linear predictor in log-odds units.
To compute this data set for each of the 100 resamples, we'll use the `map` function from the `purrr` package:
```{r model_purrr, warning=FALSE}
library(purrr)
rs_obj$results <- map(rs_obj$splits,
holdout_results,
mod_form)
rs_obj
```
Now we can compute the accuracy values for all of the assessment data sets:
```{r model_acc}
rs_obj$accuracy <- map_dbl(rs_obj$results, function(x) mean(x$correct))
summary(rs_obj$accuracy)
```
Keep in mind that the baseline accuracy to beat is the rate of non-attrition, which is `r round(mean(attrition$Attrition == "No"), 3)`. Not a great model so far.
## Using the Bootstrap to Make Comparisons
Traditionally, the bootstrap has been primarily used to empirically determine the sampling distribution of a test statistic. Given a set of samples with replacement, a statistic can be calculated on each analysis set and the results can be used to make inferences (such as confidence intervals).
For example, are there differences in the median monthly income between genders?
```{r type_plot}
ggplot(attrition, aes(x = Gender, y = MonthlyIncome)) +
geom_boxplot() +
scale_y_log10()
```
If we wanted to compare the genders, we could conduct a _t_-test or rank-based test. Instead, let's use the bootstrap to see if there is a difference in the median incomes for the two groups. We need a simple function to compute this statistic on the resample:
```{r mean_diff}
median_diff <- function(splits) {
x <- analysis(splits)
median(x$MonthlyIncome[x$Gender == "Female"]) -
median(x$MonthlyIncome[x$Gender == "Male"])
}
```
Now we would create a large number of bootstrap samples (say 2000+). For illustration, we'll only do 500 in this document.
```{r boot_mean_diff}
set.seed(353)
bt_resamples <- bootstraps(attrition, times = 500)
```
This function is then computed across each resample:
```{r stats}
bt_resamples$wage_diff <- map_dbl(bt_resamples$splits, median_diff)
```
The bootstrap distribution of this statistic has a slightly bimodal and skewed distribution:
```{r stats_plot}
ggplot(bt_resamples, aes(x = wage_diff)) +
geom_line(stat = "density", adjust = 1.25) +
xlab("Difference in Median Monthly Income (Female - Male)")
```
The variation is considerable in this statistic. One method of computing a confidence interval is to take the percentiles of the bootstrap distribution. A 95% confidence interval for the difference in the means would be:
```{r ci}
quantile(bt_resamples$wage_diff,
probs = c(0.025, 0.500, 0.975))
```
_On average_, there is no evidence for a difference in the genders.
## Bootstrap Estimates of Model Coefficients
Unless there is already a column in the resample object that contains the fitted model, a function can be used to fit the model and save all of the model coefficients. The [`broom` package](https://cran.r-project.org/package=broom) package has a `tidy` function that will save the coefficients in a data frame. Instead of returning a data frame with a row for each model term, we will save a data frame with a single row and columns for each model term. As before, `purrr::map` can be used to estimate and save these values for each split.
```{r coefs}
glm_coefs <- function(splits, ...) {
## use `analysis` or `as.data.frame` to get the analysis data
mod <- glm(..., data = analysis(splits), family = binomial)
as.data.frame(t(coef(mod)))
}
bt_resamples$betas <- map(.x = bt_resamples$splits,
.f = glm_coefs,
mod_form)
bt_resamples
bt_resamples$betas[[1]]
```
## Keeping Tidy
As previously mentioned, the [`broom` package](https://cran.r-project.org/package=broom) contains a class called `tidy` that created representations of objects that can be easily used for analysis, plotting, etc. `rsample` contains `tidy` methods for `rset` and `rsplit` objects. For example:
```{r tidy_rsplit}
first_resample <- bt_resamples$splits[[1]]
class(first_resample)
tidy(first_resample)
```
and
```{r tidy_rset}
class(bt_resamples)
tidy(bt_resamples)
```
rsample/inst/doc/Basics.Rmd 0000644 0001762 0000144 00000006212 13323650050 015305 0 ustar ligges users ---
title: "Basics"
vignette: >
%\VignetteEngine{knitr::rmarkdown}
%\VignetteIndexEntry{Basics}
output:
knitr:::html_vignette:
toc: yes
---
```{r ex_setup, include=FALSE}
knitr::opts_chunk$set(
message = FALSE,
digits = 3,
collapse = TRUE,
comment = "#>"
)
options(digits = 3)
```
## Terminology
We define a _resample_ as the result of a two-way split of a data set. For example, when bootstrapping, one part of the resample is a sample with replacement of the original data. The other part of the split contains the instances that were not contained in the bootstrap sample. Cross-validation is another type of resampling.
## `rset` Objects Contain Many Resamples
The main class in the package (`rset`) is for a _set_ or _collection_ of resamples. In 10-fold cross-validation, the set would consist of the 10 different resamples of the original data.
Like [`modelr`](https://cran.r-project.org/package=modelr), the resamples are stored in data-frame-like `tibble` object. As a simple example, here is a small set of bootstraps of the `mtcars` data:
```{r mtcars_bt, message=FALSE}
library(rsample)
set.seed(8584)
bt_resamples <- bootstraps(mtcars, times = 3)
bt_resamples
```
## Individual Resamples are `rsplit` Objects
The resamples are stored in the `splits` column in an object that has class `rsplit`.
In this package we use the following terminology for the two partitions that comprise a resample:
* The _analysis_ data are those that we selected in the resample. For a bootstrap, this is the sample with replacement. For 10-fold cross-validation, this is the 90% of the data. These data are often used to fit a model or calculate a statistic in traditional bootstrapping.
* The _assessment_ data are usually the section of the original data not covered by the analysis set. Again, in 10-fold CV, this is the 10% held out. These data are often used to evaluate the performance of a model that was fit to the analysis data.
(Aside: While some might use the term "training" and "testing" for these data sets, we avoid them since those labels often conflict with the data that result from an initial partition of the data that is typically done _before_ resampling. The training/test split can be conducted using the `initial_split` function in this package.)
Let's look at one of the `rsplit` objects
```{r rsplit}
first_resample <- bt_resamples$splits[[1]]
first_resample
```
This indicates that there were `r dim(bt_resamples$splits[[1]])["analysis"]` data points in the analysis set, `r dim(bt_resamples$splits[[1]])["assessment"]` instances were in the assessment set, and that the original data contained `r dim(bt_resamples$splits[[1]])["n"]` data points. These results can also be determined using the `dim` function on an `rsplit` object.
To obtain either of these data sets from an `rsplit`, the `as.data.frame` function can be used. By default, the analysis set is returned but the `data` option can be used to return the assessment data:
```{r rsplit_df}
head(as.data.frame(first_resample))
as.data.frame(first_resample, data = "assessment")
```
Alternatively, you can use the shortcuts `analysis(first_resample)` and `assessment(first_resample)`. rsample/inst/doc/Working_with_rsets.html 0000644 0001762 0000144 00000637040 13512177703 020240 0 ustar ligges users
Working with rsets
Working with rsets
Introduction
rsample can be used to create objects containing resamples of the original data. This page contains examples of how those objects can be used for data analysis.
For illustration, the attrition data is used. From the help file:
These data are from the IBM Watson Analytics Lab. The website describes the data with “Uncover the factors that lead to employee attrition and explore important questions such as ‘show me a breakdown of distance from home by job role and attrition’ or ‘compare average monthly income by education and attrition’. This is a fictional data set created by IBM data scientists.” There are 1470 rows.
The data can be accessed using
Model Assessment
Let’s fit a logistic regression model to the data with model terms for the job satisfaction, gender, and monthly income.
If we were fitting the model to the entire data set, we might model attrition using
For convenience, we’ll create a formula object that will be used later:
To evaluate this model, we will use 10 repeats of 10-fold cross-validation and use the 100 holdout samples to evaluate the overall accuracy of the model.
First, let’s make the splits of the data:
Now let’s write a function that will, for each resample:
- obtain the analysis data set (i.e. the 90% used for modeling)
- fit a logistic regression model
- predict the assessment data (the other 10% not used for the model) using the
broom package
- determine if each sample was predicted correctly.
Here is our function:
For example:
For this model, the .fitted value is the linear predictor in log-odds units.
To compute this data set for each of the 100 resamples, we’ll use the map function from the purrr package:
Now we can compute the accuracy values for all of the assessment data sets:
Keep in mind that the baseline accuracy to beat is the rate of non-attrition, which is 0.839. Not a great model so far.
Using the Bootstrap to Make Comparisons
Traditionally, the bootstrap has been primarily used to empirically determine the sampling distribution of a test statistic. Given a set of samples with replacement, a statistic can be calculated on each analysis set and the results can be used to make inferences (such as confidence intervals).
For example, are there differences in the median monthly income between genders?

If we wanted to compare the genders, we could conduct a t-test or rank-based test. Instead, let’s use the bootstrap to see if there is a difference in the median incomes for the two groups. We need a simple function to compute this statistic on the resample:
Now we would create a large number of bootstrap samples (say 2000+). For illustration, we’ll only do 500 in this document.
This function is then computed across each resample:
The bootstrap distribution of this statistic has a slightly bimodal and skewed distribution:

The variation is considerable in this statistic. One method of computing a confidence interval is to take the percentiles of the bootstrap distribution. A 95% confidence interval for the difference in the means would be:
On average, there is no evidence for a difference in the genders.
Bootstrap Estimates of Model Coefficients
Unless there is already a column in the resample object that contains the fitted model, a function can be used to fit the model and save all of the model coefficients. The broom package package has a tidy function that will save the coefficients in a data frame. Instead of returning a data frame with a row for each model term, we will save a data frame with a single row and columns for each model term. As before, purrr::map can be used to estimate and save these values for each split.
glm_coefs <- function(splits, ...) {
## use `analysis` or `as.data.frame` to get the analysis data
mod <- glm(..., data = analysis(splits), family = binomial)
as.data.frame(t(coef(mod)))
}
bt_resamples$betas <- map(.x = bt_resamples$splits,
.f = glm_coefs,
mod_form)
bt_resamples
#> # Bootstrap sampling
#> # A tibble: 500 x 4
#> splits id wage_diff betas
#> <list> <chr> <dbl> <list>
#> 1 <split [1.5K/558]> Bootstrap001 136 <df[,6] [1 × 6]>
#> 2 <split [1.5K/528]> Bootstrap002 282. <df[,6] [1 × 6]>
#> 3 <split [1.5K/541]> Bootstrap003 470 <df[,6] [1 × 6]>
#> 4 <split [1.5K/561]> Bootstrap004 -213 <df[,6] [1 × 6]>
#> 5 <split [1.5K/518]> Bootstrap005 453 <df[,6] [1 × 6]>
#> 6 <split [1.5K/539]> Bootstrap006 684 <df[,6] [1 × 6]>
#> 7 <split [1.5K/542]> Bootstrap007 60 <df[,6] [1 × 6]>
#> 8 <split [1.5K/536]> Bootstrap008 286 <df[,6] [1 × 6]>
#> 9 <split [1.5K/552]> Bootstrap009 -30 <df[,6] [1 × 6]>
#> 10 <split [1.5K/517]> Bootstrap010 410 <df[,6] [1 × 6]>
#> # … with 490 more rows
bt_resamples$betas[[1]]
#> (Intercept) JobSatisfaction.L JobSatisfaction.Q JobSatisfaction.C GenderMale
#> 1 -0.939 -0.501 -0.272 0.0842 0.0989
#> MonthlyIncome
#> 1 -0.000129
Keeping Tidy
As previously mentioned, the broom package contains a class called tidy that created representations of objects that can be easily used for analysis, plotting, etc. rsample contains tidy methods for rset and rsplit objects. For example:
and
rsample/inst/doc/Basics.html 0000644 0001762 0000144 00000043401 13512177675 015551 0 ustar ligges users
Basics
Basics
Terminology
We define a resample as the result of a two-way split of a data set. For example, when bootstrapping, one part of the resample is a sample with replacement of the original data. The other part of the split contains the instances that were not contained in the bootstrap sample. Cross-validation is another type of resampling.
rset Objects Contain Many Resamples
The main class in the package (rset) is for a set or collection of resamples. In 10-fold cross-validation, the set would consist of the 10 different resamples of the original data.
Like modelr, the resamples are stored in data-frame-like tibble object. As a simple example, here is a small set of bootstraps of the mtcars data:
Individual Resamples are rsplit Objects
The resamples are stored in the splits column in an object that has class rsplit.
In this package we use the following terminology for the two partitions that comprise a resample:
- The analysis data are those that we selected in the resample. For a bootstrap, this is the sample with replacement. For 10-fold cross-validation, this is the 90% of the data. These data are often used to fit a model or calculate a statistic in traditional bootstrapping.
- The assessment data are usually the section of the original data not covered by the analysis set. Again, in 10-fold CV, this is the 10% held out. These data are often used to evaluate the performance of a model that was fit to the analysis data.
(Aside: While some might use the term “training” and “testing” for these data sets, we avoid them since those labels often conflict with the data that result from an initial partition of the data that is typically done before resampling. The training/test split can be conducted using the initial_split function in this package.)
Let’s look at one of the rsplit objects
This indicates that there were 32 data points in the analysis set, 14 instances were in the assessment set, and that the original data contained 32 data points. These results can also be determined using the dim function on an rsplit object.
To obtain either of these data sets from an rsplit, the as.data.frame function can be used. By default, the analysis set is returned but the data option can be used to return the assessment data:
Alternatively, you can use the shortcuts analysis(first_resample) and assessment(first_resample).
rsample/tests/ 0000755 0001762 0000144 00000000000 13512177703 013065 5 ustar ligges users rsample/tests/testthat.R 0000644 0001762 0000144 00000000115 13323650050 015034 0 ustar ligges users library(testthat)
library(rsample)
test_check(package = "rsample")
q("no")
rsample/tests/testthat/ 0000755 0001762 0000144 00000000000 13512203633 014715 5 ustar ligges users rsample/tests/testthat/test_vfold.R 0000644 0001762 0000144 00000004533 13512132573 017222 0 ustar ligges users context("V-fold CV")
library(testthat)
library(rsample)
library(purrr)
dat1 <- data.frame(a = 1:20, b = letters[1:20])
test_that('default param', {
set.seed(11)
rs1 <- vfold_cv(dat1)
sizes1 <- dim_rset(rs1)
expect_true(all(sizes1$analysis == 18))
expect_true(all(sizes1$assessment == 2))
same_data <-
map_lgl(rs1$splits, function(x)
all.equal(x$data, dat1))
expect_true(all(same_data))
good_holdout <- map_lgl(rs1$splits,
function(x) {
length(intersect(x$in_ind, x$out_id)) == 0
})
expect_true(all(good_holdout))
})
test_that('repeated', {
set.seed(11)
rs2 <- vfold_cv(dat1, repeats = 4)
sizes2 <- dim_rset(rs2)
expect_true(all(sizes2$analysis == 18))
expect_true(all(sizes2$assessment == 2))
same_data <-
map_lgl(rs2$splits, function(x)
all.equal(x$data, dat1))
expect_true(all(same_data))
good_holdout <- map_lgl(rs2$splits,
function(x) {
length(intersect(x$in_ind, x$out_id)) == 0
})
expect_true(all(good_holdout))
})
test_that('strata', {
iris2 <- iris[1:130, ]
set.seed(11)
rs3 <- vfold_cv(iris2, repeats = 2, strata = "Species")
sizes3 <- dim_rset(rs3)
expect_true(all(sizes3$analysis == 117))
expect_true(all(sizes3$assessment == 13))
rate <- map_dbl(rs3$splits,
function(x) {
dat <- as.data.frame(x)$Species
mean(dat == "virginica")
})
expect_true(length(unique(rate)) == 1)
good_holdout <- map_lgl(rs3$splits,
function(x) {
length(intersect(x$in_ind, x$out_id)) == 0
})
expect_true(all(good_holdout))
})
test_that('bad args', {
expect_error(vfold_cv(iris, strata = iris$Species))
expect_error(vfold_cv(iris, strata = c("Species", "Sepal.Width")))
})
test_that('printing', {
expect_output(print(vfold_cv(mtcars)))
})
test_that('rsplit labels', {
rs <- vfold_cv(mtcars)
all_labs <- map_df(rs$splits, labels)
original_id <- rs[, grepl("^id", names(rs))]
expect_equal(all_labs, original_id)
rs2 <- vfold_cv(mtcars, repeats = 4)
all_labs2 <- map_df(rs2$splits, labels)
original_id2 <- rs2[, grepl("^id", names(rs2))]
expect_equal(all_labs2, original_id2)
})
rsample/tests/testthat/test_rsplit.R 0000644 0001762 0000144 00000002300 13414310573 017412 0 ustar ligges users context("Rsplit constructor")
library(testthat)
library(rsample)
dat1 <- data.frame(a = 1:100, b = 101:200)
size1 <- object.size(dat1)
dat2 <- as.matrix(dat1)
test_that('simple rsplit', {
rs1 <- rsplit(dat1, 1:2, 4:5)
expect_equal(rs1$data, dat1)
expect_equal(rs1$in_id, 1:2)
expect_equal(rs1$out_id, 4:5)
})
test_that('simple rsplit with matrices', {
rs2 <- rsplit(dat2, 1:2, 4:5)
expect_equal(rs2$data, dat2)
expect_equal(rs2$in_id, 1:2)
expect_equal(rs2$out_id, 4:5)
})
test_that('bad inputs', {
expect_error(rsplit(as.list(dat1), 1:2, 4:5))
expect_error(rsplit(dat1, letters[1:2], 4:5))
expect_error(rsplit(as.list(dat1), 1:2, letters[4:5]))
expect_error(rsplit(as.list(dat1), -1:2, 4:5))
expect_error(rsplit(as.list(dat1), 1:2, -4:5))
expect_error(rsplit(as.list(dat1), integer(0), 4:5))
})
test_that('as.data.frame', {
rs3 <- rsplit(dat1, 1:2, 4:5)
expect_equal(as.data.frame(rs3), dat1[1:2,])
expect_equal(as.data.frame(rs3, data = "assessment"), dat1[4:5,])
rs4 <- rsplit(dat1, rep(1:2, each = 3), rep(4:5, c(2, 1)))
expect_equal(as.data.frame(rs4), dat1[c(1, 1, 1, 2, 2, 2),])
expect_equal(as.data.frame(rs4, data = "assessment"), dat1[c(4, 4, 5),])
})
rsample/tests/testthat/test_loo.R 0000644 0001762 0000144 00000001454 13352536641 016706 0 ustar ligges users context("Leave-one-out CV")
library(testthat)
library(rsample)
library(purrr)
dat1 <- data.frame(a = 1:10, b = letters[1:10])
test_that('Loooooo', {
loo1 <- loo_cv(dat1)
expect_equal(nrow(loo1), nrow(dat1))
same_data <-
map_lgl(loo1$splits, function(x)
all.equal(x$data, dat1))
expect_true(all(same_data))
holdouts <-
map_lgl(loo1$splits, function(x)
length(x$out_id) == 1)
expect_true(all(holdouts))
retained <-
map_lgl(loo1$splits, function(x)
length(x$in_id) == (nrow(dat1) - 1))
expect_true(all(retained))
})
test_that('printing', {
expect_output(print(loo_cv(dat1)))
})
test_that('rsplit labels', {
rs <- loo_cv(mtcars)
all_labs <- map_df(rs$splits, labels)
original_id <- rs[, grepl("^id", names(rs))]
expect_equal(all_labs, original_id)
})
rsample/tests/testthat/test_dplyr.R 0000644 0001762 0000144 00000005457 13414310573 017247 0 ustar ligges users context("Compatibility with dplyr")
library(rsample)
library(testthat)
library(dplyr)
###################################################################
obj_1 <- vfold_cv(mtcars)
obj_2 <- bootstraps(mtcars)
obj_3 <- rolling_origin(mtcars)
obj_4 <- nested_cv(mtcars, obj_1, inside = bootstraps(times = 5))
obj_5 <- mc_cv(mtcars)
obj_6 <- loo_cv(mtcars)
obj_7 <- group_vfold_cv(mtcars, group = "am")
###################################################################
check_att <- function(x, y)
length(setdiff(names(attributes(x)), names(attributes(x)))) == 0
###################################################################
test_that('object types', {
expect_true(is_rset(obj_1))
expect_false(is_rset(obj_1[, -1]))
})
###################################################################
test_that('dplyr ops', {
expect_true(
is_rset(
obj_2 %>% filter(id == "Bootstrap02")
)
)
expect_true(
is_rset(
obj_3 %>% mutate(blah = substr(id, 1, 3))
)
)
expect_true(
is_rset(
obj_4 %>% select(splits, id)
)
)
expect_true(
is_rset(
obj_1 %>% arrange(id)
)
)
expect_true(
is_rset(
obj_1 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah)
)
)
expect_true(
check_att(
obj_1 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah),
obj_1
)
)
expect_true(
is_rset(
obj_2 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah)
)
)
expect_true(
check_att(
obj_2 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah),
obj_2
)
)
expect_true(
is_rset(
obj_3 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah)
)
)
expect_true(
check_att(
obj_3 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah),
obj_3
)
)
expect_true(
is_rset(
obj_4 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah)
)
)
expect_true(
check_att(
obj_4 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah),
obj_4
)
)
expect_true(
is_rset(
obj_5 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah)
)
)
expect_true(
check_att(
obj_5 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah),
obj_5
)
)
expect_true(
is_rset(
obj_6 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah)
)
)
expect_true(
check_att(
obj_6 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah),
obj_6
)
)
expect_true(
is_rset(
obj_7 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah)
)
)
expect_true(
check_att(
obj_7 %>% mutate(blah = substr(id, 1, 3)) %>% rename(newer = blah),
obj_7
)
)
expect_true(
is_rset(
obj_3 %>% slice(1L)
)
)
})
rsample/tests/testthat/test_mc.R 0000644 0001762 0000144 00000004231 13512132573 016502 0 ustar ligges users context("Monte Carlo CV")
library(testthat)
library(rsample)
library(purrr)
dat1 <- data.frame(a = 1:20, b = letters[1:20])
test_that('default param', {
set.seed(11)
rs1 <- mc_cv(dat1)
sizes1 <- dim_rset(rs1)
expect_true(all(sizes1$analysis == 15))
expect_true(all(sizes1$assessment == 5))
same_data <-
map_lgl(rs1$splits, function(x)
all.equal(x$data, dat1))
expect_true(all(same_data))
good_holdout <- map_lgl(rs1$splits,
function(x) {
length(intersect(x$in_ind, x$out_id)) == 0
})
expect_true(all(good_holdout))
})
test_that('different percent', {
set.seed(11)
rs2 <- mc_cv(dat1, prop = .5)
sizes2 <- dim_rset(rs2)
expect_true(all(sizes2$analysis == 10))
expect_true(all(sizes2$assessment == 10))
same_data <-
map_lgl(rs2$splits, function(x)
all.equal(x$data, dat1))
expect_true(all(same_data))
good_holdout <- map_lgl(rs2$splits,
function(x) {
length(intersect(x$in_ind, x$out_id)) == 0
})
expect_true(all(good_holdout))
})
test_that('strata', {
iris2 <- iris[1:130, ]
set.seed(11)
rs3 <- mc_cv(iris2, strata = "Species")
sizes3 <- dim_rset(rs3)
expect_true(all(sizes3$analysis == 99))
expect_true(all(sizes3$assessment == 31))
rate <- map_dbl(rs3$splits,
function(x) {
dat <- as.data.frame(x)$Species
mean(dat == "virginica")
})
expect_true(length(unique(rate)) == 1)
good_holdout <- map_lgl(rs3$splits,
function(x) {
length(intersect(x$in_ind, x$out_id)) == 0
})
expect_true(all(good_holdout))
})
test_that('bad args', {
expect_error(mc_cv(iris, strata = iris$Species))
expect_error(mc_cv(iris, strata = c("Species", "Sepal.Length")))
})
test_that('printing', {
expect_output(print(mc_cv(iris)))
})
test_that('rsplit labels', {
rs <- mc_cv(mtcars)
all_labs <- map_df(rs$splits, labels)
original_id <- rs[, grepl("^id", names(rs))]
expect_equal(all_labs, original_id)
})
rsample/tests/testthat/test_initial.R 0000644 0001762 0000144 00000001304 13352536641 017540 0 ustar ligges users context("Initial splitting")
library(testthat)
library(rsample)
library(purrr)
dat1 <- data.frame(a = 1:20, b = letters[1:20])
test_that('default param', {
set.seed(11)
rs1 <- initial_split(dat1)
expect_equal(class(rs1), c("rsplit", "mc_split"))
tr1 <- training(rs1)
ts1 <- testing(rs1)
expect_equal(nrow(tr1), nrow(dat1)*3/4)
expect_equal(nrow(ts1), nrow(dat1)/4)
})
test_that('default time param', {
rs1 <- initial_time_split(dat1)
expect_equal(class(rs1), "rsplit")
tr1 <- training(rs1)
ts1 <- testing(rs1)
expect_equal(nrow(tr1), floor(nrow(dat1) * 3/4))
expect_equal(nrow(ts1), ceiling(nrow(dat1) / 4))
expect_equal(tr1, dplyr::slice(dat1, 1:floor(nrow(dat1) * 3/4)))
})
rsample/tests/testthat/test_rolling.R 0000644 0001762 0000144 00000004352 13414310573 017554 0 ustar ligges users context("Rolling window resampling")
library(testthat)
library(rsample)
library(purrr)
dat1 <- data.frame(a = 1:20, b = letters[1:20])
test_that('default param', {
rs1 <- rolling_origin(dat1)
sizes1 <- dim_rset(rs1)
expect_true(all(sizes1$assessment == 1))
expect_true(all(sizes1$analysis == 5:19))
same_data <-
map_lgl(rs1$splits, function(x)
all.equal(x$data, dat1))
expect_true(all(same_data))
for (i in 1:nrow(rs1)) {
expect_equal(rs1$splits[[i]]$in_id,
1:(i + attr(rs1, "initial") - 1))
expect_equal(rs1$splits[[i]]$out_id,
i + attr(rs1, "initial"))
}
})
test_that('larger holdout', {
rs2 <- rolling_origin(dat1, assess = 3)
sizes2 <- dim_rset(rs2)
expect_true(all(sizes2$assessment == 3))
expect_true(all(sizes2$analysis == 5:17))
for (i in 1:nrow(rs2)) {
expect_equal(rs2$splits[[i]]$in_id,
1:(i + attr(rs2, "initial") - 1))
expect_equal(rs2$splits[[i]]$out_id,
(i + attr(rs2, "initial")):
(i + attr(rs2, "initial") + attr(rs2, "assess") - 1))
}
})
test_that('fixed analysis size', {
rs3 <- rolling_origin(dat1, cumulative = FALSE)
sizes3 <- dim_rset(rs3)
expect_true(all(sizes3$assessment == 1))
expect_true(all(sizes3$analysis == 5))
for (i in 1:nrow(rs3)) {
expect_equal(rs3$splits[[i]]$in_id,
i:(i + attr(rs3, "initial") - 1))
expect_equal(rs3$splits[[i]]$out_id,
i + attr(rs3, "initial"))
}
})
test_that('skipping', {
rs4 <- rolling_origin(dat1, cumulative = FALSE, skip = 2)
sizes4 <- dim_rset(rs4)
expect_true(all(sizes4$assessment == 1))
expect_true(all(sizes4$analysis == 5))
for (i in 1:nrow(rs4)) {
expect_equal(rs4$splits[[i]]$in_id,
(i + attr(rs4, "skip")*(i-1)):
(i + attr(rs4, "skip")*(i-1) + attr(rs4, "initial") -1))
expect_equal(rs4$splits[[i]]$out_id,
i + attr(rs4, "skip")*(i-1) + attr(rs4, "initial"))
}
})
test_that('printing', {
expect_output(print(rolling_origin(dat1)))
})
test_that('rsplit labels', {
rs <- rolling_origin(dat1)
all_labs <- map_df(rs$splits, labels)
original_id <- rs[, grepl("^id", names(rs))]
expect_equal(all_labs, original_id)
})
rsample/tests/testthat/test_boot.R 0000644 0001762 0000144 00000004557 13512132573 017061 0 ustar ligges users context("Bootstrapping")
library(testthat)
library(rsample)
library(purrr)
dat1 <- data.frame(a = 1:20, b = letters[1:20])
test_that('default param', {
set.seed(11)
rs1 <- bootstraps(dat1)
sizes1 <- dim_rset(rs1)
expect_true(all(sizes1$analysis == nrow(dat1)))
same_data <-
map_lgl(rs1$splits, function(x)
all.equal(x$data, dat1))
expect_true(all(same_data))
good_holdout <- map_lgl(rs1$splits,
function(x) {
length(intersect(x$in_ind, x$out_id)) == 0
})
expect_true(all(good_holdout))
})
test_that('apparent', {
rs2 <- bootstraps(dat1, apparent = TRUE)
sizes2 <- dim_rset(rs2)
expect_true(all(sizes2$analysis == nrow(dat1)))
expect_true(all(sizes2$assessment[nrow(sizes2)] == nrow(dat1)))
expect_equal(sizes2$assessment[sizes2$id == "Apparent"], nrow(dat1))
res2 <-
as.data.frame(rs2$splits[[nrow(sizes2)]], data = "assessment")
expect_equal(res2, dat1)
})
test_that('strata', {
iris2 <- iris[1:130, ]
set.seed(11)
rs4 <- bootstraps(iris2, strata = "Species")
sizes4 <- dim_rset(rs4)
expect_true(all(sizes4$analysis == nrow(iris2)))
rate <- map_dbl(rs4$splits,
function(x) {
dat <- as.data.frame(x)$Species
mean(dat == "virginica")
})
expect_true(length(unique(rate)) == 1)
good_holdout <- map_lgl(rs4$splits,
function(x) {
length(intersect(x$in_ind, x$out_id)) == 0
})
expect_true(all(good_holdout))
rs5 <- bootstraps(iris2, apparent = TRUE, strata = "Species")
sizes5 <- dim_rset(rs5)
expect_true(all(sizes5$analysis == nrow(iris2)))
expect_true(all(sizes5$assessment[nrow(sizes5)] == nrow(iris2)))
expect_equal(sizes5$assessment[sizes5$id == "Apparent"], nrow(iris2))
res5 <-
as.data.frame(rs5$splits[[nrow(sizes5)]], data = "assessment")
expect_equal(res5, iris2)
})
test_that('bad args', {
expect_error(bootstraps(iris, strata = iris$Species))
expect_error(bootstraps(iris, strata = c("Species", "Sepal.Length")))
})
test_that('printing', {
expect_output(print(bootstraps(iris)))
})
test_that('rsplit labels', {
rs <- bootstraps(iris)
all_labs <- map_df(rs$splits, labels)
original_id <- rs[, grepl("^id", names(rs))]
expect_equal(all_labs, original_id)
})
rsample/tests/testthat/test_bootci.R 0000644 0001762 0000144 00000014013 13512145132 017354 0 ustar ligges users library(rsample)
library(testthat)
library(purrr)
library(tibble)
library(dplyr)
library(broom)
data("attrition")
context("Bootstrap intervals")
# ------------------------------------------------------------------------------
get_stats <- function(split, ...) {
dat <- analysis(split)
x <- dat[[1]]
tibble(
term = "mean",
estimate = mean(x, na.rm = TRUE),
std.error = sqrt(var(x, na.rm = TRUE)/sum(!is.na(x)))
)
}
# ------------------------------------------------------------------------------
n <- 1000
mu <- 10
sigma <- 1
set.seed(888)
rand_nums <- rnorm(n, mu, sigma)
ttest <- tidy(t.test(rand_nums))
dat <- data.frame(x = rand_nums)
set.seed(456765)
bt_norm <-
bootstraps(dat, times = 1000, apparent = TRUE) %>%
dplyr::mutate(
stats = map(splits, ~ get_stats(.x))
)
test_that('Bootstrap estimate of mean is close to estimate of mean from normal distribution',{
single_pct_res <- int_pctl(bt_norm, stats)
single_t_res <- int_t(bt_norm, stats)
single_bca_res <- int_bca(bt_norm, stats, .fn = get_stats)
expect_equal(ttest$conf.low,
single_pct_res$.lower,
tolerance = 0.01)
expect_equal(ttest$estimate,
single_pct_res$.estimate,
tolerance = 0.01)
expect_equal(ttest$conf.high,
single_pct_res$.upper,
tolerance = 0.01)
expect_equal(ttest$conf.low,
single_t_res$.lower,
tolerance = 0.01)
expect_equal(ttest$estimate,
single_t_res$.estimate,
tolerance = 0.01)
expect_equal(ttest$conf.high,
single_pct_res$.upper,
tolerance = 0.01)
expect_equal(ttest$conf.low,
single_bca_res$.lower,
tolerance = 0.01)
expect_equal(ttest$estimate,
single_bca_res$.estimate,
tolerance = 0.01)
expect_equal(ttest$conf.high,
single_bca_res$.upper,
tolerance = 0.01)
})
# ------------------------------------------------------------------------------
context("Wrapper Functions")
test_that("Wrappers -- selection of multiple variables works", {
func <- function(split, ...) {
lm(Age ~ HourlyRate + DistanceFromHome, data = analysis(split)) %>% tidy()
}
# generate boostrap resamples
set.seed(888)
bt_resamples <- bootstraps(attrition, times = 1000, apparent = TRUE) %>%
mutate(res = map(splits, func))
attrit_tidy <-
lm(Age ~ HourlyRate + DistanceFromHome, data = attrition) %>%
tidy(conf.int = TRUE) %>%
dplyr::arrange(term)
pct_res <-
int_pctl(bt_resamples, res) %>%
inner_join(attrit_tidy, by = "term")
expect_equal(pct_res$conf.low, pct_res$.lower, tolerance = .01)
expect_equal(pct_res$conf.high, pct_res$.upper, tolerance = .01)
t_res <-
int_t(bt_resamples, res) %>%
inner_join(attrit_tidy, by = "term")
expect_equal(t_res$conf.low, t_res$.lower, tolerance = .01)
expect_equal(t_res$conf.high, t_res$.upper, tolerance = .01)
bca_res <-
int_bca(bt_resamples, res, .fn = func) %>%
inner_join(attrit_tidy, by = "term")
expect_equal(bca_res$conf.low, bca_res$.lower, tolerance = .01)
expect_equal(bca_res$conf.high, bca_res$.upper, tolerance = .01)
})
# ------------------------------------------------------------------------------
context("boot_ci() Prompt Errors: Too Many NAs")
test_that('Upper & lower confidence interval does not contain NA', {
bad_stats <- function(split, ...) {
tibble(
term = "mean",
estimate = NA_real_,
std.error = runif(1)
)
}
set.seed(888)
bt_resamples <- bootstraps(data.frame(x = 1:100), times = 1000, apparent = TRUE) %>%
mutate(res = map(splits, bad_stats))
expect_error(
int_pctl(bt_resamples, res),
"missing values"
)
expect_error(
int_t(bt_resamples, res),
"missing values"
)
expect_error(
int_bca(bt_resamples, res, .fn = bad_stats),
"missing values"
)
})
# ------------------------------------------------------------------------------
context("boot_ci() Insufficient Number of Bootstrap Resamples")
set.seed(456765)
bt_small <-
bootstraps(dat, times = 10, apparent = TRUE) %>%
dplyr::mutate(
stats = map(splits, ~ get_stats(.x)),
junk = 1:11
)
test_that(
"Sufficient replications needed to sufficiently reduce Monte Carlo sampling Error for BCa method", {
expect_warning(int_pctl(bt_small, stats))
expect_warning(int_t(bt_small, stats))
expect_warning(int_bca(bt_small, stats, .fn = get_stats))
}
)
context("boot_ci() Input Validation")
test_that("bad input", {
expect_error(int_pctl(bt_small, id))
expect_error(int_pctl(bt_small, junk))
bad_bt_norm <-
bt_norm %>%
mutate(stats = map(stats, ~ .x[, 1:2]))
expect_error(int_t(bad_bt_norm, stats))
expect_error(int_bca(bad_bt_norm, stats))
no_dots <- function(split) {
dat <- analysis(split)
x <- dat[[1]]
tibble(
term = "mean",
estimate = mean(x, na.rm = TRUE),
std.error = sqrt(var(x, na.rm = TRUE)/sum(!is.na(x)))
)
}
expect_error(
int_bca(bt_norm, stats, .fn = no_dots),
"must have an argument"
)
expect_error(int_pctl(as.data.frame(bt_norm), stats))
expect_error(int_t(as.data.frame(bt_norm), stats))
expect_error(int_bca(as.data.frame(bt_norm), stats, .fn = get_stats))
expect_error(
int_t(bt_norm %>% dplyr::filter(id != "Apparent"), stats)
)
expect_error(
int_bca(bt_norm %>% dplyr::filter(id != "Apparent"), stats, .fn = get_stats)
)
poo <- function(x) {
x$estimate <- "a"
x
}
badder_bt_norm <-
bt_norm %>%
mutate(
bad_term = map(stats, ~ .x %>% setNames(c("a", "estimate", "std.err"))),
bad_est = map(stats, ~ .x %>% setNames(c("term", "b", "std.err"))),
bad_err = map(stats, ~ .x %>% setNames(c("term", "estimate", "c"))),
bad_num = map(stats, ~ poo(.x))
)
expect_error(int_pctl(badder_bt_norm, bad_term))
expect_error(int_t(badder_bt_norm, bad_err))
expect_error(int_bca(badder_bt_norm, bad_est, .fn = get_stats))
expect_error(int_pctl(badder_bt_norm, bad_num))
})
rsample/tests/testthat/test_for_pred.R 0000644 0001762 0000144 00000002424 13352536641 017713 0 ustar ligges users context("Predictor extraction")
library(testthat)
test_that('no dots', {
expect_equal(form_pred(y ~ x + z), c("x", "z"))
expect_equal(form_pred(terms(y ~ x + z)), c("x", "z"))
expect_equal(form_pred(y ~ x + log(z)), c("x", "z"))
expect_equal(form_pred(terms(y ~ x + log(z))), c("x", "z"))
expect_equal(form_pred(log(y) ~ x + z), c("x", "z"))
expect_equal(form_pred(terms(log(y) ~ x + z)), c("x", "z"))
expect_equal(form_pred(y1 + y2 ~ x + z), c("x", "z"))
expect_equal(form_pred(terms(y1 + y2 ~ x + z)), c("x", "z"))
expect_equal(form_pred(log(y1) + y2 ~ x + z), c("x", "z"))
expect_equal(form_pred(terms(log(y1) + y2 ~ x + z)), c("x", "z"))
expect_equal(form_pred(~ x + z), c("x", "z"))
expect_equal(form_pred(terms(~ x + z)), c("x", "z"))
expect_equal(form_pred(~ x), "x")
expect_equal(form_pred(terms(~ x)), "x")
expect_equal(form_pred(y ~ x), "x")
expect_equal(form_pred(terms(y ~ x)), "x")
})
test_that('dots', {
expect_error(form_pred(y ~ .))
expect_error(form_pred(terms(y ~ .)))
expect_error(form_pred(y ~ (.)^2))
expect_error(form_pred(terms(y ~ (.)^2)))
expect_equal(form_pred(terms(Species ~ (.)^2, data = iris)),
names(iris)[1:4])
expect_equal(form_pred(terms(~ (.)^2, data = iris)),
names(iris))
})
rsample/tests/testthat/test_rset.R 0000644 0001762 0000144 00000003352 13414310573 017062 0 ustar ligges users context("Rset constructor")
library(testthat)
library(rsample)
cars_10fold <- vfold_cv(mtcars)
test_that('bad args', {
expect_error(
new_rset(cars_10fold$splits[1:2], cars_10fold$id)
)
expect_error(
new_rset(cars_10fold$splits, cars_10fold[ "splits"])
)
expect_error(
new_rset(cars_10fold$splits, cars_10fold$splits)
)
args <- list(a = 1, b = 2, 3)
expect_error(
new_rset(
cars_10fold$splits,
cars_10fold$id,
attrib = args
)
)
})
test_that('simple rset', {
res1 <- new_rset(
cars_10fold$splits,
cars_10fold$id
)
expect_equal(names(res1), c("splits", "id"))
expect_equal(class(res1), c("tbl_df", "tbl", "data.frame"))
expect_equal(sort(names(attributes(res1))),
c("class", "names", "row.names"))
res2 <- new_rset(
cars_10fold[, "splits"],
cars_10fold[, "id"]
)
expect_equal(names(res2), c("splits", "id"))
expect_equal(class(res2), c("rset", "tbl_df", "tbl", "data.frame"))
expect_equal(sort(names(attributes(res2))),
sort(names(attributes(cars_10fold))))
})
test_that('rset with attributes', {
args <- list(value = "potato")
res3 <- new_rset(
cars_10fold$splits,
cars_10fold$id,
attrib = args
)
expect_equal(sort(names(attributes(res3))),
c("class", "names", "row.names", "value"))
expect_equal(attr(res3, "value"), "potato")
})
test_that('rset with additional classes', {
res4 <- new_rset(
cars_10fold$splits,
cars_10fold$id,
subclass = "potato"
)
expect_equal(class(res4),
c("potato", "tbl_df", "tbl", "data.frame"))
})
test_that('not an rsplit', {
folds <- vfold_cv(mtcars)
expect_error(analysis(folds$splits[1]))
expect_error(assessment(folds$splits[1]))
})
rsample/tests/testthat/test_labels.R 0000644 0001762 0000144 00000003306 13507237102 017345 0 ustar ligges users library(testthat)
library(rsample)
context("Labels")
test_that('basic cv', {
cv_obj <- vfold_cv(mtcars)
expect_equal(cv_obj$id, labels(cv_obj))
expect_is(labels(cv_obj), "character")
expect_s3_class(labels(cv_obj, TRUE), "factor")
})
test_that('repeated cv', {
rcv_obj <- vfold_cv(mtcars, repeats = 3)
expect_equal(paste(rcv_obj$id, rcv_obj$id2, sep = "."),
labels(rcv_obj))
expect_is(labels(rcv_obj), "character")
expect_s3_class(labels(rcv_obj, TRUE), "factor")
})
test_that('nested cv', {
expect_error(
labels(
nested_cv(mtcars,
outside = vfold_cv(v = 3),
inside = bootstraps(times = 5)
)
)
)
})
test_that('adding labels', {
set.seed(363)
car_folds <- vfold_cv(mtcars, repeats = 3)
res <-
analysis(car_folds$splits[[1]]) %>%
add_resample_id(car_folds$splits[[1]])
expect_equal(colnames(res), c(colnames(mtcars), "id", "id2"))
car_bt <- bootstraps(mtcars)
res <- analysis(car_bt$splits[[1]]) %>%
add_resample_id(car_bt$splits[[1]])
expect_equal(colnames(res), c(colnames(mtcars), "id"))
res <- analysis(car_bt$splits[[1]]) %>%
add_resample_id(car_bt$splits[[1]], TRUE)
expect_equal(colnames(res), c(colnames(mtcars), ".id"))
expect_error(
analysis(car_folds$splits[[1]]) %>%
add_resample_id(car_folds$splits[[1]], 7)
)
expect_error(
analysis(car_folds$splits[[1]]) %>%
add_resample_id(car_folds$splits[[1]], c(TRUE, TRUE))
)
expect_error(
analysis(car_folds$splits[[1]]) %>%
add_resample_id(car_folds$splits)
)
expect_error(
analysis(car_folds$splits[[1]]) %>%
as.matrix() %>%
add_resample_id(car_folds$splits[[1]])
)
})
rsample/tests/testthat/test_names.R 0000644 0001762 0000144 00000000416 13414310573 017206 0 ustar ligges users context("Naming functions")
library(testthat)
library(rsample)
test_that('basic naming sequences', {
expect_equal(names0(2), c("x1", "x2"))
expect_equal(names0(2, "y"), c("y1", "y2"))
expect_equal(names0(10),
c(paste0("x0", 1:9), "x10"))
})
rsample/tests/testthat/test_group.R 0000644 0001762 0000144 00000005150 13414310573 017237 0 ustar ligges users context("Group resampling")
library(testthat)
library(rsample)
library(purrr)
library(tibble)
iris2 <- as_tibble(iris)
get_id_left_out <- function(x)
unique(as.character(assessment(x)$Species))
test_that('bad args', {
expect_error(group_vfold_cv(iris, group = iris$Species))
expect_error(group_vfold_cv(iris, group = c("Species", "Sepal.Width")))
expect_error(group_vfold_cv(iris, group = "Specie"))
expect_error(group_vfold_cv(iris))
expect_error(group_vfold_cv(iris, group = "Species", v = 10))
})
test_that('default param', {
set.seed(11)
rs1 <- group_vfold_cv(iris, "Species")
sizes1 <- dim_rset(rs1)
expect_true(all(sizes1$analysis == 100))
expect_true(all(sizes1$assessment == 50))
same_data <-
map_lgl(rs1$splits, function(x)
all.equal(x$data, iris))
expect_true(all(same_data))
good_holdout <- map_lgl(rs1$splits,
function(x) {
length(intersect(x$in_ind, x$out_id)) == 0
})
expect_true(all(good_holdout))
sp_out <- map_chr(rs1$splits, get_id_left_out)
expect_true(all(table(sp_out) == 1))
})
test_that('v < max v', {
set.seed(11)
rs2 <- group_vfold_cv(iris, "Species", v = 2)
sizes2 <- dim_rset(rs2)
expect_true(!all(sizes2$analysis == 100))
expect_true(!all(sizes2$assessment == 50))
same_data <-
map_lgl(rs2$splits, function(x)
all.equal(x$data, iris))
expect_true(all(same_data))
good_holdout <- map_lgl(rs2$splits,
function(x) {
length(intersect(x$in_ind, x$out_id)) == 0
})
expect_true(all(good_holdout))
sp_out <- map(rs2$splits, get_id_left_out)
expect_true(all(table(unlist(sp_out)) == 1))
})
test_that('tibble input', {
set.seed(11)
rs3 <- group_vfold_cv(iris2, "Species")
sizes3 <- dim_rset(rs3)
expect_true(all(sizes3$analysis == 100))
expect_true(all(sizes3$assessment == 50))
same_data <-
map_lgl(rs3$splits, function(x)
all.equal(x$data, iris2))
expect_true(all(same_data))
good_holdout <- map_lgl(rs3$splits,
function(x) {
length(intersect(x$in_ind, x$out_id)) == 0
})
expect_true(all(good_holdout))
sp_out <- map_chr(rs3$splits, get_id_left_out)
expect_true(all(table(sp_out) == 1))
})
test_that('printing', {
expect_output(print(group_vfold_cv(iris, "Species")))
})
test_that('rsplit labels', {
rs <- group_vfold_cv(iris, "Species")
all_labs <- map_df(rs$splits, labels)
original_id <- rs[, grepl("^id", names(rs))]
expect_equal(all_labs, original_id)
})
rsample/tests/testthat/test_strata.R 0000644 0001762 0000144 00000001253 13512132573 017402 0 ustar ligges users context("Strata constructor")
library(testthat)
library(rsample)
library(purrr)
test_that('simple numerics', {
set.seed(13333)
x1 <- rnorm(1000)
str1a <- make_strata(x1)
tab1a <- table(str1a)
expect_equal(as.vector(tab1a), rep(250, 4))
str1b <- make_strata(x1, depth = 500)
tab1b <- table(str1b)
expect_equal(as.vector(tab1b), rep(500, 2))
})
test_that('simple character', {
x2 <- factor(rep(LETTERS[1:5], each = 50))
str2a <- make_strata(x2)
expect_equal(table(str2a, dnn = ""), table(x2, dnn = ""))
})
test_that('bad data', {
x3 <- factor(rep(LETTERS[1:10], each = 50))
expect_warning(make_strata(x3))
expect_warning(make_strata(mtcars$mpg))
})
rsample/tests/testthat/test_caret.R 0000644 0001762 0000144 00000022377 13352536641 017222 0 ustar ligges users context("Conversions for caret")
library(testthat)
library(rsample)
###################################################################
## Test cases for caret -> rsample that mimic `trainControl`
dat <- data.frame(y = 1:15, x = 15:1)
lgo1 <-
structure(
list(
method = "LGOCV",
index = structure(
list(
Resample1 = c(1L, 4L, 5L, 6L, 7L, 9L, 10L, 14L),
Resample2 = c(2L, 4L, 5L, 6L, 9L, 10L, 14L, 15L),
Resample3 = c(1L, 2L, 3L, 5L, 6L, 7L, 8L, 9L)
),
.Names = c("Resample1", "Resample2", "Resample3")
),
indexOut = structure(
list(
Resample1 = c(2L, 3L, 8L, 11L, 12L, 13L, 15L),
Resample2 = c(1L, 3L, 7L, 8L, 11L, 12L, 13L),
Resample3 = c(4L, 10L, 11L, 12L, 13L, 14L, 15L)
),
.Names = c("Resample1", "Resample2", "Resample3")
),
number = 3,
p = 0.5
),
.Names = c("method", "index", "indexOut", "number", "p")
)
cv_1 <- structure(
list(
method = "cv",
index = structure(
list(
Fold1 = c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 10L, 11L, 13L),
Fold2 = c(1L, 4L, 6L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L),
Fold3 = c(1L, 2L, 3L, 5L, 7L, 9L, 12L, 14L, 15L)
),
.Names = c("Fold1", "Fold2", "Fold3")
),
indexOut = structure(
list(
Resample1 = c(1L, 9L, 12L, 14L, 15L),
Resample2 = c(2L, 3L, 5L, 7L),
Resample3 = c(4L, 6L, 8L, 10L, 11L, 13L)
),
.Names = c("Resample1", "Resample2", "Resample3")
),
number = 3,
repeats = NA
),
.Names = c("method", "index", "indexOut", "number", "repeats")
)
cv_2 <-
structure(
list(
method = "repeatedcv",
index = structure(
list(
Fold1.Rep1 = c(1L, 3L, 4L, 6L, 9L, 10L, 12L, 13L, 14L, 15L),
Fold2.Rep1 = c(2L, 5L, 7L, 8L, 10L, 11L, 13L, 14L, 15L),
Fold3.Rep1 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L),
Fold1.Rep2 = c(1L, 2L, 3L, 5L, 6L, 7L, 10L, 11L, 12L, 14L),
Fold2.Rep2 = c(2L, 4L, 6L, 8L, 9L, 11L, 13L, 14L, 15L),
Fold3.Rep2 = c(1L, 3L, 4L, 5L, 7L, 8L, 9L, 10L, 12L, 13L, 15L)
),
.Names = c(
"Fold1.Rep1", "Fold2.Rep1", "Fold3.Rep1",
"Fold1.Rep2", "Fold2.Rep2", "Fold3.Rep2"
)
),
indexOut = structure(
list(
Resample1 = c(2L, 5L, 7L, 8L, 11L),
Resample2 = c(1L, 3L, 4L, 6L, 9L, 12L),
Resample3 = c(10L, 13L, 14L, 15L),
Resample4 = c(4L, 8L, 9L, 13L, 15L),
Resample5 = c(1L, 3L, 5L, 7L, 10L, 12L),
Resample6 = c(2L, 6L, 11L, 14L)
),
.Names = c(
"Resample1", "Resample2", "Resample3",
"Resample4", "Resample5", "Resample6"
)
),
number = 3,
repeats = 2
),
.Names = c("method", "index", "indexOut", "number", "repeats")
)
cv_3 <- cv_2
cv_3$method <- "adaptive_cv"
bt_1 <-
structure(
list(
method = "boot",
index = structure(
list(
Resample1 = c(1L, 1L, 4L, 4L, 5L, 7L, 8L, 10L, 11L, 11L, 12L, 13L, 15L, 15L, 15L),
Resample2 = c(1L, 2L, 3L, 5L, 5L, 5L, 6L, 7L, 8L, 9L, 9L, 9L, 10L, 10L, 12L)
),
.Names = c("Resample1", "Resample2")
),
indexOut = structure(
list(
Resample1 = c(2L, 3L, 6L, 9L, 14L),
Resample2 = c(4L, 11L, 13L, 14L, 15L)
),
.Names = c("Resample1", "Resample2")
),
number = 2
),
.Names = c("method",
"index", "indexOut", "number")
)
bt_2 <- bt_1
bt_2$method <- "boot632"
bt_3 <- bt_1
bt_3$method <- "optimism_boot"
bt_4 <- bt_1
bt_4$method <- "boot_all"
bt_5 <- bt_1
bt_5$method <- "adaptive_boot"
loo_1 <-
structure(
list(
method = "LOOCV",
index = structure(
list(
Fold01 = 2:15,
Fold02 = c(1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L),
Fold03 = c(1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L),
Fold04 = c(1L, 2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L),
Fold05 = c(1L, 2L, 3L, 4L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L),
Fold06 = c(1L, 2L, 3L, 4L, 5L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L),
Fold07 = c(1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L),
Fold08 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 9L, 10L, 11L, 12L, 13L, 14L, 15L),
Fold09 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 10L, 11L, 12L, 13L, 14L, 15L),
Fold10 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 14L, 15L),
Fold11 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 12L, 13L, 14L, 15L),
Fold12 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 14L, 15L),
Fold13 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 14L, 15L),
Fold14 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 15L),
Fold15 = 1:14
),
.Names = c(
"Fold01", "Fold02", "Fold03", "Fold04", "Fold05", "Fold06",
"Fold07", "Fold08", "Fold09", "Fold10", "Fold11", "Fold12",
"Fold13", "Fold14", "Fold15"
)
),
indexOut = structure(
list(
Resample01 = 1L, Resample02 = 2L,
Resample03 = 3L, Resample04 = 4L,
Resample05 = 5L, Resample06 = 6L,
Resample07 = 7L, Resample08 = 8L,
Resample09 = 9L, Resample10 = 10L,
Resample11 = 11L, Resample12 = 12L,
Resample13 = 13L, Resample14 = 14L,
Resample15 = 15L
),
.Names = c(
"Resample01", "Resample02", "Resample03", "Resample04",
"Resample05", "Resample06", "Resample07", "Resample08",
"Resample09", "Resample10", "Resample11", "Resample12",
"Resample13", "Resample14", "Resample15"
)
)
),
.Names = c("method", "index", "indexOut")
)
rof_1 <-
structure(
list(
method = "timeSlice",
index = structure(
list(
Training04 = 1:4, Training05 = 2:5, Training06 = 3:6,
Training07 = 4:7, Training08 = 5:8, Training09 = 6:9,
Training10 = 7:10
),
.Names = c(
"Training04", "Training05", "Training06", "Training07",
"Training08", "Training09", "Training10"
)
),
indexOut = structure(
list(
Testing04 = 5:9, Testing05 = 6:10, Testing06 = 7:11,
Testing07 = 8:12, Testing08 = 9:13, Testing09 = 10:14,
Testing10 = 11:15
),
.Names = c(
"Testing04", "Testing05", "Testing06", "Testing07",
"Testing08", "Testing09", "Testing10"
)
),
initialWindow = 4, horizon = 5, fixedWindow = TRUE, skip = 0
),
.Names = c(
"method", "index", "indexOut", "initialWindow",
"horizon", "fixedWindow", "skip"
)
)
###################################################################
##
check_indices <- function(newer, orig) {
for (i in seq_along(newer$splits)) {
expect_equal(as.integer(newer$splits[[i]]),
orig$index[[i]])
expect_equal(as.integer(newer$splits[[i]], "assessment"),
orig$indexOut[[i]])
}
invisible(NULL)
}
###################################################################
## Tests
test_that('basic v-fold', {
vfold_obj_1 <- caret2rsample(cv_1, data = dat)
check_indices(vfold_obj_1, cv_1)
for (i in seq_along(vfold_obj_1$splits))
expect_equal(vfold_obj_1$id[[i]], names(cv_1$index)[i])
})
test_that('repeated v-fold', {
vfold_obj_2 <- caret2rsample(cv_2, data = dat)
check_indices(vfold_obj_2, cv_2)
for (i in seq_along(vfold_obj_2$splits))
expect_equal(paste(vfold_obj_2$id2[[i]], vfold_obj_2$id[[i]],
sep = "."),
names(cv_2$index)[i])
})
test_that('basic boot', {
bt_obj_1 <- caret2rsample(bt_1, data = dat)
check_indices(bt_obj_1, bt_1)
for (i in seq_along(bt_obj_1$splits))
expect_equal(bt_obj_1$id[[i]], names(bt_1$index)[i])
})
test_that('boot 632', {
bt_obj_2 <- caret2rsample(bt_2, data = dat)
check_indices(bt_obj_2, bt_2)
for (i in seq_along(bt_obj_2$splits))
expect_equal(bt_obj_2$id[[i]], names(bt_2$index)[i])
})
test_that('boot optim', {
bt_obj_3 <- caret2rsample(bt_3, data = dat)
check_indices(bt_obj_3, bt_3)
for (i in seq_along(bt_obj_3$splits))
expect_equal(bt_obj_3$id[[i]], names(bt_3$index)[i])
})
test_that('boot all', {
bt_obj_4 <- caret2rsample(bt_4, data = dat)
check_indices(bt_obj_4, bt_4)
for (i in seq_along(bt_obj_4$splits))
expect_equal(bt_obj_4$id[[i]], names(bt_4$index)[i])
})
test_that('adaptive boot', {
bt_obj_5 <- caret2rsample(bt_5, data = dat)
check_indices(bt_obj_5, bt_5)
for (i in seq_along(bt_obj_5$splits))
expect_equal(bt_obj_5$id[[i]], names(bt_5$index)[i])
})
test_that('loo', {
loo_obj <- caret2rsample(loo_1, data = dat)
check_indices(loo_obj, loo_1)
for (i in seq_along(loo_obj$splits))
expect_equal(loo_obj$id[[i]], names(loo_1$index)[i])
})
test_that('mcv', {
mcv_obj <- caret2rsample(lgo1, data = dat)
check_indices(mcv_obj, lgo1)
for (i in seq_along(mcv_obj$splits))
expect_equal(mcv_obj$id[[i]], names(lgo1$index)[i])
})
test_that('rolling origin', {
rof_obj <- caret2rsample(rof_1, data = dat)
check_indices(rof_obj, rof_1)
for (i in seq_along(rof_obj$splits))
expect_equal(rof_obj$id[[i]], names(rof_1$index)[i])
})
rsample/tests/testthat/test_gather.R 0000644 0001762 0000144 00000001247 13352536641 017367 0 ustar ligges users context("Gather from tidyr")
library(testthat)
library(rsample)
library(tidyr)
cvs <- vfold_cv(mtcars)
cvs$one <- 1
cvs$two <- 2
expt <- cvs %>% gather(model, statistic, -id)
test_that('basics', {
res_1 <- gather(cvs)
expect_equal(res_1, expt)
})
test_that('extra args ignored', {
res_2 <- gather(cvs, contains("o"))
expect_equal(res_2, expt)
res_2 <- gather(cvs, key = ignored)
expect_equal(res_2, expt)
res_3 <- gather(cvs, ignored)
expect_equal(res_2, expt)
})
test_that('no extra cols', {
expect_error(gather(vfold_cv(mtcars)))
})
test_that('no splits', {
cvs2 <- cvs
cvs2$splits <- NULL
res_4 <- gather(cvs2)
expect_equal(res_4, expt)
})
rsample/tests/testthat/test_nesting.R 0000644 0001762 0000144 00000004142 13414310573 017552 0 ustar ligges users context("Nested CV")
library(testthat)
library(rsample)
library(purrr)
test_that('default param', {
set.seed(11)
rs1 <- nested_cv(mtcars[1:30,],
outside = vfold_cv(v = 10),
inside = vfold_cv(v = 3))
sizes1 <- dim_rset(rs1)
expect_true(all(sizes1$analysis == 27))
expect_true(all(sizes1$assessment == 3))
subsizes1 <- map(rs1$inner_resamples, dim_rset)
subsizes1 <- do.call("rbind", subsizes1)
expect_true(all(subsizes1$analysis == 18))
expect_true(all(subsizes1$assessment == 9))
set.seed(11)
rs2 <- nested_cv(mtcars[1:30,],
outside = vfold_cv(v = 10),
inside = bootstraps(times = 3))
sizes2 <- dim_rset(rs2)
expect_true(all(sizes2$analysis == 27))
expect_true(all(sizes2$assessment == 3))
subsizes2 <- map(rs2$inner_resamples, dim_rset)
subsizes2 <- do.call("rbind", subsizes2)
expect_true(all(subsizes2$analysis == 27))
set.seed(11)
rs3 <- nested_cv(mtcars[1:30,],
outside = vfold_cv(v = 10),
inside = mc_cv(prop = 2/3, times = 3))
sizes3 <- dim_rset(rs3)
expect_true(all(sizes3$analysis == 27))
expect_true(all(sizes3$assessment == 3))
subsizes3 <- map(rs3$inner_resamples, dim_rset)
subsizes3 <- do.call("rbind", subsizes3)
expect_true(all(subsizes3$analysis == 18))
expect_true(all(subsizes3$assessment == 9))
})
test_that('bad args', {
expect_warning(
nested_cv(mtcars,
outside = bootstraps(times = 5),
inside = vfold_cv(V = 3))
)
folds <- vfold_cv(mtcars)
expect_error(
nested_cv(mtcars,
outside = vfold_cv(),
inside = folds)
)
})
test_that('printing', {
rs1 <- nested_cv(mtcars[1:30,],
outside = vfold_cv(v = 10),
inside = vfold_cv(v = 3))
expect_output(print(rs1))
})
test_that('rsplit labels', {
rs <- nested_cv(mtcars[1:30,],
outside = vfold_cv(v = 10),
inside = vfold_cv(v = 3))
all_labs <- map_df(rs$splits, labels)
original_id <- rs[, grepl("^id", names(rs))]
expect_equal(all_labs, original_id)
})
rsample/tests/testthat/test_tidy.R 0000644 0001762 0000144 00000002322 13414310573 017052 0 ustar ligges users context("Tidy methods")
library(testthat)
library(rsample)
library(purrr)
check_ind <- function(x, tdat) {
in_dat <- subset(tdat, Data == "Analysis")
in_check <- all(sort(in_dat$Row) == x$in_ind)
out_dat <- subset(tdat, Data == "Analysis")
out_check <- all(sort(out_dat$Row) == x$out_ind)
in_check & out_check
}
dat1 <- data.frame(a = 1:20, b = letters[1:20])
test_that('simple boot', {
set.seed(11)
rs1 <- bootstraps(dat1)
td1 <- tidy(rs1, unique_ind = FALSE)
name_vals <- names0(nrow(rs1), "Bootstrap")
for(i in 1:nrow(rs1)) {
expect_true(
check_ind(rs1$splits[[i]],
subset(td1, Resample == name_vals[i])
)
)
}
})
test_that('vfold', {
set.seed(11)
rs2 <- vfold_cv(dat1)
td2 <- tidy(rs2, unique_ind = FALSE)
for(i in 1:nrow(rs2)) {
expect_true(
check_ind(rs2$splits[[i]],
subset(td2, Fold == rs2$id[i])
)
)
}
})
test_that('vfold with repeats', {
set.seed(11)
rs3 <- vfold_cv(dat1, repeats = 2)
td3 <- tidy(rs3, unique_ind = FALSE)
for(i in 1:nrow(rs3)) {
expect_true(
check_ind(rs3$splits[[i]],
subset(td3, Fold == rs3$id2[i] & Repeat == rs3$id[i])
)
)
}
})
rsample/NAMESPACE 0000644 0001762 0000144 00000006351 13512177670 013152 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method("[",rset)
S3method(as.data.frame,rsplit)
S3method(as.integer,rsplit)
S3method(complement,apparent_split)
S3method(complement,boot_split)
S3method(complement,group_vfold_split)
S3method(complement,loo_split)
S3method(complement,mc_split)
S3method(complement,rof_split)
S3method(complement,vfold_split)
S3method(dim,rsplit)
S3method(gather,rset)
S3method(labels,rset)
S3method(labels,rsplit)
S3method(labels,vfold_cv)
S3method(obj_sum,rsplit)
S3method(populate,rset)
S3method(populate,rsplit)
S3method(pretty,apparent)
S3method(pretty,bootstraps)
S3method(pretty,group_vfold_cv)
S3method(pretty,loo_cv)
S3method(pretty,mc_cv)
S3method(pretty,nested_cv)
S3method(pretty,rolling_origin)
S3method(pretty,vfold_cv)
S3method(print,apparent)
S3method(print,bootstraps)
S3method(print,group_vfold_cv)
S3method(print,loo_cv)
S3method(print,mc_cv)
S3method(print,nested_cv)
S3method(print,rolling_origin)
S3method(print,rsplit)
S3method(print,vfold_cv)
S3method(tidy,nested_cv)
S3method(tidy,rset)
S3method(tidy,rsplit)
S3method(tidy,vfold_cv)
S3method(type_sum,rsplit)
export(add_resample_id)
export(analysis)
export(apparent)
export(assessment)
export(bootstraps)
export(caret2rsample)
export(complement)
export(form_pred)
export(gather.rset)
export(group_vfold_cv)
export(initial_split)
export(initial_time_split)
export(int_bca)
export(int_pctl)
export(int_t)
export(loo_cv)
export(make_strata)
export(mc_cv)
export(nested_cv)
export(populate)
export(pretty.apparent)
export(pretty.bootstraps)
export(pretty.group_vfold_cv)
export(pretty.loo_cv)
export(pretty.mc_cv)
export(pretty.nested_cv)
export(pretty.rolling_origin)
export(pretty.vfold_cv)
export(rolling_origin)
export(rsample2caret)
export(testing)
export(tidy)
export(training)
export(vfold_cv)
exportMethods(gather.rset)
exportMethods(pretty.apparent)
exportMethods(pretty.bootstraps)
exportMethods(pretty.group_vfold_cv)
exportMethods(pretty.loo_cv)
exportMethods(pretty.mc_cv)
exportMethods(pretty.nested_cv)
exportMethods(pretty.rolling_origin)
exportMethods(pretty.vfold_cv)
importFrom(dplyr,"%>%")
importFrom(dplyr,arrange)
importFrom(dplyr,arrange_)
importFrom(dplyr,as_tibble)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,do)
importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
importFrom(dplyr,inner_join)
importFrom(dplyr,last)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(dplyr,summarize)
importFrom(dplyr,ungroup)
importFrom(furrr,future_map_dfr)
importFrom(generics,tidy)
importFrom(methods,formalArgs)
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map_dbl)
importFrom(purrr,map_df)
importFrom(purrr,map_dfr)
importFrom(purrr,pluck)
importFrom(rlang,"!!")
importFrom(rlang,enquo)
importFrom(rlang,exec)
importFrom(rlang,is_call)
importFrom(rlang,is_string)
importFrom(rlang,quos)
importFrom(stats,pnorm)
importFrom(stats,qnorm)
importFrom(stats,quantile)
importFrom(stats,setNames)
importFrom(stats,terms)
importFrom(tibble,as_tibble)
importFrom(tibble,is_tibble)
importFrom(tibble,obj_sum)
importFrom(tibble,tibble)
importFrom(tibble,type_sum)
importFrom(tidyr,gather)
importFrom(tidyr,unnest)
importFrom(tidyselect,one_of)
importFrom(tidyselect,vars_select)
importFrom(utils,globalVariables)
rsample/NEWS.md 0000644 0001762 0000144 00000004403 13512145132 013011 0 ustar ligges users # `rsample` 0.0.5
* Added three functions to compute different bootstrap confidence intervals.
* A new function (`add_resample_id`) augments a data frame with columns for the resampling identifier.
* Updated `initial_split`, `mc_cv`, `vfold_cv`, `bootstraps`, and `group_vfold_cv` to use tidyselect on the stratification variable.
* Updated `initial_split`, `mc_cv`, `vfold_cv`, `bootstraps` with new `breaks` parameter that specifies the number of bins to stratify by for a numeric stratification variable.
# `rsample` 0.0.4
Small maintenence release.
## Minor improvements and fixes
* `fill()` was removed per the deprecation warning.
* Small changes were made for the new version of `tibble`.
# `rsample` 0.0.3
## New features
* Added function `initial_time_split` for ordered initial sampling appropriate for time series data.
## Minor improvements and fixes
* `fill()` has been renamed `populate()` to avoid a conflict with `tidyr::fill()`.
* Changed the R version requirement to be R >= 3.1 instead of 3.3.3.
* The `recipes`-related `prepper` function was [moved to the `recipes` package](https://github.com/tidymodels/rsample/issues/48). This makes the `rsample` install footprint much smaller.
* `rsplit` objects are shown differently inside of a tibble.
* Moved from the `broom` package to the `generics` package.
# `rsample` 0.0.2
* `initial_split`, `training`, and `testing` were added to do training/testing splits prior to resampling.
* Another resampling method, `group_vfold_cv`, was added.
* `caret2rsample` and `rsample2caret` can convert `rset` objects to those used by `caret::trainControl` and vice-versa.
* A function called `form_pred` can be used to determine the original names of the predictors in a formula or `terms` object.
* A vignette and a function (`prepper`) were included to facilitate using the `recipes` with `rsample`.
* A `gather` method was added for `rset` objects.
* A `labels` method was added for `rsplit` objects. This can help identify which resample is being used even when the whole `rset` object is not available.
* A variety of `dplyr` methods were added (e.g. `filter`, `mutate`, etc) that work without dropping classes or attributes of the `rsample` objects.
# `rsample` 0.0.1 (2017-07-08)
Initial public version on CRAN
rsample/data/ 0000755 0001762 0000144 00000000000 13331130352 012620 5 ustar ligges users rsample/data/attrition.RData 0000644 0001762 0000144 00000122676 13323650050 015573 0 ustar ligges users Ž]v:w;~{9o˖eے%˖&ұ--Mn $!H BZ % <{Ys1G
k9(?Ƙk]9{:ru9WtrK8,O0oܿw8Wx;W~<c=xc3Kxc{؍.<;^x40ޏύ0 8:£
:m
ۂ;GØA&+'7s"
GhdR8=\?BkoZApݏۯ^{`8K6n k'ɐ;??Ü7~ k|=wSQd-mAFYk·ztq5-3AmAf;6|E|raAz+.1Y{Ø0o85;xn%b "'.zזc_x>t9dpb۾ `xuQy=?"g^DZwm >?oENւa`np~žec-!^"w_{,b(xb9υ10g[`p֙s7G]'|Z>|Hk {;cc#~j-z=Hp?t?b"Wc&;e1svŜ,`s>t:o
:8ɋ\c\ qX],8
:
9~\x=]
2aehsXGjBMqҧd.w.ɥa!f]떰ޠ1.z[].}gB+H\7X;;m.֧AbQuvsOX?Υ KjAZk9'eW'ؒZr8&1g`1 7-ׂK+yK'
$7.]cX)cǂ>Ŀ;\ۻ^k,@6-{E:
{]AK%#.A#.bW8p4SxFELͺ{<<O?ҟz$_IW0Hub+9SvsZ/Zt~\;\Cͅ'R7$mq;Dso?;]w7\8ʞ@(b2P{%=lGž( ɔxb>I
}>P0^]pmkx-} {.|[!ubS"N>"f $oH,^ ma]O>fyb=~^ .Q~IN)9Ldq*闤~9}{k v{Xg1vx-5@MS%_J/-no#EtQ_6㰋}uZ-Ϧh\ZP1^F]'>-)y\O{"K/كnrq)ԢApMx,>^%ˮWsq/$)^YI/u]3]O gğKApX8>b*c!iOHo)G¹c.7G"/lE/m.MC.gKӤȽ6 ~E|'EpFj 2!H?#s8e/ Fz4M}Ɂro Vr읏Xm\IߕܸZpO\"nu=`C; yɼ|!>a_J-o_7@(}Cɋ7l2d 1v
v\%{>Ygܶž`8,\ġre=d"wv<>_4{5Β%aA{)Zqq;W뷇e^=yzj?nYz(ÌG{~rϣ?U=q`_ֱw_ÌyT)/e\oQ{^x_o8Ա_^<
؏Qc> >V>GҵOՔ͏o|*3|ppt4qQH)ל"+-{~){`g;fS-1֜~}Տ]Z~|د=b}l̷di--,bg)o2Ͽֵka~=˿0gzrk{^xalf3GyI]7V_Y?~Ct_y˭Voy67?X"o^ӵzsJc\KY\i<z^d >^XvX%W4^rg_V^G^p8Xuju=ol?ڏܼRvWO0xk5}=A[-.ca<My9
kU8j<`3ƕW3{x:\¹?p~!qvU~~y