lazyeval/0000755000176200001440000000000013175456530012112 5ustar liggesuserslazyeval/inst/0000755000176200001440000000000013171753670013070 5ustar liggesuserslazyeval/inst/doc/0000755000176200001440000000000013171753670013635 5ustar liggesuserslazyeval/inst/doc/lazyeval-old.R0000644000176200001440000000501413171753666016370 0ustar liggesusers## ---- echo = FALSE------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") rownames(mtcars) <- NULL ## ------------------------------------------------------------------------ library(lazyeval) f <- function(x = a - b) { lazy(x) } f() f(a + b) ## ------------------------------------------------------------------------ a <- 10 b <- 1 lazy_eval(f()) lazy_eval(f(a + b)) ## ------------------------------------------------------------------------ lazy_eval(f(), list(a = 1)) ## ------------------------------------------------------------------------ lazy_eval(~ a + b) h <- function(i) { ~ 10 + i } lazy_eval(h(1)) ## ------------------------------------------------------------------------ subset2_ <- function(df, condition) { r <- lazy_eval(condition, df) r <- r & !is.na(r) df[r, , drop = FALSE] } subset2_(mtcars, lazy(mpg > 31)) ## ------------------------------------------------------------------------ subset2_(mtcars, ~mpg > 31) subset2_(mtcars, quote(mpg > 31)) subset2_(mtcars, "mpg > 31") ## ------------------------------------------------------------------------ subset2 <- function(df, condition) { subset2_(df, lazy(condition)) } subset2(mtcars, mpg > 31) ## ------------------------------------------------------------------------ above_threshold <- function(df, var, threshold) { cond <- interp(~ var > x, var = lazy(var), x = threshold) subset2_(df, cond) } above_threshold(mtcars, mpg, 31) ## ------------------------------------------------------------------------ x <- 31 f1 <- function(...) { x <- 30 subset(mtcars, ...) } # Uses 30 instead of 31 f1(mpg > x) f2 <- function(...) { x <- 30 subset2(mtcars, ...) } # Correctly uses 31 f2(mpg > x) ## ---- eval = FALSE------------------------------------------------------- # x <- 31 # g1 <- function(comp) { # x <- 30 # subset(mtcars, comp) # } # g1(mpg > x) # #> Error: object 'mpg' not found ## ------------------------------------------------------------------------ g2 <- function(comp) { x <- 30 subset2(mtcars, comp) } g2(mpg > x) ## ------------------------------------------------------------------------ library(lazyeval) f1 <- function(x) lazy(x) g1 <- function(y) f1(y) g1(a + b) ## ------------------------------------------------------------------------ f2 <- function(x) lazy(x, .follow_symbols = FALSE) g2 <- function(y) f2(y) g2(a + b) ## ------------------------------------------------------------------------ a <- 10 b <- 1 lazy_eval(g1(a + b)) lazy_eval(g2(a + b)) lazyeval/inst/doc/lazyeval.R0000644000176200001440000002047113171753667015621 0ustar liggesusers## ---- include = FALSE---------------------------------------------------- library(lazyeval) knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ---- fig.width = 4, fig.height = 2.5------------------------------------ par(mar = c(4.5, 4.5, 1, 0.5)) grid <- seq(0, 2 * pi, length = 100) plot(grid, sin(grid), type = "l") ## ------------------------------------------------------------------------ df <- data.frame(x = c(1, 5, 4, 2, 3), y = c(2, 1, 5, 4, 3)) with(df, mean(x)) subset(df, x == y) transform(df, z = x + y) ## ------------------------------------------------------------------------ my_label <- function(x) deparse(substitute(x)) my_label(x + y) ## ------------------------------------------------------------------------ my_label({ a + b c + d }) ## ------------------------------------------------------------------------ my_label2 <- function(x) my_label(x) my_label2(a + b) ## ------------------------------------------------------------------------ my_label <- function(x) expr_text(x) my_label2 <- function(x) my_label(x) my_label({ a + b c + d }) my_label2(a + b) ## ------------------------------------------------------------------------ expr_label(x) expr_label(a + b + c) expr_label(foo({ x + y })) ## ---- eval = FALSE------------------------------------------------------- # x <- c("a", "b", "c") # my_mean(x) # #> Error: `x` is a not a numeric vector. # my_mean(x == "a") # #> Error: `x == "a"` is not a numeric vector. # my_mean("a") # #> Error: "a" is not a numeric vector. ## ------------------------------------------------------------------------ f <- ~ x + y + z typeof(f) attributes(f) ## ------------------------------------------------------------------------ length(f) # The 1st element is always ~ f[[1]] # The 2nd element is the RHS f[[2]] ## ------------------------------------------------------------------------ g <- y ~ x + z length(g) # The 1st element is still ~ g[[1]] # But now the 2nd element is the LHS g[[2]] # And the 3rd element is the RHS g[[3]] ## ------------------------------------------------------------------------ f_rhs(f) f_lhs(f) f_env(f) f_rhs(g) f_lhs(g) f_env(g) ## ------------------------------------------------------------------------ f <- ~ 1 + 2 + 3 f f_eval(f) ## ------------------------------------------------------------------------ x <- 1 add_1000 <- function(x) { ~ 1000 + x } add_1000(3) f_eval(add_1000(3)) ## ------------------------------------------------------------------------ f_unwrap(add_1000(3)) ## ------------------------------------------------------------------------ y <- 100 f_eval(~ y) f_eval(~ y, data = list(y = 10)) # Can mix variables in environment and data argument f_eval(~ x + y, data = list(x = 10)) # Can even supply functions f_eval(~ f(y), data = list(f = function(x) x * 3)) ## ------------------------------------------------------------------------ f_eval(~ mean(cyl), data = mtcars) ## ---- eval = FALSE------------------------------------------------------- # f_eval(~ x, data = mydata) ## ------------------------------------------------------------------------ mydata <- data.frame(x = 100, y = 1) x <- 10 f_eval(~ .env$x, data = mydata) f_eval(~ .data$x, data = mydata) ## ---- error = TRUE------------------------------------------------------- f_eval(~ .env$z, data = mydata) f_eval(~ .data$z, data = mydata) ## ------------------------------------------------------------------------ df_mean <- function(df, variable) { f_eval(~ mean(uq(variable)), data = df) } df_mean(mtcars, ~ cyl) df_mean(mtcars, ~ disp * 0.01638) df_mean(mtcars, ~ sqrt(mpg)) ## ------------------------------------------------------------------------ variable <- ~cyl f_interp(~ mean(uq(variable))) variable <- ~ disp * 0.01638 f_interp(~ mean(uq(variable))) ## ------------------------------------------------------------------------ f <- ~ mean f_interp(~ uq(f)(uq(variable))) ## ------------------------------------------------------------------------ formula <- y ~ x f_interp(~ lm(uq(formula), data = df)) ## ------------------------------------------------------------------------ f_interp(~ lm(uqf(formula), data = df)) ## ------------------------------------------------------------------------ variable <- ~ x extra_args <- list(na.rm = TRUE, trim = 0.9) f_interp(~ mean(uq(variable), uqs(extra_args))) ## ------------------------------------------------------------------------ f <- function(x) x + 1 f_eval(~ f(10), list(f = "a")) ## ------------------------------------------------------------------------ sieve <- function(df, condition) { rows <- f_eval(condition, df) if (!is.logical(rows)) { stop("`condition` must be logical.", call. = FALSE) } rows[is.na(rows)] <- FALSE df[rows, , drop = FALSE] } df <- data.frame(x = 1:5, y = 5:1) sieve(df, ~ x <= 2) sieve(df, ~ x == y) ## ---- eval = FALSE------------------------------------------------------- # sieve(march, ~ x > 100) # sieve(april, ~ x > 50) # sieve(june, ~ x > 45) # sieve(july, ~ x > 17) ## ------------------------------------------------------------------------ threshold_x <- function(df, threshold) { sieve(df, ~ x > threshold) } threshold_x(df, 3) ## ---- error = TRUE------------------------------------------------------- rm(x) df2 <- data.frame(y = 5:1) # Throws an error threshold_x(df2, 3) # Silently gives the incorrect result! x <- 5 threshold_x(df2, 3) ## ------------------------------------------------------------------------ df3 <- data.frame(x = 1:5, y = 5:1, threshold = 4) threshold_x(df3, 3) ## ---- error = TRUE------------------------------------------------------- threshold_x <- function(df, threshold) { sieve(df, ~ .data$x > .env$threshold) } threshold_x(df2, 3) threshold_x(df3, 3) ## ------------------------------------------------------------------------ threshold <- function(df, variable, threshold) { stopifnot(is.character(variable), length(variable) == 1) sieve(df, ~ .data[[.env$variable]] > .env$threshold) } threshold(df, "x", 4) ## ------------------------------------------------------------------------ threshold <- function(df, variable = ~x, threshold = 0) { sieve(df, ~ uq(variable) > .env$threshold) } threshold(df, ~ x, 4) threshold(df, ~ abs(x - y), 2) ## ------------------------------------------------------------------------ x <- 3 threshold(df, ~ .data$x - .env$x, 0) ## ------------------------------------------------------------------------ mogrify <- function(`_df`, ...) { args <- list(...) for (nm in names(args)) { `_df`[[nm]] <- f_eval(args[[nm]], `_df`) } `_df` } ## ------------------------------------------------------------------------ df <- data.frame(x = 1:5, y = sample(5)) mogrify(df, z = ~ x + y, z2 = ~ z * 2) ## ------------------------------------------------------------------------ add_variable <- function(df, name, expr) { do.call("mogrify", c(list(df), setNames(list(expr), name))) } add_variable(df, "z", ~ x + y) ## ------------------------------------------------------------------------ f_list("x" ~ y, z = ~z) ## ------------------------------------------------------------------------ mogrify <- function(`_df`, ...) { args <- f_list(...) for (nm in names(args)) { `_df`[[nm]] <- f_eval(args[[nm]], `_df`) } `_df` } ## ------------------------------------------------------------------------ add_variable <- function(df, name, expr) { mogrify(df, name ~ uq(expr)) } add_variable(df, "z", ~ x + y) ## ------------------------------------------------------------------------ sieve_ <- function(df, condition) { rows <- f_eval(condition, df) if (!is.logical(rows)) { stop("`condition` must be logical.", call. = FALSE) } rows[is.na(rows)] <- FALSE df[rows, , drop = FALSE] } ## ------------------------------------------------------------------------ sieve <- function(df, expr) { sieve_(df, f_capture(expr)) } sieve(df, x == 1) ## ------------------------------------------------------------------------ scramble <- function(df) { df[sample(nrow(df)), , drop = FALSE] } subscramble <- function(df, expr) { scramble(sieve(df, expr)) } subscramble(df, x < 4) ## ------------------------------------------------------------------------ mogrify_ <- function(`_df`, args) { args <- as_f_list(args) for (nm in names(args)) { `_df`[[nm]] <- f_eval(args[[nm]], `_df`) } `_df` } mogrify <- function(`_df`, ...) { mogrify_(`_df`, dots_capture(...)) } lazyeval/inst/doc/lazyeval-old.Rmd0000644000176200001440000001337713171350764016714 0ustar liggesusers--- title: "Lazyeval: a new approach to NSE" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Lazyeval: a new approach to NSE} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} --- ```{r, echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") rownames(mtcars) <- NULL ``` This document outlines my previous approach to non-standard evaluation (NSE). You should avoid it unless you are working with an older version of dplyr or tidyr. There are three key ideas: * Instead of using `substitute()`, use `lazyeval::lazy()` to capture both expression and environment. (Or use `lazyeval::lazy_dots(...)` to capture promises in `...`) * Every function that uses NSE should have a standard evaluation (SE) escape hatch that does the actual computation. The SE-function name should end with `_`. * The SE-function has a flexible input specification to make it easy for people to program with. ## `lazy()` The key tool that makes this approach possible is `lazy()`, an equivalent to `substitute()` that captures both expression and environment associated with a function argument: ```{r} library(lazyeval) f <- function(x = a - b) { lazy(x) } f() f(a + b) ``` As a complement to `eval()`, the lazy package provides `lazy_eval()` that uses the environment associated with the lazy object: ```{r} a <- 10 b <- 1 lazy_eval(f()) lazy_eval(f(a + b)) ``` The second argument to lazy eval is a list or data frame where names should be looked up first: ```{r} lazy_eval(f(), list(a = 1)) ``` `lazy_eval()` also works with formulas, since they contain the same information as a lazy object: an expression (only the RHS is used by convention) and an environment: ```{r} lazy_eval(~ a + b) h <- function(i) { ~ 10 + i } lazy_eval(h(1)) ``` ## Standard evaluation Whenever we need a function that does non-standard evaluation, always write the standard evaluation version first. For example, let's implement our own version of `subset()`: ```{r} subset2_ <- function(df, condition) { r <- lazy_eval(condition, df) r <- r & !is.na(r) df[r, , drop = FALSE] } subset2_(mtcars, lazy(mpg > 31)) ``` `lazy_eval()` will always coerce it's first argument into a lazy object, so a variety of specifications will work: ```{r} subset2_(mtcars, ~mpg > 31) subset2_(mtcars, quote(mpg > 31)) subset2_(mtcars, "mpg > 31") ``` Note that quoted called and strings don't have environments associated with them, so `as.lazy()` defaults to using `baseenv()`. This will work if the expression is self-contained (i.e. doesn't contain any references to variables in the local environment), and will otherwise fail quickly and robustly. ## Non-standard evaluation With the SE version in hand, writing the NSE version is easy. We just use `lazy()` to capture the unevaluated expression and corresponding environment: ```{r} subset2 <- function(df, condition) { subset2_(df, lazy(condition)) } subset2(mtcars, mpg > 31) ``` This standard evaluation escape hatch is very important because it allows us to implement different NSE approaches. For example, we could create a subsetting function that finds all rows where a variable is above a threshold: ```{r} above_threshold <- function(df, var, threshold) { cond <- interp(~ var > x, var = lazy(var), x = threshold) subset2_(df, cond) } above_threshold(mtcars, mpg, 31) ``` Here we're using `interp()` to modify a formula. We use the value of `threshold` and the expression in by `var`. ## Scoping Because `lazy()` captures the environment associated with the function argument, we automatically avoid a subtle scoping bug present in `subset()`: ```{r} x <- 31 f1 <- function(...) { x <- 30 subset(mtcars, ...) } # Uses 30 instead of 31 f1(mpg > x) f2 <- function(...) { x <- 30 subset2(mtcars, ...) } # Correctly uses 31 f2(mpg > x) ``` `lazy()` has another advantage over `substitute()` - by default, it follows promises across function invocations. This simplifies the casual use of NSE. ```{r, eval = FALSE} x <- 31 g1 <- function(comp) { x <- 30 subset(mtcars, comp) } g1(mpg > x) #> Error: object 'mpg' not found ``` ```{r} g2 <- function(comp) { x <- 30 subset2(mtcars, comp) } g2(mpg > x) ``` Note that `g2()` doesn't have a standard-evaluation escape hatch, so it's not suitable for programming with in the same way that `subset2_()` is. ## Chained promises Take the following example: ```{r} library(lazyeval) f1 <- function(x) lazy(x) g1 <- function(y) f1(y) g1(a + b) ``` `lazy()` returns `a + b` because it always tries to find the top-level promise. In this case the process looks like this: 1. Find the object that `x` is bound to. 2. It's a promise, so find the expr it's bound to (`y`, a symbol) and the environment in which it should be evaluated (the environment of `g()`). 3. Since `x` is bound to a symbol, look up its value: it's bound to a promise. 4. That promise has expression `a + b` and should be evaluated in the global environment. 5. The expression is not a symbol, so stop. Occasionally, you want to avoid this recursive behaviour, so you can use `follow_symbol = FALSE`: ```{r} f2 <- function(x) lazy(x, .follow_symbols = FALSE) g2 <- function(y) f2(y) g2(a + b) ``` Either way, if you evaluate the lazy expression you'll get the same result: ```{r} a <- 10 b <- 1 lazy_eval(g1(a + b)) lazy_eval(g2(a + b)) ``` Note that the resolution of chained promises only works with unevaluated objects. This is because R deletes the information about the environment associated with a promise when it has been forced, so that the garbage collector is allowed to remove the environment from memory in case it is no longer used. `lazy()` will fail with an error in such situations. ```{r, error = TRUE, purl = FALSE} var <- 0 f3 <- function(x) { force(x) lazy(x) } f3(var) ``` lazyeval/inst/doc/lazyeval.Rmd0000644000176200001440000005105613171350764016134 0ustar liggesusers--- title: "Non-standard evaluation" author: "Hadley Wickham" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Non-standard evaluation} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} library(lazyeval) knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ``` This document describes lazyeval, a package that provides principled tools to perform non-standard evaluation (NSE) in R. You should read this vignette if you want to program with packages like dplyr and ggplot2[^1], or you want a principled way of working with delayed expressions in your own package. As the name suggests, non-standard evaluation breaks away from the standard evaluation (SE) rules in order to do something special. There are three common uses of NSE: 1. __Labelling__ enhances plots and tables by using the expressions supplied to a function, rather than their values. For example, note the axis labels in this plot: ```{r, fig.width = 4, fig.height = 2.5} par(mar = c(4.5, 4.5, 1, 0.5)) grid <- seq(0, 2 * pi, length = 100) plot(grid, sin(grid), type = "l") ``` 1. __Non-standard scoping__ looks for objects in places other than the current environment. For example, base R has `with()`, `subset()`, and `transform()` that look for objects in a data frame (or list) before the current environment: ```{r} df <- data.frame(x = c(1, 5, 4, 2, 3), y = c(2, 1, 5, 4, 3)) with(df, mean(x)) subset(df, x == y) transform(df, z = x + y) ``` 1. __Metaprogramming__ is a catch-all term that covers all other uses of NSE (such as in `bquote()` and `library()`). Metaprogramming is so called because it involves computing on the unevaluated code in some way. This document is broadly organised according to the three types of non-standard evaluation described above. The main difference is that after [labelling], we'll take a detour to learn more about [formulas]. You're probably familiar with formulas from linear models (e.g. `lm(mpg ~ displ, data = mtcars)`) but formulas are more than just a tool for modelling: they are a general way of capturing an unevaluated expression. The approaches recommended here are quite different to my previous generation of recommendations. I am fairly confident these new approaches are correct, and will not have to change substantially again. The current tools make it easy to solve a number of practical problems that were previously challenging and are rooted in [long-standing theory](http://repository.readscheme.org/ftp/papers/pepm99/bawden.pdf). [^1]: Currently neither ggplot2 nor dplyr actually use these tools since I've only just figured it out. But I'll be working hard to make sure all my packages are consistent in the near future. ## Labelling In base R, the classic way to turn an argument into a label is to use `deparse(substitute(x))`: ```{r} my_label <- function(x) deparse(substitute(x)) my_label(x + y) ``` There are two potential problems with this approach: 1. For long some expressions, `deparse()` generates a character vector with length > 1: ```{r} my_label({ a + b c + d }) ``` 1. `substitute()` only looks one level up, so you lose the original label if the function isn't called directly: ```{r} my_label2 <- function(x) my_label(x) my_label2(a + b) ``` Both of these problems are resolved by `lazyeval::expr_text()`: ```{r} my_label <- function(x) expr_text(x) my_label2 <- function(x) my_label(x) my_label({ a + b c + d }) my_label2(a + b) ``` There are two variations on the theme of `expr_text()`: * `expr_find()` find the underlying expression. It works similarly to `substitute()` but will follow a chain of promises back up to the original expression. This is often useful for [metaprogramming]. * `expr_label()` is a customised version of `expr_text()` that produces labels designed to be used in messages to the user: ```{r} expr_label(x) expr_label(a + b + c) expr_label(foo({ x + y })) ``` ### Exercises 1. `plot()` uses `deparse(substitute(x))` to generate labels for the x and y axes. Can you generate input that causes it to display bad labels? Write your own wrapper around `plot()` that uses `expr_label()` to compute `xlim` and `ylim`. 1. Create a simple implementation of `mean()` that stops with an informative error message if the argument is not numeric: ```{r, eval = FALSE} x <- c("a", "b", "c") my_mean(x) #> Error: `x` is a not a numeric vector. my_mean(x == "a") #> Error: `x == "a"` is not a numeric vector. my_mean("a") #> Error: "a" is not a numeric vector. ``` 1. Read the source code for `expr_text()`. How does it work? What additional arguments to `deparse()` does it use? ## Formulas Non-standard scoping is probably the most useful NSE tool, but before we can talk about a solid approach, we need to take a detour to talk about formulas. Formulas are a familiar tool from linear models, but their utility is not limited to models. In fact, formulas are a powerful, general purpose tool, because a formula captures two things: 1. An unevaluated expression. 1. The context (environment) in which the expression was created. `~` is a single character that allows you to say: "I want to capture the meaning of this code, without evaluating it right away". For that reason, the formula can be thought of as a "quoting" operator. ### Definition of a formula Technically, a formula is a "language" object (i.e. an unevaluated expression) with a class of "formula" and an attribute that stores the environment: ```{r} f <- ~ x + y + z typeof(f) attributes(f) ``` The structure of the underlying object is slightly different depending on whether you have a one-sided or two-sided formula: * One-sided formulas have length two: ```{r} length(f) # The 1st element is always ~ f[[1]] # The 2nd element is the RHS f[[2]] ``` * Two-sided formulas have length three: ```{r} g <- y ~ x + z length(g) # The 1st element is still ~ g[[1]] # But now the 2nd element is the LHS g[[2]] # And the 3rd element is the RHS g[[3]] ``` To abstract away these differences, lazyeval provides `f_rhs()` and `f_lhs()` to access either side of the formula, and `f_env()` to access its environment: ```{r} f_rhs(f) f_lhs(f) f_env(f) f_rhs(g) f_lhs(g) f_env(g) ``` ### Evaluating a formula A formula captures delays the evaluation of an expression so you can later evaluate it with `f_eval()`: ```{r} f <- ~ 1 + 2 + 3 f f_eval(f) ``` This allows you to use a formula as a robust way of delaying evaluation, cleanly separating the creation of the formula from its evaluation. Because formulas capture the code and context, you get the correct result even when a formula is created and evaluated in different places. In the following example, note that the value of `x` inside `add_1000()` is used: ```{r} x <- 1 add_1000 <- function(x) { ~ 1000 + x } add_1000(3) f_eval(add_1000(3)) ``` It can be hard to see what's going on when looking at a formula because important values are stored in the environment, which is largely opaque. You can use `f_unwrap()` to replace names with their corresponding values: ```{r} f_unwrap(add_1000(3)) ``` ### Non-standard scoping `f_eval()` has an optional second argument: a named list (or data frame) that overrides values found in the formula's environment. ```{r} y <- 100 f_eval(~ y) f_eval(~ y, data = list(y = 10)) # Can mix variables in environment and data argument f_eval(~ x + y, data = list(x = 10)) # Can even supply functions f_eval(~ f(y), data = list(f = function(x) x * 3)) ``` This makes it very easy to implement non-standard scoping: ```{r} f_eval(~ mean(cyl), data = mtcars) ``` One challenge with non-standard scoping is that we've introduced some ambiguity. For example, in the code below does `x` come from `mydata` or the environment? ```{r, eval = FALSE} f_eval(~ x, data = mydata) ``` You can't tell without knowing whether or not `mydata` has a variable called `x`. To overcome this problem, `f_eval()` provides two pronouns: * `.data` is bound to the data frame. * `.env` is bound to the formula environment. They both start with `.` to minimise the chances of clashing with existing variables. With these pronouns we can rewrite the previous formula to remove the ambiguity: ```{r} mydata <- data.frame(x = 100, y = 1) x <- 10 f_eval(~ .env$x, data = mydata) f_eval(~ .data$x, data = mydata) ``` If the variable or object doesn't exist, you'll get an informative error: ```{r, error = TRUE} f_eval(~ .env$z, data = mydata) f_eval(~ .data$z, data = mydata) ``` ### Unquoting `f_eval()` has one more useful trick up its sleeve: unquoting. Unquoting allows you to write functions where the user supplies part of the formula. For example, the following function allows you to compute the mean of any column (or any function of a column): ```{r} df_mean <- function(df, variable) { f_eval(~ mean(uq(variable)), data = df) } df_mean(mtcars, ~ cyl) df_mean(mtcars, ~ disp * 0.01638) df_mean(mtcars, ~ sqrt(mpg)) ``` To see how this works, we can use `f_interp()` which `f_eval()` calls internally (you shouldn't call it in your own code, but it's useful for debugging). The key is `uq()`: `uq()` evaluates its first (and only) argument and inserts the value into the formula: ```{r} variable <- ~cyl f_interp(~ mean(uq(variable))) variable <- ~ disp * 0.01638 f_interp(~ mean(uq(variable))) ``` Unquoting allows you to create code "templates", where you write most of the expression, while still allowing the user to control important components. You can even use `uq()` to change the function being called: ```{r} f <- ~ mean f_interp(~ uq(f)(uq(variable))) ``` Note that `uq()` only takes the RHS of a formula, which makes it difficult to insert literal formulas into a call: ```{r} formula <- y ~ x f_interp(~ lm(uq(formula), data = df)) ``` You can instead use `uqf()` which uses the whole formula, not just the RHS: ```{r} f_interp(~ lm(uqf(formula), data = df)) ``` Unquoting is powerful, but it only allows you to modify a single argument: it doesn't allow you to add an arbitrary number of arguments. To do that, you'll need "unquote-splice", or `uqs()`. The first (and only) argument to `uqs()` should be a list of arguments to be spliced into the call: ```{r} variable <- ~ x extra_args <- list(na.rm = TRUE, trim = 0.9) f_interp(~ mean(uq(variable), uqs(extra_args))) ``` ### Exercises 1. Create a wrapper around `lm()` that allows the user to supply the response and predictors as two separate formulas. 1. Compare and contrast `f_eval()` with `with()`. 1. Why does this code work even though `f` is defined in two places? (And one of them is not a function). ```{r} f <- function(x) x + 1 f_eval(~ f(10), list(f = "a")) ``` ## Non-standard scoping Non-standard scoping (NSS) is an important part of R because it makes it easy to write functions tailored for interactive data exploration. These functions require less typing, at the cost of some ambiguity and "magic". This is a good trade-off for interactive data exploration because you want to get ideas out of your head and into the computer as quickly as possible. If a function does make a bad guess, you'll spot it quickly because you're working interactively. There are three challenges to implementing non-standard scoping: 1. You must correctly delay the evaluation of a function argument, capturing both the computation (the expression), and the context (the environment). I recommend making this explicit by requiring the user to "quote" any NSS arguments with `~`, and then evaluating explicit with `f_eval()`. 1. When writing functions that use NSS-functions, you need some way to avoid the automatic lookup and be explicit about where objects should be found. `f_eval()` solves this problem with the `.data.` and `.env` pronouns. 1. You need some way to allow the user to supply parts of a formula. `f_eval()` solves this with unquoting. To illustrate these challenges, I will implement a `sieve()` function that works similarly to `base::subset()` or `dplyr::filter()`. The goal of `sieve()` is to make it easy to select observations that match criteria defined by a logical expression. `sieve()` has three advantages over `[`: 1. It is much more compact when the condition uses many variables, because you don't need to repeat the name of the data frame many times. 1. It drops rows where the condition evaluates to `NA`, rather than filling them with `NA`s. 1. It always returns a data frame. The implementation of `sieve()` is straightforward. First we use `f_eval()` to perform NSS. Then we then check that we have a logical vector, replace `NA`s with `FALSE`, and subset with `[`. ```{R} sieve <- function(df, condition) { rows <- f_eval(condition, df) if (!is.logical(rows)) { stop("`condition` must be logical.", call. = FALSE) } rows[is.na(rows)] <- FALSE df[rows, , drop = FALSE] } df <- data.frame(x = 1:5, y = 5:1) sieve(df, ~ x <= 2) sieve(df, ~ x == y) ``` ### Programming with `sieve()` Imagine that you've written some code that looks like this: ```{r, eval = FALSE} sieve(march, ~ x > 100) sieve(april, ~ x > 50) sieve(june, ~ x > 45) sieve(july, ~ x > 17) ``` (This is a contrived example, but it illustrates all of the important issues you'll need to consider when writing more useful functions.) Instead of continuing to copy-and-paste your code, you decide to wrap up the common behaviour in a function: ```{r} threshold_x <- function(df, threshold) { sieve(df, ~ x > threshold) } threshold_x(df, 3) ``` There are two ways that this function might fail: 1. The data frame might not have a variable called `x`. This will fail unless there's a variable called `x` hanging around in the global environment: ```{r, error = TRUE} rm(x) df2 <- data.frame(y = 5:1) # Throws an error threshold_x(df2, 3) # Silently gives the incorrect result! x <- 5 threshold_x(df2, 3) ``` 1. The data frame might have a variable called `threshold`: ```{r} df3 <- data.frame(x = 1:5, y = 5:1, threshold = 4) threshold_x(df3, 3) ``` These failures are partiuclarly pernicious because instead of throwing an error they silently produce the wrong answer. Both failures arise because `f_eval()` introduces ambiguity by looking in two places for each name: the supplied data and formula environment. To make `threshold_x()` more reliable, we need to be more explicit by using the `.data` and `.env` pronouns: ```{r, error = TRUE} threshold_x <- function(df, threshold) { sieve(df, ~ .data$x > .env$threshold) } threshold_x(df2, 3) threshold_x(df3, 3) ``` Here `.env` is bound to the environment where `~` is evaluated, namely the inside of `threshold_x()`. ### Adding arguments The `threshold_x()` function is not very useful because it's bound to a specific variable. It would be more powerful if we could vary both the threshold and the variable it applies to. We can do that by taking an additional argument to specify which variable to use. One simple approach is to use a string and `[[`: ```{r} threshold <- function(df, variable, threshold) { stopifnot(is.character(variable), length(variable) == 1) sieve(df, ~ .data[[.env$variable]] > .env$threshold) } threshold(df, "x", 4) ``` This is a simple and robust solution, but only allows us to use an existing variable, not an arbitrary expression like `sqrt(x)`. A more general solution is to allow the user to supply a formula, and use unquoting: ```{r} threshold <- function(df, variable = ~x, threshold = 0) { sieve(df, ~ uq(variable) > .env$threshold) } threshold(df, ~ x, 4) threshold(df, ~ abs(x - y), 2) ``` In this case, it's the responsibility of the user to ensure the `variable` is specified unambiguously. `f_eval()` is designed so that `.data` and `.env` work even when evaluated inside of `uq()`: ```{r} x <- 3 threshold(df, ~ .data$x - .env$x, 0) ``` ### Dot-dot-dot There is one more tool that you might find useful for functions that take `...`. For example, the code below implements a function similar to `dplyr::mutate()` or `base::transform()`. ```{r} mogrify <- function(`_df`, ...) { args <- list(...) for (nm in names(args)) { `_df`[[nm]] <- f_eval(args[[nm]], `_df`) } `_df` } ``` (NB: the first argument is a non-syntactic name (i.e. it requires quoting with `` ` ``) so it doesn't accidentally match one of the names of the new variables.) `transmogrifty()` makes it easy to add new variables to a data frame: ```{r} df <- data.frame(x = 1:5, y = sample(5)) mogrify(df, z = ~ x + y, z2 = ~ z * 2) ``` One problem with this implementation is that it's hard to specify the names of the generated variables. Imagine you want a function where the name and expression are in separate variables. This is awkward because the variable name is supplied as an argument name to `mogrify()`: ```{r} add_variable <- function(df, name, expr) { do.call("mogrify", c(list(df), setNames(list(expr), name))) } add_variable(df, "z", ~ x + y) ``` Lazyeval provides the `f_list()` function to make writing this sort of function a little easier. It takes a list of formulas and evaluates the LHS of each formula (if present) to rename the elements: ```{r} f_list("x" ~ y, z = ~z) ``` If we tweak `mogrify()` to use `f_list()` instead of `list()`: ```{r} mogrify <- function(`_df`, ...) { args <- f_list(...) for (nm in names(args)) { `_df`[[nm]] <- f_eval(args[[nm]], `_df`) } `_df` } ``` `add_new()` becomes much simpler: ```{r} add_variable <- function(df, name, expr) { mogrify(df, name ~ uq(expr)) } add_variable(df, "z", ~ x + y) ``` ### Exercises 1. Write a function that selects all rows of `df` where `variable` is greater than its mean. Make the function more general by allowing the user to specify a function to use instead of `mean()` (e.g. `median()`). 1. Create a version of `mogrify()` where the first argument is `x`? What happens if you try to create a new variable called `x`? ## Non-standard evaluation In some situations you might want to eliminate the formula altogether, and allow the user to type expressions directly. I was once much enamoured with this approach (witness ggplot2, dplyr, ...). However, I now think that it should be used sparingly because explict quoting with `~` leads to simpler code, and makes it more clear to the user that something special is going on. That said, lazyeval does allow you to eliminate the `~` if you really want to. In this case, I recommend having both a NSE and SE version of the function. The SE version, which takes formuals, should have suffix `_`: ```{r} sieve_ <- function(df, condition) { rows <- f_eval(condition, df) if (!is.logical(rows)) { stop("`condition` must be logical.", call. = FALSE) } rows[is.na(rows)] <- FALSE df[rows, , drop = FALSE] } ``` Then create the NSE version which doesn't need the explicit formula. The key is the use of `f_capture()` which takes an unevaluated argument (a promise) and captures it as a formula: ```{r} sieve <- function(df, expr) { sieve_(df, f_capture(expr)) } sieve(df, x == 1) ``` If you're familiar with `substitute()` you might expect the same drawbacks to apply. However, `f_capture()` is smart enough to follow a chain of promises back to the original value, so, for example, this code works fine: ```{r} scramble <- function(df) { df[sample(nrow(df)), , drop = FALSE] } subscramble <- function(df, expr) { scramble(sieve(df, expr)) } subscramble(df, x < 4) ``` ### Dot-dot-dot If you want a `...` function that doesn't require formulas, I recommend that the SE version take a list of arguments, and the NSE version uses `dots_capture()` to capture multiple arguments as a list of formulas. ```{r} mogrify_ <- function(`_df`, args) { args <- as_f_list(args) for (nm in names(args)) { `_df`[[nm]] <- f_eval(args[[nm]], `_df`) } `_df` } mogrify <- function(`_df`, ...) { mogrify_(`_df`, dots_capture(...)) } ``` ### Exercises 1. Recreate `subscramble()` using `base::subset()` instead of `sieve()`. Why does it fail? ## Metaprogramming The final use of non-standard evaluation is to do metaprogramming. This is a catch-all term that encompasses any function that does computation on an unevaluated expression. You can learn about metaprogrgramming in , particularly . Over time, the goal is to move all useful metaprogramming helper functions into this package, and discuss metaprogramming more here. lazyeval/inst/doc/lazyeval-old.html0000644000176200001440000005362013171753666017141 0ustar liggesusers Lazyeval: a new approach to NSE

Lazyeval: a new approach to NSE

2017-10-19

This document outlines my previous approach to non-standard evaluation (NSE). You should avoid it unless you are working with an older version of dplyr or tidyr.

There are three key ideas:

lazy()

The key tool that makes this approach possible is lazy(), an equivalent to substitute() that captures both expression and environment associated with a function argument:

library(lazyeval)
f <- function(x = a - b) {
  lazy(x)
}
f()
#> <lazy>
#>   expr: a - b
#>   env:  <environment: 0x7fe58944e208>
f(a + b)
#> <lazy>
#>   expr: a + b
#>   env:  <environment: R_GlobalEnv>

As a complement to eval(), the lazy package provides lazy_eval() that uses the environment associated with the lazy object:

a <- 10
b <- 1
lazy_eval(f())
#> [1] 9
lazy_eval(f(a + b))
#> [1] 11

The second argument to lazy eval is a list or data frame where names should be looked up first:

lazy_eval(f(), list(a = 1))
#> [1] 0

lazy_eval() also works with formulas, since they contain the same information as a lazy object: an expression (only the RHS is used by convention) and an environment:

lazy_eval(~ a + b)
#> [1] 11
h <- function(i) {
  ~ 10 + i
}
lazy_eval(h(1))
#> [1] 11

Standard evaluation

Whenever we need a function that does non-standard evaluation, always write the standard evaluation version first. For example, let’s implement our own version of subset():

subset2_ <- function(df, condition) {
  r <- lazy_eval(condition, df)
  r <- r & !is.na(r)
  df[r, , drop = FALSE]
} 

subset2_(mtcars, lazy(mpg > 31))
#>     mpg cyl disp hp drat    wt  qsec vs am gear carb
#> 18 32.4   4 78.7 66 4.08 2.200 19.47  1  1    4    1
#> 20 33.9   4 71.1 65 4.22 1.835 19.90  1  1    4    1

lazy_eval() will always coerce it’s first argument into a lazy object, so a variety of specifications will work:

subset2_(mtcars, ~mpg > 31)
#>     mpg cyl disp hp drat    wt  qsec vs am gear carb
#> 18 32.4   4 78.7 66 4.08 2.200 19.47  1  1    4    1
#> 20 33.9   4 71.1 65 4.22 1.835 19.90  1  1    4    1
subset2_(mtcars, quote(mpg > 31))
#>     mpg cyl disp hp drat    wt  qsec vs am gear carb
#> 18 32.4   4 78.7 66 4.08 2.200 19.47  1  1    4    1
#> 20 33.9   4 71.1 65 4.22 1.835 19.90  1  1    4    1
subset2_(mtcars, "mpg > 31")
#>     mpg cyl disp hp drat    wt  qsec vs am gear carb
#> 18 32.4   4 78.7 66 4.08 2.200 19.47  1  1    4    1
#> 20 33.9   4 71.1 65 4.22 1.835 19.90  1  1    4    1

Note that quoted called and strings don’t have environments associated with them, so as.lazy() defaults to using baseenv(). This will work if the expression is self-contained (i.e. doesn’t contain any references to variables in the local environment), and will otherwise fail quickly and robustly.

Non-standard evaluation

With the SE version in hand, writing the NSE version is easy. We just use lazy() to capture the unevaluated expression and corresponding environment:

subset2 <- function(df, condition) {
  subset2_(df, lazy(condition))
}
subset2(mtcars, mpg > 31)
#>     mpg cyl disp hp drat    wt  qsec vs am gear carb
#> 18 32.4   4 78.7 66 4.08 2.200 19.47  1  1    4    1
#> 20 33.9   4 71.1 65 4.22 1.835 19.90  1  1    4    1

This standard evaluation escape hatch is very important because it allows us to implement different NSE approaches. For example, we could create a subsetting function that finds all rows where a variable is above a threshold:

above_threshold <- function(df, var, threshold) {
  cond <- interp(~ var > x, var = lazy(var), x = threshold)
  subset2_(df, cond)
}
above_threshold(mtcars, mpg, 31)
#>     mpg cyl disp hp drat    wt  qsec vs am gear carb
#> 18 32.4   4 78.7 66 4.08 2.200 19.47  1  1    4    1
#> 20 33.9   4 71.1 65 4.22 1.835 19.90  1  1    4    1

Here we’re using interp() to modify a formula. We use the value of threshold and the expression in by var.

Scoping

Because lazy() captures the environment associated with the function argument, we automatically avoid a subtle scoping bug present in subset():

x <- 31
f1 <- function(...) {
  x <- 30
  subset(mtcars, ...)
}
# Uses 30 instead of 31
f1(mpg > x)
#>     mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> 18 32.4   4 78.7  66 4.08 2.200 19.47  1  1    4    1
#> 19 30.4   4 75.7  52 4.93 1.615 18.52  1  1    4    2
#> 20 33.9   4 71.1  65 4.22 1.835 19.90  1  1    4    1
#> 28 30.4   4 95.1 113 3.77 1.513 16.90  1  1    5    2

f2 <- function(...) {
  x <- 30
  subset2(mtcars, ...)
}
# Correctly uses 31
f2(mpg > x)
#>     mpg cyl disp hp drat    wt  qsec vs am gear carb
#> 18 32.4   4 78.7 66 4.08 2.200 19.47  1  1    4    1
#> 20 33.9   4 71.1 65 4.22 1.835 19.90  1  1    4    1

lazy() has another advantage over substitute() - by default, it follows promises across function invocations. This simplifies the casual use of NSE.

x <- 31
g1 <- function(comp) {
  x <- 30
  subset(mtcars, comp)
}
g1(mpg > x)
#> Error: object 'mpg' not found
g2 <- function(comp) {
  x <- 30
  subset2(mtcars, comp)
}
g2(mpg > x)
#>     mpg cyl disp hp drat    wt  qsec vs am gear carb
#> 18 32.4   4 78.7 66 4.08 2.200 19.47  1  1    4    1
#> 20 33.9   4 71.1 65 4.22 1.835 19.90  1  1    4    1

Note that g2() doesn’t have a standard-evaluation escape hatch, so it’s not suitable for programming with in the same way that subset2_() is.

Chained promises

Take the following example:

library(lazyeval)
f1 <- function(x) lazy(x)
g1 <- function(y) f1(y)

g1(a + b)
#> <lazy>
#>   expr: a + b
#>   env:  <environment: R_GlobalEnv>

lazy() returns a + b because it always tries to find the top-level promise.

In this case the process looks like this:

  1. Find the object that x is bound to.
  2. It’s a promise, so find the expr it’s bound to (y, a symbol) and the environment in which it should be evaluated (the environment of g()).
  3. Since x is bound to a symbol, look up its value: it’s bound to a promise.
  4. That promise has expression a + b and should be evaluated in the global environment.
  5. The expression is not a symbol, so stop.

Occasionally, you want to avoid this recursive behaviour, so you can use follow_symbol = FALSE:

f2 <- function(x) lazy(x, .follow_symbols = FALSE)
g2 <- function(y) f2(y)

g2(a + b)
#> <lazy>
#>   expr: x
#>   env:  <environment: 0x7fe589034c00>

Either way, if you evaluate the lazy expression you’ll get the same result:

a <- 10
b <- 1

lazy_eval(g1(a + b))
#> [1] 11
lazy_eval(g2(a + b))
#> [1] 11

Note that the resolution of chained promises only works with unevaluated objects. This is because R deletes the information about the environment associated with a promise when it has been forced, so that the garbage collector is allowed to remove the environment from memory in case it is no longer used. lazy() will fail with an error in such situations.

var <- 0

f3 <- function(x) {
  force(x)
  lazy(x)
}

f3(var)
#> Error in lazy(x): Promise has already been forced
lazyeval/inst/doc/lazyeval.html0000644000176200001440000021265213171753670016362 0ustar liggesusers Non-standard evaluation

Non-standard evaluation

Hadley Wickham

2017-10-19

This document describes lazyeval, a package that provides principled tools to perform non-standard evaluation (NSE) in R. You should read this vignette if you want to program with packages like dplyr and ggplot21, or you want a principled way of working with delayed expressions in your own package. As the name suggests, non-standard evaluation breaks away from the standard evaluation (SE) rules in order to do something special. There are three common uses of NSE:

  1. Labelling enhances plots and tables by using the expressions supplied to a function, rather than their values. For example, note the axis labels in this plot:

    par(mar = c(4.5, 4.5, 1, 0.5))
    grid <- seq(0, 2 * pi, length = 100)
    plot(grid, sin(grid), type = "l")

  2. Non-standard scoping looks for objects in places other than the current environment. For example, base R has with(), subset(), and transform() that look for objects in a data frame (or list) before the current environment:

    df <- data.frame(x = c(1, 5, 4, 2, 3), y = c(2, 1, 5, 4, 3))
    
    with(df, mean(x))
    #> [1] 3
    subset(df, x == y)
    #>   x y
    #> 5 3 3
    transform(df, z = x + y)
    #>   x y z
    #> 1 1 2 3
    #> 2 5 1 6
    #> 3 4 5 9
    #> 4 2 4 6
    #> 5 3 3 6
  3. Metaprogramming is a catch-all term that covers all other uses of NSE (such as in bquote() and library()). Metaprogramming is so called because it involves computing on the unevaluated code in some way.

This document is broadly organised according to the three types of non-standard evaluation described above. The main difference is that after [labelling], we’ll take a detour to learn more about [formulas]. You’re probably familiar with formulas from linear models (e.g. lm(mpg ~ displ, data = mtcars)) but formulas are more than just a tool for modelling: they are a general way of capturing an unevaluated expression.

The approaches recommended here are quite different to my previous generation of recommendations. I am fairly confident these new approaches are correct, and will not have to change substantially again. The current tools make it easy to solve a number of practical problems that were previously challenging and are rooted in long-standing theory.

Labelling

In base R, the classic way to turn an argument into a label is to use deparse(substitute(x)):

my_label <- function(x) deparse(substitute(x))
my_label(x + y)
#> [1] "x + y"

There are two potential problems with this approach:

  1. For long some expressions, deparse() generates a character vector with length > 1:

    my_label({
      a + b
      c + d
    })
    #> [1] "{"         "    a + b" "    c + d" "}"
  2. substitute() only looks one level up, so you lose the original label if the function isn’t called directly:

    my_label2 <- function(x) my_label(x)
    my_label2(a + b)
    #> [1] "x"

Both of these problems are resolved by lazyeval::expr_text():

my_label <- function(x) expr_text(x)
my_label2 <- function(x) my_label(x)
   
my_label({
  a + b
  c + d
})
#> [1] "{\n    a + b\n    c + d\n}"
my_label2(a + b)
#> [1] "a + b"

There are two variations on the theme of expr_text():

  • expr_find() find the underlying expression. It works similarly to substitute() but will follow a chain of promises back up to the original expression. This is often useful for [metaprogramming].

  • expr_label() is a customised version of expr_text() that produces labels designed to be used in messages to the user:

    expr_label(x)
    #> [1] "`x`"
    expr_label(a + b + c)
    #> [1] "`a + b + c`"
    expr_label(foo({
      x + y
    }))
    #> [1] "`foo(...)`"

Exercises

  1. plot() uses deparse(substitute(x)) to generate labels for the x and y axes. Can you generate input that causes it to display bad labels? Write your own wrapper around plot() that uses expr_label() to compute xlim and ylim.

  2. Create a simple implementation of mean() that stops with an informative error message if the argument is not numeric:

    x <- c("a", "b", "c")
    my_mean(x)
    #> Error: `x` is a not a numeric vector.
    my_mean(x == "a")
    #> Error: `x == "a"` is not a numeric vector.
    my_mean("a")
    #> Error: "a" is not a numeric vector.
  3. Read the source code for expr_text(). How does it work? What additional arguments to deparse() does it use?

Formulas

Non-standard scoping is probably the most useful NSE tool, but before we can talk about a solid approach, we need to take a detour to talk about formulas. Formulas are a familiar tool from linear models, but their utility is not limited to models. In fact, formulas are a powerful, general purpose tool, because a formula captures two things:

  1. An unevaluated expression.
  2. The context (environment) in which the expression was created.

~ is a single character that allows you to say: “I want to capture the meaning of this code, without evaluating it right away”. For that reason, the formula can be thought of as a “quoting” operator.

Definition of a formula

Technically, a formula is a “language” object (i.e. an unevaluated expression) with a class of “formula” and an attribute that stores the environment:

f <- ~ x + y + z
typeof(f)
#> [1] "language"
attributes(f)
#> $class
#> [1] "formula"
#> 
#> $.Environment
#> <environment: R_GlobalEnv>

The structure of the underlying object is slightly different depending on whether you have a one-sided or two-sided formula:

  • One-sided formulas have length two:

    length(f)
    #> [1] 2
    # The 1st element is always ~
    f[[1]]
    #> `~`
    # The 2nd element is the RHS
    f[[2]]
    #> x + y + z
  • Two-sided formulas have length three:

    g <- y ~ x + z
    length(g)
    #> [1] 3
    # The 1st element is still ~
    g[[1]]
    #> `~`
    # But now the 2nd element is the LHS
    g[[2]]
    #> y
    # And the 3rd element is the RHS
    g[[3]]
    #> x + z

To abstract away these differences, lazyeval provides f_rhs() and f_lhs() to access either side of the formula, and f_env() to access its environment:

f_rhs(f)
#> x + y + z
f_lhs(f)
#> NULL
f_env(f)
#> <environment: R_GlobalEnv>

f_rhs(g)
#> x + z
f_lhs(g)
#> y
f_env(g)
#> <environment: R_GlobalEnv>

Evaluating a formula

A formula captures delays the evaluation of an expression so you can later evaluate it with f_eval():

f <- ~ 1 + 2 + 3
f
#> ~1 + 2 + 3
f_eval(f)
#> [1] 6

This allows you to use a formula as a robust way of delaying evaluation, cleanly separating the creation of the formula from its evaluation. Because formulas capture the code and context, you get the correct result even when a formula is created and evaluated in different places. In the following example, note that the value of x inside add_1000() is used:

x <- 1
add_1000 <- function(x) {
  ~ 1000 + x
}

add_1000(3)
#> ~1000 + x
#> <environment: 0x7fe5890d4190>
f_eval(add_1000(3))
#> [1] 1003

It can be hard to see what’s going on when looking at a formula because important values are stored in the environment, which is largely opaque. You can use f_unwrap() to replace names with their corresponding values:

f_unwrap(add_1000(3))
#> ~1000 + 3

Non-standard scoping

f_eval() has an optional second argument: a named list (or data frame) that overrides values found in the formula’s environment.

y <- 100
f_eval(~ y)
#> [1] 100
f_eval(~ y, data = list(y = 10))
#> [1] 10

# Can mix variables in environment and data argument
f_eval(~ x + y, data = list(x = 10))
#> [1] 110
# Can even supply functions
f_eval(~ f(y), data = list(f = function(x) x * 3))
#> [1] 300

This makes it very easy to implement non-standard scoping:

f_eval(~ mean(cyl), data = mtcars)
#> [1] 6.1875

One challenge with non-standard scoping is that we’ve introduced some ambiguity. For example, in the code below does x come from mydata or the environment?

f_eval(~ x, data = mydata)

You can’t tell without knowing whether or not mydata has a variable called x. To overcome this problem, f_eval() provides two pronouns:

  • .data is bound to the data frame.
  • .env is bound to the formula environment.

They both start with . to minimise the chances of clashing with existing variables.

With these pronouns we can rewrite the previous formula to remove the ambiguity:

mydata <- data.frame(x = 100, y = 1)
x <- 10

f_eval(~ .env$x, data = mydata)
#> [1] 10
f_eval(~ .data$x, data = mydata)
#> [1] 100

If the variable or object doesn’t exist, you’ll get an informative error:

f_eval(~ .env$z, data = mydata)
#> Error: Object 'z' not found in environment
f_eval(~ .data$z, data = mydata)
#> Error: Variable 'z' not found in data

Unquoting

f_eval() has one more useful trick up its sleeve: unquoting. Unquoting allows you to write functions where the user supplies part of the formula. For example, the following function allows you to compute the mean of any column (or any function of a column):

df_mean <- function(df, variable) {
  f_eval(~ mean(uq(variable)), data = df)
}

df_mean(mtcars, ~ cyl)
#> [1] 6.1875
df_mean(mtcars, ~ disp * 0.01638)
#> [1] 3.779224
df_mean(mtcars, ~ sqrt(mpg))
#> [1] 4.43477

To see how this works, we can use f_interp() which f_eval() calls internally (you shouldn’t call it in your own code, but it’s useful for debugging). The key is uq(): uq() evaluates its first (and only) argument and inserts the value into the formula:

variable <- ~cyl
f_interp(~ mean(uq(variable)))
#> ~mean(cyl)

variable <- ~ disp * 0.01638
f_interp(~ mean(uq(variable)))
#> ~mean(disp * 0.01638)

Unquoting allows you to create code “templates”, where you write most of the expression, while still allowing the user to control important components. You can even use uq() to change the function being called:

f <- ~ mean
f_interp(~ uq(f)(uq(variable)))
#> ~mean(disp * 0.01638)

Note that uq() only takes the RHS of a formula, which makes it difficult to insert literal formulas into a call:

formula <- y ~ x
f_interp(~ lm(uq(formula), data = df))
#> ~lm(x, data = df)

You can instead use uqf() which uses the whole formula, not just the RHS:

f_interp(~ lm(uqf(formula), data = df))
#> ~lm(y ~ x, data = df)

Unquoting is powerful, but it only allows you to modify a single argument: it doesn’t allow you to add an arbitrary number of arguments. To do that, you’ll need “unquote-splice”, or uqs(). The first (and only) argument to uqs() should be a list of arguments to be spliced into the call:

variable <- ~ x
extra_args <- list(na.rm = TRUE, trim = 0.9)
f_interp(~ mean(uq(variable), uqs(extra_args)))
#> ~mean(x, na.rm = TRUE, trim = 0.9)

Exercises

  1. Create a wrapper around lm() that allows the user to supply the response and predictors as two separate formulas.

  2. Compare and contrast f_eval() with with().

  3. Why does this code work even though f is defined in two places? (And one of them is not a function).

    f <- function(x) x + 1
    f_eval(~ f(10), list(f = "a"))
    #> [1] 11

Non-standard scoping

Non-standard scoping (NSS) is an important part of R because it makes it easy to write functions tailored for interactive data exploration. These functions require less typing, at the cost of some ambiguity and “magic”. This is a good trade-off for interactive data exploration because you want to get ideas out of your head and into the computer as quickly as possible. If a function does make a bad guess, you’ll spot it quickly because you’re working interactively.

There are three challenges to implementing non-standard scoping:

  1. You must correctly delay the evaluation of a function argument, capturing both the computation (the expression), and the context (the environment). I recommend making this explicit by requiring the user to “quote” any NSS arguments with ~, and then evaluating explicit with f_eval().

  2. When writing functions that use NSS-functions, you need some way to avoid the automatic lookup and be explicit about where objects should be found. f_eval() solves this problem with the .data. and .env pronouns.

  3. You need some way to allow the user to supply parts of a formula. f_eval() solves this with unquoting.

To illustrate these challenges, I will implement a sieve() function that works similarly to base::subset() or dplyr::filter(). The goal of sieve() is to make it easy to select observations that match criteria defined by a logical expression. sieve() has three advantages over [:

  1. It is much more compact when the condition uses many variables, because you don’t need to repeat the name of the data frame many times.

  2. It drops rows where the condition evaluates to NA, rather than filling them with NAs.

  3. It always returns a data frame.

The implementation of sieve() is straightforward. First we use f_eval() to perform NSS. Then we then check that we have a logical vector, replace NAs with FALSE, and subset with [.

sieve <- function(df, condition) {
  rows <- f_eval(condition, df)
  if (!is.logical(rows)) {
    stop("`condition` must be logical.", call. = FALSE)
  }
  
  rows[is.na(rows)] <- FALSE
  df[rows, , drop = FALSE]
}

df <- data.frame(x = 1:5, y = 5:1)
sieve(df, ~ x <= 2)
#>   x y
#> 1 1 5
#> 2 2 4
sieve(df, ~ x == y)
#>   x y
#> 3 3 3

Programming with sieve()

Imagine that you’ve written some code that looks like this:

sieve(march, ~ x > 100)
sieve(april, ~ x > 50)
sieve(june, ~ x > 45)
sieve(july, ~ x > 17)

(This is a contrived example, but it illustrates all of the important issues you’ll need to consider when writing more useful functions.)

Instead of continuing to copy-and-paste your code, you decide to wrap up the common behaviour in a function:

threshold_x <- function(df, threshold) {
  sieve(df, ~ x > threshold)
}
threshold_x(df, 3)
#>   x y
#> 4 4 2
#> 5 5 1

There are two ways that this function might fail:

  1. The data frame might not have a variable called x. This will fail unless there’s a variable called x hanging around in the global environment:

    rm(x)
    df2 <- data.frame(y = 5:1)
    
    # Throws an error
    threshold_x(df2, 3)
    #> Error in eval(expr, data, expr_env): object 'x' not found
    
    # Silently gives the incorrect result!
    x <- 5
    threshold_x(df2, 3)
    #>   y
    #> 1 5
    #> 2 4
    #> 3 3
    #> 4 2
    #> 5 1
  2. The data frame might have a variable called threshold:

    df3 <- data.frame(x = 1:5, y = 5:1, threshold = 4)
    threshold_x(df3, 3)
    #>   x y threshold
    #> 5 5 1         4

These failures are partiuclarly pernicious because instead of throwing an error they silently produce the wrong answer. Both failures arise because f_eval() introduces ambiguity by looking in two places for each name: the supplied data and formula environment.

To make threshold_x() more reliable, we need to be more explicit by using the .data and .env pronouns:

threshold_x <- function(df, threshold) {
  sieve(df, ~ .data$x > .env$threshold)
}

threshold_x(df2, 3)
#> Error: Variable 'x' not found in data
threshold_x(df3, 3)
#>   x y threshold
#> 4 4 2         4
#> 5 5 1         4

Here .env is bound to the environment where ~ is evaluated, namely the inside of threshold_x().

Adding arguments

The threshold_x() function is not very useful because it’s bound to a specific variable. It would be more powerful if we could vary both the threshold and the variable it applies to. We can do that by taking an additional argument to specify which variable to use.

One simple approach is to use a string and [[:

threshold <- function(df, variable, threshold) {
  stopifnot(is.character(variable), length(variable) == 1)
  
  sieve(df, ~ .data[[.env$variable]] > .env$threshold)
}
threshold(df, "x", 4)
#>   x y
#> 5 5 1

This is a simple and robust solution, but only allows us to use an existing variable, not an arbitrary expression like sqrt(x).

A more general solution is to allow the user to supply a formula, and use unquoting:

threshold <- function(df, variable = ~x, threshold = 0) {
  sieve(df, ~ uq(variable) > .env$threshold)
}

threshold(df, ~ x, 4)
#>   x y
#> 5 5 1
threshold(df, ~ abs(x - y), 2)
#>   x y
#> 1 1 5
#> 5 5 1

In this case, it’s the responsibility of the user to ensure the variable is specified unambiguously. f_eval() is designed so that .data and .env work even when evaluated inside of uq():

x <- 3
threshold(df, ~ .data$x - .env$x, 0)
#>   x y
#> 4 4 2
#> 5 5 1

Dot-dot-dot

There is one more tool that you might find useful for functions that take .... For example, the code below implements a function similar to dplyr::mutate() or base::transform().

mogrify <- function(`_df`, ...) {
  args <- list(...)
  
  for (nm in names(args)) {
    `_df`[[nm]] <- f_eval(args[[nm]], `_df`)
  }
  
  `_df`
}

(NB: the first argument is a non-syntactic name (i.e. it requires quoting with `) so it doesn’t accidentally match one of the names of the new variables.)

transmogrifty() makes it easy to add new variables to a data frame:

df <- data.frame(x = 1:5, y = sample(5))
mogrify(df, z = ~ x + y, z2 = ~ z * 2)
#>   x y z z2
#> 1 1 3 4  8
#> 2 2 4 6 12
#> 3 3 1 4  8
#> 4 4 5 9 18
#> 5 5 2 7 14

One problem with this implementation is that it’s hard to specify the names of the generated variables. Imagine you want a function where the name and expression are in separate variables. This is awkward because the variable name is supplied as an argument name to mogrify():

add_variable <- function(df, name, expr) {
  do.call("mogrify", c(list(df), setNames(list(expr), name)))
}
add_variable(df, "z", ~ x + y)
#>   x y z
#> 1 1 3 4
#> 2 2 4 6
#> 3 3 1 4
#> 4 4 5 9
#> 5 5 2 7

Lazyeval provides the f_list() function to make writing this sort of function a little easier. It takes a list of formulas and evaluates the LHS of each formula (if present) to rename the elements:

f_list("x" ~ y, z = ~z)
#> $x
#> ~y
#> 
#> $z
#> ~z

If we tweak mogrify() to use f_list() instead of list():

mogrify <- function(`_df`, ...) {
  args <- f_list(...)
  
  for (nm in names(args)) {
    `_df`[[nm]] <- f_eval(args[[nm]], `_df`)
  }
  
  `_df`
}

add_new() becomes much simpler:

add_variable <- function(df, name, expr) {
  mogrify(df, name ~ uq(expr))
}
add_variable(df, "z", ~ x + y)
#>   x y z
#> 1 1 3 4
#> 2 2 4 6
#> 3 3 1 4
#> 4 4 5 9
#> 5 5 2 7

Exercises

  1. Write a function that selects all rows of df where variable is greater than its mean. Make the function more general by allowing the user to specify a function to use instead of mean() (e.g. median()).

  2. Create a version of mogrify() where the first argument is x? What happens if you try to create a new variable called x?

Non-standard evaluation

In some situations you might want to eliminate the formula altogether, and allow the user to type expressions directly. I was once much enamoured with this approach (witness ggplot2, dplyr, …). However, I now think that it should be used sparingly because explict quoting with ~ leads to simpler code, and makes it more clear to the user that something special is going on.

That said, lazyeval does allow you to eliminate the ~ if you really want to. In this case, I recommend having both a NSE and SE version of the function. The SE version, which takes formuals, should have suffix _:

sieve_ <- function(df, condition) {
  rows <- f_eval(condition, df)
  if (!is.logical(rows)) {
    stop("`condition` must be logical.", call. = FALSE)
  }
  
  rows[is.na(rows)] <- FALSE
  df[rows, , drop = FALSE]
}

Then create the NSE version which doesn’t need the explicit formula. The key is the use of f_capture() which takes an unevaluated argument (a promise) and captures it as a formula:

sieve <- function(df, expr) {
  sieve_(df, f_capture(expr))
}
sieve(df, x == 1)
#>   x y
#> 1 1 3

If you’re familiar with substitute() you might expect the same drawbacks to apply. However, f_capture() is smart enough to follow a chain of promises back to the original value, so, for example, this code works fine:

scramble <- function(df) {
  df[sample(nrow(df)), , drop = FALSE]
}
subscramble <- function(df, expr) {
  scramble(sieve(df, expr))
}
subscramble(df, x < 4)
#>   x y
#> 3 3 1
#> 2 2 4
#> 1 1 3

Dot-dot-dot

If you want a ... function that doesn’t require formulas, I recommend that the SE version take a list of arguments, and the NSE version uses dots_capture() to capture multiple arguments as a list of formulas.

mogrify_ <- function(`_df`, args) {
  args <- as_f_list(args)
  
  for (nm in names(args)) {
    `_df`[[nm]] <- f_eval(args[[nm]], `_df`)
  }
  
  `_df`
}

mogrify <- function(`_df`, ...) {
  mogrify_(`_df`, dots_capture(...))
}

Exercises

  1. Recreate subscramble() using base::subset() instead of sieve(). Why does it fail?

Metaprogramming

The final use of non-standard evaluation is to do metaprogramming. This is a catch-all term that encompasses any function that does computation on an unevaluated expression. You can learn about metaprogrgramming in http://adv-r.had.co.nz/Expressions.html, particularly http://adv-r.had.co.nz/Expressions.html#ast-funs. Over time, the goal is to move all useful metaprogramming helper functions into this package, and discuss metaprogramming more here.


  1. Currently neither ggplot2 nor dplyr actually use these tools since I’ve only just figured it out. But I’ll be working hard to make sure all my packages are consistent in the near future.

lazyeval/tests/0000755000176200001440000000000012472613077013254 5ustar liggesuserslazyeval/tests/testthat.R0000644000176200001440000000007412467622500015233 0ustar liggesuserslibrary(testthat) library(lazyeval) test_check("lazyeval") lazyeval/tests/testthat/0000755000176200001440000000000013175456530015114 5ustar liggesuserslazyeval/tests/testthat/test-complain.R0000644000176200001440000000166312726271770020026 0ustar liggesuserscontext("complain") test_that("NULL return unchanged", { expect_identical(complain(NULL), NULL) }) test_that("can't access non-existent list members", { x1 <- list(y = 1) x2 <- complain(x1) expect_equal(x2$y, 1) expect_error(x2$z, "object 'z' not found") expect_error(x2[["z"]], "object 'z' not found") }) test_that("can't access non-existent environment components", { x1 <- list2env(list(y = 1)) x2 <- complain(x1) expect_equal(x2$y, 1) expect_error(x2$z, "object 'z' not found") expect_error(x2[["z"]], "object 'z' not found") }) test_that("can't use non-character vectors", { x <- complain(list(y = 1)) expect_error(x[[1]], "subset with a string") expect_error(x[[c("a", "b")]], "subset with a string") }) test_that("complain doesn't taint env class", { x1 <- list2env(list(y = 1)) x2 <- complain(x1) expect_equal(class(x1), "environment") expect_equal(class(x2), c("complain", "environment")) }) lazyeval/tests/testthat/test-f-eval.R0000644000176200001440000000341013004065022017343 0ustar liggesuserscontext("f_eval") test_that("first argument must be a function", { expect_error(f_eval(10), "`f` is not a formula") }) test_that("f_eval uses formula's environment", { x <- 10 f <- local({ y <- 100 ~ x + y }) expect_equal(f_eval(f), 110) }) test_that("data needs to be a list", { expect_error(f_eval(~ x, 10), "Do not know how to find data") }) test_that("looks first in `data`", { x <- 10 data <- list(x = 100) expect_equal(f_eval(~ x, data), 100) }) test_that("pronouns resolve ambiguity looks first in `data`", { x <- 10 data <- list(x = 100) expect_equal(f_eval(~ .data$x, data), 100) expect_equal(f_eval(~ .env$x, data), 10) }) test_that("pronouns complain about missing values", { expect_error(f_eval(~ .data$x, list()), "Variable 'x' not found in data") expect_error(f_eval(~ .env$`__`, list()), "Object '__' not found in environment") }) test_that("f_eval does quasiquoting", { x <- 10 expect_equal(f_eval(~ uq(quote(x))), 10) }) test_that("unquoted formulas look in their own env", { f <- function() { n <- 100 ~ n } n <- 10 expect_equal(f_eval(~ uq(f())), 10) }) test_that("unquoted formulas can use data", { f1 <- function() { z <- 100 ~ x + z } f2 <- function() { z <- 100 ~ .data$x + .env$z } z <- 10 expect_equal(f_eval(~ uq(f1()), data = list(x = 1)), 101) expect_equal(f_eval(~ uq(f2()), data = list(x = 1)), 101) }) test_that("f_eval_lhs uses lhs", { f <- 1 ~ 2 expect_equal(f_eval_lhs(f), 1) }) # find_data --------------------------------------------------------------- test_that("find data works for NULL, lists, and data frames", { expect_equal(find_data(NULL), list()) expect_equal(find_data(list(x = 1)), list(x = 1)) expect_equal(find_data(mtcars), mtcars) }) lazyeval/tests/testthat/test-names.R0000644000176200001440000000035612467622500017316 0ustar liggesuserscontext("names") test_that("auto_name does not truncate symbols (#19)", { long_name <- quote(AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA) dots <- as.lazy_dots(long_name) expect_equal(auto_names(dots), as.character(long_name)) }) lazyeval/tests/testthat/test-dots.R0000644000176200001440000000073013013622611017147 0ustar liggesusers context("lazy_dots") test_that("lazy_dots works with no args", { l1 <- lazy_dots() l2 <- lazy_dots(.follow_symbols = TRUE) expect_equal(l1, structure(list(), class = "lazy_dots")) expect_equal(l2, structure(list(), class = "lazy_dots")) }) test_that(".ignore_empty drops empty arguments", { l1 <- lazy_dots(, 1,) l2 <- lazy_dots(, 1, , .ignore_empty = TRUE) expect_equal(length(l1), 3) expect_equal(length(l2), 1) expect_equal(l2[[1]]$expr, 1) }) lazyeval/tests/testthat/test-formula.R0000644000176200001440000000313313004065022017640 0ustar liggesuserscontext("formula") # Creation ---------------------------------------------------------------- test_that("env must be an environment", { expect_error(f_new(quote(a), env = list()), "must be an environment") }) test_that("equivalent to ~", { f1 <- ~abc f2 <- f_new(quote(abc)) expect_identical(f1, f2) }) test_that("is_formula works", { expect_true(is_formula(~10)) expect_false(is_formula(10)) }) # Getters ----------------------------------------------------------------- test_that("throws errors for bad inputs", { expect_error(f_rhs(1), "not a formula") expect_error(f_rhs(`~`()), "Invalid formula") expect_error(f_rhs(`~`(1, 2, 3)), "Invalid formula") expect_error(f_lhs(1), "not a formula") expect_error(f_lhs(`~`()), "Invalid formula") expect_error(f_lhs(`~`(1, 2, 3)), "Invalid formula") expect_error(f_env(1), "not a formula") }) test_that("extracts call, name, or scalar", { expect_identical(f_rhs(~ x), quote(x)) expect_identical(f_rhs(~ f()), quote(f())) expect_identical(f_rhs(~ 1L), 1L) }) # Setters ----------------------------------------------------------------- test_that("can replace RHS of one-sided formula", { f <- ~ x1 f_rhs(f) <- quote(x2) expect_equal(f, ~ x2) }) test_that("can replace both sides of two-sided formula", { f <- x1 ~ y1 f_lhs(f) <- quote(x2) f_rhs(f) <- quote(y2) expect_equal(f, x2 ~ y2) }) test_that("can remove lhs of two-sided formula", { f <- x ~ y f_lhs(f) <- NULL expect_equal(f, ~ y) }) test_that("can modify environment", { f <- x ~ y env <- new.env() f_env(f) <- env expect_equal(f_env(f), env) }) lazyeval/tests/testthat/test-f-capture.R0000644000176200001440000000103212726271770020100 0ustar liggesuserscontext("f_capture") test_that("explicit promise makes a formula", { f1 <- f_capture(1 + 2 + 3) f2 <- ~ 1 + 2 + 3 expect_equal(f1, f2) }) test_that("explicit promise works several levels deep", { f <- function(x) g(x) g <- function(y) h(y) h <- function(z) f_capture(z) f1 <- f(1 + 2 + 3) f2 <- ~ 1 + 2 + 3 expect_equal(f1, f2) }) test_that("explicit dots makes a list of formulas", { fs <- dots_capture(x = 1 + 2, y = 2 + 3) f1 <- ~ 1 + 2 f2 <- ~ 2 + 3 expect_equal(fs$x, f1) expect_equal(fs$y, f2) }) lazyeval/tests/testthat/test-lazy.R0000644000176200001440000000240513046425567017177 0ustar liggesuserscontext("lazy") lazy_caller <- function(arg) { lazy(arg) } outer_fun <- function(arg) { lazy_caller(arg) } test_that("basic lazy() functionality works", { expect_equal(lazy_caller(0)$expr, 0) expect_equal(lazy_caller("char")$expr, "char") expect_equal(lazy_caller(sym)$expr, as.name("sym")) expect_equal(lazy_caller(call("name"))$expr, quote(call("name"))) }) test_that("lazy() works with nested promises", { expect_equal(outer_fun(0)$expr, 0) expect_equal(outer_fun("char")$expr, "char") expect_equal(outer_fun(sym)$expr, as.name("sym")) expect_equal(outer_fun(call("name"))$expr, quote(call("name"))) }) test_that("lazy() does not unpack lazily loaded objects", { lazy <- lazy_caller(mean) expect_equal(deparse(lazy$expr), "mean") nested_lazy <- outer_fun(mean) expect_equal(deparse(lazy$expr), "mean") outer_fun2 <- function() { list( lazy = lazy_caller(mean), env = environment() ) } embedded_lazy <- outer_fun2() expect_identical(embedded_lazy$lazy$expr, as.name("mean")) expect_identical(embedded_lazy$lazy$env, embedded_lazy$env) }) test_that("lazy() works for double-colon operator", { expect_error(lazy <- lazy_caller(stats::runif(10)), NA) expect_error(nested_lazy <- outer_fun(stats::runif(10)), NA) }) lazyeval/tests/testthat/ast-irregular.txt0000644000176200001440000000005513171350433020425 0ustar liggesusers┗ () ┗ `foo ┗ lazyeval/tests/testthat/test-ast.R0000644000176200001440000000106412726271770017006 0ustar liggesuserscontext("ast") test_that("common cases are as expected", { skip_on_cran() # because of unicode comparison problems. x <- list( 1, quote(x), quote(a + b), quote(function(x = 1, y = a + b, z) { c + d }) ) expect_output_file(ast_(x), "ast-sample.txt", update = TRUE) }) test_that("can print trees that can't be generated from text source", { skip_on_cran() # because of unicode comparison problems. x <- quote(foo()) x[[2]] <- mtcars x[[3]] <- 1:10 expect_output_file(ast_(x), "ast-irregular.txt", update = TRUE) }) lazyeval/tests/testthat/test-function.R0000644000176200001440000000036412726271770020046 0ustar liggesuserscontext("function") test_that("function_new equivalent to regular function", { f1 <- function(x = a + b, y) { x + y } attr(f1, "srcref") <- NULL f2 <- function_new(alist(x = a + b, y =), quote({x + y})) expect_equal(f1, f2) }) lazyeval/tests/testthat/test-expr.R0000644000176200001440000000323213046425567017175 0ustar liggesuserscontext("expr") # expr_find --------------------------------------------------------------- test_that("doesn't go pass lazy loaded objects", { expect_identical(expr_find(mtcars), quote(mtcars)) }) test_that("follows multiple promises", { f <- function(x) g(x) g <- function(y) h(y) h <- function(z) expr_find(z) expect_identical(f(x + y), quote(x + y)) }) # expr_env ---------------------------------------------------------------- test_that("follows multiple promises", { f <- function(x) g(x) g <- function(y) h(y) h <- function(z) expr_env(z) expect_identical(h(x + y), environment()) }) test_that("throws error if promise forced", { f <- function(x) { force(x) expr_env(x) } expect_error(f(10), "already been forced") }) test_that("or can return default env", { env <- new.env(parent = emptyenv()) f <- function(x) { force(x) expr_env(x, env) } expect_identical(f(10), env) }) # expr_text --------------------------------------------------------------- test_that("always returns single string", { out <- expr_text({ a + b }) expect_length(out, 1) }) test_that("can truncate lines", { out <- expr_text({ a + b }, nlines = 2) expect_equal(out, "{\n...") }) # expr_label -------------------------------------------------------------- test_that("quotes strings", { expect_equal(expr_label("a"), '"a"') expect_equal(expr_label("\n"), '"\\n"') }) test_that("backquotes names", { expect_equal(expr_label(x), "`x`") }) test_that("converts atomics to strings", { expect_equal(expr_label(0.5), "0.5") }) test_that("truncates long calls", { expect_equal(expr_label({ a + b }), "`{\n ...\n}`") }) lazyeval/tests/testthat/test-f-unwrap.R0000644000176200001440000000073212726271770017757 0ustar liggesuserscontext("f_unwrap") test_that("f_unwrap substitutes values", { n <- 100 f1 <- f_unwrap(~ x + n) f2 <- f_new(quote(x + 100), env = parent.env(environment())) expect_identical(f1, f2) }) test_that("f_unwrap substitutes even in globalenv", { .GlobalEnv$`__1` <- 1 expect_equal(f_rhs(f_unwrap(f_new(quote(`__1`), env = globalenv()))), 1) }) test_that("doesn't go past empty env", { f <- f_new(quote(x == y), env = emptyenv()) expect_equal(f_unwrap(f), f) }) lazyeval/tests/testthat/ast-sample.txt0000644000176200001440000000033513171350433017713 0ustar liggesusers┗ 1 ┗ `x ┗ () ┗ `+ ┗ `a ┗ `b ┗ () ┗ `function ┗ [] ┗ x = 1 ┗ y =() ┗ `+ ┗ `a ┗ `b ┗ z =`MISSING ┗ () ┗ `{ ┗ () ┗ `+ ┗ `c ┗ `d ┗ lazyeval/tests/testthat/test-f-list.R0000644000176200001440000000242312726271770017415 0ustar liggesuserscontext("f_list") test_that("input must be a list", { expect_error(as_f_list(1), "must be a list") }) test_that("LHS must evaluate to a string", { expect_error(f_list(1 ~ x), "must evaluate to a string or name") expect_error(f_list(letters ~ x), "must evaluate to a single string") expect_error(f_list(x ~ x ~ z), "must be a single-sided formula") }) test_that("regular elements are left as is", { expect_equal(f_list(x = 1:10), list(x = 1:10)) expect_equal(f_list(x = ~x), list(x = ~x)) }) test_that("output is actually a formula", { out <- f_list(x = ~x)[[1]] expect_s3_class(out, "formula") expect_identical(attr(out, ".Environment"), environment()) }) test_that("output always has names", { out <- f_list(1, 2, 3) expect_equal(names(out), c("", "", "")) }) test_that("names taken from LHS of formula", { out1 <- f_list("x" ~ y) out2 <- f_list(quote(x) ~ y) var <- ~x out3 <- f_list(var ~ y); out3 expect_equal(out1, list(x = ~y)) expect_equal(out2, list(x = ~y)) expect_equal(out3, list(x = ~y)) }) test_that("null LHS leaves names unchanged", { expect_equal(f_list(x = NULL ~ y), list(x = ~y)) }) test_that("LHS evaluated in formula environment", { f <- function(x) { paste0(x, 1) ~ y } expect_equal(f_list(f("y")), list(y1 = ~ y)) }) lazyeval/tests/testthat/test-f-interp.R0000644000176200001440000000323312726271770017743 0ustar liggesuserscontext("f_interp") test_that("protected against bad inputs", { f <- ~ x + 1 attr(f, ".Environment") <- 10 expect_error(f_interp(f), "must be an environment") }) test_that("interp produces single string for character inputs", { x <- interp("aaaaaaaaaaaaaa + bbbbbbbbbbbbbbb + ccccccccccccccccc + dddddddddddddddd + eeeeeeeeeeeeeee") expect_is(x, "character") expect_equal(length(x), 1) }) test_that("can interpolate from environment", { env <- new.env(parent = emptyenv()) env$a <- 10 out <- interp(~ f(a), .values = env) expect_identical(out, ~f(10)) }) # uq ---------------------------------------------------------------------- test_that("evaluates contents of uq()", { expect_equal(f_interp(~ uq(1 + 2)), ~ 3) }) test_that("unquoted formulas are interpolated first", { f <- function(n) { ~ x + uq(n) } n <- 100 expect_equal(f_interp(~ uq(f(10))), ~ x + 10) }) # uqs --------------------------------------------------------------------- test_that("contents of uqs() must be a vector", { expr <- ~ 1 + uqs(environment()) expect_error(f_interp(expr), "`x` must be a vector") }) test_that("values of uqs() spliced into expression", { expr <- ~ f(a, uqs(list(quote(b), quote(c))), d) expect_identical(f_interp(expr), ~ f(a, b, c, d)) }) test_that("names within uqs() are preseved", { expr <- ~ f(uqs(list(a = quote(b)))) expect_identical(f_interp(expr), ~ f(a = b)) }) # uqf --------------------------------------------------------------------- test_that("requires formula", { expect_error(f_interp(~ uqf(10)), "must be a formula") }) test_that("interpolates formula", { expect_equal(f_interp(~ uqf(x ~ y)), ~ (x ~ y)) }) lazyeval/tests/testthat/test-call.R0000644000176200001440000000245113011153647017121 0ustar liggesuserscontext("call") # Creation ---------------------------------------------------------------- test_that("character vector must be length 1", { expect_error(call_new(letters), "must be length 1") }) test_that("args can be specified individually or as list", { out <- call_new("f", a = 1, .args = list(b = 2)) expect_equal(out, quote(f(a = 1, b = 2))) }) # Standardisation --------------------------------------------------------- test_that("can standardise base function", { out <- call_standardise(quote(matrix(nro = 3, 1:9))) expect_equal(out, quote(matrix(data = 1:9, nrow = 3))) }) test_that("can standardise local function", { foo <- function(bar, baz) {} out <- call_standardise(quote(foo(baz = 1, 4))) expect_equal(out, quote(foo(bar = 4, baz = 1))) }) # Modification ------------------------------------------------------------ test_that("all args must be named", { call <- quote(matrix(1:10)) expect_error(call_modify(call, list(1)), "must be named") }) test_that("new args inserted at end", { call <- quote(matrix(1:10)) out <- call_modify(call, list(nrow = 3)) expect_equal(out, quote(matrix(data = 1:10, nrow = 3))) }) test_that("new args replace old", { call <- quote(matrix(1:10)) out <- call_modify(call, list(data = 3)) expect_equal(out, quote(matrix(data = 3))) }) lazyeval/tests/testthat/test-language.R0000644000176200001440000000147013004065022017760 0ustar liggesuserscontext("language") test_that("NULL is a valid language object", { expect_true(is_lang(NULL)) }) # coercion ---------------------------------------------------------------- test_that("as_name produces names", { expect_equal(as_name("a"), quote(a)) expect_equal(as_name(quote(a)), quote(a)) expect_equal(as_name(quote(a())), quote(a)) expect_equal(as_name(~ a), quote(a)) expect_equal(as_name(~ a()), quote(a)) expect_error(as_name(c("a", "b")), "Can not coerce character vector of length > 1") }) test_that("as_call produces calls", { expect_equal(as_call(quote(a)), quote(a())) expect_equal(as_call(quote(a())), quote(a())) expect_equal(as_call("a()"), quote(a())) expect_equal(as_call(~ a()), quote(a())) expect_error(as_call(c("a", "b")), "Can not coerce character vector of length > 1") }) lazyeval/src/0000755000176200001440000000000013171753670012702 5ustar liggesuserslazyeval/src/utils.c0000644000176200001440000000315413171753670014211 0ustar liggesusers#define R_NO_REMAP #include #include #include bool is_symbol_str(SEXP sym, const char* f) { return !strcmp(CHAR(PRINTNAME(sym)), f); } bool is_call_to(SEXP x, const char* f) { if (!Rf_isLanguage(x)) return false; SEXP fun = CAR(x); if (Rf_isSymbol(fun)) return is_symbol_str(fun, f); else return false; } bool is_lazy_load(SEXP x) { if (TYPEOF(x) != PROMSXP) return false; return is_call_to(PREXPR(x), "lazyLoadDBfetch"); } SEXP findLast(SEXP x) { SEXP cons = x; while(CDR(cons) != R_NilValue) cons = CDR(cons); return cons; } // Formulas -------------------------------------------------------------------- bool is_formula(SEXP x) { return TYPEOF(x) == LANGSXP && Rf_inherits(x, "formula"); } SEXP rhs(SEXP f) { if (!is_formula(f)) Rf_errorcall(R_NilValue, "`x` is not a formula"); switch (Rf_length(f)) { case 2: return CADR(f); case 3: return CADDR(f); default: Rf_errorcall(R_NilValue, "Invalid formula"); } } SEXP lhs(SEXP f) { if (!is_formula(f)) Rf_errorcall(R_NilValue, "`x` is not a formula"); switch (Rf_length(f)) { case 2: return R_NilValue; case 3: return CADR(f); default: Rf_errorcall(R_NilValue, "Invalid formula"); } } SEXP env(SEXP f) { if (!is_formula(f)) Rf_errorcall(R_NilValue, "`x` is not a formula"); return Rf_getAttrib(f, Rf_install(".Environment")); } SEXP make_formula1(SEXP rhs, SEXP env) { SEXP f = PROTECT(Rf_lang2(Rf_install("~"), rhs)); Rf_setAttrib(f, R_ClassSymbol, Rf_mkString("formula")); Rf_setAttrib(f, Rf_install(".Environment"), env); UNPROTECT(1); return f; } lazyeval/src/name.c0000644000176200001440000000300213171753670013761 0ustar liggesusers#define R_NO_REMAP #include #include #include "utils.h" // Returns a CHARSXP SEXP as_name(SEXP x) { switch(TYPEOF(x)) { case STRSXP: if (Rf_length(x) != 1) Rf_errorcall(R_NilValue, "LHS must evaluate to a single string"); return STRING_ELT(x, 0); case SYMSXP: return PRINTNAME(x); case LANGSXP: if (!is_formula(x) || Rf_length(x) != 2) Rf_errorcall(R_NilValue, "RHS of LHS must be a single-sided formula"); return as_name(rhs(x)); default: Rf_errorcall(R_NilValue, "LHS must evaluate to a string or name"); } } SEXP lhs_name(SEXP x) { if (TYPEOF(x) != VECSXP) Rf_errorcall(R_NilValue, "`x` must be a list (not a %s)", Rf_type2char(TYPEOF(x))); int n = Rf_length(x); SEXP x2 = PROTECT(Rf_shallow_duplicate(x)); SEXP names = Rf_getAttrib(x2, R_NamesSymbol); // Hush rchk false positives PROTECT(names); if (names == R_NilValue) { names = Rf_allocVector(STRSXP, n); Rf_setAttrib(x2, R_NamesSymbol, names); } for (int i = 0; i < n; ++i) { SEXP xi = VECTOR_ELT(x2, i); if (!is_formula(xi) || Rf_length(xi) != 3) continue; // Hush rchk false positives SEXP p_lhs = PROTECT(lhs(xi)); SEXP p_env = PROTECT(env(xi)); // set name SEXP name = PROTECT(Rf_eval(p_lhs, p_env)); if (TYPEOF(name) != NILSXP) SET_STRING_ELT(names, i, as_name(name)); // replace with RHS of formula SET_VECTOR_ELT(x2, i, make_formula1(CADDR(xi), env(xi))); UNPROTECT(3); } UNPROTECT(2); return x2; } lazyeval/src/lazy.c0000644000176200001440000000562313171753670014033 0ustar liggesusers#include #include #include "utils.h" SEXP promise_as_lazy(SEXP promise, SEXP env, int follow_symbols) { // recurse until we find the real promise, not a promise of a promise while(TYPEOF(promise) == PROMSXP) { if (PRENV(promise) == R_NilValue) { Rf_error("Promise has already been forced"); } env = PRENV(promise); promise = PREXPR(promise); // If the promise is threaded through multiple functions, we'll // get some symbols along the way. If the symbol is bound to a promise // keep going on up if (follow_symbols && TYPEOF(promise) == SYMSXP) { SEXP obj = findVar(promise, env); if (obj == R_MissingArg || obj == R_UnboundValue) break; if (TYPEOF(obj) == PROMSXP && is_lazy_load(obj)) break; promise = obj; } } // Make named list for output SEXP lazy = PROTECT(allocVector(VECSXP, 2)); MARK_NOT_MUTABLE(promise); SET_VECTOR_ELT(lazy, 0, promise); SET_VECTOR_ELT(lazy, 1, env); SEXP names = PROTECT(allocVector(STRSXP, 2)); SET_STRING_ELT(names, 0, mkChar("expr")); SET_STRING_ELT(names, 1, mkChar("env")); setAttrib(lazy, install("names"), names); setAttrib(lazy, install("class"), PROTECT(mkString("lazy"))); UNPROTECT(3); return lazy; } SEXP make_lazy(SEXP name, SEXP env, SEXP follow_symbols_) { SEXP promise = findVar(name, env); int follow_symbols = asLogical(follow_symbols_); return promise_as_lazy(promise, env, follow_symbols); } int is_missing(SEXP x) { return TYPEOF(x) == SYMSXP && x == R_MissingArg; } SEXP make_lazy_dots(SEXP env, SEXP follow_symbols_, SEXP ignore_empty_) { SEXP dots = findVar(R_DotsSymbol, env); int follow_symbols = asLogical(follow_symbols_); int ignore_empty = asLogical(ignore_empty_); // Hush rchk false positive PROTECT(dots); if (dots == R_MissingArg) { SEXP out = PROTECT(Rf_allocVector(VECSXP, 0)); setAttrib(out, install("class"), PROTECT(mkString("lazy_dots"))); UNPROTECT(3); return out; } // Figure out how many elements in dots int n = 0; for(SEXP nxt = dots; nxt != R_NilValue; nxt = CDR(nxt)) { if (ignore_empty && is_missing(CAR(nxt))) continue; n++; } // Allocate list to store results SEXP lazy_dots = PROTECT(allocVector(VECSXP, n)); SEXP names = PROTECT(allocVector(STRSXP, n)); // Iterate through all elements of dots, converting promises into lazy exprs int i = 0; for(SEXP nxt = dots; nxt != R_NilValue; nxt = CDR(nxt)) { SEXP promise = CAR(nxt); if (ignore_empty && is_missing(promise)) continue; SEXP lazy = promise_as_lazy(promise, env, follow_symbols); SET_VECTOR_ELT(lazy_dots, i, lazy); if (TAG(nxt) != R_NilValue) SET_STRING_ELT(names, i, PRINTNAME(TAG(nxt))); i++; } setAttrib(lazy_dots, install("names"), names); setAttrib(lazy_dots, install("class"), PROTECT(mkString("lazy_dots"))); UNPROTECT(4); return lazy_dots; } lazyeval/src/interp.c0000644000176200001440000000212513171753670014347 0ustar liggesusers#define R_NO_REMAP #include #include #include "utils.h" SEXP interp_walk(SEXP x, SEXP env, SEXP data) { if (!Rf_isLanguage(x)) return x; if (is_call_to(x, "uq")) { SEXP uq_call = PROTECT(Rf_lang3(Rf_install("uq"), CADR(x), data)); SEXP res = PROTECT(Rf_eval(uq_call, env)); UNPROTECT(2); return res; } if (is_call_to(x, "uqf")) { return Rf_eval(x, env); } // Recursive case for(SEXP cur = x; cur != R_NilValue; cur = CDR(cur)) { SETCAR(cur, interp_walk(CAR(cur), env, data)); SEXP nxt = CDR(cur); if (is_call_to(CAR(nxt), "uqs")) { // uqs() does error checking and returns a pair list SEXP args_pl = Rf_eval(CAR(nxt), env); // Insert args_pl into existing pairlist of args SEXP last_arg = findLast(args_pl); SETCDR(last_arg, CDR(nxt)); SETCDR(cur, args_pl); } } return x; } SEXP interp_(SEXP x, SEXP env, SEXP data) { if (!Rf_isLanguage(x)) return x; if (!Rf_isEnvironment(env)) Rf_error("`env` must be an environment"); return interp_walk(Rf_duplicate(x), env, data); } lazyeval/src/utils.h0000644000176200001440000000044113171753670014212 0ustar liggesusers#define R_NO_REMAP #include #include #include bool is_lazy_load(SEXP x); bool is_call_to(SEXP x, const char* f); bool is_formula(SEXP x); SEXP rhs(SEXP f); SEXP lhs(SEXP f); SEXP env(SEXP f); SEXP findLast(SEXP x); SEXP make_formula1(SEXP rhs, SEXP env); lazyeval/src/expr.c0000644000176200001440000000350713171753670014031 0ustar liggesusers#define R_NO_REMAP #include #include #include "utils.h" SEXP base_promise(SEXP promise, SEXP env) { // recurse until we find the real promise, not a promise of a promise while(TYPEOF(promise) == PROMSXP) { env = PRENV(promise); promise = PREXPR(promise); // promise has already been forced so can't go further if (env == R_NilValue) break; // If the promise is threaded through multiple functions, we'll // get some symbols along the way. If the symbol is bound to a promise // keep going on up if (TYPEOF(promise) == SYMSXP) { SEXP obj = Rf_findVar(promise, env); if (TYPEOF(obj) != PROMSXP) break; if (is_lazy_load(obj)) break; promise = obj; } } return promise; } // Return NULL if not a promise or has already been forced SEXP base_promise_env(SEXP promise, SEXP env) { if (TYPEOF(promise) != PROMSXP) return R_NilValue; // recurse until we find the real promise, not a promise of a promise while(TYPEOF(promise) == PROMSXP) { env = PRENV(promise); promise = PREXPR(promise); // promise has already been forced so can't go further if (env == R_NilValue) return R_NilValue; // If the promise is threaded through multiple functions, we'll // get some symbols along the way. If the symbol is bound to a promise // keep going on up if (TYPEOF(promise) == SYMSXP) { SEXP obj = Rf_findVar(promise, env); if (TYPEOF(obj) != PROMSXP) break; if (is_lazy_load(obj)) break; promise = obj; } } return env; } SEXP expr_find_(SEXP name, SEXP env) { SEXP promise = Rf_findVar(name, env); return base_promise(promise, env); } SEXP expr_env_(SEXP name, SEXP env) { SEXP promise = Rf_findVar(name, env); return base_promise_env(promise, env); } lazyeval/src/init.c0000644000176200001440000000210613171753670014010 0ustar liggesusers#include #include extern SEXP env(SEXP); extern SEXP expr_env_(SEXP, SEXP); extern SEXP expr_find_(SEXP, SEXP); extern SEXP interp_(SEXP, SEXP, SEXP); extern SEXP lhs(SEXP); extern SEXP lhs_name(SEXP); extern SEXP make_lazy(SEXP, SEXP, SEXP); extern SEXP make_lazy_dots(SEXP, SEXP, SEXP); extern SEXP rhs(SEXP); static const R_CallMethodDef call_entries[] = { {"lazyeval_env", (DL_FUNC) &env, 1}, {"lazyeval_expr_env_", (DL_FUNC) &expr_env_, 2}, {"lazyeval_expr_find_", (DL_FUNC) &expr_find_, 2}, {"lazyeval_interp_", (DL_FUNC) &interp_, 3}, {"lazyeval_lhs", (DL_FUNC) &lhs, 1}, {"lazyeval_lhs_name", (DL_FUNC) &lhs_name, 1}, {"lazyeval_make_lazy", (DL_FUNC) &make_lazy, 3}, {"lazyeval_make_lazy_dots", (DL_FUNC) &make_lazy_dots, 3}, {"lazyeval_rhs", (DL_FUNC) &rhs, 1}, {NULL, NULL, 0} }; void R_init_lazyeval(DllInfo* dll) { R_registerRoutines(dll, NULL, call_entries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } lazyeval/NAMESPACE0000644000176200001440000000376013171753463013340 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method("$",complain) S3method("$<-",lazy_dots) S3method("[",lazy_dots) S3method("[<-",lazy_dots) S3method("[[",complain) S3method(as.lazy,call) S3method(as.lazy,character) S3method(as.lazy,formula) S3method(as.lazy,lazy) S3method(as.lazy,logical) S3method(as.lazy,name) S3method(as.lazy,numeric) S3method(as.lazy_dots,"NULL") S3method(as.lazy_dots,call) S3method(as.lazy_dots,character) S3method(as.lazy_dots,formula) S3method(as.lazy_dots,lazy) S3method(as.lazy_dots,lazy_dots) S3method(as.lazy_dots,list) S3method(as.lazy_dots,name) S3method(as_call,call) S3method(as_call,character) S3method(as_call,formula) S3method(as_call,name) S3method(as_name,call) S3method(as_name,character) S3method(as_name,formula) S3method(as_name,name) S3method(c,lazy_dots) S3method(find_data,"NULL") S3method(find_data,data.frame) S3method(find_data,default) S3method(find_data,list) S3method(has_name,default) S3method(has_name,environment) S3method(interp,call) S3method(interp,character) S3method(interp,formula) S3method(interp,lazy) S3method(interp,name) S3method(print,lazy) export("f_env<-") export("f_lhs<-") export("f_rhs<-") export(all_dots) export(as.lazy) export(as.lazy_dots) export(as_call) export(as_f_list) export(as_name) export(ast) export(ast_) export(auto_name) export(call_modify) export(call_new) export(call_standardise) export(common_env) export(dots_capture) export(expr_env) export(expr_find) export(expr_label) export(expr_text) export(f_capture) export(f_env) export(f_eval) export(f_eval_lhs) export(f_eval_rhs) export(f_interp) export(f_label) export(f_lhs) export(f_list) export(f_new) export(f_rhs) export(f_text) export(f_unwrap) export(find_data) export(function_new) export(interp) export(is_atomic) export(is_call) export(is_formula) export(is_lang) export(is_name) export(is_pairlist) export(lazy) export(lazy_) export(lazy_dots) export(lazy_eval) export(make_call) export(missing_arg) export(uq) export(uqf) export(uqs) useDynLib(lazyeval, .registration = TRUE) lazyeval/NEWS.md0000644000176200001440000000624213171753463013215 0ustar liggesusers# lazyeval 0.2.1 This is a maintenance release. The lazyeval package is no longer developed as the tidyverse is switching to tidy evaluation. * Use new registration system. * Switch from `SET_NAMED()` to `MARK_NOT_MUTABLE()` in prevision of an API change in R core * No longer check the type of the sides of the formula. # lazyeval 0.2.0 ## Formula-based lazy evaluation Lazyeval has a new system for lazy-eval based on formulas, described in depth in the new `lazyeval` vignette. This system is still a little experimental - it hasn't seen much use outside of the vignette, so it certainly may change a little in the future. However, long-term goal is to use these tools across all of my packages (ggplot2, tidyr, dplyr, etc), and I am fairly confident that this is a robust system that won't need major changes. There are three key components: * `f_eval()` evaluates a formula in the environment where it was defined. If supplied, values are first looked for in an optional `data` argument. Pronouns `.data` and `.env` can be used to resolve ambiguity in this case. (#43). Longer forms `f_eval_rhs()` and `f_eval_lhs()` emphasise the side of the formula that you want to evaluate (#64). * `f_interp()` provides a full quasiquoting system using `uq()` for unquote and `uqs()` for unquote-splice (#36). * `f_capture()` and `dots_capture()` make it easy to turn promises and `...` into explicit formulas. These should be used sparingly, as generally lazy-eval is preferred to non-standard eval. * For functions that work with `...`, `f_list()` and `as_f_list()` make it possible to use the evaluated LHS of a formula to name the elements of a list (#59). The core components are accompanied by a number of helper functions: * Identify a formula with `is_formula()`. * Create a formula from a quoted call and an environment with `f_new()`. * "Unwrap" a formula removing one level from the stack of parent environments with `f_unwrap()`. * Get or set either side of a formula with `f_rhs()` or `f_lhs()`, and the environment with `f_env()`. * Convert to text/label with `f_text()` and `f_label()`. I've also added `expr_find()`, `expr_text()` and `expr_label()` explicitly to find the expression associated with a function argument, and label it for output (#58). This is one of the primary uses cases for NSE. `expr_env()` is a similar helper that returns the environment associated with a promise (#67). ## Fixes to existing functions * `lazy_dots()` gains `.ignore_empty` argument to drop extra arguments (#32). * `interp.formula()` only accepts single-sided formulas (#37). * `interp()` accepts an environment in `.values` (#35). * `interp.character()` always produes a single string, regardless of input length (#27). * Fixed an infinite loop in `lazy_dots(.follow_symbols = TRUE)` (#22, #24) * `lazy()` now fails with an informative error when it is applied on an object that has already been evaluated (#23, @lionel-). * `lazy()` no longer follows the expressions of lazily loaded objects (#18, @lionel-). # lazyeval 0.1.10 * `as.lazy_dots()` gains a method for NULL, returning a zero-length list. * `auto_names()` no longer truncates symbols (#19, #20) lazyeval/R/0000755000176200001440000000000013171753670012314 5ustar liggesuserslazyeval/R/f-eval.R0000644000176200001440000000574013004065022013575 0ustar liggesusers#' @export #' @rdname f_eval f_eval_rhs <- function(f, data = NULL) { if (!is_formula(f)) { stop("`f` is not a formula", call. = FALSE) } expr <- f_rhs(f_interp(f, data = data)) eval_expr(expr, f_env(f), data) } #' @export #' @rdname f_eval f_eval_lhs <- function(f, data = NULL) { if (!is_formula(f)) { stop("`f` is not a formula", call. = FALSE) } expr <- f_lhs(f_interp(f, data = data)) eval_expr(expr, f_env(f), data) } #' Evaluate a formula #' #' \code{f_eval_rhs} evaluates the RHS of a formula and \code{f_eval_lhs} #' evaluates the LHS. \code{f_eval} is a shortcut for \code{f_eval_rhs} since #' that is what you most commonly need. #' #' If \code{data} is specified, variables will be looked for first in this #' object, and if not found in the environment of the formula. #' #' @section Pronouns: #' When used with \code{data}, \code{f_eval} provides two pronouns to make it #' possible to be explicit about where you want values to come from: #' \code{.env} and \code{.data}. These are thin wrappers around \code{.data} #' and \code{.env} that throw errors if you try to access non-existent values. #' #' @param f A formula. Any expressions wrapped in \code{ uq() } will #' will be "unquoted", i.e. they will be evaluated, and the results inserted #' back into the formula. See \code{\link{f_interp}} for more details. #' @param data A list (or data frame). \code{find_data} is a generic used to #' find the data associated with a given object. If you want to make #' \code{f_eval} work for your own objects, you can define a method for this #' generic. #' @param x An object for which you want to find associated data. #' @export #' @examples #' f_eval(~ 1 + 2 + 3) #' #' # formulas automatically capture their enclosing environment #' foo <- function(x) { #' y <- 10 #' ~ x + y #' } #' f <- foo(1) #' f #' f_eval(f) #' #' # If you supply data, f_eval will look their first: #' f_eval(~ cyl, mtcars) #' #' # To avoid ambiguity, you can use .env and .data pronouns to be #' # explicit: #' cyl <- 10 #' f_eval(~ .data$cyl, mtcars) #' f_eval(~ .env$cyl, mtcars) #' #' # Imagine you are computing the mean of a variable: #' f_eval(~ mean(cyl), mtcars) #' # How can you change the variable that's being computed? #' # The easiest way is "unquote" with uq() #' # See ?f_interp for more details #' var <- ~ cyl #' f_eval(~ mean( uq(var) ), mtcars) f_eval <- f_eval_rhs eval_expr <- function(expr, env, data) { data <- find_data(data) expr_env <- new.env(parent = env) expr_env$.env <- complain(env, "Object '%s' not found in environment") expr_env$.data <- complain(data, "Variable '%s' not found in data") eval(expr, data, expr_env) } #' @rdname f_eval #' @export find_data <- function(x) UseMethod("find_data") #' @export find_data.NULL <- function(x) list() #' @export find_data.list <- function(x) x #' @export find_data.data.frame <- function(x) x #' @export find_data.default <- function(x) { stop("Do not know how to find data associated with `x`", call. = FALSE) } lazyeval/R/utils.R0000644000176200001440000000130313046425567013576 0ustar liggesusers"%||%" <- function(x, y) if(is.null(x)) y else x is_atomic <- function(x) { typeof(x) %in% c("logical", "integer", "double", "complex", "character", "raw") } is_vector <- function(x) { is_atomic(x) || is.list(x) } has_names <- function(x) { nms <- names(x) if (is.null(nms)) { rep(FALSE, length(x)) } else { !(is.na(nms) | nms == "") } } substitute_ <- function(x, env) { if (identical(env, globalenv())) { env <- as.list(env) } call <- substitute(substitute(x, env), list(x = x)) eval(call) } #' Generate a missing argument. #' #' @export #' @examples #' f_interp(~f(x = uq(missing_arg()))) #' f_interp(~f(x = uq(NULL))) missing_arg <- function() { quote(expr = ) } lazyeval/R/expr.R0000644000176200001440000000453013171753463013417 0ustar liggesusers#' Find the expression associated with an argument #' #' \code{expr_find()} finds the full expression; \code{expr_text()} turns the #' expression into a single string; \code{expr_label()} formats it nicely for #' use in messages. \code{expr_env()} finds the environment associated with #' the expression. #' #' These functions never force promises, and will work even if a promise has #' previously been forced. #' #' @param x A promise (function argument) #' @export #' @examples #' # Unlike substitute(), expr_find() finds the original expression #' f <- function(x) g(x) #' g <- function(y) h(y) #' h <- function(z) list(substitute(z), expr_find(z)) #' #' f(1 + 2 + 3) #' #' expr_label(10) #' # Names a quoted with `` #' expr_label(x) #' # Strings are encoded #' expr_label("a\nb") #' # Expressions are captured #' expr_label(a + b + c) #' # Long expressions are collapsed #' expr_label(foo({ #' 1 + 2 #' print(x) #' })) expr_label <- function(x) { expr_label_(expr_find(x)) } expr_label_ <- function(x) { if (is.character(x)) { encodeString(x, quote = '"') } else if (is.atomic(x)) { format(x) } else if (is.name(x)) { paste0("`", as.character(x), "`") } else { chr <- deparse(x) if (length(chr) > 1) { dot_call <- call_new(x[[1]], quote(...)) chr <- paste(deparse(dot_call), collapse = "\n") } paste0("`", chr, "`") } } #' @export #' @rdname expr_label #' @param width Width of each line #' @param nlines Maximum number of lines to extract. expr_text <- function(x, width = 60L, nlines = Inf) { expr_text_(expr_find(x), width = width, nlines = nlines) } expr_text_ <- function(x, width = 60L, nlines = Inf) { str <- deparse(x, width.cutoff = width) if (length(str) > nlines) { str <- c(str[seq_len(nlines - 1)], "...") } paste0(str, collapse = "\n") } #' @export #' @rdname expr_label expr_find <- function(x) { .Call(lazyeval_expr_find_, quote(x), environment()) } #' @param default_env If supplied, \code{expr_env} will return this if the #' promise has already been forced. Otherwise it will throw an error. #' @export #' @rdname expr_label expr_env <- function(x, default_env) { env <- .Call(lazyeval_expr_env_, quote(x), environment()) if (is.null(env)) { if (missing(default_env)) { stop("Promise has already been forced") } else { default_env } } else { env } } lazyeval/R/lazy-names.R0000644000176200001440000000212612726271770014521 0ustar liggesusers#' Automatically name all components of a lazy dots. #' #' Any components missing a name will automatically get a name added by #' looking at the first \code{max_width} characters of the deparsed expression. #' #' @param x A \code{\link{lazy_dots}} #' @param max_width Maximum number of characters to use #' @keywords internal #' @export #' @examples #' x <- lazy_dots(1 + 2, mean(mpg)) #' auto_name(x) #' #' auto_name(list(~f, quote(x))) auto_name <- function(x, max_width = 40) { names(x) <- auto_names(x, max_width = max_width) x } auto_names <- function(x, max_width = 40) { x <- as.lazy_dots(x) nms <- names(x) %||% rep("", length(x)) missing <- nms == "" expr <- lapply(x[missing], `[[`, "expr") nms[missing] <- vapply(expr, deparse_trunc, width = max_width, FUN.VALUE = character(1), USE.NAMES = FALSE) nms } deparse_trunc <- function(x, width = getOption("width")) { if (is.symbol(x)) { return(as.character(x)) } text <- deparse(x, width.cutoff = width) if (length(text) == 1 && nchar(text) < width) return(text) paste0(substr(text[1], 1, width - 3), "...") } lazyeval/R/lazyeval.R0000644000176200001440000000006213171753463014264 0ustar liggesusers#' @useDynLib lazyeval, .registration = TRUE NULL lazyeval/R/lazy-eval.R0000644000176200001440000000131512726271770014344 0ustar liggesusers#' Evaluate a lazy expression. #' #' @param x A lazy object or a formula. #' @param data Option, a data frame or list in which to preferentially look #' for variables before using the environment associated with the lazy #' object. #' @export #' @examples #' f <- function(x) { #' z <- 100 #' ~ x + z #' } #' z <- 10 #' lazy_eval(f(10)) #' lazy_eval(f(10), list(x = 100)) #' lazy_eval(f(10), list(x = 1, z = 1)) #' #' lazy_eval(lazy_dots(a = x, b = z), list(x = 10)) lazy_eval <- function(x, data = NULL) { if (is.lazy_dots(x)) { return(lapply(x, lazy_eval, data = data)) } x <- as.lazy(x) if (!is.null(data)) { eval(x$expr, data, x$env) } else { eval(x$expr, x$env, emptyenv()) } } lazyeval/R/formula.R0000644000176200001440000000677113171753463014117 0ustar liggesusers#' Create a formula object by "hand". #' #' @param lhs,rhs A call, name, or atomic vector. #' @param env An environment #' @return A formula object #' @export #' @examples #' f_new(quote(a)) #' f_new(quote(a), quote(b)) f_new <- function(rhs, lhs = NULL, env = parent.frame()) { if (!is.environment(env)) { stop("`env` must be an environment", call. = FALSE) } if (is.null(lhs)) { f <- call_new("~", rhs) } else { f <- call_new("~", lhs, rhs) } structure( f, class = "formula", .Environment = env ) } #' Is object a formula? #' #' @param x Object to test #' @export #' @examples #' is_formula(~ 10) #' is_formula(10) is_formula <- function(x) { typeof(x) == "language" && inherits(x, "formula") } #' Get/set formula components. #' #' \code{f_rhs} extracts the righthand side, \code{f_lhs} extracts the #' lefthand side, and \code{f_env} extracts the environment. All functions #' throw an error if \code{f} is not a formula. #' #' @param f,x A formula #' @param value The value to replace with. #' @export #' @return \code{f_rhs} and \code{f_lhs} return language objects (i.e. #' atomic vectors of length 1, a name, or a call). \code{f_env} #' returns an environment. #' @examples #' f_rhs(~ 1 + 2 + 3) #' f_rhs(~ x) #' f_rhs(~ "A") #' f_rhs(1 ~ 2) #' #' f_lhs(~ y) #' f_lhs(x ~ y) #' #' f_env(~ x) f_rhs <- function(f) { .Call(lazyeval_rhs, f) } #' @export #' @rdname f_rhs `f_rhs<-` <- function(x, value) { stopifnot(is_formula(x)) f_new(value, f_lhs(x), f_env(x)) } #' @export #' @rdname f_rhs f_lhs <- function(f) { .Call(lazyeval_lhs, f) } #' @export #' @rdname f_rhs `f_lhs<-` <- function(x, value) { stopifnot(is_formula(x)) f_new(f_rhs(x), value, f_env(x)) } #' @export #' @rdname f_rhs f_env <- function(f) { .Call(lazyeval_env, f) } #' @export #' @rdname f_rhs `f_env<-` <- function(x, value) { stopifnot(is_formula(x)) f_new(f_rhs(x), f_lhs(x), value) } #' Turn RHS of formula into a string/label. #' #' Equivalent of \code{\link{expr_text}()} and \code{\link{expr_label}()} for #' formulas. #' #' @param x A formula. #' @inheritParams expr_text #' @export #' @examples #' f <- ~ a + b + bc #' f_text(f) #' f_label(f) #' #' # Names a quoted with `` #' f_label(~ x) #' # Strings are encoded #' f_label(~ "a\nb") #' # Long expressions are collapsed #' f_label(~ foo({ #' 1 + 2 #' print(x) #' })) f_text <- function(x, width = 60L, nlines = Inf) { expr_text_(f_rhs(x), width = width, nlines = nlines) } #' @export #' @rdname f_text f_label <- function(x) { expr_label_(f_rhs(x)) } #' Unwrap a formula #' #' This interpolates values in the formula that are defined in its environment, #' replacing the environment with its parent. #' #' @export #' @param f A formula to unwrap. #' @examples #' n <- 100 #' f <- ~ x + n #' f_unwrap(f) f_unwrap <- function(f) { stopifnot(is_formula(f)) e <- environment(f) if (identical(e, emptyenv())) { f } else { f_new(substitute_(f_rhs(f), e), f_lhs(f), parent.env(e)) } } #' Build a named list from the LHS of formulas #' #' \code{f_list} makes a new list; \code{as_f_list} takes an existing list. #' Both take the LHS of any two-sided formulas and evaluate it, replacing the #' current name with the result. #' #' @param ... Named arguments. #' @param x An existing list #' @return A named list. #' @export #' @examples #' f_list("y" ~ x) #' f_list(a = "y" ~ a, ~ b, c = ~c) f_list <- function(...) { .Call(lazyeval_lhs_name, list(...)) } #' @export #' @rdname f_list as_f_list <- function(x) { .Call(lazyeval_lhs_name, x) } lazyeval/R/f-interp.R0000644000176200001440000000362213171753463014166 0ustar liggesusers#' Interpolate a formula #' #' Interpolation replaces sub-expressions of the form \code{uq(x)} with #' the evaluated value of \code{x}, and inlines sub-expressions of #' the form \code{uqs(x)}. #' #' @section Theory: #' Formally, \code{f_interp} is a quasiquote function, \code{uq()} is the #' unquote operator, and \code{uqs()} is the unquote splice operator. #' These terms have a rich history in LISP, and live on in modern languages #' like \href{Julia}{http://docs.julialang.org/en/release-0.1/manual/metaprogramming/} #' and \href{Racket}{https://docs.racket-lang.org/reference/quasiquote.html}. #' #' @param f A one-sided formula. #' @param x For \code{uq} and \code{uqf}, a formula. For \code{uqs}, a #' a vector. #' @param data When called from inside \code{f_eval}, this is used to pass on #' the data so that nested formulas are evaluated in the correct environment. #' @export #' @aliases uq uqs #' @examples #' f_interp(x ~ 1 + uq(1 + 2 + 3) + 10) #' #' # Use uqs() if you want to add multiple arguments to a function #' # It must evaluate to a list #' args <- list(1:10, na.rm = TRUE) #' f_interp(~ mean( uqs(args) )) #' #' # You can combine the two #' var <- quote(xyz) #' extra_args <- list(trim = 0.9) #' f_interp(~ mean( uq(var) , uqs(extra_args) )) #' #' foo <- function(n) { #' ~ 1 + uq(n) #' } #' f <- foo(10) #' f #' f_interp(f) f_interp <- function(f, data = NULL) { f_rhs(f) <- .Call(lazyeval_interp_, f_rhs(f), f_env(f), data) f } #' @export #' @rdname f_interp uq <- function(x, data = NULL) { if (is_formula(x)) { if (is.null(data)) { f_rhs(f_interp(x)) } else { f_eval(x, data = data) } } else { x } } #' @export #' @rdname f_interp uqf <- function(x) { if (!is_formula(x)) stop("`x` must be a formula", call. = FALSE) x } #' @export #' @rdname f_interp uqs <- function(x) { if (!is_vector(x)) { stop("`x` must be a vector") } as.pairlist(x) } lazyeval/R/function.R0000644000176200001440000000237712726271770014276 0ustar liggesusers#' Create a function by "hand" #' #' This constructs a new function given it's three components: #' list of arguments, body code and parent environment. #' #' @param args A named list of default arguments. Note that if you want #' arguments that don't have defaults, you'll need to use the special function #' \code{\link{alist}}, e.g. \code{alist(a = , b = 1)} #' @param body A language object representing the code inside the function. #' Usually this will be most easily generated with \code{\link{quote}} #' @param env The parent environment of the function, defaults to the calling #' environment of \code{make_function} #' @export #' @examples #' f <- function(x) x + 3 #' g <- function_new(alist(x = ), quote(x + 3)) #' #' # The components of the functions are identical #' identical(formals(f), formals(g)) #' identical(body(f), body(g)) #' identical(environment(f), environment(g)) #' #' # But the functions are not identical because f has src code reference #' identical(f, g) #' #' attr(f, "srcref") <- NULL #' # Now they are: #' stopifnot(identical(f, g)) function_new <- function(args, body, env = parent.frame()) { stopifnot(all(has_names(args)), is_lang(body), is.environment(env)) args <- as.pairlist(args) eval(call("function", args, body), env) } lazyeval/R/lazy.R0000644000176200001440000000407113171753463013420 0ustar liggesusers#' Capture expression for later lazy evaluation. #' #' \code{lazy()} uses non-standard evaluation to turn promises into lazy #' objects; \code{lazy_()} does standard evaluation and is suitable for #' programming. #' #' Use \code{lazy()} like you'd use \code{\link{substitute}()} #' to capture an unevaluated promise. Compared to \code{substitute()} it #' also captures the environment associated with the promise, so that you #' can correctly replay it in the future. #' #' @param expr Expression to capture. For \code{lazy_} must be a name #' or a call. #' @param env Environment in which to evaluate expr. #' @param .follow_symbols If \code{TRUE}, the default, follows promises across #' function calls. See \code{vignette("chained-promises")} for details. #' @export #' @examples #' lazy_(quote(a + x), globalenv()) #' #' # Lazy is designed to be used inside a function - you should #' # give it the name of a function argument (a promise) #' f <- function(x = b - a) { #' lazy(x) #' } #' f() #' f(a + b / c) #' #' # Lazy also works when called from the global environment. This makes #' # easy to play with interactively. #' lazy(a + b / c) #' #' # By default, lazy will climb all the way back to the initial promise #' # This is handy if you have if you have nested functions: #' g <- function(y) f(y) #' h <- function(z) g(z) #' f(a + b) #' g(a + b) #' h(a + b) #' #' # To avoid this behavour, set .follow_symbols = FALSE #' # See vignette("chained-promises") for details lazy_ <- function(expr, env) { stopifnot(is.call(expr) || is.name(expr) || is.atomic(expr)) structure(list(expr = expr, env = env), class = "lazy") } #' @rdname lazy_ #' @export lazy <- function(expr, env = parent.frame(), .follow_symbols = TRUE) { .Call(lazyeval_make_lazy, quote(expr), environment(), .follow_symbols) } is.lazy <- function(x) inherits(x, "lazy") #' @export print.lazy <- function(x, ...) { code <- deparse(x$expr) if (length(code) > 1) { code <- paste(code[[1]], "...") } cat("\n") cat(" expr: ", code, "\n", sep = "") cat(" env: ", format(x$env), "\n", sep = "") } lazyeval/R/ast.R0000644000176200001440000000445712726271770013241 0ustar liggesusers#' Display a call (or expression) as a tree. #' #' \code{ast_} takes a quoted expression; \code{ast} does the quoting #' for you. #' #' @param x Quoted call, list of calls, or expression to display. #' @param width Display width, defaults to current width as reported by #' \code{getOption("width")}. #' @export #' @examples #' ast(f(x, 1, g(), h(i()))) #' ast(if (TRUE) 3 else 4) #' ast(function(a = 1, b = 2) {a + b + 10}) #' ast(f(x)(y)(z)) #' #' ast_(quote(f(x, 1, g(), h(i())))) #' ast_(quote(if (TRUE) 3 else 4)) #' ast_(expression(1, 2, 3)) ast_ <- function(x, width = getOption("width")) { if (is.expression(x) || is.list(x)) { trees <- vapply(x, tree, character(1), width = width) out <- paste0(trees, collapse = "\n\n") } else { out <- tree(x, width = width) } cat(out, "\n") } #' @rdname ast_ #' @export ast <- function(x) ast_(expr_find(x)) tree <- function(x, level = 1, width = getOption("width"), branch = "\u2517 ") { if (is_atomic(x) && length(x) == 1) { label <- paste0(" ", deparse(x)[1]) children <- NULL } else if (is_name(x)) { x <- as.character(x) if (x == "") { # Special case the missing argument label <- "`MISSING" } else { label <- paste0("`", as.character(x)) } children <- NULL } else if (is_call(x)) { label <- "()" children <- vapply(as.list(x), tree, character(1), level = level + 1, width = width - 3) } else if (is_pairlist(x)) { label <- "[]" branches <- paste("\u2517", format(names(x)), "=") children <- character(length(x)) for (i in seq_along(x)) { children[i] <- tree(x[[i]], level = level + 1, width = width - 3, branch = branches[i]) } } else { # Special case for srcrefs, since they're commonly seen if (inherits(x, "srcref")) { label <- "" } else { label <- paste0("<", typeof(x), ">") } children <- NULL } indent <- paste0(str_dup(" ", level - 1), branch) label <- str_trunc(label, width - 3) if (is.null(children)) { paste0(indent, label) } else { paste0(indent, label, "\n", paste0(children, collapse = "\n")) } } str_trunc <- function(x, width = getOption("width")) { ifelse(nchar(x) <= width, x, paste0(substr(x, 1, width - 3), "...")) } str_dup <- function(x, n) { paste0(rep(x, n), collapse = "") } lazyeval/R/lazy-interp.R0000644000176200001440000000515212726271770014721 0ustar liggesusers#' Interpolate values into an expression. #' #' This is useful if you want to build an expression up from a mixture of #' constants and variables. #' #' @param _obj An object to modify: can be a call, name, formula, #' \code{\link{lazy}}, or a string. #' @param ...,.values Either individual name-value pairs, or a list #' (or environment) of values. #' @export #' @examples #' # Interp works with formulas, lazy objects, quoted calls and strings #' interp(~ x + y, x = 10) #' interp(lazy(x + y), x = 10) #' interp(quote(x + y), x = 10) #' interp("x + y", x = 10) #' #' # Use as.name if you have a character string that gives a #' # variable name #' interp(~ mean(var), var = as.name("mpg")) #' # or supply the quoted name directly #' interp(~ mean(var), var = quote(mpg)) #' #' # Or a function! #' interp(~ f(a, b), f = as.name("+")) #' # Remember every action in R is a function call: #' # http://adv-r.had.co.nz/Functions.html#all-calls #' #' # If you've built up a list of values through some other #' # mechanism, use .values #' interp(~ x + y, .values = list(x = 10)) #' #' # You can also interpolate variables defined in the current #' # environment, but this is a little risky. #' y <- 10 #' interp(~ x + y, .values = environment()) interp <- function(`_obj`, ..., .values) { UseMethod("interp") } #' @export interp.call <- function(`_obj`, ..., .values) { values <- all_values(.values, ...) substitute_(`_obj`, values) } #' @export interp.name <- function(`_obj`, ..., .values) { values <- all_values(.values, ...) substitute_(`_obj`, values) } #' @export interp.formula <- function(`_obj`, ..., .values) { if (length(`_obj`) != 2) stop("Must use one-sided formula.", call. = FALSE) values <- all_values(.values, ...) `_obj`[[2]] <- substitute_(`_obj`[[2]], values) `_obj` } #' @export interp.lazy <- function(`_obj`, ..., .values) { values <- all_values(.values, ...) `_obj`$expr <- substitute_(`_obj`$expr, values) `_obj` } #' @export interp.character <- function(`_obj`, ..., .values) { values <- all_values(.values, ...) expr1 <- parse(text = `_obj`)[[1]] expr2 <- substitute_(expr1, values) paste(deparse(expr2), collapse = "\n") } all_values <- function(.values, ...) { if (missing(.values)) { values <- list(...) } else if (identical(.values, globalenv())) { # substitute doesn't want to replace in globalenv values <- as.list(globalenv()) } else { values <- .values } if (is.list(values)) { # Replace lazy objects with their expressions is_lazy <- vapply(values, is.lazy, logical(1)) values[is_lazy] <- lapply(values[is_lazy], `[[`, "expr") } values } lazyeval/R/complain.R0000644000176200001440000000170113004065015014220 0ustar liggesuserscomplain <- function(x, message = "object '%s' not found") { if (is.null(x)) { return(NULL) } if (is.environment(x)) { x <- clone_env(x) } structure(x, message = message, class = c("complain", class(x))) } clone_env <- function(x) { list2env(as.list(x, all.names = TRUE), parent = parent.env(x)) } #' @export `$.complain` <- function(x, name) { if (!has_name(x, name)) { stop(sprintf(attr(x, "message"), name), call. = FALSE) } x[[name]] } #' @export `[[.complain` <- function(x, i, ...) { if (!is.character(i) || length(i) != 1) { stop("Must subset with a string", call. = FALSE) } if (!has_name(x, i)) { stop(sprintf(attr(x, "message"), i), call. = FALSE) } NextMethod() } has_name <- function(x, name) { UseMethod("has_name") } #' @export has_name.default <- function(x, name) { name %in% names(x) } #' @export has_name.environment <- function(x, name) { exists(name, envir = x, inherits = FALSE) } lazyeval/R/language.R0000644000176200001440000000446513004065022014211 0ustar liggesusers#' Is an object a language object? #' #' These helpers are consistent wrappers around their base R equivalents. #' A language object is either an atomic vector (typically a scalar), a #' name (aka a symbol), a call, or a pairlist (used for function arguments). #' #' @param x An object to test. #' @seealso \code{\link{as_name}()} and \code{\link{as_call}()} for coercion #' functions. #' @export #' @examples #' q1 <- quote(1) #' is_lang(q1) #' is_atomic(q1) #' #' q2 <- quote(x) #' is_lang(q2) #' is_name(q2) #' #' q3 <- quote(x + 1) #' is_lang(q3) #' is_call(q3) is_lang <- function(x) { is_call(x) || is_pairlist(x) || is_atomic(x) || is_name(x) || is.null(x) } #' @rdname is_lang #' @export is_name <- function(x) { typeof(x) == "symbol" } #' @rdname is_lang #' @export is_call <- function(x) { typeof(x) == "language" } #' @rdname is_lang #' @export is_pairlist <- function(x) { typeof(x) == "pairlist" } #' @rdname is_lang #' @export is_atomic <- function(x) { typeof(x) %in% c("logical", "integer", "double", "complex", "character", "raw") } #' Coerce an object to a name or call. #' #' These are a S3 generics with built-in methods for names, calls, formuals, #' and strings. The distinction between a name and a call is particularly #' important when coercing from a string. Coercing to a call will parse the #' string, coercing to a name will create a (potentially) non-syntactic name. #' #' @param x An object to coerce #' @export #' @examples #' as_name("x + y") #' as_call("x + y") #' #' as_call(~ f) #' as_name(~ f()) as_name <- function(x) UseMethod("as_name") #' @export as_name.name <- function(x) x #' @export as_name.character <- function(x) { if (length(x) > 1) { stop("Can not coerce character vector of length > 1 to name", call. = FALSE) } as.name(x) } #' @export as_name.call <- function(x) x[[1]] #' @export as_name.formula <- function(x) { as_name(f_rhs(x)) } #' @export #' @rdname as_name as_call <- function(x) { UseMethod("as_call") } #' @export as_call.name <- function(x) { call_new(x) } #' @export as_call.call <- function(x) { x } #' @export as_call.character <- function(x) { if (length(x) > 1) { stop("Can not coerce character vector of length > 1 to name", call. = FALSE) } parse(text = x)[[1]] } #' @export as_call.formula <- function(x) { as_call(f_rhs(x)) } lazyeval/R/lazy-as.R0000644000176200001440000000541312726271770014023 0ustar liggesusers#' Convert an object to a lazy expression or lazy dots. #' #' @param x An R object. Current methods for \code{as.lazy()} convert formulas, #' character vectors, calls and names. Methods for \code{as.lazy_dots()} #' convert lists and character vectors (by calling \code{\link{lapply}()} #' with \code{as.lazy()}.) #' @param env Environment to use for objects that don't already have #' associated environment. #' @export #' @examples #' as.lazy(~ x + 1) #' as.lazy(quote(x + 1), globalenv()) #' as.lazy("x + 1", globalenv()) #' #' as.lazy_dots(list(~x, y = ~z + 1)) #' as.lazy_dots(c("a", "b", "c"), globalenv()) #' as.lazy_dots(~x) #' as.lazy_dots(quote(x), globalenv()) #' as.lazy_dots(quote(f()), globalenv()) #' as.lazy_dots(lazy(x)) as.lazy <- function(x, env = baseenv()) UseMethod("as.lazy") #' @export as.lazy.lazy <- function(x, env = baseenv()) x #' @export as.lazy.formula <- function(x, env = baseenv()) lazy_(x[[2]], environment(x)) #' @export as.lazy.character <- function(x, env = baseenv()) lazy_(parse(text = x)[[1]], env) #' @export as.lazy.call <- function(x, env = baseenv()) lazy_(x, env) #' @export as.lazy.name <- function(x, env = baseenv()) lazy_(x, env) #' @export as.lazy.numeric <- function(x, env = baseenv()) { if (length(x) > 1) { warning("Truncating vector to length 1", call. = FALSE) x <- x[1] } lazy_(x, env) } #' @export as.lazy.logical <- as.lazy.numeric #' @export #' @rdname as.lazy as.lazy_dots <- function(x, env) UseMethod("as.lazy_dots") #' @export as.lazy_dots.NULL <- function(x, env = baseenv()) { structure(list(), class = "lazy_dots") } #' @export as.lazy_dots.list <- function(x, env = baseenv()) { structure(lapply(x, as.lazy, env = env), class = "lazy_dots") } #' @export as.lazy_dots.name <- function(x, env = baseenv()) { structure(list(as.lazy(x, env)), class = "lazy_dots") } #' @export as.lazy_dots.formula <- as.lazy_dots.name #' @export as.lazy_dots.call <- as.lazy_dots.name #' @export as.lazy_dots.lazy <- function(x, env = baseenv()) { structure(list(x), class = "lazy_dots") } #' @export as.lazy_dots.character <- function(x, env = baseenv()) { structure(lapply(x, as.lazy, env = env), class = "lazy_dots") } #' @export as.lazy_dots.lazy_dots <- function(x, env = baseenv()) { x } #' Combine explicit and implicit dots. #' #' @param ... Individual lazy objects #' @param .dots A list of lazy objects #' @param all_named If \code{TRUE}, uses \code{\link{auto_name}} to ensure #' every component has a name. #' @return A \code{\link{lazy_dots}} #' @keywords internal #' @export all_dots <- function(.dots, ..., all_named = FALSE) { dots <- as.lazy_dots(list(...)) if (!missing(.dots)) { dots2 <- as.lazy_dots(.dots) dots <- c(dots, dots2) } if (all_named) { dots <- auto_name(dots) } dots } lazyeval/R/lazy-call.R0000644000176200001440000000555512726271770014342 0ustar liggesusers#' Make a call with \code{lazy_dots} as arguments. #' #' In order to exactly replay the original call, the environment must be the #' same for all of the dots. This function circumvents that a little, #' falling back to the \code{\link{baseenv}()} if all environments aren't #' the same. #' #' @param fun Function as symbol or quoted call. #' @param args Arguments to function; must be a \code{lazy_dots} object, #' or something \code{\link{as.lazy_dots}()} can coerce.. #' @return A list: #' \item{env}{The common environment for all elements} #' \item{expr}{The expression} #' @export #' @examples #' make_call(quote(f), lazy_dots(x = 1, 2)) #' make_call(quote(f), list(x = 1, y = ~x)) #' make_call(quote(f), ~x) #' #' # If no known or no common environment, fails back to baseenv() #' make_call(quote(f), quote(x)) make_call <- function(fun, args) { stopifnot(is.call(fun) || is.name(fun)) args <- as.lazy_dots(args) expr <- lapply(args, `[[`, "expr") lazy_( as.call(c(fun, expr)), common_env(args) ) } #' Find common environment in list of lazy objects. #' #' If no common environment is found, will return \code{baseenv()}. #' #' @param dots A list of lazy objects #' @keywords internal #' @export #' @examples #' common_env(lazy_dots(a, b, c)) #' #' f <- function(x) ~x #' common_env(list(f(1))) #' common_env(list(f(1), f(2))) common_env <- function(dots) { if (!is.list(dots)) stop("dots must be a list", call. = FALSE) if (length(dots) == 0) return(baseenv()) dots <- as.lazy_dots(dots) env <- dots[[1]]$env if (length(dots) == 1) return(env) for (i in 2:length(dots)) { if (!identical(env, dots[[i]]$env)) { return(baseenv()) } } env } # ------------------------------------------------------------------------------ #' Evaluate a call with \code{lazy_dots} as argument. #' #' This simulates the original call as closely as possible by creating #' a temporary environment where each \code{lazy} object is bound to #' a promise by \code{\link{delayedAssign}}. #' #' @noRd #' @param env Environment in which to evaluate call. Defaults to #' \code{\link{parent.frame}()}. #' @examples #' make_env <- function(...) list2env(list(...), parent = emptyenv()) #' #' f1 <- as.lazy(quote(a()), make_env(a = function() {message("!"); 1})) #' f2 <- as.lazy(quote(a), make_env(a = 10)) #' args <- as.lazy_dots(list(f1, f2)) #' #' a <- 100 #' eval_call(quote(`+`), args) eval_call <- function(fun, dots, env = parent.frame()) { vars <- paste0("x", seq_along(dots)) names(vars) <- names(dots) # Create environment containing promises env <- new.env(parent = env) for(i in seq_along(dots)) { dot <- dots[[i]] assign_call <- substitute( delayedAssign(vars[i], expr, dot$env, assign.env = env), list(expr = dot$expr) ) eval(assign_call) } args <- lapply(vars, as.symbol) call <- as.call(c(fun, args)) eval(call, env) } lazyeval/R/call.R0000644000176200001440000000410213011153647013336 0ustar liggesusers#' Create a call by "hand" #' #' @param f Function to call. For \code{make_call}, either a string, a symbol #' or a quoted call. For \code{do_call}, a bare function name or call. #' @param ...,.args Arguments to the call either in or out of a list #' @export #' @examples #' # f can either be a string, a symbol or a call #' call_new("f", a = 1) #' call_new(quote(f), a = 1) #' call_new(quote(f()), a = 1) #' #' #' Can supply arguments individually or in a list #' call_new(quote(f), a = 1, b = 2) #' call_new(quote(f), .args = list(a = 1, b = 2)) call_new <- function(f, ..., .args = list()) { if (is.character(f)) { if (length(f) != 1) { stop("Character `f` must be length 1", call. = FALSE) } f <- as.name(f) } args <- c(list(...), as.list(.args)) as.call(c(f, args)) } #' Modify the arguments of a call. #' #' @param call A call to modify. It is first standardised with #' \code{\link{call_standardise}}. #' @param env Environment in which to look up call value. #' @param new_args A named list of expressions (constants, names or calls) #' used to modify the call. Use \code{NULL} to remove arguments. #' @export #' @examples #' call <- quote(mean(x, na.rm = TRUE)) #' call_standardise(call) #' #' # Modify an existing argument #' call_modify(call, list(na.rm = FALSE)) #' call_modify(call, list(x = quote(y))) #' #' # Remove an argument #' call_modify(call, list(na.rm = NULL)) #' #' # Add a new argument #' call_modify(call, list(trim = 0.1)) #' #' # Add an explicit missing argument #' call_modify(call, list(na.rm = quote(expr = ))) call_modify <- function(call, new_args, env = parent.frame()) { stopifnot(is.call(call), is.list(new_args)) call <- call_standardise(call, env) if (!all(has_names(new_args))) { stop("All new arguments must be named", call. = FALSE) } for (nm in names(new_args)) { call[[nm]] <- new_args[[nm]] } call } #' @rdname call_modify #' @export call_standardise <- function(call, env = parent.frame()) { stopifnot(is_call(call)) f <- eval(call[[1]], env) if (is.primitive(f)) return(call) match.call(f, call) } lazyeval/R/lazy-dots.R0000644000176200001440000000275213171753463014373 0ustar liggesusers#' Capture ... (dots) for later lazy evaluation. #' #' @param ... Dots from another function #' @param .ignore_empty If \code{TRUE}, empty arguments will be ignored. #' @return A named list of \code{\link{lazy}} expressions. #' @inheritParams lazy #' @export #' @examples #' lazy_dots(x = 1) #' lazy_dots(a, b, c * 4) #' #' f <- function(x = a + b, ...) { #' lazy_dots(x = x, y = a + b, ...) #' } #' f(z = a + b) #' f(z = a + b, .follow_symbols = TRUE) #' #' # .follow_symbols is off by default because it causes problems #' # with lazy loaded objects #' lazy_dots(letters) #' lazy_dots(letters, .follow_symbols = TRUE) #' #' # You can also modify a dots like a list. Anything on the RHS will #' # be coerced to a lazy. #' l <- lazy_dots(x = 1) #' l$y <- quote(f) #' l[c("y", "x")] #' l["z"] <- list(~g) #' #' c(lazy_dots(x = 1), lazy_dots(f)) lazy_dots <- function(..., .follow_symbols = FALSE, .ignore_empty = FALSE) { .Call(lazyeval_make_lazy_dots, environment(), .follow_symbols, .ignore_empty) } is.lazy_dots <- function(x) inherits(x, "lazy_dots") #' @export `[.lazy_dots` <- function(x, i) { structure(NextMethod(), class = "lazy_dots") } #' @export `$<-.lazy_dots` <- function(x, i, value) { value <- as.lazy(value, parent.frame()) x[[i]] <- value x } #' @export `[<-.lazy_dots` <- function(x, i, value) { value <- lapply(value, as.lazy, env = parent.frame()) NextMethod() } #' @export c.lazy_dots <- function(..., recursive = FALSE) { structure(NextMethod(), class = "lazy_dots") } lazyeval/R/f-capture.R0000644000176200001440000000232213171753463014324 0ustar liggesusers#' Make a promise explicit by converting into a formula. #' #' This should be used sparingly if you want to implement true non-standard #' evaluation with 100\% magic. I recommend avoiding this unless you have #' strong reasons otherwise since requiring arguments to be formulas only #' adds one extra character to the inputs, and otherwise makes life much much #' simpler. #' #' @param x,... An unevaluated promises #' @param .ignore_empty If \code{TRUE}, empty arguments will be silently #' dropped. #' @export #' @return \code{f_capture} returns a formula; \code{dots_capture} #' returns a list of formulas. #' @examples #' f_capture(a + b) #' dots_capture(a + b, c + d, e + f) #' #' # These functions will follow a chain of promises back to the #' # original definition #' f <- function(x) g(x) #' g <- function(y) h(y) #' h <- function(z) f_capture(z) #' f(a + b + c) f_capture <- function(x) { lazy <- .Call(lazyeval_make_lazy, quote(x), environment(), TRUE) f_new(lazy$expr, env = lazy$env) } #' @export #' @rdname f_capture dots_capture <- function(..., .ignore_empty = TRUE) { lazies <- .Call(lazyeval_make_lazy_dots, environment(), TRUE, .ignore_empty) lapply(lazies, function(x) f_new(x$expr, env = x$env)) } lazyeval/vignettes/0000755000176200001440000000000013171753670014123 5ustar liggesuserslazyeval/vignettes/lazyeval-old.Rmd0000644000176200001440000001337713171350764017202 0ustar liggesusers--- title: "Lazyeval: a new approach to NSE" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Lazyeval: a new approach to NSE} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} --- ```{r, echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") rownames(mtcars) <- NULL ``` This document outlines my previous approach to non-standard evaluation (NSE). You should avoid it unless you are working with an older version of dplyr or tidyr. There are three key ideas: * Instead of using `substitute()`, use `lazyeval::lazy()` to capture both expression and environment. (Or use `lazyeval::lazy_dots(...)` to capture promises in `...`) * Every function that uses NSE should have a standard evaluation (SE) escape hatch that does the actual computation. The SE-function name should end with `_`. * The SE-function has a flexible input specification to make it easy for people to program with. ## `lazy()` The key tool that makes this approach possible is `lazy()`, an equivalent to `substitute()` that captures both expression and environment associated with a function argument: ```{r} library(lazyeval) f <- function(x = a - b) { lazy(x) } f() f(a + b) ``` As a complement to `eval()`, the lazy package provides `lazy_eval()` that uses the environment associated with the lazy object: ```{r} a <- 10 b <- 1 lazy_eval(f()) lazy_eval(f(a + b)) ``` The second argument to lazy eval is a list or data frame where names should be looked up first: ```{r} lazy_eval(f(), list(a = 1)) ``` `lazy_eval()` also works with formulas, since they contain the same information as a lazy object: an expression (only the RHS is used by convention) and an environment: ```{r} lazy_eval(~ a + b) h <- function(i) { ~ 10 + i } lazy_eval(h(1)) ``` ## Standard evaluation Whenever we need a function that does non-standard evaluation, always write the standard evaluation version first. For example, let's implement our own version of `subset()`: ```{r} subset2_ <- function(df, condition) { r <- lazy_eval(condition, df) r <- r & !is.na(r) df[r, , drop = FALSE] } subset2_(mtcars, lazy(mpg > 31)) ``` `lazy_eval()` will always coerce it's first argument into a lazy object, so a variety of specifications will work: ```{r} subset2_(mtcars, ~mpg > 31) subset2_(mtcars, quote(mpg > 31)) subset2_(mtcars, "mpg > 31") ``` Note that quoted called and strings don't have environments associated with them, so `as.lazy()` defaults to using `baseenv()`. This will work if the expression is self-contained (i.e. doesn't contain any references to variables in the local environment), and will otherwise fail quickly and robustly. ## Non-standard evaluation With the SE version in hand, writing the NSE version is easy. We just use `lazy()` to capture the unevaluated expression and corresponding environment: ```{r} subset2 <- function(df, condition) { subset2_(df, lazy(condition)) } subset2(mtcars, mpg > 31) ``` This standard evaluation escape hatch is very important because it allows us to implement different NSE approaches. For example, we could create a subsetting function that finds all rows where a variable is above a threshold: ```{r} above_threshold <- function(df, var, threshold) { cond <- interp(~ var > x, var = lazy(var), x = threshold) subset2_(df, cond) } above_threshold(mtcars, mpg, 31) ``` Here we're using `interp()` to modify a formula. We use the value of `threshold` and the expression in by `var`. ## Scoping Because `lazy()` captures the environment associated with the function argument, we automatically avoid a subtle scoping bug present in `subset()`: ```{r} x <- 31 f1 <- function(...) { x <- 30 subset(mtcars, ...) } # Uses 30 instead of 31 f1(mpg > x) f2 <- function(...) { x <- 30 subset2(mtcars, ...) } # Correctly uses 31 f2(mpg > x) ``` `lazy()` has another advantage over `substitute()` - by default, it follows promises across function invocations. This simplifies the casual use of NSE. ```{r, eval = FALSE} x <- 31 g1 <- function(comp) { x <- 30 subset(mtcars, comp) } g1(mpg > x) #> Error: object 'mpg' not found ``` ```{r} g2 <- function(comp) { x <- 30 subset2(mtcars, comp) } g2(mpg > x) ``` Note that `g2()` doesn't have a standard-evaluation escape hatch, so it's not suitable for programming with in the same way that `subset2_()` is. ## Chained promises Take the following example: ```{r} library(lazyeval) f1 <- function(x) lazy(x) g1 <- function(y) f1(y) g1(a + b) ``` `lazy()` returns `a + b` because it always tries to find the top-level promise. In this case the process looks like this: 1. Find the object that `x` is bound to. 2. It's a promise, so find the expr it's bound to (`y`, a symbol) and the environment in which it should be evaluated (the environment of `g()`). 3. Since `x` is bound to a symbol, look up its value: it's bound to a promise. 4. That promise has expression `a + b` and should be evaluated in the global environment. 5. The expression is not a symbol, so stop. Occasionally, you want to avoid this recursive behaviour, so you can use `follow_symbol = FALSE`: ```{r} f2 <- function(x) lazy(x, .follow_symbols = FALSE) g2 <- function(y) f2(y) g2(a + b) ``` Either way, if you evaluate the lazy expression you'll get the same result: ```{r} a <- 10 b <- 1 lazy_eval(g1(a + b)) lazy_eval(g2(a + b)) ``` Note that the resolution of chained promises only works with unevaluated objects. This is because R deletes the information about the environment associated with a promise when it has been forced, so that the garbage collector is allowed to remove the environment from memory in case it is no longer used. `lazy()` will fail with an error in such situations. ```{r, error = TRUE, purl = FALSE} var <- 0 f3 <- function(x) { force(x) lazy(x) } f3(var) ``` lazyeval/vignettes/lazyeval.Rmd0000644000176200001440000005105613171350764016422 0ustar liggesusers--- title: "Non-standard evaluation" author: "Hadley Wickham" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Non-standard evaluation} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} library(lazyeval) knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ``` This document describes lazyeval, a package that provides principled tools to perform non-standard evaluation (NSE) in R. You should read this vignette if you want to program with packages like dplyr and ggplot2[^1], or you want a principled way of working with delayed expressions in your own package. As the name suggests, non-standard evaluation breaks away from the standard evaluation (SE) rules in order to do something special. There are three common uses of NSE: 1. __Labelling__ enhances plots and tables by using the expressions supplied to a function, rather than their values. For example, note the axis labels in this plot: ```{r, fig.width = 4, fig.height = 2.5} par(mar = c(4.5, 4.5, 1, 0.5)) grid <- seq(0, 2 * pi, length = 100) plot(grid, sin(grid), type = "l") ``` 1. __Non-standard scoping__ looks for objects in places other than the current environment. For example, base R has `with()`, `subset()`, and `transform()` that look for objects in a data frame (or list) before the current environment: ```{r} df <- data.frame(x = c(1, 5, 4, 2, 3), y = c(2, 1, 5, 4, 3)) with(df, mean(x)) subset(df, x == y) transform(df, z = x + y) ``` 1. __Metaprogramming__ is a catch-all term that covers all other uses of NSE (such as in `bquote()` and `library()`). Metaprogramming is so called because it involves computing on the unevaluated code in some way. This document is broadly organised according to the three types of non-standard evaluation described above. The main difference is that after [labelling], we'll take a detour to learn more about [formulas]. You're probably familiar with formulas from linear models (e.g. `lm(mpg ~ displ, data = mtcars)`) but formulas are more than just a tool for modelling: they are a general way of capturing an unevaluated expression. The approaches recommended here are quite different to my previous generation of recommendations. I am fairly confident these new approaches are correct, and will not have to change substantially again. The current tools make it easy to solve a number of practical problems that were previously challenging and are rooted in [long-standing theory](http://repository.readscheme.org/ftp/papers/pepm99/bawden.pdf). [^1]: Currently neither ggplot2 nor dplyr actually use these tools since I've only just figured it out. But I'll be working hard to make sure all my packages are consistent in the near future. ## Labelling In base R, the classic way to turn an argument into a label is to use `deparse(substitute(x))`: ```{r} my_label <- function(x) deparse(substitute(x)) my_label(x + y) ``` There are two potential problems with this approach: 1. For long some expressions, `deparse()` generates a character vector with length > 1: ```{r} my_label({ a + b c + d }) ``` 1. `substitute()` only looks one level up, so you lose the original label if the function isn't called directly: ```{r} my_label2 <- function(x) my_label(x) my_label2(a + b) ``` Both of these problems are resolved by `lazyeval::expr_text()`: ```{r} my_label <- function(x) expr_text(x) my_label2 <- function(x) my_label(x) my_label({ a + b c + d }) my_label2(a + b) ``` There are two variations on the theme of `expr_text()`: * `expr_find()` find the underlying expression. It works similarly to `substitute()` but will follow a chain of promises back up to the original expression. This is often useful for [metaprogramming]. * `expr_label()` is a customised version of `expr_text()` that produces labels designed to be used in messages to the user: ```{r} expr_label(x) expr_label(a + b + c) expr_label(foo({ x + y })) ``` ### Exercises 1. `plot()` uses `deparse(substitute(x))` to generate labels for the x and y axes. Can you generate input that causes it to display bad labels? Write your own wrapper around `plot()` that uses `expr_label()` to compute `xlim` and `ylim`. 1. Create a simple implementation of `mean()` that stops with an informative error message if the argument is not numeric: ```{r, eval = FALSE} x <- c("a", "b", "c") my_mean(x) #> Error: `x` is a not a numeric vector. my_mean(x == "a") #> Error: `x == "a"` is not a numeric vector. my_mean("a") #> Error: "a" is not a numeric vector. ``` 1. Read the source code for `expr_text()`. How does it work? What additional arguments to `deparse()` does it use? ## Formulas Non-standard scoping is probably the most useful NSE tool, but before we can talk about a solid approach, we need to take a detour to talk about formulas. Formulas are a familiar tool from linear models, but their utility is not limited to models. In fact, formulas are a powerful, general purpose tool, because a formula captures two things: 1. An unevaluated expression. 1. The context (environment) in which the expression was created. `~` is a single character that allows you to say: "I want to capture the meaning of this code, without evaluating it right away". For that reason, the formula can be thought of as a "quoting" operator. ### Definition of a formula Technically, a formula is a "language" object (i.e. an unevaluated expression) with a class of "formula" and an attribute that stores the environment: ```{r} f <- ~ x + y + z typeof(f) attributes(f) ``` The structure of the underlying object is slightly different depending on whether you have a one-sided or two-sided formula: * One-sided formulas have length two: ```{r} length(f) # The 1st element is always ~ f[[1]] # The 2nd element is the RHS f[[2]] ``` * Two-sided formulas have length three: ```{r} g <- y ~ x + z length(g) # The 1st element is still ~ g[[1]] # But now the 2nd element is the LHS g[[2]] # And the 3rd element is the RHS g[[3]] ``` To abstract away these differences, lazyeval provides `f_rhs()` and `f_lhs()` to access either side of the formula, and `f_env()` to access its environment: ```{r} f_rhs(f) f_lhs(f) f_env(f) f_rhs(g) f_lhs(g) f_env(g) ``` ### Evaluating a formula A formula captures delays the evaluation of an expression so you can later evaluate it with `f_eval()`: ```{r} f <- ~ 1 + 2 + 3 f f_eval(f) ``` This allows you to use a formula as a robust way of delaying evaluation, cleanly separating the creation of the formula from its evaluation. Because formulas capture the code and context, you get the correct result even when a formula is created and evaluated in different places. In the following example, note that the value of `x` inside `add_1000()` is used: ```{r} x <- 1 add_1000 <- function(x) { ~ 1000 + x } add_1000(3) f_eval(add_1000(3)) ``` It can be hard to see what's going on when looking at a formula because important values are stored in the environment, which is largely opaque. You can use `f_unwrap()` to replace names with their corresponding values: ```{r} f_unwrap(add_1000(3)) ``` ### Non-standard scoping `f_eval()` has an optional second argument: a named list (or data frame) that overrides values found in the formula's environment. ```{r} y <- 100 f_eval(~ y) f_eval(~ y, data = list(y = 10)) # Can mix variables in environment and data argument f_eval(~ x + y, data = list(x = 10)) # Can even supply functions f_eval(~ f(y), data = list(f = function(x) x * 3)) ``` This makes it very easy to implement non-standard scoping: ```{r} f_eval(~ mean(cyl), data = mtcars) ``` One challenge with non-standard scoping is that we've introduced some ambiguity. For example, in the code below does `x` come from `mydata` or the environment? ```{r, eval = FALSE} f_eval(~ x, data = mydata) ``` You can't tell without knowing whether or not `mydata` has a variable called `x`. To overcome this problem, `f_eval()` provides two pronouns: * `.data` is bound to the data frame. * `.env` is bound to the formula environment. They both start with `.` to minimise the chances of clashing with existing variables. With these pronouns we can rewrite the previous formula to remove the ambiguity: ```{r} mydata <- data.frame(x = 100, y = 1) x <- 10 f_eval(~ .env$x, data = mydata) f_eval(~ .data$x, data = mydata) ``` If the variable or object doesn't exist, you'll get an informative error: ```{r, error = TRUE} f_eval(~ .env$z, data = mydata) f_eval(~ .data$z, data = mydata) ``` ### Unquoting `f_eval()` has one more useful trick up its sleeve: unquoting. Unquoting allows you to write functions where the user supplies part of the formula. For example, the following function allows you to compute the mean of any column (or any function of a column): ```{r} df_mean <- function(df, variable) { f_eval(~ mean(uq(variable)), data = df) } df_mean(mtcars, ~ cyl) df_mean(mtcars, ~ disp * 0.01638) df_mean(mtcars, ~ sqrt(mpg)) ``` To see how this works, we can use `f_interp()` which `f_eval()` calls internally (you shouldn't call it in your own code, but it's useful for debugging). The key is `uq()`: `uq()` evaluates its first (and only) argument and inserts the value into the formula: ```{r} variable <- ~cyl f_interp(~ mean(uq(variable))) variable <- ~ disp * 0.01638 f_interp(~ mean(uq(variable))) ``` Unquoting allows you to create code "templates", where you write most of the expression, while still allowing the user to control important components. You can even use `uq()` to change the function being called: ```{r} f <- ~ mean f_interp(~ uq(f)(uq(variable))) ``` Note that `uq()` only takes the RHS of a formula, which makes it difficult to insert literal formulas into a call: ```{r} formula <- y ~ x f_interp(~ lm(uq(formula), data = df)) ``` You can instead use `uqf()` which uses the whole formula, not just the RHS: ```{r} f_interp(~ lm(uqf(formula), data = df)) ``` Unquoting is powerful, but it only allows you to modify a single argument: it doesn't allow you to add an arbitrary number of arguments. To do that, you'll need "unquote-splice", or `uqs()`. The first (and only) argument to `uqs()` should be a list of arguments to be spliced into the call: ```{r} variable <- ~ x extra_args <- list(na.rm = TRUE, trim = 0.9) f_interp(~ mean(uq(variable), uqs(extra_args))) ``` ### Exercises 1. Create a wrapper around `lm()` that allows the user to supply the response and predictors as two separate formulas. 1. Compare and contrast `f_eval()` with `with()`. 1. Why does this code work even though `f` is defined in two places? (And one of them is not a function). ```{r} f <- function(x) x + 1 f_eval(~ f(10), list(f = "a")) ``` ## Non-standard scoping Non-standard scoping (NSS) is an important part of R because it makes it easy to write functions tailored for interactive data exploration. These functions require less typing, at the cost of some ambiguity and "magic". This is a good trade-off for interactive data exploration because you want to get ideas out of your head and into the computer as quickly as possible. If a function does make a bad guess, you'll spot it quickly because you're working interactively. There are three challenges to implementing non-standard scoping: 1. You must correctly delay the evaluation of a function argument, capturing both the computation (the expression), and the context (the environment). I recommend making this explicit by requiring the user to "quote" any NSS arguments with `~`, and then evaluating explicit with `f_eval()`. 1. When writing functions that use NSS-functions, you need some way to avoid the automatic lookup and be explicit about where objects should be found. `f_eval()` solves this problem with the `.data.` and `.env` pronouns. 1. You need some way to allow the user to supply parts of a formula. `f_eval()` solves this with unquoting. To illustrate these challenges, I will implement a `sieve()` function that works similarly to `base::subset()` or `dplyr::filter()`. The goal of `sieve()` is to make it easy to select observations that match criteria defined by a logical expression. `sieve()` has three advantages over `[`: 1. It is much more compact when the condition uses many variables, because you don't need to repeat the name of the data frame many times. 1. It drops rows where the condition evaluates to `NA`, rather than filling them with `NA`s. 1. It always returns a data frame. The implementation of `sieve()` is straightforward. First we use `f_eval()` to perform NSS. Then we then check that we have a logical vector, replace `NA`s with `FALSE`, and subset with `[`. ```{R} sieve <- function(df, condition) { rows <- f_eval(condition, df) if (!is.logical(rows)) { stop("`condition` must be logical.", call. = FALSE) } rows[is.na(rows)] <- FALSE df[rows, , drop = FALSE] } df <- data.frame(x = 1:5, y = 5:1) sieve(df, ~ x <= 2) sieve(df, ~ x == y) ``` ### Programming with `sieve()` Imagine that you've written some code that looks like this: ```{r, eval = FALSE} sieve(march, ~ x > 100) sieve(april, ~ x > 50) sieve(june, ~ x > 45) sieve(july, ~ x > 17) ``` (This is a contrived example, but it illustrates all of the important issues you'll need to consider when writing more useful functions.) Instead of continuing to copy-and-paste your code, you decide to wrap up the common behaviour in a function: ```{r} threshold_x <- function(df, threshold) { sieve(df, ~ x > threshold) } threshold_x(df, 3) ``` There are two ways that this function might fail: 1. The data frame might not have a variable called `x`. This will fail unless there's a variable called `x` hanging around in the global environment: ```{r, error = TRUE} rm(x) df2 <- data.frame(y = 5:1) # Throws an error threshold_x(df2, 3) # Silently gives the incorrect result! x <- 5 threshold_x(df2, 3) ``` 1. The data frame might have a variable called `threshold`: ```{r} df3 <- data.frame(x = 1:5, y = 5:1, threshold = 4) threshold_x(df3, 3) ``` These failures are partiuclarly pernicious because instead of throwing an error they silently produce the wrong answer. Both failures arise because `f_eval()` introduces ambiguity by looking in two places for each name: the supplied data and formula environment. To make `threshold_x()` more reliable, we need to be more explicit by using the `.data` and `.env` pronouns: ```{r, error = TRUE} threshold_x <- function(df, threshold) { sieve(df, ~ .data$x > .env$threshold) } threshold_x(df2, 3) threshold_x(df3, 3) ``` Here `.env` is bound to the environment where `~` is evaluated, namely the inside of `threshold_x()`. ### Adding arguments The `threshold_x()` function is not very useful because it's bound to a specific variable. It would be more powerful if we could vary both the threshold and the variable it applies to. We can do that by taking an additional argument to specify which variable to use. One simple approach is to use a string and `[[`: ```{r} threshold <- function(df, variable, threshold) { stopifnot(is.character(variable), length(variable) == 1) sieve(df, ~ .data[[.env$variable]] > .env$threshold) } threshold(df, "x", 4) ``` This is a simple and robust solution, but only allows us to use an existing variable, not an arbitrary expression like `sqrt(x)`. A more general solution is to allow the user to supply a formula, and use unquoting: ```{r} threshold <- function(df, variable = ~x, threshold = 0) { sieve(df, ~ uq(variable) > .env$threshold) } threshold(df, ~ x, 4) threshold(df, ~ abs(x - y), 2) ``` In this case, it's the responsibility of the user to ensure the `variable` is specified unambiguously. `f_eval()` is designed so that `.data` and `.env` work even when evaluated inside of `uq()`: ```{r} x <- 3 threshold(df, ~ .data$x - .env$x, 0) ``` ### Dot-dot-dot There is one more tool that you might find useful for functions that take `...`. For example, the code below implements a function similar to `dplyr::mutate()` or `base::transform()`. ```{r} mogrify <- function(`_df`, ...) { args <- list(...) for (nm in names(args)) { `_df`[[nm]] <- f_eval(args[[nm]], `_df`) } `_df` } ``` (NB: the first argument is a non-syntactic name (i.e. it requires quoting with `` ` ``) so it doesn't accidentally match one of the names of the new variables.) `transmogrifty()` makes it easy to add new variables to a data frame: ```{r} df <- data.frame(x = 1:5, y = sample(5)) mogrify(df, z = ~ x + y, z2 = ~ z * 2) ``` One problem with this implementation is that it's hard to specify the names of the generated variables. Imagine you want a function where the name and expression are in separate variables. This is awkward because the variable name is supplied as an argument name to `mogrify()`: ```{r} add_variable <- function(df, name, expr) { do.call("mogrify", c(list(df), setNames(list(expr), name))) } add_variable(df, "z", ~ x + y) ``` Lazyeval provides the `f_list()` function to make writing this sort of function a little easier. It takes a list of formulas and evaluates the LHS of each formula (if present) to rename the elements: ```{r} f_list("x" ~ y, z = ~z) ``` If we tweak `mogrify()` to use `f_list()` instead of `list()`: ```{r} mogrify <- function(`_df`, ...) { args <- f_list(...) for (nm in names(args)) { `_df`[[nm]] <- f_eval(args[[nm]], `_df`) } `_df` } ``` `add_new()` becomes much simpler: ```{r} add_variable <- function(df, name, expr) { mogrify(df, name ~ uq(expr)) } add_variable(df, "z", ~ x + y) ``` ### Exercises 1. Write a function that selects all rows of `df` where `variable` is greater than its mean. Make the function more general by allowing the user to specify a function to use instead of `mean()` (e.g. `median()`). 1. Create a version of `mogrify()` where the first argument is `x`? What happens if you try to create a new variable called `x`? ## Non-standard evaluation In some situations you might want to eliminate the formula altogether, and allow the user to type expressions directly. I was once much enamoured with this approach (witness ggplot2, dplyr, ...). However, I now think that it should be used sparingly because explict quoting with `~` leads to simpler code, and makes it more clear to the user that something special is going on. That said, lazyeval does allow you to eliminate the `~` if you really want to. In this case, I recommend having both a NSE and SE version of the function. The SE version, which takes formuals, should have suffix `_`: ```{r} sieve_ <- function(df, condition) { rows <- f_eval(condition, df) if (!is.logical(rows)) { stop("`condition` must be logical.", call. = FALSE) } rows[is.na(rows)] <- FALSE df[rows, , drop = FALSE] } ``` Then create the NSE version which doesn't need the explicit formula. The key is the use of `f_capture()` which takes an unevaluated argument (a promise) and captures it as a formula: ```{r} sieve <- function(df, expr) { sieve_(df, f_capture(expr)) } sieve(df, x == 1) ``` If you're familiar with `substitute()` you might expect the same drawbacks to apply. However, `f_capture()` is smart enough to follow a chain of promises back to the original value, so, for example, this code works fine: ```{r} scramble <- function(df) { df[sample(nrow(df)), , drop = FALSE] } subscramble <- function(df, expr) { scramble(sieve(df, expr)) } subscramble(df, x < 4) ``` ### Dot-dot-dot If you want a `...` function that doesn't require formulas, I recommend that the SE version take a list of arguments, and the NSE version uses `dots_capture()` to capture multiple arguments as a list of formulas. ```{r} mogrify_ <- function(`_df`, args) { args <- as_f_list(args) for (nm in names(args)) { `_df`[[nm]] <- f_eval(args[[nm]], `_df`) } `_df` } mogrify <- function(`_df`, ...) { mogrify_(`_df`, dots_capture(...)) } ``` ### Exercises 1. Recreate `subscramble()` using `base::subset()` instead of `sieve()`. Why does it fail? ## Metaprogramming The final use of non-standard evaluation is to do metaprogramming. This is a catch-all term that encompasses any function that does computation on an unevaluated expression. You can learn about metaprogrgramming in , particularly . Over time, the goal is to move all useful metaprogramming helper functions into this package, and discuss metaprogramming more here. lazyeval/README.md0000644000176200001440000000204213171753463013370 0ustar liggesusers# lazyeval [![Build Status](https://travis-ci.org/hadley/lazyeval.png?branch=master)](https://travis-ci.org/hadley/lazyeval) [![Coverage Status](http://codecov.io/github/hadley/lazyeval/coverage.svg?branch=master)](http://codecov.io/github/hadley/lazyeval?branch=master) The lazyeval package provides tools that make it easier to correctly implement non-standard evaluation (NSE) in R. You use lazy evaluation by requiring the user to "quote" specially evaluated arguments with `~`, and then using the lazyeval package to compute with those formulas. It is also possible to eliminate the use of the `~` by converting promises to formulas. This does make programming with such functions a little harder, but it can be worth it in certain situations. See `vignette("lazyeval")` for more details. ## Installation Install the released version from CRAN with: ```R install.packages("lazyeval") ``` Install the development version from github with: ```R # install.packages("devtools") devtools::install_github("hadley/lazyeval", build_vignettes = TRUE) ``` lazyeval/MD50000644000176200001440000001033413175456530012423 0ustar liggesusersb83aa4df133ff1780f623af003771305 *DESCRIPTION 076be75efe0b6fdb85434c1d6ba4a81e *NAMESPACE 9f6939434f914a3b7f539ce217df3326 *NEWS.md d9cc209f4c0c733a11ed93e673ae0d9c *R/ast.R befb1c81988ed1357c416f919528980d *R/call.R e3159cd804827c6d85885c17b284b5fb *R/complain.R b2388d07483695bad432af4278771633 *R/expr.R 57e40b0dd45196bae1bd50f405a23129 *R/f-capture.R c1e8a4f2908cc985b4faed2c04bebc7f *R/f-eval.R 213fb437c2e4a967086694e3e2db8974 *R/f-interp.R 1d0f8e3b2eaf491d129a04ebe3084280 *R/formula.R ea161e248423ef0d17885123c1065258 *R/function.R be4330310f7efddd7fefe5052db9a8e0 *R/language.R d9a423351a93e1514bfac54255d4eb28 *R/lazy-as.R f2b5984a77262840aea4bf29785ed9b9 *R/lazy-call.R b9ffe3bc4e4a0476f20941d79767c947 *R/lazy-dots.R 66bfb57dbc11072d7581559830ba1994 *R/lazy-eval.R 41ed7c6a852958828eb44d08374bff84 *R/lazy-interp.R 3cbae872f5df31e044ddf79731550d93 *R/lazy-names.R 82c4b4197e5df808c9eca0b9b0bd9faa *R/lazy.R eaab07fb1d658479edaa8e1385a7ae05 *R/lazyeval.R 4501ecd06bc9f3d7513cc558cc195440 *R/utils.R 0f459b4372bceb994acda6f5836a8cc7 *README.md add6e0109119aa6cf267084dd15b2c49 *build/vignette.rds 01e2dd08598e3e44bab0fb7a03a40d6c *inst/doc/lazyeval-old.R fd0e6bc6fa61549d20cc69f641dd2ffd *inst/doc/lazyeval-old.Rmd a908e650997ce0208a616ad5f7c69d35 *inst/doc/lazyeval-old.html 5f895ae6e564ecd4855e5ae17b1e1216 *inst/doc/lazyeval.R 036dd3ea1f555238004da3c88ffb2373 *inst/doc/lazyeval.Rmd 750bd2b0dab394af791c04c9169a68fa *inst/doc/lazyeval.html 2c6ee8d0f2d6cf9583df82824633a7ec *man/all_dots.Rd 2f412970013bfff969553cc5cdbf7414 *man/as.lazy.Rd 4fa1680ecbd33e9f2accbed41fc3baa8 *man/as_name.Rd 4901deef27855e3ef5092ac2e4057a75 *man/ast_.Rd 1310232e76533965b20420bf90e30409 *man/auto_name.Rd 1340d8a3bedefed84920b71fedc792b2 *man/call_modify.Rd 42f82e62a13b78b7b320d47f22306e8b *man/call_new.Rd 1a8c04aa15f32ebd4f1b7134e5e47177 *man/common_env.Rd f1a86b0424eda1d67dcfe1d087ed4ba0 *man/expr_label.Rd 3a2dfc1787f0e86465012f83b49f37fd *man/f_capture.Rd 723f0f8420caa731cc4728018ece4da2 *man/f_eval.Rd fdfbf4920db2dd295a7776ff8b96c8bb *man/f_interp.Rd fb3fd925c09ff6f29b25e47b8d0669b4 *man/f_list.Rd ab43799b3040137e6469cc910bfc641a *man/f_new.Rd f0a2c22aa74c49a34b1e3dc1932fe493 *man/f_rhs.Rd 884e3f93ec645eaff34d3b9f741f8738 *man/f_text.Rd 49fc315564f56e6c446f4473f18c0171 *man/f_unwrap.Rd 0478841ebc4f4a9dd82b4e7e2f5f69fd *man/function_new.Rd a1aebb42db9afefd71c6d00bc831e54c *man/interp.Rd eb6f14cec04ba1c8edcc1a1248197b7e *man/is_formula.Rd 4d2fea5d64ef556504c187239d3245df *man/is_lang.Rd 5197c686f549745ac6a6978c4b2fe2b1 *man/lazy_.Rd 0db962cf222cbe82f959174dd660805b *man/lazy_dots.Rd e4dd5b05f00327e486789b26be8f674a *man/lazy_eval.Rd 4557247dd1c9ce7d6a78520aa770f55b *man/make_call.Rd 4342c540bfd816fc182175e5b0608cd2 *man/missing_arg.Rd 5a213920da3263a55dc830932737b7e0 *src/expr.c 7aa5384ba84c293274b4add5c53118aa *src/init.c e4155ab688f6e7c66de0d688ece032ac *src/interp.c 99f2d28770a76e1c6a6e1559ddfd1db4 *src/lazy.c 5ff30a1fc26006eb8377347e329b1d13 *src/name.c 83f89c0bd554828bf1472d59758a824f *src/utils.c 26ca4f69c34debf19d0665cc61dcea80 *src/utils.h b43a93ea47dc4f6a64a4625fb67b87f6 *tests/testthat.R 28ab6d5c95cce52309b007a83e869cd1 *tests/testthat/ast-irregular.txt eda899de62882fc656bfbd548171ee04 *tests/testthat/ast-sample.txt eaca1b22c21c0bd595aa7be2b8de7c6c *tests/testthat/test-ast.R 2a7ae04e75bf6ca648d1d8b7f0f0f255 *tests/testthat/test-call.R bd8c6bcb7b287fa288af9d8dd003c46c *tests/testthat/test-complain.R c62da31fc3eed1f72d5e9926d90d5e3e *tests/testthat/test-dots.R c31c22fe7c0f3ab3723dcf15b574e365 *tests/testthat/test-expr.R 03a6e2c3ec4675634e9d46be2794138d *tests/testthat/test-f-capture.R 943b81371b45f3ab141370765197be09 *tests/testthat/test-f-eval.R b5ea255aacfa712ab108dff7bc031543 *tests/testthat/test-f-interp.R f13ad194342bbc3166d97bae4d2c0f0a *tests/testthat/test-f-list.R 11bfa6279aa0e9589ea9d4f1b6d62a6e *tests/testthat/test-f-unwrap.R 81dc0f3c29e0df764289059a52026e02 *tests/testthat/test-formula.R cf2bc1d7126c6275bb6a66200c86cdc4 *tests/testthat/test-function.R 4e12b82cde56263135ae960627dce9cf *tests/testthat/test-language.R 7c37c5cf3fab082e9dfda9942e2f72e0 *tests/testthat/test-lazy.R dcd46b347706159f3b53165d13ee9272 *tests/testthat/test-names.R fd0e6bc6fa61549d20cc69f641dd2ffd *vignettes/lazyeval-old.Rmd 036dd3ea1f555238004da3c88ffb2373 *vignettes/lazyeval.Rmd lazyeval/build/0000755000176200001440000000000013171753670013212 5ustar liggesuserslazyeval/build/vignette.rds0000644000176200001440000000037513171753670015556 0ustar liggesusersun0M ~B0;r xrR'i~ńQ@ЦabC׾A#kr~1V!we& Gj]nf6Z*9. HѦ&Sisp4Baԇiڪϑ??= 3.1.0) Suggests: knitr, rmarkdown (>= 0.2.65), testthat, covr VignetteBuilder: knitr RoxygenNote: 6.0.1 NeedsCompilation: yes Packaged: 2017-10-18 22:37:44 UTC; lionel Author: Hadley Wickham [aut, cre], RStudio [cph] Maintainer: Hadley Wickham Repository: CRAN Date/Publication: 2017-10-29 22:59:36 UTC lazyeval/man/0000755000176200001440000000000013171753463012666 5ustar liggesuserslazyeval/man/f_text.Rd0000644000176200001440000000123313171753463014445 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{f_text} \alias{f_text} \alias{f_label} \title{Turn RHS of formula into a string/label.} \usage{ f_text(x, width = 60L, nlines = Inf) f_label(x) } \arguments{ \item{x}{A formula.} \item{width}{Width of each line} \item{nlines}{Maximum number of lines to extract.} } \description{ Equivalent of \code{\link{expr_text}()} and \code{\link{expr_label}()} for formulas. } \examples{ f <- ~ a + b + bc f_text(f) f_label(f) # Names a quoted with `` f_label(~ x) # Strings are encoded f_label(~ "a\\nb") # Long expressions are collapsed f_label(~ foo({ 1 + 2 print(x) })) } lazyeval/man/missing_arg.Rd0000644000176200001440000000046313171753463015462 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{missing_arg} \alias{missing_arg} \title{Generate a missing argument.} \usage{ missing_arg() } \description{ Generate a missing argument. } \examples{ f_interp(~f(x = uq(missing_arg()))) f_interp(~f(x = uq(NULL))) } lazyeval/man/f_new.Rd0000644000176200001440000000066513171753463014262 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{f_new} \alias{f_new} \title{Create a formula object by "hand".} \usage{ f_new(rhs, lhs = NULL, env = parent.frame()) } \arguments{ \item{lhs, rhs}{A call, name, or atomic vector.} \item{env}{An environment} } \value{ A formula object } \description{ Create a formula object by "hand". } \examples{ f_new(quote(a)) f_new(quote(a), quote(b)) } lazyeval/man/call_modify.Rd0000644000176200001440000000200213171753463015431 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/call.R \name{call_modify} \alias{call_modify} \alias{call_standardise} \title{Modify the arguments of a call.} \usage{ call_modify(call, new_args, env = parent.frame()) call_standardise(call, env = parent.frame()) } \arguments{ \item{call}{A call to modify. It is first standardised with \code{\link{call_standardise}}.} \item{new_args}{A named list of expressions (constants, names or calls) used to modify the call. Use \code{NULL} to remove arguments.} \item{env}{Environment in which to look up call value.} } \description{ Modify the arguments of a call. } \examples{ call <- quote(mean(x, na.rm = TRUE)) call_standardise(call) # Modify an existing argument call_modify(call, list(na.rm = FALSE)) call_modify(call, list(x = quote(y))) # Remove an argument call_modify(call, list(na.rm = NULL)) # Add a new argument call_modify(call, list(trim = 0.1)) # Add an explicit missing argument call_modify(call, list(na.rm = quote(expr = ))) } lazyeval/man/as_name.Rd0000644000176200001440000000120213171753463014553 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/language.R \name{as_name} \alias{as_name} \alias{as_call} \title{Coerce an object to a name or call.} \usage{ as_name(x) as_call(x) } \arguments{ \item{x}{An object to coerce} } \description{ These are a S3 generics with built-in methods for names, calls, formuals, and strings. The distinction between a name and a call is particularly important when coercing from a string. Coercing to a call will parse the string, coercing to a name will create a (potentially) non-syntactic name. } \examples{ as_name("x + y") as_call("x + y") as_call(~ f) as_name(~ f()) } lazyeval/man/f_interp.Rd0000644000176200001440000000275513171753463014774 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/f-interp.R \name{f_interp} \alias{f_interp} \alias{uq} \alias{uqs} \alias{uq} \alias{uqf} \alias{uqs} \title{Interpolate a formula} \usage{ f_interp(f, data = NULL) uq(x, data = NULL) uqf(x) uqs(x) } \arguments{ \item{f}{A one-sided formula.} \item{data}{When called from inside \code{f_eval}, this is used to pass on the data so that nested formulas are evaluated in the correct environment.} \item{x}{For \code{uq} and \code{uqf}, a formula. For \code{uqs}, a a vector.} } \description{ Interpolation replaces sub-expressions of the form \code{uq(x)} with the evaluated value of \code{x}, and inlines sub-expressions of the form \code{uqs(x)}. } \section{Theory}{ Formally, \code{f_interp} is a quasiquote function, \code{uq()} is the unquote operator, and \code{uqs()} is the unquote splice operator. These terms have a rich history in LISP, and live on in modern languages like \href{Julia}{http://docs.julialang.org/en/release-0.1/manual/metaprogramming/} and \href{Racket}{https://docs.racket-lang.org/reference/quasiquote.html}. } \examples{ f_interp(x ~ 1 + uq(1 + 2 + 3) + 10) # Use uqs() if you want to add multiple arguments to a function # It must evaluate to a list args <- list(1:10, na.rm = TRUE) f_interp(~ mean( uqs(args) )) # You can combine the two var <- quote(xyz) extra_args <- list(trim = 0.9) f_interp(~ mean( uq(var) , uqs(extra_args) )) foo <- function(n) { ~ 1 + uq(n) } f <- foo(10) f f_interp(f) } lazyeval/man/f_rhs.Rd0000644000176200001440000000154413171753463014262 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{f_rhs} \alias{f_rhs} \alias{f_rhs<-} \alias{f_lhs} \alias{f_lhs<-} \alias{f_env} \alias{f_env<-} \title{Get/set formula components.} \usage{ f_rhs(f) f_rhs(x) <- value f_lhs(f) f_lhs(x) <- value f_env(f) f_env(x) <- value } \arguments{ \item{f, x}{A formula} \item{value}{The value to replace with.} } \value{ \code{f_rhs} and \code{f_lhs} return language objects (i.e. atomic vectors of length 1, a name, or a call). \code{f_env} returns an environment. } \description{ \code{f_rhs} extracts the righthand side, \code{f_lhs} extracts the lefthand side, and \code{f_env} extracts the environment. All functions throw an error if \code{f} is not a formula. } \examples{ f_rhs(~ 1 + 2 + 3) f_rhs(~ x) f_rhs(~ "A") f_rhs(1 ~ 2) f_lhs(~ y) f_lhs(x ~ y) f_env(~ x) } lazyeval/man/f_unwrap.Rd0000644000176200001440000000061413171753463014777 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{f_unwrap} \alias{f_unwrap} \title{Unwrap a formula} \usage{ f_unwrap(f) } \arguments{ \item{f}{A formula to unwrap.} } \description{ This interpolates values in the formula that are defined in its environment, replacing the environment with its parent. } \examples{ n <- 100 f <- ~ x + n f_unwrap(f) } lazyeval/man/f_eval.Rd0000644000176200001440000000410313171753463014407 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/f-eval.R \name{f_eval_rhs} \alias{f_eval_rhs} \alias{f_eval_lhs} \alias{f_eval} \alias{find_data} \title{Evaluate a formula} \usage{ f_eval_rhs(f, data = NULL) f_eval_lhs(f, data = NULL) f_eval(f, data = NULL) find_data(x) } \arguments{ \item{f}{A formula. Any expressions wrapped in \code{ uq() } will will be "unquoted", i.e. they will be evaluated, and the results inserted back into the formula. See \code{\link{f_interp}} for more details.} \item{data}{A list (or data frame). \code{find_data} is a generic used to find the data associated with a given object. If you want to make \code{f_eval} work for your own objects, you can define a method for this generic.} \item{x}{An object for which you want to find associated data.} } \description{ \code{f_eval_rhs} evaluates the RHS of a formula and \code{f_eval_lhs} evaluates the LHS. \code{f_eval} is a shortcut for \code{f_eval_rhs} since that is what you most commonly need. } \details{ If \code{data} is specified, variables will be looked for first in this object, and if not found in the environment of the formula. } \section{Pronouns}{ When used with \code{data}, \code{f_eval} provides two pronouns to make it possible to be explicit about where you want values to come from: \code{.env} and \code{.data}. These are thin wrappers around \code{.data} and \code{.env} that throw errors if you try to access non-existent values. } \examples{ f_eval(~ 1 + 2 + 3) # formulas automatically capture their enclosing environment foo <- function(x) { y <- 10 ~ x + y } f <- foo(1) f f_eval(f) # If you supply data, f_eval will look their first: f_eval(~ cyl, mtcars) # To avoid ambiguity, you can use .env and .data pronouns to be # explicit: cyl <- 10 f_eval(~ .data$cyl, mtcars) f_eval(~ .env$cyl, mtcars) # Imagine you are computing the mean of a variable: f_eval(~ mean(cyl), mtcars) # How can you change the variable that's being computed? # The easiest way is "unquote" with uq() # See ?f_interp for more details var <- ~ cyl f_eval(~ mean( uq(var) ), mtcars) } lazyeval/man/as.lazy.Rd0000644000176200001440000000170713171753463014543 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazy-as.R \name{as.lazy} \alias{as.lazy} \alias{as.lazy_dots} \title{Convert an object to a lazy expression or lazy dots.} \usage{ as.lazy(x, env = baseenv()) as.lazy_dots(x, env) } \arguments{ \item{x}{An R object. Current methods for \code{as.lazy()} convert formulas, character vectors, calls and names. Methods for \code{as.lazy_dots()} convert lists and character vectors (by calling \code{\link{lapply}()} with \code{as.lazy()}.)} \item{env}{Environment to use for objects that don't already have associated environment.} } \description{ Convert an object to a lazy expression or lazy dots. } \examples{ as.lazy(~ x + 1) as.lazy(quote(x + 1), globalenv()) as.lazy("x + 1", globalenv()) as.lazy_dots(list(~x, y = ~z + 1)) as.lazy_dots(c("a", "b", "c"), globalenv()) as.lazy_dots(~x) as.lazy_dots(quote(x), globalenv()) as.lazy_dots(quote(f()), globalenv()) as.lazy_dots(lazy(x)) } lazyeval/man/f_capture.Rd0000644000176200001440000000203513171753463015125 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/f-capture.R \name{f_capture} \alias{f_capture} \alias{dots_capture} \title{Make a promise explicit by converting into a formula.} \usage{ f_capture(x) dots_capture(..., .ignore_empty = TRUE) } \arguments{ \item{x, ...}{An unevaluated promises} \item{.ignore_empty}{If \code{TRUE}, empty arguments will be silently dropped.} } \value{ \code{f_capture} returns a formula; \code{dots_capture} returns a list of formulas. } \description{ This should be used sparingly if you want to implement true non-standard evaluation with 100\% magic. I recommend avoiding this unless you have strong reasons otherwise since requiring arguments to be formulas only adds one extra character to the inputs, and otherwise makes life much much simpler. } \examples{ f_capture(a + b) dots_capture(a + b, c + d, e + f) # These functions will follow a chain of promises back to the # original definition f <- function(x) g(x) g <- function(y) h(y) h <- function(z) f_capture(z) f(a + b + c) } lazyeval/man/f_list.Rd0000644000176200001440000000107613171753463014441 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{f_list} \alias{f_list} \alias{as_f_list} \title{Build a named list from the LHS of formulas} \usage{ f_list(...) as_f_list(x) } \arguments{ \item{...}{Named arguments.} \item{x}{An existing list} } \value{ A named list. } \description{ \code{f_list} makes a new list; \code{as_f_list} takes an existing list. Both take the LHS of any two-sided formulas and evaluate it, replacing the current name with the result. } \examples{ f_list("y" ~ x) f_list(a = "y" ~ a, ~ b, c = ~c) } lazyeval/man/ast_.Rd0000644000176200001440000000130213171753463014077 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ast.R \name{ast_} \alias{ast_} \alias{ast} \title{Display a call (or expression) as a tree.} \usage{ ast_(x, width = getOption("width")) ast(x) } \arguments{ \item{x}{Quoted call, list of calls, or expression to display.} \item{width}{Display width, defaults to current width as reported by \code{getOption("width")}.} } \description{ \code{ast_} takes a quoted expression; \code{ast} does the quoting for you. } \examples{ ast(f(x, 1, g(), h(i()))) ast(if (TRUE) 3 else 4) ast(function(a = 1, b = 2) {a + b + 10}) ast(f(x)(y)(z)) ast_(quote(f(x, 1, g(), h(i())))) ast_(quote(if (TRUE) 3 else 4)) ast_(expression(1, 2, 3)) } lazyeval/man/lazy_eval.Rd0000644000176200001440000000120313171753463015137 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazy-eval.R \name{lazy_eval} \alias{lazy_eval} \title{Evaluate a lazy expression.} \usage{ lazy_eval(x, data = NULL) } \arguments{ \item{x}{A lazy object or a formula.} \item{data}{Option, a data frame or list in which to preferentially look for variables before using the environment associated with the lazy object.} } \description{ Evaluate a lazy expression. } \examples{ f <- function(x) { z <- 100 ~ x + z } z <- 10 lazy_eval(f(10)) lazy_eval(f(10), list(x = 100)) lazy_eval(f(10), list(x = 1, z = 1)) lazy_eval(lazy_dots(a = x, b = z), list(x = 10)) } lazyeval/man/all_dots.Rd0000644000176200001440000000100013171753463014745 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazy-as.R \name{all_dots} \alias{all_dots} \title{Combine explicit and implicit dots.} \usage{ all_dots(.dots, ..., all_named = FALSE) } \arguments{ \item{.dots}{A list of lazy objects} \item{...}{Individual lazy objects} \item{all_named}{If \code{TRUE}, uses \code{\link{auto_name}} to ensure every component has a name.} } \value{ A \code{\link{lazy_dots}} } \description{ Combine explicit and implicit dots. } \keyword{internal} lazyeval/man/common_env.Rd0000644000176200001440000000073613171753463015323 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazy-call.R \name{common_env} \alias{common_env} \title{Find common environment in list of lazy objects.} \usage{ common_env(dots) } \arguments{ \item{dots}{A list of lazy objects} } \description{ If no common environment is found, will return \code{baseenv()}. } \examples{ common_env(lazy_dots(a, b, c)) f <- function(x) ~x common_env(list(f(1))) common_env(list(f(1), f(2))) } \keyword{internal} lazyeval/man/make_call.Rd0000644000176200001440000000167213171753463015073 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazy-call.R \name{make_call} \alias{make_call} \title{Make a call with \code{lazy_dots} as arguments.} \usage{ make_call(fun, args) } \arguments{ \item{fun}{Function as symbol or quoted call.} \item{args}{Arguments to function; must be a \code{lazy_dots} object, or something \code{\link{as.lazy_dots}()} can coerce..} } \value{ A list: \item{env}{The common environment for all elements} \item{expr}{The expression} } \description{ In order to exactly replay the original call, the environment must be the same for all of the dots. This function circumvents that a little, falling back to the \code{\link{baseenv}()} if all environments aren't the same. } \examples{ make_call(quote(f), lazy_dots(x = 1, 2)) make_call(quote(f), list(x = 1, y = ~x)) make_call(quote(f), ~x) # If no known or no common environment, fails back to baseenv() make_call(quote(f), quote(x)) } lazyeval/man/function_new.Rd0000644000176200001440000000227513171753463015661 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/function.R \name{function_new} \alias{function_new} \title{Create a function by "hand"} \usage{ function_new(args, body, env = parent.frame()) } \arguments{ \item{args}{A named list of default arguments. Note that if you want arguments that don't have defaults, you'll need to use the special function \code{\link{alist}}, e.g. \code{alist(a = , b = 1)}} \item{body}{A language object representing the code inside the function. Usually this will be most easily generated with \code{\link{quote}}} \item{env}{The parent environment of the function, defaults to the calling environment of \code{make_function}} } \description{ This constructs a new function given it's three components: list of arguments, body code and parent environment. } \examples{ f <- function(x) x + 3 g <- function_new(alist(x = ), quote(x + 3)) # The components of the functions are identical identical(formals(f), formals(g)) identical(body(f), body(g)) identical(environment(f), environment(g)) # But the functions are not identical because f has src code reference identical(f, g) attr(f, "srcref") <- NULL # Now they are: stopifnot(identical(f, g)) } lazyeval/man/lazy_dots.Rd0000644000176200001440000000222113171753463015162 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazy-dots.R \name{lazy_dots} \alias{lazy_dots} \title{Capture ... (dots) for later lazy evaluation.} \usage{ lazy_dots(..., .follow_symbols = FALSE, .ignore_empty = FALSE) } \arguments{ \item{...}{Dots from another function} \item{.follow_symbols}{If \code{TRUE}, the default, follows promises across function calls. See \code{vignette("chained-promises")} for details.} \item{.ignore_empty}{If \code{TRUE}, empty arguments will be ignored.} } \value{ A named list of \code{\link{lazy}} expressions. } \description{ Capture ... (dots) for later lazy evaluation. } \examples{ lazy_dots(x = 1) lazy_dots(a, b, c * 4) f <- function(x = a + b, ...) { lazy_dots(x = x, y = a + b, ...) } f(z = a + b) f(z = a + b, .follow_symbols = TRUE) # .follow_symbols is off by default because it causes problems # with lazy loaded objects lazy_dots(letters) lazy_dots(letters, .follow_symbols = TRUE) # You can also modify a dots like a list. Anything on the RHS will # be coerced to a lazy. l <- lazy_dots(x = 1) l$y <- quote(f) l[c("y", "x")] l["z"] <- list(~g) c(lazy_dots(x = 1), lazy_dots(f)) } lazyeval/man/is_lang.Rd0000644000176200001440000000144613171753463014576 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/language.R \name{is_lang} \alias{is_lang} \alias{is_name} \alias{is_call} \alias{is_pairlist} \alias{is_atomic} \title{Is an object a language object?} \usage{ is_lang(x) is_name(x) is_call(x) is_pairlist(x) is_atomic(x) } \arguments{ \item{x}{An object to test.} } \description{ These helpers are consistent wrappers around their base R equivalents. A language object is either an atomic vector (typically a scalar), a name (aka a symbol), a call, or a pairlist (used for function arguments). } \examples{ q1 <- quote(1) is_lang(q1) is_atomic(q1) q2 <- quote(x) is_lang(q2) is_name(q2) q3 <- quote(x + 1) is_lang(q3) is_call(q3) } \seealso{ \code{\link{as_name}()} and \code{\link{as_call}()} for coercion functions. } lazyeval/man/is_formula.Rd0000644000176200001440000000045313171753463015317 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/formula.R \name{is_formula} \alias{is_formula} \title{Is object a formula?} \usage{ is_formula(x) } \arguments{ \item{x}{Object to test} } \description{ Is object a formula? } \examples{ is_formula(~ 10) is_formula(10) } lazyeval/man/lazy_.Rd0000644000176200001440000000312213171753463014271 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazy.R \name{lazy_} \alias{lazy_} \alias{lazy} \title{Capture expression for later lazy evaluation.} \usage{ lazy_(expr, env) lazy(expr, env = parent.frame(), .follow_symbols = TRUE) } \arguments{ \item{expr}{Expression to capture. For \code{lazy_} must be a name or a call.} \item{env}{Environment in which to evaluate expr.} \item{.follow_symbols}{If \code{TRUE}, the default, follows promises across function calls. See \code{vignette("chained-promises")} for details.} } \description{ \code{lazy()} uses non-standard evaluation to turn promises into lazy objects; \code{lazy_()} does standard evaluation and is suitable for programming. } \details{ Use \code{lazy()} like you'd use \code{\link{substitute}()} to capture an unevaluated promise. Compared to \code{substitute()} it also captures the environment associated with the promise, so that you can correctly replay it in the future. } \examples{ lazy_(quote(a + x), globalenv()) # Lazy is designed to be used inside a function - you should # give it the name of a function argument (a promise) f <- function(x = b - a) { lazy(x) } f() f(a + b / c) # Lazy also works when called from the global environment. This makes # easy to play with interactively. lazy(a + b / c) # By default, lazy will climb all the way back to the initial promise # This is handy if you have if you have nested functions: g <- function(y) f(y) h <- function(z) g(z) f(a + b) g(a + b) h(a + b) # To avoid this behavour, set .follow_symbols = FALSE # See vignette("chained-promises") for details } lazyeval/man/call_new.Rd0000644000176200001440000000132613171753463014743 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/call.R \name{call_new} \alias{call_new} \title{Create a call by "hand"} \usage{ call_new(f, ..., .args = list()) } \arguments{ \item{f}{Function to call. For \code{make_call}, either a string, a symbol or a quoted call. For \code{do_call}, a bare function name or call.} \item{..., .args}{Arguments to the call either in or out of a list} } \description{ Create a call by "hand" } \examples{ # f can either be a string, a symbol or a call call_new("f", a = 1) call_new(quote(f), a = 1) call_new(quote(f()), a = 1) #' Can supply arguments individually or in a list call_new(quote(f), a = 1, b = 2) call_new(quote(f), .args = list(a = 1, b = 2)) } lazyeval/man/expr_label.Rd0000644000176200001440000000255413171753463015300 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/expr.R \name{expr_label} \alias{expr_label} \alias{expr_text} \alias{expr_find} \alias{expr_env} \title{Find the expression associated with an argument} \usage{ expr_label(x) expr_text(x, width = 60L, nlines = Inf) expr_find(x) expr_env(x, default_env) } \arguments{ \item{x}{A promise (function argument)} \item{width}{Width of each line} \item{nlines}{Maximum number of lines to extract.} \item{default_env}{If supplied, \code{expr_env} will return this if the promise has already been forced. Otherwise it will throw an error.} } \description{ \code{expr_find()} finds the full expression; \code{expr_text()} turns the expression into a single string; \code{expr_label()} formats it nicely for use in messages. \code{expr_env()} finds the environment associated with the expression. } \details{ These functions never force promises, and will work even if a promise has previously been forced. } \examples{ # Unlike substitute(), expr_find() finds the original expression f <- function(x) g(x) g <- function(y) h(y) h <- function(z) list(substitute(z), expr_find(z)) f(1 + 2 + 3) expr_label(10) # Names a quoted with `` expr_label(x) # Strings are encoded expr_label("a\\nb") # Expressions are captured expr_label(a + b + c) # Long expressions are collapsed expr_label(foo({ 1 + 2 print(x) })) } lazyeval/man/interp.Rd0000644000176200001440000000246613171753463014466 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazy-interp.R \name{interp} \alias{interp} \title{Interpolate values into an expression.} \usage{ interp(`_obj`, ..., .values) } \arguments{ \item{_obj}{An object to modify: can be a call, name, formula, \code{\link{lazy}}, or a string.} \item{..., .values}{Either individual name-value pairs, or a list (or environment) of values.} } \description{ This is useful if you want to build an expression up from a mixture of constants and variables. } \examples{ # Interp works with formulas, lazy objects, quoted calls and strings interp(~ x + y, x = 10) interp(lazy(x + y), x = 10) interp(quote(x + y), x = 10) interp("x + y", x = 10) # Use as.name if you have a character string that gives a # variable name interp(~ mean(var), var = as.name("mpg")) # or supply the quoted name directly interp(~ mean(var), var = quote(mpg)) # Or a function! interp(~ f(a, b), f = as.name("+")) # Remember every action in R is a function call: # http://adv-r.had.co.nz/Functions.html#all-calls # If you've built up a list of values through some other # mechanism, use .values interp(~ x + y, .values = list(x = 10)) # You can also interpolate variables defined in the current # environment, but this is a little risky. y <- 10 interp(~ x + y, .values = environment()) } lazyeval/man/auto_name.Rd0000644000176200001440000000112713171753463015126 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lazy-names.R \name{auto_name} \alias{auto_name} \title{Automatically name all components of a lazy dots.} \usage{ auto_name(x, max_width = 40) } \arguments{ \item{x}{A \code{\link{lazy_dots}}} \item{max_width}{Maximum number of characters to use} } \description{ Any components missing a name will automatically get a name added by looking at the first \code{max_width} characters of the deparsed expression. } \examples{ x <- lazy_dots(1 + 2, mean(mpg)) auto_name(x) auto_name(list(~f, quote(x))) } \keyword{internal}