rvest/0000755000176200001440000000000015054331142011413 5ustar liggesusersrvest/tests/0000755000176200001440000000000015051434444012563 5ustar liggesusersrvest/tests/testthat/0000755000176200001440000000000015054331142014415 5ustar liggesusersrvest/tests/testthat/test-utils.R0000644000176200001440000000040013775334175016670 0ustar liggesuserstest_that("can truncate strings", { expect_equal(str_trunc("abcdef", 10), "abcdef") expect_equal(str_trunc("abcdef", 4), "a...") }) test_that("minimal html doesn't change unexpectedly", { expect_snapshot(cat(as.character(minimal_html("

Hi")))) }) rvest/tests/testthat/test-text.R0000644000176200001440000000644613775423635016534 0ustar liggesuserstest_that("html_text returns raw html", { html <- minimal_html("

x\ny
z

") p <- html_elements(html, "p") expect_equal(html_text(p), "x\nyz") }) # html_text2 -------------------------------------------------------------- test_that("handles block containing only inline elements", { html <- minimal_html("

a b c

") expect_equal(html_text2(html), "a b c") # internal newlines are trimmed html <- minimal_html("

a\n\nb\nc

") expect_equal(html_text2(html), "a b c") }) test_that("handles multiple paragraphs with line breaks", { html <- minimal_html("

a

b
c ") expect_equal(html_text2(html), "a\n\nb\nc") expect_equal(html_text2(html_elements(html, "p")), c("a", "b\nc")) }) test_that("handles table", { html <- minimal_html("
ab
12
23
") expect_equal(html_text2(html), "a\tb\n1\t2\n2\t3") }) test_that("handles mixed block as well as can be expected", { html <- minimal_html("

a

b
") expect_equal(html_text2(html_element(html, "div")), "a\n\nb\n") }) test_that("returns NA for xml_missing", { expect_equal(html_text2(xml2::xml_missing()), NA_character_) }) test_that("breaks as expected", { expect_identical(tag_margin("p"), 2L) expect_identical(tag_margin("li"), 1L) expect_identical(tag_margin("b"), 0L) }) # inline ------------------------------------------------------------------ test_that("handle single line of text", { html <- minimal_html("

a b c

") expect_equal(html_text_inline(html_element(html, "p")), "a b c") # collapses space across nodes html <- minimal_html("

a b c

") expect_equal(html_text_inline(html_element(html, "p")), "a b c") }) test_that("converts br to \n", { html <- minimal_html("


x

") expect_equal(html_text_inline(html_element(html, "p")), "\nx") html <- minimal_html("

x

") expect_equal(html_text_inline(html_element(html, "p")), "x\n") html <- minimal_html("



") expect_equal(html_text_inline(html_element(html, "p")), "\n\n") }) test_that("empty block returns empty string", { html <- minimal_html("

") expect_equal(html_text_inline(html_element(html, "p")), "") }) test_that("collapse whitespace handles single line", { expect_equal(collapse_whitespace("\n\tx\t\n"), "x") expect_equal(collapse_whitespace("x y"), "x y") }) test_that("optionally preserve nbsp", { expect_equal(collapse_whitespace("x \u00a0 y"), "x y") expect_equal(collapse_whitespace("x\u00a0y", TRUE), "x\u00a0y") }) # PaddedText -------------------------------------------------------------- test_that("margins only added within text", { text <- PaddedText$new() text$add_margin(1) text$add_text("x") text$add_margin(1) expect_equal(text$output(), "x") }) test_that("margins are collapsed", { text <- PaddedText$new() text$add_text("x") text$add_margin(1) expect_equal(text$lines, 1) text$add_margin(2) expect_equal(text$lines, 2) text$add_text("y") expect_equal(text$output(), "x\n\ny") }) test_that("empty text is ignored", { text <- PaddedText$new() text$add_text("") text$add_margin(1) text$add_text("x") expect_equal(text$output(), "x") }) rvest/tests/testthat/test-html.R0000644000176200001440000000106614554042640016472 0ustar liggesuserstest_that("forwards to xml2 functions", { html <- minimal_html("

Hello children

") p <- html_elements(html, "p") expect_equal(html_name(p), "p") expect_equal(html_attr(p, "id"), "x") expect_equal(html_attr(p, "id2"), NA_character_) expect_equal(html_attrs(p), list(c(id = "x"))) expect_equal(html_children(p), html_elements(html, "i")) }) test_that("validates inputs", { html <- minimal_html("

Hello children

") expect_snapshot(error = TRUE, { html_attr(html, 1) html_attr(html, "id", 1) }) }) rvest/tests/testthat/test-rename.R0000644000176200001440000000172413775436633017013 0ustar liggesuserstest_that("xml functions are deprecated", { x <- minimal_html("

Hello

") expect_snapshot(. <- xml_tag(x)) expect_snapshot(. <- xml_node(x, "p")) expect_snapshot(. <- xml_nodes(x, "p")) }) test_that("html_node(s) is superseded (no warnings)", { x <- minimal_html("

Hello

") expect_equal(html_node(x, "p"), html_element(x, "p")) expect_equal(html_nodes(x, "p"), html_elements(x, "p")) }) test_that("set_values() is deprecated", { html <- minimal_html('
') form <- html_form(html)[[1]] expect_snapshot(set_values(form, text = "abc")) }) test_that("prefixless session functions are deprecated", { expect_snapshot({ s <- html_session("http://rvest.tidyverse.org/") . <- follow_link(s, i = 1) s <- jump_to(s, "https://rvest.tidyverse.org/reference/index.html") s <- back(s) s <- forward(s) }) }) # session_submit() is tested in form-submit because it needs a test server rvest/tests/testthat/test-table.R0000644000176200001440000001105314554031036016607 0ustar liggesuserstest_that("can parse simple table", { html <- minimal_html('
xyz
1EveJackson
2JohnDoe
') table <- html_table(html)[[1]] expect_snapshot_output(table) }) test_that("strips whitespace", { html <- minimal_html('
x
x
x
x
') table <- html_table(html)[[1]] expect_equal(table$x, c("x", "x", "x")) }) test_that("can parse with colspan", { html <- minimal_html('
xyz
1
12
12
') table <- html_table(html)[[1]] expect_snapshot_output(table) }) test_that("can parse with rowspan", { html <- minimal_html('
xyz
123
23
3
') table <- html_table(html)[[1]] expect_snapshot_output(table) }) test_that("can handle wobbling rowspan", { html <- minimal_html('
xyz
1a1b1c
2b
3a3c
') table <- html_table(html)[[1]] expect_snapshot_output(table) }) test_that("can handle trailing rowspans", { html <- minimal_html('
xyz
1 2 3
') table <- html_table(html)[[1]] expect_snapshot_output(table) }) test_that("can handle blank colspans", { html <- minimal_html('
xy
1 2
3
') table <- html_table(html)[[1]] expect_snapshot_output(table) }) test_that("can handle blank rowspans", { html <- minimal_html('
xy
1 2
3
') table <- html_table(html)[[1]] expect_snapshot_output(table) }) test_that("can handle empty row", { html <- minimal_html('
x
2
') table <- html_table(html)[[1]] expect_snapshot_output(table) }) test_that("defaults to minimal name repair", { html <- minimal_html('
xx
') table <- html_table(html)[[1]] expect_named(table, c("x", "x", "")) }) test_that("adds names if needed", { html <- minimal_html('
12
') table <- html_table(html)[[1]] expect_named(table, c("X1", "X2")) }) test_that("passes arguments to type.convert", { html <- minimal_html("
xy
NA1,2
") table <- html_table(html, na.strings = "")[[1]] expect_equal(table$x, "NA") table <- html_table(html, dec = ",")[[1]] expect_equal(table$y, 1.2) }) test_that("no conversion", { html <- minimal_html('
xy
001100.0
') table <- html_table(html, convert = FALSE)[[1]] expect_snapshot_output(table) }) test_that("fill = FALSE is deprecated", { html <- minimal_html('
x
1
') expect_snapshot({ . <- html_table(html, fill = FALSE) . <- html_table(html, fill = TRUE) }) }) test_that("can handle empty tables", { html <- minimal_html('
') table <- html_table(html)[[1]] expect_snapshot_output(table) }) test_that("can handle tables consisting of a single empty row", { html <- minimal_html('
') table <- html_table(html)[[1]] expect_snapshot_output(table) }) test_that("can handle tables consisting of only empty rows", { html <- minimal_html('
') table <- html_table(html)[[1]] expect_snapshot_output(table) }) rvest/tests/testthat/test-session.R0000644000176200001440000000567215051434445017221 0ustar liggesuserstest_that("basic session process works as expected", { expect_snapshot({ s <- session("http://hadley.nz/") s expect_true(is.session(s)) s <- session_follow_link(s, css = "p a") session_history(s) }, transform = function(x) gsub("Size: .*", "Size: ", x)) }) test_that("session caches xml parsing and sets base url", { s <- session("https://rvest.tidyverse.org/") expect_equal(s$cache$html, NULL) html <- read_html(s) expect_true(rlang::is_reference(s$cache$html, html)) expect_equal(xml2::xml_url(html), "https://rvest.tidyverse.org/") }) test_that("errors if try to access HTML from non-HTML page", { expect_snapshot(error = TRUE, { s <- session("https://rvest.tidyverse.org/logo.png") read_html(s) }) }) test_that("session responds to httr and rvest methods", { # skip_on_cran() s <- session("http://hadley.nz/") expect_silent(html_form(s)) expect_silent(html_table(s)) expect_silent(html_element(s, "body")) expect_silent(html_element(s, "body")) expect_silent(status_code(s)) expect_silent(headers(s)) expect_silent(cookies(s)) }) test_that("informative errors for bad inputs", { expect_snapshot_error(check_form(1)) expect_snapshot_error(check_session(1)) }) # navigation -------------------------------------------------------------- test_that("can navigate back and forward", { s <- session("https://hadley.nz/") expect_equal(s$back, character()) expect_equal(s$forward, character()) expect_snapshot_error(session_back(s)) expect_snapshot_error(session_forward(s)) s <- session_jump_to(s, "https://r4ds.hadley.nz/") expect_equal(s$back, "https://hadley.nz/") expect_equal(s$forward, character()) expect_equal(session_forward(session_back(s))$url, s$url) s <- session_back(s) expect_equal(s$back, character()) expect_equal(s$forward, "https://r4ds.hadley.nz/") s <- session_forward(s) expect_equal(s$back, "https://hadley.nz/") expect_equal(s$forward, character()) }) test_that("can find link by position, content, css, or xpath", { html <- minimal_html(" a b ") expect_equal(find_href(html, i = 1), "a") expect_equal(find_href(html, i = "b"), "b") expect_equal(find_href(html, css = "a.b"), "b") # Failure modes expect_snapshot(find_href(html, i = 1, css = "a"), error = TRUE) expect_snapshot(find_href(html, i = TRUE), error = TRUE) expect_snapshot(find_href(html, i = "c"), error = TRUE) expect_snapshot(find_href(html, css = "p a"), error = TRUE) }) test_that("can submit a form", { app <- local_test_app() html <- minimal_html('
') form <- html_form(html, base_url = app$url())[[1]] s <- session("http://hadley.nz/") s <- session_submit(s, form) expect_s3_class(s, "rvest_session") resp <- httr::content(s$response) expect_equal(resp$query, "x=1&y=2") }) rvest/tests/testthat/test.html0000644000176200001440000000013412362565206016271 0ustar liggesusers

rvest/tests/testthat/helper.R0000644000176200001440000000275715054317500016034 0ustar liggesuserslocal_test_app <- function(envir = parent.frame()) { skip_if_not_installed("webfakes") webfakes::local_app_process(app_request(), .local_envir = envir) } app_request <- function() { req_json <- function(req, res) { out <- list( method = req$method, query = req$query_string, type = req$headers$`Content-Type` %||% NA_character_, body = rawToChar(req$.body %||% raw()) ) res$send_json(out, auto_unbox = TRUE) } app <- webfakes::new_app() app$post("/", req_json) app$get("/", req_json) app } show_response <- function(x) { strip_divider <- function(x) { gsub("-{3,}[A-Za-z0-9-]+", "---{divider}", x) } x <- httr::content(x) cat_line(toupper(x$method), " ", strip_divider(x$type)) cat_line("Query string: ", x$query) cat_line(strip_divider(x$body)) } # chromote ---------------------------------------------------------------- skip_if_no_chromote <- function() { skip_on_cran() skip_if(lacks_chromote(), "chromote not available") # On CI we have to opt-in to testlive skip_if(Sys.getenv("CI") == "true" && Sys.getenv('testlive') == "") } lacks_chromote <- function() { # We try twice because in particular Windows on GHA seems to need it, # but it doesn't otherwise hurt. More details at # https://github.com/rstudio/shinytest2/issues/209 env_cache(the, "lacks_chromote", !has_chromote() && !has_chromote()) } html_test_path <- function(name) { paste0("file://", normalizePath(test_path(paste0("html/", name, ".html")))) } rvest/tests/testthat/test-live.R0000644000176200001440000000735315051552265016474 0ustar liggesuserstest_that("has print method", { skip_if_no_chromote() bullets <- read_html_live(html_test_path("bullets")) expect_snapshot(bullets) }) test_that("can find multiple elements", { skip_if_no_chromote() bullets <- read_html_live(html_test_path("bullets")) # can extract from page ul <- bullets |> html_elements("ul") expect_length(ul, 1) # or with xpath ul <- bullets |> html_elements(xpath = ".//ul") expect_length(ul, 1) # can extract from other elements li <- ul |> html_elements("li") expect_length(li, 4) }) test_that("can extract tables", { skip_if_no_chromote() page <- read_html_live(html_test_path("table")) tables <- page |> html_table() expect_equal(dim(tables[[1]]), c(2, 3)) }) test_that("can find single element", { skip_if_no_chromote() dynamic <- read_html_live("https://rvest.tidyverse.org/articles/starwars.html") static <- read_html("https://rvest.tidyverse.org/articles/starwars.html") expect_equal(html_element(dynamic, "p"), html_element(static, "p")) expect_equal(html_element(dynamic, "xyz"), html_element(static, "xyz")) }) test_that("can click a button", { skip_if_no_chromote() sess <- read_html_live(html_test_path("click")) sess$click("button") expect_equal(html_text(html_element(sess, "p")), "clicked") sess$click("button", 2) expect_equal(html_text(html_element(sess, "p")), "double clicked") }) test_that("can scroll in various ways", { skip_if_no_chromote() sess <- read_html_live(html_test_path("scroll")) expect_equal(sess$get_scroll_position(), list(x = 0, y = 0)) sess$scroll_to(500) Sys.sleep(0.2) expect_equal(sess$get_scroll_position(), list(x = 0, y = 500)) sess$scroll_by(-250) Sys.sleep(0.2) expect_equal(sess$get_scroll_position(), list(x = 0, y = 250)) sess$scroll_into_view("#bottom") Sys.sleep(0.2) expect_equal(sess$get_scroll_position(), list(x = 0, y = 685)) }) test_that("can type text", { skip_if_no_chromote() sess <- read_html_live(html_test_path("type")) sess$type("#inputText", "hello") expect_equal(html_text(html_element(sess, "#replicatedText")), "hello") }) test_that("can press special keys",{ skip_if_no_chromote() sess <- read_html_live(html_test_path("press")) sess$press("#inputBox", "ArrowRight") expect_equal(html_text(html_element(sess, "#keyInfo")), "ArrowRight/ArrowRight") sess$press("#inputBox", "BracketRight") expect_equal(html_text(html_element(sess, "#keyInfo")), "]/BracketRight") }) test_that("can find elements after click that navigates", { skip_if_no_chromote() sess <- read_html_live(html_test_path("navigate1")) sess$click("a") expect_equal(html_text2(html_element(sess, "p")), "Success!") }) # as_key_desc ------------------------------------------------------------- test_that("gracefully errors on bad inputs", { expect_snapshot(error = TRUE, { as_key_desc("xyz") as_key_desc("X", "Malt") }) }) test_that("automatically adjusts for shift key", { # str(Filter(\(x) has_name(x, "shiftKey"), keydefs)) expect_equal(as_key_desc("KeyA")$key, "a") expect_equal(as_key_desc("KeyA", "Shift")$key, "A") # str(Filter(\(x) has_name(x, "shiftKeyCode"), keydefs)) expect_equal(as_key_desc("Numpad0")$windowsVirtualKeyCode, 45) expect_equal(as_key_desc("Numpad0", "Shift")$windowsVirtualKeyCode, 96) }) test_that("don't send text if modifier pushed", { expect_equal(as_key_desc("KeyA")$text, "a") expect_equal(as_key_desc("KeyA", "Shift")$text, "a") expect_equal(as_key_desc("KeyA", "Alt")$text, "") expect_equal(as_key_desc("KeyA", "Meta")$text, "") expect_equal(as_key_desc("KeyA", "Control")$text, "") }) test_that("modifiers are bitflag", { expect_equal(as_key_desc("KeyA", "Shift")$modifiers, 8) expect_equal(as_key_desc("KeyA", c("Alt", "Control"))$modifiers, 3) }) rvest/tests/testthat/test-encoding.R0000644000176200001440000000133114553750033017310 0ustar liggesuserstest_that("can guess encoding", { skip("currently broken") skip_on_os("linux") # some hidden dependency on system library path <- system.file("html-ex", "bad-encoding.html", package = "rvest") x <- read_html(path) expect_snapshot(html_encoding_guess(x)) # deprecated expect_snapshot(guess_encoding(x)) }) test_that("encoding repair is deprecated", { skip("currently broken") skip_on_cran() skip_on_os("linux") # some hidden dependency on system library path <- system.file("html-ex", "bad-encoding.html", package = "rvest") x <- read_html(path) text <- html_text(html_element(x, "p")) expect_snapshot(repair_encoding(text), error = TRUE) expect_snapshot(repair_encoding(text, "ISO-8859-1")) }) rvest/tests/testthat/_snaps/0000755000176200001440000000000015051434445015707 5ustar liggesusersrvest/tests/testthat/_snaps/live.md0000644000176200001440000000104215051433725017165 0ustar liggesusers# has print method Code bullets Output {xml_nodeset (2)} [1] Simple Bulleted List [2]