selectr/0000755000176200001440000000000015120470422011710 5ustar liggesusersselectr/tests/0000755000176200001440000000000015120446446013063 5ustar liggesusersselectr/tests/testthat/0000755000176200001440000000000015120452724014717 5ustar liggesusersselectr/tests/testthat/test-querySelector-XML.R0000755000176200001440000001412415107555231021332 0ustar liggesuserscontext("querySelector-XML") test_that("querySelector returns a single node or NULL", { library(XML) doc <- xmlRoot(xmlParse('')) p <- function(x) { if (is.null(x)) return(x) saveXML(x, file = NULL) } expect_that(p(querySelector(doc, "a")), equals(p(getNodeSet(doc, "//a")[[1]]))) expect_that(p(querySelector(doc, "*", prefix = "")), equals(p(getNodeSet(doc, "*")[[1]]))) expect_that(p(querySelector(doc, "d")), equals(NULL)) expect_that(p(querySelector(doc, "c")), equals(p(getNodeSet(doc, "//c")[[1]]))) # do the same again but on the xml doc itself doc <- xmlParse('') expect_that(p(querySelector(doc, "a")), equals(p(getNodeSet(xmlRoot(doc), "//a")[[1]]))) expect_that(p(querySelector(doc, "*", prefix = "")), equals(p(getNodeSet(xmlRoot(doc), "*")[[1]]))) expect_that(p(querySelector(doc, "d")), equals(NULL)) expect_that(p(querySelector(doc, "c")), equals(p(getNodeSet(xmlRoot(doc), "//c")[[1]]))) }) test_that("querySelectorAll returns expected nodes", { library(XML) doc <- xmlRoot(xmlParse('')) p <- function(x) { lapply(x, function(node) saveXML(node, file = NULL)) } expect_that(p(querySelectorAll(doc, "a")), equals(p(getNodeSet(doc, "//a")))) expect_that(p(querySelectorAll(doc, "*", prefix = "")), equals(p(getNodeSet(doc, "*")))) expect_that(p(querySelectorAll(doc, "c")), equals(p(getNodeSet(doc, "//c")))) # do the same again but on the xml doc itself doc <- xmlParse('') expect_that(p(querySelectorAll(doc, "a")), equals(p(getNodeSet(xmlRoot(doc), "//a")))) expect_that(p(querySelectorAll(doc, "*", prefix = "")), equals(p(getNodeSet(xmlRoot(doc), "*")))) expect_that(p(querySelectorAll(doc, "c")), equals(p(getNodeSet(xmlRoot(doc), "//c")))) }) test_that("querySelectorAll returns empty list for no match", { library(XML) doc <- xmlRoot(xmlParse('')) p <- function(x) { lapply(x, function(node) saveXML(node, file = NULL)) } expect_that(p(querySelectorAll(doc, "d")), equals(p(getNodeSet(doc, "//d")))) }) test_that("querySelector handles namespaces", { library(XML) doc <- xmlRoot(xmlParse('')) p <- function(x) { if (is.null(x)) x else saveXML(x, file = NULL) } expect_that(querySelector(doc, "circle"), equals(NULL)) expect_that(querySelector(doc, "circle", ns = c(svg = "http://www.w3.org/2000/svg")), equals(NULL)) expect_that(p(querySelector(doc, "svg|circle", ns = c(svg = "http://www.w3.org/2000/svg"))), equals(p(getNodeSet(doc, "//svg:circle", namespaces = c(svg = "http://www.w3.org/2000/svg"))[[1]]))) # now with querySelectorNS expect_that(querySelectorNS(doc, "circle", c(svg = "http://www.w3.org/2000/svg")), equals(NULL)) expect_that(p(querySelectorNS(doc, "svg|circle", c(svg = "http://www.w3.org/2000/svg"))), equals(p(getNodeSet(doc, "//svg:circle", namespaces = c(svg = "http://www.w3.org/2000/svg"))[[1]]))) }) test_that("querySelectorAll handles namespaces", { library(XML) doc <- xmlRoot(xmlParse('')) p <- function(x) { lapply(x, function(node) saveXML(node, file = NULL)) } expect_that(p(querySelectorAll(doc, "circle")), equals(p(getNodeSet(doc, "//circle")))) expect_that(p(querySelectorAll(doc, "circle", ns = c(svg = "http://www.w3.org/2000/svg"))), equals(p(getNodeSet(doc, "//circle", namespaces = c(svg = "http://www.w3.org/2000/svg"))))) expect_that(p(querySelectorAll(doc, "svg|circle", ns = c(svg = "http://www.w3.org/2000/svg"))), equals(p(getNodeSet(doc, "//svg:circle", namespaces = c(svg = "http://www.w3.org/2000/svg"))))) # now with querySelectorAllNS expect_that(p(querySelectorAllNS(doc, "circle", c(svg = "http://www.w3.org/2000/svg"))), equals(p(getNodeSet(doc, "//circle", namespaces = c(svg = "http://www.w3.org/2000/svg"))))) expect_that(p(querySelectorAllNS(doc, "svg|circle", c(svg = "http://www.w3.org/2000/svg"))), equals(p(getNodeSet(doc, "//svg:circle", namespaces = c(svg = "http://www.w3.org/2000/svg"))))) }) test_that("querySelector methods handle invalid arguments", { library(XML) doc <- xmlParse('') expect_error(querySelector(doc), "A valid selector (character vector) must be provided.", fixed = TRUE) expect_error(querySelectorAll(doc), "A valid selector (character vector) must be provided.", fixed = TRUE) expect_error(querySelectorNS(doc), "A valid selector (character vector) must be provided.", fixed = TRUE) expect_error(querySelectorAllNS(doc), "A valid selector (character vector) must be provided.", fixed = TRUE) expect_error(querySelectorNS(doc, "a"), "A namespace must be provided.", fixed = TRUE) expect_error(querySelectorNS(doc, "a", NULL), "A namespace must be provided.", fixed = TRUE) expect_error(querySelectorNS(doc, "a", character(0)), "A namespace must be provided.", fixed = TRUE) expect_error(querySelectorAllNS(doc, "a"), "A namespace must be provided.", fixed = TRUE) expect_error(querySelectorAllNS(doc, "a", NULL), "A namespace must be provided.", fixed = TRUE) expect_error(querySelectorAllNS(doc, "a", character(0)), "A namespace must be provided.", fixed = TRUE) }) selectr/tests/testthat/test-xpath.R0000755000176200001440000001645715107555231017165 0ustar liggesuserscontext("xpath") test_that("XPathExpr objects print correctly", { shw <- function(x) trimws(capture.output(x$show())) xp <- XPathExpr$new() expect_that(xp$repr(), equals("XPathExpr[*]")) expect_that(shw(xp), equals("XPathExpr[*]")) xp <- XPathExpr$new("//") expect_that(xp$repr(), equals("XPathExpr[//*]")) expect_that(shw(xp), equals("XPathExpr[//*]")) xp <- XPathExpr$new(element = "a") expect_that(xp$repr(), equals("XPathExpr[a]")) expect_that(shw(xp), equals("XPathExpr[a]")) xp <- XPathExpr$new("//a/", "b") expect_that(xp$repr(), equals("XPathExpr[//a/b]")) expect_that(shw(xp), equals("XPathExpr[//a/b]")) }) test_that("Generic translator validates language arguments", { translator <- GenericTranslator$new() expect_that(translator$css_to_xpath("xml:lang(en)"), equals("descendant-or-self::xml[(lang('en'))]")) expect_that(translator$css_to_xpath("xml:lang(en-nz)"), equals("descendant-or-self::xml[(lang('en-nz'))]")) expect_error(translator$css_to_xpath("xml:lang()"), "Expected at least one argument.*") expect_error(translator$css_to_xpath("xml:lang(1)"), "Expected string, ident, or \\* arguments.*") # Multiple languages with OR logic expect_that(translator$css_to_xpath("xml:lang(en, fr)"), equals("descendant-or-self::xml[((lang('en') or lang('fr')))]")) expect_that(translator$css_to_xpath("xml:lang(en, de, fr)"), equals("descendant-or-self::xml[((lang('en') or lang('de') or lang('fr')))]")) }) test_that("HTML translator validates language arguments", { translator <- HTMLTranslator$new() expect_that(translator$css_to_xpath("html:lang(en)"), equals("descendant-or-self::html[(ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'en-')])]")) expect_that(translator$css_to_xpath("html:lang(en-nz)"), equals("descendant-or-self::html[(ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'en-nz-')])]")) expect_error(translator$css_to_xpath("html:lang()"), "Expected at least one argument.*") expect_error(translator$css_to_xpath("html:lang(1)"), "Expected string, ident, or \\* arguments.*") # Multiple languages with OR logic expect_that(translator$css_to_xpath("html:lang(en, fr)"), equals("descendant-or-self::html[((ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'en-')] or ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'fr-')]))]")) }) test_that("Generic translator handles :lang() wildcards and comma lists", { translator <- GenericTranslator$new() # Simple languages still work expect_that(translator$css_to_xpath("div:lang(en)"), equals("descendant-or-self::div[(lang('en'))]")) # Wildcard * matches everything expect_that(translator$css_to_xpath('div:lang(*)'), equals("descendant-or-self::div[(true())]")) # Wildcard suffix like en-* for prefix matching expect_that(translator$css_to_xpath('div:lang(en-*)'), equals("descendant-or-self::div[(lang('en-'))]")) expect_that(translator$css_to_xpath('div:lang(fr-*)'), equals("descendant-or-self::div[(lang('fr-'))]")) # Comma-separated lists with OR logic expect_that(translator$css_to_xpath('div:lang(en, fr)'), equals("descendant-or-self::div[((lang('en') or lang('fr')))]")) expect_that(translator$css_to_xpath('div:lang(en, de, fr)'), equals("descendant-or-self::div[((lang('en') or lang('de') or lang('fr')))]")) # Mixed wildcards and regular languages expect_that(translator$css_to_xpath('div:lang(en-*, fr)'), equals("descendant-or-self::div[((lang('en-') or lang('fr')))]")) expect_that(translator$css_to_xpath('div:lang(*, de)'), equals("descendant-or-self::div[((true() or lang('de')))]")) }) test_that("HTML translator handles :lang() wildcards and comma lists", { translator <- HTMLTranslator$new() # Wildcard * matches any element with lang attribute expect_that(translator$css_to_xpath('div:lang(*)'), equals("descendant-or-self::div[(ancestor-or-self::*[@lang])]")) # Wildcard suffix for prefix matching expect_that(translator$css_to_xpath('div:lang(en-*)'), equals("descendant-or-self::div[(ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'en-')])]")) # Multiple values with OR logic expect_that(translator$css_to_xpath('div:lang(en, fr)'), equals("descendant-or-self::div[((ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'en-')] or ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'fr-')]))]")) }) test_that("Generic translator handles :dir() function", { translator <- GenericTranslator$new() # :dir() uses "never matches" pattern (requires runtime directionality detection) expect_that(translator$css_to_xpath("div:dir(ltr)"), equals("descendant-or-self::div[(0)]")) expect_that(translator$css_to_xpath("div:dir(rtl)"), equals("descendant-or-self::div[(0)]")) expect_that(translator$css_to_xpath(":dir(ltr)"), equals("descendant-or-self::*[(0)]")) expect_error(translator$css_to_xpath("div:dir()"), "Expected at least one argument.*") expect_error(translator$css_to_xpath("div:dir(1)"), "Expected string, ident, or \\* arguments.*") }) test_that("HTML translator handles :dir() function", { translator <- HTMLTranslator$new() # :dir() uses "never matches" pattern (requires runtime directionality detection) expect_that(translator$css_to_xpath("div:dir(ltr)"), equals("descendant-or-self::div[(0)]")) expect_that(translator$css_to_xpath("div:dir(rtl)"), equals("descendant-or-self::div[(0)]")) expect_that(translator$css_to_xpath(":dir(ltr)"), equals("descendant-or-self::*[(0)]")) expect_error(translator$css_to_xpath("div:dir()"), "Expected at least one argument.*") expect_error(translator$css_to_xpath("div:dir(1)"), "Expected string, ident, or \\* arguments.*") }) test_that("unimplemented methods throw errors", { translator <- GenericTranslator$new() expect_error(translator$css_to_xpath("*:nth-of-type(2n)"), ".* is not implemented") expect_error(translator$css_to_xpath("*:nth-last-of-type(2n)"), ".* is not implemented") expect_error(translator$css_to_xpath("*:first-of-type"), ".* is not implemented") expect_error(translator$css_to_xpath("*:last-of-type"), ".* is not implemented") expect_error(translator$css_to_xpath("*:only-of-type"), ".* is not implemented") }) test_that("contains method only takes string arguments", { expect_that(css_to_xpath("a:contains(b)"), equals("descendant-or-self::a[(contains(., 'b'))]")) expect_that(css_to_xpath("a:contains('b')"), equals("descendant-or-self::a[(contains(., 'b'))]")) expect_error(css_to_xpath("a:contains(1)"), "Expected a single string or ident for :contains\\(\\), got .*") }) selectr/tests/testthat/test-main.R0000755000176200001440000001052215107555231016750 0ustar liggesuserscontext("main") # We know that the results are correct via other tests, just check that # this produces the correct results with respect to its arguments test_that("css_to_xpath vectorises arguments", { expect_that(css_to_xpath("a b"), equals("descendant-or-self::a//b")) expect_that(css_to_xpath("a b", prefix = ""), equals("a//b")) expect_that(css_to_xpath("a b", prefix = c("descendant-or-self::", "")), equals(c("descendant-or-self::a//b", "a//b"))) expect_that(css_to_xpath("a:checked", prefix = "", translator = c("generic", "html", "xhtml")), equals(c("a[(0)]", "a[((@selected and name(.) = 'option') or (@checked and (name(.) = 'input' or name(.) = 'command')and (@type = 'checkbox' or @type = 'radio')))]", "a[((@selected and name(.) = 'option') or (@checked and (name(.) = 'input' or name(.) = 'command')and (@type = 'checkbox' or @type = 'radio')))]"))) expect_that(css_to_xpath(c("a b", "b c"), prefix = ""), equals(c("a//b", "b//c"))) }) test_that("css_to_xpath handles bad arguments", { # must have a selector arg provided expect_error(css_to_xpath(), "A valid selector (character vector) must be provided.", fixed = TRUE) expect_error(css_to_xpath(NULL), "A valid selector (character vector) must be provided.", fixed = TRUE) # should complain about incorrect vector type expect_error(css_to_xpath(1), "The 'selector' argument.*") expect_error(css_to_xpath("a", prefix = 1), "The 'prefix' argument.*") expect_error(css_to_xpath("a", translator = 1), "The 'translator' argument.*") # should strip the NA values out expect_that(length(css_to_xpath(c("a", NA))), equals(1)) expect_that(length(css_to_xpath("a", prefix = c("", NA))), equals(1)) expect_that(length(css_to_xpath("a", translator = c("generic", NA))), equals(1)) # expect NAs to be stripped out resulting in zero length args (unusable) expect_error(css_to_xpath(NA_character_), "Zero length character vector.*") expect_error(css_to_xpath("a", prefix = NA_character_), "Zero length character vector.*") expect_error(css_to_xpath("a", translator = NA_character_), "Zero length character vector.*") # performs partial matching expect_that(css_to_xpath("a", translator = "g"), equals("descendant-or-self::a")) expect_that(css_to_xpath("a", translator = "gEnErIC"), equals("descendant-or-self::a")) expect_that(css_to_xpath("a", translator = "h"), equals("descendant-or-self::a")) expect_that(css_to_xpath("a", translator = "x"), equals("descendant-or-self::a")) expect_that(css_to_xpath("a", translator = c("g", "h", "x")), equals(rep("descendant-or-self::a", 3))) # errors anything not matching generic, html, xhtml expect_error(css_to_xpath("a", translator = ""), "'arg' should be one of.*") expect_error(css_to_xpath("a", translator = "a"), "'arg' should be one of.*") expect_error(css_to_xpath("a", translator = c("generic", "a")), "'arg' should be one of.*") }) test_that("namespace handling works correctly", { # formatNS must return a NULL or a named vector expect_that(formatNS(NULL), equals(NULL)) expect_that(formatNS(list(a = "b")), equals(c(a = "b"))) expect_that(formatNS(c(a = "b")), equals(c(a = "b"))) # bad input handling expect_error(formatNS(1), "A namespace object must be.*") expect_error(formatNS(TRUE), "A namespace object must be.*") expect_error(formatNS("a"), "The namespace object either missing some or all names.*") expect_error(formatNS(c(a = "a", "b")), "The namespace object either missing some or all names.*") tmp <- letters names(tmp) <- letters[1:5] expect_error(formatNS(tmp), "The namespace object either missing some or all names.*") expect_error(formatNS(list(a = 1, b = 2)), "The values in the namespace object.*") # formatNSPrefix must return a pipe separated string of namespace prefixes expect_that(formatNSPrefix(c(svg = "svg"), ""), equals("(//svg:*)/")) expect_that(formatNSPrefix(c(svg = "svg"), "asd"), equals("(//svg:*)/asd")) expect_that(formatNSPrefix(c(svg = "svg", math = "mathml"), ""), equals("(//svg:*|//math:*)/")) expect_that(formatNSPrefix(c(svg = "svg", math = "mathml"), "asd"), equals("(//svg:*|//math:*)/asd")) }) selectr/tests/testthat/test-nth-child.R0000755000176200001440000005373715107555231017715 0ustar liggesuserscontext(":nth-child() and :nth-last-child() pseudo-classes") test_that(":nth-child() generates correct XPath", { xpath <- function(css) { css_to_xpath(css, prefix = "") } # :nth-child(1) - first child result <- xpath("li:nth-child(1)") expect_true(grepl("count\\(preceding-sibling::\\*\\) = 0", result)) # :nth-child(2) - second child result <- xpath("li:nth-child(2)") expect_true(grepl("count\\(preceding-sibling::\\*\\) = 1", result)) # :nth-child(odd) - odd children result <- xpath("li:nth-child(odd)") expect_true(grepl("count\\(preceding-sibling::\\*\\)", result)) expect_true(grepl("mod 2", result)) # :nth-child(even) - even children result <- xpath("li:nth-child(even)") expect_true(grepl("count\\(preceding-sibling::\\*\\)", result)) expect_true(grepl("mod 2", result)) # :nth-child(2n) - every 2nd child (even) result <- xpath("li:nth-child(2n)") expect_true(grepl("count\\(preceding-sibling::\\*\\)", result)) # :nth-child(3n+1) - every 3rd starting from 1st result <- xpath("li:nth-child(3n+1)") expect_true(grepl("count\\(preceding-sibling::\\*\\)", result)) # :nth-child(n) - all children (simplifies to just the element) result <- xpath("li:nth-child(n)") expect_that(result, equals("li")) # :nth-child(-n+3) - first 3 children result <- xpath("li:nth-child(-n+3)") expect_true(grepl("count\\(preceding-sibling::\\*\\)", result)) }) test_that(":nth-last-child() generates correct XPath", { xpath <- function(css) { css_to_xpath(css, prefix = "") } # :nth-last-child(1) - last child result <- xpath("li:nth-last-child(1)") expect_true(grepl("count\\(following-sibling::\\*\\) = 0", result)) # :nth-last-child(2) - second from last result <- xpath("li:nth-last-child(2)") expect_true(grepl("count\\(following-sibling::\\*\\) = 1", result)) # :nth-last-child(odd) - odd from end result <- xpath("li:nth-last-child(odd)") expect_true(grepl("count\\(following-sibling::\\*\\)", result)) expect_true(grepl("mod 2", result)) # :nth-last-child(even) - even from end result <- xpath("li:nth-last-child(even)") expect_true(grepl("count\\(following-sibling::\\*\\)", result)) expect_true(grepl("mod 2", result)) # :nth-last-child(-n+2) - last 2 children result <- xpath("li:nth-last-child(-n+2)") expect_true(grepl("count\\(following-sibling::\\*\\)", result)) }) test_that(":nth-child() works correctly with XML documents", { library(XML) html <- paste0( '', '
    ', '
  • Item 1
  • ', '
  • Item 2
  • ', '
  • Item 3
  • ', '
  • Item 4
  • ', '
  • Item 5
  • ', '
', '
    ', '
  • A
  • ', '
  • B
  • ', '
  • C
  • ', '
', '
' ) doc <- xmlRoot(xmlParse(html)) get_ids <- function(css) { results <- querySelectorAll(doc, css) sapply(results, function(x) xmlGetAttr(x, "id")) } # First child expect_that(get_ids("li:nth-child(1)"), equals(c("li1", "li6"))) # Second child expect_that(get_ids("li:nth-child(2)"), equals(c("li2", "li7"))) # Third child expect_that(get_ids("li:nth-child(3)"), equals(c("li3", "li8"))) # Odd children (1, 3, 5) expect_that(get_ids("li:nth-child(odd)"), equals(c("li1", "li3", "li5", "li6", "li8"))) # Even children (2, 4) expect_that(get_ids("li:nth-child(even)"), equals(c("li2", "li4", "li7"))) # Every 2nd child starting from 2 (same as even) expect_that(get_ids("li:nth-child(2n)"), equals(c("li2", "li4", "li7"))) # Every 2nd child starting from 1 (same as odd) expect_that(get_ids("li:nth-child(2n+1)"), equals(c("li1", "li3", "li5", "li6", "li8"))) # Every 3rd child starting from 1 (1, 4) expect_that(get_ids("li:nth-child(3n+1)"), equals(c("li1", "li4", "li6"))) # Every 3rd child starting from 2 (2, 5) expect_that(get_ids("li:nth-child(3n+2)"), equals(c("li2", "li5", "li7"))) # First 3 children expect_that(get_ids("li:nth-child(-n+3)"), equals(c("li1", "li2", "li3", "li6", "li7", "li8"))) # All children (n matches all positive integers) all_ids <- get_ids("li:nth-child(n)") expect_that(length(all_ids), equals(8)) }) test_that(":nth-last-child() works correctly with XML documents", { library(XML) html <- paste0( '', '
    ', '
  • Item 1
  • ', '
  • Item 2
  • ', '
  • Item 3
  • ', '
  • Item 4
  • ', '
  • Item 5
  • ', '
', '
    ', '
  • A
  • ', '
  • B
  • ', '
  • C
  • ', '
', '
' ) doc <- xmlRoot(xmlParse(html)) get_ids <- function(css) { results <- querySelectorAll(doc, css) sapply(results, function(x) xmlGetAttr(x, "id")) } # Last child expect_that(get_ids("li:nth-last-child(1)"), equals(c("li5", "li8"))) # Second from last expect_that(get_ids("li:nth-last-child(2)"), equals(c("li4", "li7"))) # Third from last expect_that(get_ids("li:nth-last-child(3)"), equals(c("li3", "li6"))) # Odd from end (last=1, 3rd-last=3, 5th-last=5) expect_that(get_ids("li:nth-last-child(odd)"), equals(c("li1", "li3", "li5", "li6", "li8"))) # Even from end (2nd-last=2, 4th-last=4) expect_that(get_ids("li:nth-last-child(even)"), equals(c("li2", "li4", "li7"))) # Last 2 children expect_that(get_ids("li:nth-last-child(-n+2)"), equals(c("li4", "li5", "li7", "li8"))) # Last 3 children expect_that(get_ids("li:nth-last-child(-n+3)"), equals(c("li3", "li4", "li5", "li6", "li7", "li8"))) }) test_that(":nth-child() works correctly with xml2 documents", { library(xml2) html <- paste0( '', '
    ', '
  • Item 1
  • ', '
  • Item 2
  • ', '
  • Item 3
  • ', '
  • Item 4
  • ', '
  • Item 5
  • ', '
', '
    ', '
  • A
  • ', '
  • B
  • ', '
  • C
  • ', '
', '
' ) doc <- read_xml(html) get_ids <- function(css) { results <- querySelectorAll(doc, css) xml_attr(results, "id") } # First child expect_that(get_ids("li:nth-child(1)"), equals(c("li1", "li6"))) # Second child expect_that(get_ids("li:nth-child(2)"), equals(c("li2", "li7"))) # Odd children expect_that(get_ids("li:nth-child(odd)"), equals(c("li1", "li3", "li5", "li6", "li8"))) # Even children expect_that(get_ids("li:nth-child(even)"), equals(c("li2", "li4", "li7"))) # First 3 children expect_that(get_ids("li:nth-child(-n+3)"), equals(c("li1", "li2", "li3", "li6", "li7", "li8"))) }) test_that(":nth-last-child() works correctly with xml2 documents", { library(xml2) html <- paste0( '', '
    ', '
  • Item 1
  • ', '
  • Item 2
  • ', '
  • Item 3
  • ', '
  • Item 4
  • ', '
  • Item 5
  • ', '
', '
' ) doc <- read_xml(html) get_ids <- function(css) { results <- querySelectorAll(doc, css) xml_attr(results, "id") } # Last child expect_that(get_ids("li:nth-last-child(1)"), equals("li5")) # Second from last expect_that(get_ids("li:nth-last-child(2)"), equals("li4")) # Last 2 children expect_that(get_ids("li:nth-last-child(-n+2)"), equals(c("li4", "li5"))) }) test_that(":nth-child() and :nth-last-child() can be combined", { library(XML) html <- paste0( '', '
    ', '
  • 1
  • ', '
  • 2
  • ', '
  • 3
  • ', '
  • 4
  • ', '
  • 5
  • ', '
', '
' ) doc <- xmlRoot(xmlParse(html)) get_ids <- function(css) { results <- querySelectorAll(doc, css) sapply(results, function(x) xmlGetAttr(x, "id")) } # Second child AND second from last (middle element in list of 5) expect_that(get_ids("li:nth-child(2):nth-last-child(4)"), equals("li2")) # Middle element (3rd child AND 3rd from last) expect_that(get_ids("li:nth-child(3):nth-last-child(3)"), equals("li3")) # First child that's also last child (only child) # This won't match in our test case since we have 5 items expect_that(length(querySelectorAll(doc, "li:nth-child(1):nth-last-child(1)")), equals(0)) }) test_that(":nth-child() edge cases", { library(XML) # Empty list html1 <- '
    ' doc1 <- xmlRoot(xmlParse(html1)) expect_that(length(querySelectorAll(doc1, "li:nth-child(1)")), equals(0)) # Single child html2 <- '
    • Only
    ' doc2 <- xmlRoot(xmlParse(html2)) # Should match as first child result <- querySelectorAll(doc2, "li:nth-child(1)") expect_that(length(result), equals(1)) expect_that(xmlGetAttr(result[[1]], "id"), equals("only")) # Should also match as last child result2 <- querySelectorAll(doc2, "li:nth-last-child(1)") expect_that(length(result2), equals(1)) expect_that(xmlGetAttr(result2[[1]], "id"), equals("only")) # Mixed element types html3 <- paste0( '', '
    ', '

    Para

    ', '
    Div
    ', '

    Para

    ', ' Span', '
    ', '
    ' ) doc3 <- xmlRoot(xmlParse(html3)) get_ids <- function(css) { results <- querySelectorAll(doc3, css) sapply(results, function(x) xmlGetAttr(x, "id")) } # First child (p element that is first child) expect_that(get_ids("p:nth-child(1)"), equals("p1")) # Second child (div element that is second child) expect_that(get_ids("div:nth-child(2)"), equals("d1")) # All p elements that are odd children expect_that(get_ids("p:nth-child(odd)"), equals(c("p1", "p2"))) }) test_that(":nth-child() with querySelector returns first match", { library(xml2) html <- paste0( '', '
      ', '
    • 1
    • ', '
    • 2
    • ', '
    ', '
      ', '
    • 3
    • ', '
    • 4
    • ', '
    ', '
    ' ) doc <- read_xml(html) # Should return first element that's a first child (li1) result <- querySelector(doc, "li:nth-child(1)") expect_that(xml_attr(result, "id"), equals("li1")) # Should return first element that's a second child (li2) result2 <- querySelector(doc, "li:nth-child(2)") expect_that(xml_attr(result2, "id"), equals("li2")) }) test_that(":nth-child() with different element types", { library(XML) # Test that :nth-child counts all siblings, not just same type html <- paste0( '', '
    ', '

    Heading

    ', '

    Para 1

    ', '

    Para 2

    ', ' Span', '
    ', '
    ' ) doc <- xmlRoot(xmlParse(html)) get_ids <- function(css) { results <- querySelectorAll(doc, css) sapply(results, function(x) xmlGetAttr(x, "id")) } # p:nth-child(2) - p that is 2nd child (p1) expect_that(get_ids("p:nth-child(2)"), equals("p1")) # p:nth-child(3) - p that is 3rd child (p2) expect_that(get_ids("p:nth-child(3)"), equals("p2")) # p:nth-child(1) - p that is first child (none) expect_that(length(querySelectorAll(doc, "p:nth-child(1)")), equals(0)) # span:nth-child(4) - span that is 4th child (s1) expect_that(get_ids("span:nth-child(4)"), equals("s1")) }) test_that(":nth-child() with complex selectors", { library(xml2) html <- paste0( '', ' ', '' ) doc <- read_xml(html) get_ids <- function(css) { results <- querySelectorAll(doc, css) xml_attr(results, "id") } # Class selector with :nth-child expect_that(get_ids(".item:nth-child(1)"), equals("li1")) # Multiple classes with :nth-child expect_that(get_ids(".item.active:nth-child(2)"), equals("li2")) # Descendant combinator with :nth-child expect_that(get_ids(".menu li:nth-child(2)"), equals("li2")) # Child combinator with :nth-child expect_that(get_ids(".menu > li:nth-child(3)"), equals("li3")) }) test_that(":nth-child() early-exit condition 1: a=1, b-1<=0 (matches all)", { library(XML) html <- paste0( '', '
      ', '
    • 1
    • ', '
    • 2
    • ', '
    • 3
    • ', '
    • 4
    • ', '
    ', '
    ' ) doc <- xmlRoot(xmlParse(html)) get_ids <- function(css) { results <- querySelectorAll(doc, css) sapply(results, function(x) xmlGetAttr(x, "id")) } # :nth-child(n) -> a=1, b=0, b-1=-1<=0 -> matches all expect_that(get_ids("li:nth-child(n)"), equals(c("li1", "li2", "li3", "li4"))) # :nth-child(1n+0) -> a=1, b=0, b-1=-1<=0 -> matches all expect_that(get_ids("li:nth-child(1n+0)"), equals(c("li1", "li2", "li3", "li4"))) # :nth-child(n+1) -> a=1, b=1, b-1=0<=0 -> matches all expect_that(get_ids("li:nth-child(n+1)"), equals(c("li1", "li2", "li3", "li4"))) # :nth-child(1n+1) -> a=1, b=1, b-1=0<=0 -> matches all expect_that(get_ids("li:nth-child(1n+1)"), equals(c("li1", "li2", "li3", "li4"))) # :nth-child(n-1) -> a=1, b=-1, b-1=-2<=0 -> matches all expect_that(get_ids("li:nth-child(n-1)"), equals(c("li1", "li2", "li3", "li4"))) # :nth-child(n-5) -> a=1, b=-5, b-1=-6<=0 -> matches all expect_that(get_ids("li:nth-child(n-5)"), equals(c("li1", "li2", "li3", "li4"))) }) test_that(":nth-child() early-exit condition 1 with selector list", { library(xml2) html <- paste0( '', '
      ', '
    • 1
    • ', '
    • 2
    • ', '
    • 3
    • ', '
    • 4
    • ', '
    ', '
    ' ) doc <- read_xml(html) get_ids <- function(css) { results <- querySelectorAll(doc, css) xml_attr(results, "id") } # :nth-child(n of .item) -> a=1, b=0 -> but filtered by .item expect_that(get_ids("li:nth-child(n of .item)"), equals(c("li1", "li2", "li4"))) # :nth-child(1n+0 of .item) -> same as above expect_that(get_ids("li:nth-child(1n+0 of .item)"), equals(c("li1", "li2", "li4"))) # :nth-child(n+1 of .item) -> a=1, b=1 -> filtered by .item expect_that(get_ids("li:nth-child(n+1 of .item)"), equals(c("li1", "li2", "li4"))) }) test_that(":nth-last-child() early-exit condition 1: a=1, b-1<=0", { library(XML) html <- paste0( '', '
      ', '
    • 1
    • ', '
    • 2
    • ', '
    • 3
    • ', '
    ', '
    ' ) doc <- xmlRoot(xmlParse(html)) get_ids <- function(css) { results <- querySelectorAll(doc, css) sapply(results, function(x) xmlGetAttr(x, "id")) } # :nth-last-child(n) -> a=1, b=0, b-1=-1<=0 -> matches all expect_that(get_ids("li:nth-last-child(n)"), equals(c("li1", "li2", "li3"))) # :nth-last-child(n+1) -> a=1, b=1, b-1=0<=0 -> matches all expect_that(get_ids("li:nth-last-child(n+1)"), equals(c("li1", "li2", "li3"))) # :nth-last-child(n of .special) -> filtered by .special expect_that(get_ids("li:nth-last-child(n of .special)"), equals("li2")) }) test_that(":nth-child() early-exit condition 2: a<0, b-1<0 (matches none)", { library(XML) html <- paste0( '', '
      ', '
    • 1
    • ', '
    • 2
    • ', '
    • 3
    • ', '
    ', '
    ' ) doc <- xmlRoot(xmlParse(html)) # :nth-child(-n) -> a=-1, b=0, b-1=-1<0 -> impossible, matches none expect_that(length(querySelectorAll(doc, "li:nth-child(-n)")), equals(0)) # :nth-child(-n-1) -> a=-1, b=-1, b-1=-2<0 -> impossible, matches none expect_that(length(querySelectorAll(doc, "li:nth-child(-n-1)")), equals(0)) # :nth-child(-2n-1) -> a=-2, b=-1, b-1=-2<0 -> impossible, matches none expect_that(length(querySelectorAll(doc, "li:nth-child(-2n-1)")), equals(0)) # :nth-child(-3n-5) -> a=-3, b=-5, b-1=-6<0 -> impossible, matches none expect_that(length(querySelectorAll(doc, "li:nth-child(-3n-5)")), equals(0)) # Verify XPath contains "0" condition xpath <- css_to_xpath("li:nth-child(-n)") expect_true(grepl("\\[.*0.*\\]", xpath)) }) test_that(":nth-child() early-exit condition 2 with selector list", { library(xml2) html <- paste0( '', '
      ', '
    • 1
    • ', '
    • 2
    • ', '
    ', '
    ' ) doc <- read_xml(html) # :nth-child(-n of .item) -> a=-1, b=0 -> impossible even with selector expect_that(length(querySelectorAll(doc, "li:nth-child(-n of .item)")), equals(0)) # :nth-child(-2n-1 of .item) -> a=-2, b=-1 -> impossible expect_that(length(querySelectorAll(doc, "li:nth-child(-2n-1 of .item)")), equals(0)) # Verify XPath contains both "0" condition and selector check xpath <- css_to_xpath("li:nth-child(-n of .item)") expect_true(grepl("0", xpath)) expect_true(grepl("item", xpath)) }) test_that(":nth-last-child() early-exit condition 2: a<0, b-1<0", { library(XML) html <- paste0( '', '
      ', '
    • 1
    • ', '
    • 2
    • ', '
    • 3
    • ', '
    ', '
    ' ) doc <- xmlRoot(xmlParse(html)) # :nth-last-child(-n) -> a=-1, b=0, b-1=-1<0 -> impossible expect_that(length(querySelectorAll(doc, "li:nth-last-child(-n)")), equals(0)) # :nth-last-child(-n-1) -> a=-1, b=-1, b-1=-2<0 -> impossible expect_that(length(querySelectorAll(doc, "li:nth-last-child(-n-1)")), equals(0)) # :nth-last-child(-n of .special) -> impossible even with selector expect_that(length(querySelectorAll(doc, "li:nth-last-child(-n of .special)")), equals(0)) }) test_that(":nth-child() boundary between early-exit conditions", { library(xml2) html <- paste0( '', '
      ', '
    • 1
    • ', '
    • 2
    • ', '
    • 3
    • ', '
    ', '
    ' ) doc <- read_xml(html) get_ids <- function(css) { results <- querySelectorAll(doc, css) xml_attr(results, "id") } # :nth-child(-n+0) -> a=-1, b=0, b-1=-1 -> NOT early-exit (b-1<0 but a<0 not b-1<0) # This should match nothing (0 or fewer siblings) expect_that(length(querySelectorAll(doc, "li:nth-child(-n+0)")), equals(0)) # :nth-child(-n+1) -> a=-1, b=1, b-1=0 -> NOT early-exit (b-1 not <0) # This should match first child only expect_that(get_ids("li:nth-child(-n+1)"), equals("li1")) # :nth-child(-n+2) -> a=-1, b=2, b-1=1 -> matches first 2 expect_that(get_ids("li:nth-child(-n+2)"), equals(c("li1", "li2"))) # :nth-child(-2n+2) -> a=-2, b=2, b-1=1 -> matches 2nd child expect_that(get_ids("li:nth-child(-2n+2)"), equals("li2")) # :nth-child(-2n+0) -> a=-2, b=0, b-1=-1<0 -> early-exit condition 2 expect_that(length(querySelectorAll(doc, "li:nth-child(-2n+0)")), equals(0)) }) selectr/tests/testthat/test-series.R0000755000176200001440000000217115107555231017317 0ustar liggesuserscontext("series") test_that("parser generates correct series", { series <- function(css) { selector <- parse(paste0(":nth-child(", css, ")"))[[1]] args <- selector$parsed_tree$arguments parse_series(args) } expect_that(series("1n+3"), equals(c(1, 3))) expect_that(series("1n +3"), equals(c(1, 3))) expect_that(series("1n + 3"), equals(c(1, 3))) expect_that(series("1n+ 3"), equals(c(1, 3))) expect_that(series("1n-3"), equals(c(1, -3))) expect_that(series("1n -3"), equals(c(1, -3))) expect_that(series("1n - 3"), equals(c(1, -3))) expect_that(series("1n- 3"), equals(c(1, -3))) expect_that(series("n-5"), equals(c(1, -5))) expect_that(series("odd"), equals(c(2, 1))) expect_that(series("even"), equals(c(2, 0))) expect_that(series("3n"), equals(c(3, 0))) expect_that(series("n"), equals(c(1, 0))) expect_that(series("+n"), equals(c(1, 0))) expect_that(series("-n"), equals(c(-1, 0))) expect_that(series("5"), equals(c(0, 5))) expect_that(series("foo"), equals(NULL)) expect_that(series("n+"), equals(NULL)) }) selectr/tests/testthat/test-xmllang-xml2.R0000755000176200001440000000341215107555231020346 0ustar liggesuserscontext("lang-xml2") test_that("xml lang function matches correct elements", { xmlLangText <- paste0('', 'a', 'b', 'c', 'd', 'e', 'f', '', '') library(xml2) xmldoc <- read_xml(xmlLangText) gt <- GenericTranslator$new() pid <- function(selector) { xpath <- gt$css_to_xpath(selector) items <- xml_find_all(xmldoc, xpath) n <- length(items) if (!n) return(NULL) result <- character(n) for (i in seq_len(n)) { element <- items[[i]] tmp <- xml_attrs(element)["id"] if (is.null(tmp)) tmp <- "nil" result[i] <- tmp } result } expect_that(pid(':lang("EN")'), equals(c('first', 'second', 'third', 'fourth'))) expect_that(pid(':lang("en-us")'), equals(c('second', 'fourth'))) expect_that(pid(':lang(en-nz)'), equals('third')) expect_that(pid(':lang(fr)'), equals('fifth')) expect_that(pid(':lang(ru)'), equals('sixth')) expect_that(pid(":lang('ZH')"), equals('eighth')) expect_that(pid(':lang(de) :lang(zh)'), equals('eighth')) expect_that(pid(':lang(en), :lang(zh)'), equals(c('first', 'second', 'third', 'fourth', 'eighth'))) expect_that(pid(":lang(es)"), equals(NULL)) }) selectr/tests/testthat/test-adjacent-sibling.R0000755000176200001440000001452515107555231021231 0ustar liggesuserscontext("adjacent sibling combinator") test_that("adjacent sibling combinator generates simplified XPath", { xpath <- function(css) { css_to_xpath(css, prefix = "") } # Simple element + element expect_that(xpath('a + b'), equals("a/following-sibling::*[1][self::b]")) # With attribute on right side expect_that(xpath('a + b[id]'), equals("a/following-sibling::*[1][self::b][(@id)]")) # With class on right side expect_that(xpath('a + b.test'), equals("a/following-sibling::*[1][self::b][(@class and contains(concat(' ', normalize-space(@class), ' '), ' test '))]")) # With ID on right side expect_that(xpath('a + b#myid'), equals("a/following-sibling::*[1][self::b][(@id = 'myid')]")) # With multiple attributes on right side expect_that(xpath('a + b[id][title]'), equals("a/following-sibling::*[1][self::b][(@id) and (@title)]")) # With class and attribute on right side expect_that(xpath('a + b.test[title]'), equals("a/following-sibling::*[1][self::b][(@class and contains(concat(' ', normalize-space(@class), ' '), ' test ')) and (@title)]")) # With conditions on both sides expect_that(xpath('a.link + b[id]'), equals("a[(@class and contains(concat(' ', normalize-space(@class), ' '), ' link '))]/following-sibling::*[1][self::b][(@id)]")) expect_that(xpath('a[href] + b.test'), equals("a[(@href)]/following-sibling::*[1][self::b][(@class and contains(concat(' ', normalize-space(@class), ' '), ' test '))]")) # With ID on left, class and attribute on right expect_that(xpath('div#main + p.intro[title]'), equals("div[(@id = 'main')]/following-sibling::*[1][self::p][(@class and contains(concat(' ', normalize-space(@class), ' '), ' intro ')) and (@title)]")) # Universal selector on right expect_that(xpath('h1 + *[rel=up]'), equals("h1/following-sibling::*[1][self::*][(@rel = 'up')]")) # Combined with child combinator expect_that(xpath('div > h1 + p'), equals("div/h1/following-sibling::*[1][self::p]")) expect_that(xpath('div#main > h1 + p[class]'), equals("div[(@id = 'main')]/h1/following-sibling::*[1][self::p][(@class)]")) # With descendant combinator expect_that(xpath('section a + b'), equals("section//a/following-sibling::*[1][self::b]")) # Complex: multiple combinators and conditions expect_that(xpath('article.post > h2.title + p.intro[data-info]'), equals("article[(@class and contains(concat(' ', normalize-space(@class), ' '), ' post '))]/h2[(@class and contains(concat(' ', normalize-space(@class), ' '), ' title '))]/following-sibling::*[1][self::p][(@class and contains(concat(' ', normalize-space(@class), ' '), ' intro ')) and (@data-info)]")) }) test_that("adjacent sibling combinator works correctly with querySelector", { skip_if_not_installed("XML") library(XML) # Test with immediate adjacent siblings doc1 <- htmlParse('AB') results1 <- querySelectorAll(doc1, "a + b") expect_equal(length(results1), 1) expect_equal(xmlGetAttr(results1[[1]], "id"), "b1") # Test with intervening element (should NOT match) doc2 <- htmlParse('ACB') results2 <- querySelectorAll(doc2, "a + b") expect_equal(length(results2), 0) # Test with attributes on right side doc3 <- htmlParse(' Link1B1 Link2B2 ') results3 <- querySelectorAll(doc3, "a + b[id]") expect_equal(length(results3), 1) expect_equal(xmlGetAttr(results3[[1]], "id"), "b1") # Test with classes on both sides doc4 <- htmlParse(' LinkB1 Link2B2 ') results4 <- querySelectorAll(doc4, "a.link + b.text") expect_equal(length(results4), 1) # Test with multiple adjacent pairs doc5 <- htmlParse(' A1B1 A2B2 ') results5 <- querySelectorAll(doc5, "a + b") expect_equal(length(results5), 2) expect_equal(xmlGetAttr(results5[[1]], "id"), "b1") expect_equal(xmlGetAttr(results5[[2]], "id"), "b2") }) test_that("adjacent sibling maintains correct semantics", { skip_if_not_installed("XML") library(XML) # Verify it only matches IMMEDIATE adjacent siblings doc <- htmlParse('

    Title

    Immediate

    Intervening

    Not immediate

    Subtitle

    Immediate

    ') results <- querySelectorAll(doc, "h1 + p, h2 + p") expect_equal(length(results), 2) ids <- sapply(results, xmlGetAttr, "id") expect_true("p1" %in% ids) expect_true("p3" %in% ids) expect_false("p2" %in% ids) # Test that it respects element type doc2 <- htmlParse(' LinkBC ') results_b <- querySelectorAll(doc2, "a + b") expect_equal(length(results_b), 1) expect_equal(xmlName(results_b[[1]]), "b") results_c <- querySelectorAll(doc2, "a + c") expect_equal(length(results_c), 0) # c is not immediately after a results_star <- querySelectorAll(doc2, "a + *") expect_equal(length(results_star), 1) expect_equal(xmlName(results_star[[1]]), "b") }) test_that("adjacent sibling with pseudo-classes", { xpath <- function(css) { css_to_xpath(css, prefix = "") } # Adjacent sibling with pseudo-class on right expect_that(xpath('h1 + p:first-child'), equals("h1/following-sibling::*[1][self::p][(count(preceding-sibling::*) = 0)]")) # Adjacent sibling with nth-child expect_that(xpath('h1 + p:nth-child(2)'), equals("h1/following-sibling::*[1][self::p][(count(preceding-sibling::*) = 1)]")) }) selectr/tests/testthat/test-shakespeare-xml2.R0000755000176200001440000005320715107555231021206 0ustar liggesuserscontext("shakespeare-test-xml2") test_that("selection works correctly on a shakespearean document", { HTML_SHAKESPEARE <- paste( "", "", #"", "", "\t
    ", "\t
    ", "\t

    As You Like It

    ", "\t
    ", "\t by William Shakespeare", "\t
    ", "\t
    ", "\t

    ACT I, SCENE III. A room in the palace.

    ", "\t
    ", "\t
    Enter CELIA and ROSALIND
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Why, cousin! why, Rosalind! Cupid have mercy! not a word?
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Not one to throw at a dog.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    No, thy words are too precious to be cast away upon
    ", "\t
    curs; throw some of them at me; come, lame me with reasons.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    CELIA
    ", "\t
    ", "\t
    But is all this for your father?
    ", "\t
    ", "\t
    ", "\t
    Then there were two cousins laid up; when the one
    ", "\t
    should be lamed with reasons and the other mad
    ", "\t
    without any.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    No, some of it is for my child's father. O, how
    ", "\t
    full of briers is this working-day world!
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    They are but burs, cousin, thrown upon thee in
    ", "\t
    holiday foolery: if we walk not in the trodden
    ", "\t
    paths our very petticoats will catch them.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    I could shake them off my coat: these burs are in my heart.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Hem them away.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    I would try, if I could cry 'hem' and have him.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Come, come, wrestle with thy affections.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    O, they take the part of a better wrestler than myself!
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    O, a good wish upon you! you will try in time, in
    ", "\t
    despite of a fall. But, turning these jests out of
    ", "\t
    service, let us talk in good earnest: is it
    ", "\t
    possible, on such a sudden, you should fall into so
    ", "\t
    strong a liking with old Sir Rowland's youngest son?
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    The duke my father loved his father dearly.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Doth it therefore ensue that you should love his son
    ", "\t
    dearly? By this kind of chase, I should hate him,
    ", "\t
    for my father hated his father dearly; yet I hate
    ", "\t
    not Orlando.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    No, faith, hate him not, for my sake.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Why should I not? doth he not deserve well?
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Let me love him for that, and do you love him
    ", "\t
    because I do. Look, here comes the duke.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    With his eyes full of anger.
    ", "\t
    Enter DUKE FREDERICK, with Lords
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    Mistress, dispatch you with your safest haste
    ", "\t
    And get you from our court.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Me, uncle?
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    You, cousin
    ", "\t
    Within these ten days if that thou be'st found
    ", "\t
    So near our public court as twenty miles,
    ", "\t
    Thou diest for it.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    I do beseech your grace,
    ", "\t
    Let me the knowledge of my fault bear with me:
    ", "\t
    If with myself I hold intelligence
    ", "\t
    Or have acquaintance with mine own desires,
    ", "\t
    If that I do not dream or be not frantic,--
    ", "\t
    As I do trust I am not--then, dear uncle,
    ", "\t
    Never so much as in a thought unborn
    ", "\t
    Did I offend your highness.
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    Thus do all traitors:
    ", "\t
    If their purgation did consist in words,
    ", "\t
    They are as innocent as grace itself:
    ", "\t
    Let it suffice thee that I trust thee not.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Yet your mistrust cannot make me a traitor:
    ", "\t
    Tell me whereon the likelihood depends.
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    Thou art thy father's daughter; there's enough.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    So was I when your highness took his dukedom;
    ", "\t
    So was I when your highness banish'd him:
    ", "\t
    Treason is not inherited, my lord;
    ", "\t
    Or, if we did derive it from our friends,
    ", "\t
    What's that to me? my father was no traitor:
    ", "\t
    Then, good my liege, mistake me not so much
    ", "\t
    To think my poverty is treacherous.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Dear sovereign, hear me speak.
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    Ay, Celia; we stay'd her for your sake,
    ", "\t
    Else had she with her father ranged along.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    I did not then entreat to have her stay;
    ", "\t
    It was your pleasure and your own remorse:
    ", "\t
    I was too young that time to value her;
    ", "\t
    But now I know her: if she be a traitor,
    ", "\t
    Why so am I; we still have slept together,
    ", "\t
    Rose at an instant, learn'd, play'd, eat together,
    ", "\t
    And wheresoever we went, like Juno's swans,
    ", "\t
    Still we went coupled and inseparable.
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    She is too subtle for thee; and her smoothness,
    ", "\t
    Her very silence and her patience
    ", "\t
    Speak to the people, and they pity her.
    ", "\t
    Thou art a fool: she robs thee of thy name;
    ", "\t
    And thou wilt show more bright and seem more virtuous
    ", "\t
    When she is gone. Then open not thy lips:
    ", "\t
    Firm and irrevocable is my doom
    ", "\t
    Which I have pass'd upon her; she is banish'd.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Pronounce that sentence then on me, my liege:
    ", "\t
    I cannot live out of her company.
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    You are a fool. You, niece, provide yourself:
    ", "\t
    If you outstay the time, upon mine honour,
    ", "\t
    And in the greatness of my word, you die.
    ", "\t
    Exeunt DUKE FREDERICK and Lords
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    O my poor Rosalind, whither wilt thou go?
    ", "\t
    Wilt thou change fathers? I will give thee mine.
    ", "\t
    I charge thee, be not thou more grieved than I am.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    I have more cause.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Thou hast not, cousin;
    ", "\t
    Prithee be cheerful: know'st thou not, the duke
    ", "\t
    Hath banish'd me, his daughter?
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    That he hath not.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    No, hath not? Rosalind lacks then the love
    ", "\t
    Which teacheth thee that thou and I am one:
    ", "\t
    Shall we be sunder'd? shall we part, sweet girl?
    ", "\t
    No: let my father seek another heir.
    ", "\t
    Therefore devise with me how we may fly,
    ", "\t
    Whither to go and what to bear with us;
    ", "\t
    And do not seek to take your change upon you,
    ", "\t
    To bear your griefs yourself and leave me out;
    ", "\t
    For, by this heaven, now at our sorrows pale,
    ", "\t
    Say what thou canst, I'll go along with thee.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Why, whither shall we go?
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    To seek my uncle in the forest of Arden.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Alas, what danger will it be to us,
    ", "\t
    Maids as we are, to travel forth so far!
    ", "\t
    Beauty provoketh thieves sooner than gold.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    I'll put myself in poor and mean attire
    ", "\t
    And with a kind of umber smirch my face;
    ", "\t
    The like do you: so shall we pass along
    ", "\t
    And never stir assailants.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Were it not better,
    ", "\t
    Because that I am more than common tall,
    ", "\t
    That I did suit me all points like a man?
    ", "\t
    A gallant curtle-axe upon my thigh,
    ", "\t
    A boar-spear in my hand; and--in my heart
    ", "\t
    Lie there what hidden woman's fear there will--
    ", "\t
    We'll have a swashing and a martial outside,
    ", "\t
    As many other mannish cowards have
    ", "\t
    That do outface it with their semblances.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    What shall I call thee when thou art a man?
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    I'll have no worse a name than Jove's own page;
    ", "\t
    And therefore look you call me Ganymede.
    ", "\t
    But what will you be call'd?
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Something that hath a reference to my state
    ", "\t
    No longer Celia, but Aliena.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    But, cousin, what if we assay'd to steal
    ", "\t
    The clownish fool out of your father's court?
    ", "\t
    Would he not be a comfort to our travel?
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    He'll go along o'er the wide world with me;
    ", "\t
    Leave me alone to woo him. Let's away,
    ", "\t
    And get our jewels and our wealth together,
    ", "\t
    Devise the fittest time and safest way
    ", "\t
    To hide us from pursuit that will be made
    ", "\t
    After my flight. Now go we in content
    ", "\t
    To liberty and not to banishment.
    ", "\t
    Exeunt
    ", "\t
    ", "\t
    ", "\t
    ", "
    ", "", "", sep = "\n") library(xml2) document <- read_xml(HTML_SHAKESPEARE) xml_ns_strip(document) body <- xml_find_first(document, "//body") gt <- GenericTranslator$new() count <- function(selector) { xpath <- gt$css_to_xpath(selector) results <- xml_find_all(body, xpath) length(results) } # Data borrowed from http://mootools.net/slickspeed/ ## Changed from original; probably because I'm only ## searching the body. #expect_that(count('*'), equals(252)) expect_that(count('*'), equals(246)) expect_that(count('div:contains(CELIA)'), equals(26)) expect_that(count('div:only-child'), equals(22)) # ? expect_that(count('div:nth-child(even)'), equals(106)) expect_that(count('div:nth-child(2n)'), equals(106)) expect_that(count('div:nth-child(odd)'), equals(137)) expect_that(count('div:nth-child(2n+1)'), equals(137)) expect_that(count('div:nth-child(n)'), equals(243)) expect_that(count('div:last-child'), equals(53)) expect_that(count('div:first-child'), equals(51)) expect_that(count('div > div'), equals(242)) expect_that(count('div + div'), equals(190)) expect_that(count('div ~ div'), equals(190)) expect_that(count('body'), equals(1)) expect_that(count('body div'), equals(243)) expect_that(count('div'), equals(243)) expect_that(count('div div'), equals(242)) expect_that(count('div div div'), equals(241)) expect_that(count('div, div, div'), equals(243)) expect_that(count('div, a, span'), equals(243)) expect_that(count('.dialog'), equals(51)) expect_that(count('div.dialog'), equals(51)) expect_that(count('div .dialog'), equals(51)) expect_that(count('div.character, div.dialog'), equals(99)) expect_that(count('div.direction.dialog'), equals(0)) expect_that(count('div.dialog.direction'), equals(0)) expect_that(count('div.dialog.scene'), equals(1)) expect_that(count('div.scene.scene'), equals(1)) expect_that(count('div.scene .scene'), equals(0)) expect_that(count('div.direction .dialog '), equals(0)) expect_that(count('div .dialog .direction'), equals(4)) expect_that(count('div.dialog .dialog .direction'), equals(4)) expect_that(count('#speech5'), equals(1)) expect_that(count('div#speech5'), equals(1)) expect_that(count('div #speech5'), equals(1)) expect_that(count('div.scene div.dialog'), equals(49)) expect_that(count('div#scene1 div.dialog div'), equals(142)) expect_that(count('#scene1 #speech1'), equals(1)) expect_that(count('div[class]'), equals(103)) expect_that(count('div[class=dialog]'), equals(50)) expect_that(count('div[class^=dia]'), equals(51)) expect_that(count('div[class$=log]'), equals(50)) expect_that(count('div[class*=sce]'), equals(1)) expect_that(count('div[class|=dialog]'), equals(50)) # ? Seems right expect_that(count('div[class!=madeup]'), equals(243)) # ? Seems right expect_that(count('div[class~=dialog]'), equals(51)) # ? Seems right }) selectr/tests/testthat/test-method-registration.R0000755000176200001440000000046415107555231022020 0ustar liggesuserscontext("methods") test_that("method registration occurs correctly", { library(XML) xdoc <- xmlParse("") library(xml2) x2doc <- read_xml("") results <- querySelector(xdoc, "circle") results <- querySelector(x2doc, "circle") }) selectr/tests/testthat/test-translation.R0000755000176200001440000001736615120450063020367 0ustar liggesuserscontext("translation") test_that("translation from parsed objects to XPath works", { gt <- GenericTranslator$new() xpath <- function(css) { gt$css_to_xpath(css, prefix = "") } expect_that(xpath("*"), equals("*")) expect_that(xpath("e"), equals("e")) expect_that(xpath("*|e"), equals("e")) expect_that(xpath("e|f"), equals("e:f")) expect_that(xpath("e[foo]"), equals("e[(@foo)]")) expect_that(xpath("e[foo|bar]"), equals("e[(@foo:bar)]")) expect_that(xpath('e[foo="bar"]'), equals("e[(@foo = 'bar')]")) expect_that(xpath('e[foo!="bar"]'), equals("e[(not(@foo) or @foo != 'bar')]")) expect_that(xpath("e[foo='(test)']"), equals("e[(@foo = '(test)')]")) expect_that(xpath('e[foo="(test)"]'), equals("e[(@foo = '(test)')]")) expect_that(xpath("e[foo='(abc)']"), equals("e[(@foo = '(abc)')]")) expect_that(xpath("e[foo='(e2e)']"), equals("e[(@foo = '(e2e)')]")) expect_that(xpath('e[foo="(e2e)"]'), equals("e[(@foo = '(e2e)')]")) expect_that(xpath("e[foo='(123)']"), equals("e[(@foo = '(123)')]")) expect_that(xpath("e[foo='(12345)']"), equals("e[(@foo = '(12345)')]")) # Six hex digits (max for CSS unicode escape) expect_that(xpath("e[foo='(abcdef)']"), equals("e[(@foo = '(abcdef)')]")) expect_that(xpath("e[foo='(123456)']"), equals("e[(@foo = '(123456)')]")) # Seven hex digits (exceeds max, so not unicode escape required) expect_that(xpath("e[foo='(1234567)']"), equals("e[(@foo = '(1234567)')]")) expect_that(xpath("e[foo='(AbCdEf)']"), equals("e[(@foo = '(AbCdEf)')]")) expect_that(xpath("e[foo='(E2E)']"), equals("e[(@foo = '(E2E)')]")) expect_that(xpath("e[foo='(o2o)']"), equals("e[(@foo = '(o2o)')]")) expect_that(xpath('e[foo="(o2o)"]'), equals("e[(@foo = '(o2o)')]")) expect_that(xpath("e[foo='(xyz)']"), equals("e[(@foo = '(xyz)')]")) expect_that(xpath("e[foo='(test123)']"), equals("e[(@foo = '(test123)')]")) expect_that(xpath("e[foo='(abc)(def)']"), equals("e[(@foo = '(abc)(def)')]")) expect_that(xpath("e[foo='(abc )']"), equals("e[(@foo = '(abc )')]")) expect_that(xpath('e[foo~="bar"]'), equals("e[(@foo and contains(concat(' ', normalize-space(@foo), ' '), ' bar '))]")) expect_that(xpath('e[foo^="bar"]'), equals("e[(@foo and starts-with(@foo, 'bar'))]")) expect_that(xpath('e[foo$="bar"]'), equals("e[(@foo and substring(@foo, string-length(@foo)-2) = 'bar')]")) expect_that(xpath('e[foo*="bar"]'), equals("e[(@foo and contains(@foo, 'bar'))]")) expect_that(xpath('e[hreflang|="en"]'), equals("e[(@hreflang and (@hreflang = 'en' or starts-with(@hreflang, 'en-')))]")) expect_that(xpath('e:nth-child(1)'), equals("e[(count(preceding-sibling::*) = 0)]")) expect_that(xpath('e:nth-child(3n+2)'), equals("e[(count(preceding-sibling::*) >= 1 and (count(preceding-sibling::*) +2) mod 3 = 0)]")) expect_that(xpath('e:nth-child(3n-2)'), equals("e[(count(preceding-sibling::*) mod 3 = 0)]")) expect_that(xpath('e:nth-child(-n+6)'), equals("e[(count(preceding-sibling::*) <= 5)]")) expect_that(xpath('e:nth-last-child(1)'), equals("e[(count(following-sibling::*) = 0)]")) expect_that(xpath('e:nth-last-child(2n)'), equals("e[((count(following-sibling::*) +1) mod 2 = 0)]")) expect_that(xpath('e:nth-last-child(2n+1)'), equals("e[(count(following-sibling::*) mod 2 = 0)]")) expect_that(xpath('e:nth-last-child(2n+2)'), equals("e[(count(following-sibling::*) >= 1 and (count(following-sibling::*) +1) mod 2 = 0)]")) expect_that(xpath('e:nth-last-child(3n+1)'), equals("e[(count(following-sibling::*) mod 3 = 0)]")) expect_that(xpath('e:nth-last-child(-n+2)'), equals("e[(count(following-sibling::*) <= 1)]")) expect_that(xpath('e:nth-of-type(1)'), equals("e[(count(preceding-sibling::e) = 0)]")) expect_that(xpath('e:nth-last-of-type(1)'), equals("e[(count(following-sibling::e) = 0)]")) expect_that(xpath('div e:nth-last-of-type(1) .aclass'), equals("div//e[(count(following-sibling::e) = 0)]//*[(@class and contains(concat(' ', normalize-space(@class), ' '), ' aclass '))]")) expect_that(xpath('e:first-child'), equals("e[(count(preceding-sibling::*) = 0)]")) expect_that(xpath('e:last-child'), equals("e[(count(following-sibling::*) = 0)]")) expect_that(xpath('e:first-of-type'), equals("e[(count(preceding-sibling::e) = 0)]")) expect_that(xpath('e:last-of-type'), equals("e[(count(following-sibling::e) = 0)]")) expect_that(xpath('e:only-child'), equals("e[(count(parent::*/child::*) = 1)]")) expect_that(xpath('e:only-of-type'), equals("e[(count(parent::*/child::e) = 1)]")) expect_that(xpath('e:empty'), equals("e[(not(*) and not(string-length()))]")) expect_that(xpath('e:EmPTY'), equals("e[(not(*) and not(string-length()))]")) expect_that(xpath('e:root'), equals("e[(not(parent::*))]")) expect_that(xpath('e:hover'), equals("e[(0)]")) #never matches expect_that(xpath('e:contains("foo")'), equals("e[(contains(., 'foo'))]")) expect_that(xpath('e:ConTains(foo)'), equals("e[(contains(., 'foo'))]")) expect_that(xpath('e.warning'), equals("e[(@class and contains(concat(' ', normalize-space(@class), ' '), ' warning '))]")) expect_that(xpath('e#myid'), equals("e[(@id = 'myid')]")) expect_that(xpath('e:not(:nth-child(odd))'), equals("e[(not((count(preceding-sibling::*) mod 2 = 0)))]")) expect_that(xpath('e:nOT(*)'), equals("e[(0)]")) # never matches expect_that(xpath('e f'), equals("e//f")) expect_that(xpath('e > f'), equals("e/f")) expect_that(xpath('e + f'), equals("e/following-sibling::*[1][self::f]")) expect_that(xpath('e ~ f'), equals("e/following-sibling::f")) expect_that(xpath('e ~ f:nth-child(3)'), equals("e/following-sibling::f[(count(preceding-sibling::*) = 2)]")) expect_that(xpath('div#container p'), equals("div[(@id = 'container')]//p")) # expect that the following do nothing for the generic translator expect_that(xpath('a:any-link'), equals("a[(0)]")) expect_that(xpath('a:link'), equals("a[(0)]")) expect_that(xpath('a:visited'), equals("a[(0)]")) expect_that(xpath('a:hover'), equals("a[(0)]")) expect_that(xpath('a:active'), equals("a[(0)]")) expect_that(xpath('a:focus'), equals("a[(0)]")) expect_that(xpath('a:target'), equals("a[(0)]")) expect_that(xpath('a:target-within'), equals("a[(0)]")) expect_that(xpath('a:local-link'), equals("a[(0)]")) expect_that(xpath('a:enabled'), equals("a[(0)]")) expect_that(xpath('a:disabled'), equals("a[(0)]")) expect_that(xpath('a:checked'), equals("a[(0)]")) # Invalid characters in XPath element names charsets <- localeToCharset() if (!anyNA(charsets) && charsets[1] == "UTF-8") { expect_that(xpath('di\ua0v'), equals("*[(name() = 'di v')]")) # div\ua0v expect_that(xpath('[h\ua0ref]'), equals("*[(attribute::*[name() = 'h ref'])]")) # h\ua0ref } expect_that(xpath('di\\[v'), equals("*[(name() = 'di[v')]")) expect_that(xpath('[h\\]ref]'), equals("*[(attribute::*[name() = 'h]ref'])]")) }) selectr/tests/testthat/test-xmllang-XML.R0000755000176200001440000000341615107555231020130 0ustar liggesuserscontext("lang-XML") test_that("xml lang function matches correct elements", { xmlLangText <- paste0('', 'a', 'b', 'c', 'd', 'e', 'f', '', '') library(XML) xmldoc <- xmlRoot(xmlParse(xmlLangText)) gt <- GenericTranslator$new() pid <- function(selector) { xpath <- gt$css_to_xpath(selector) items <- getNodeSet(xmldoc, xpath) n <- length(items) if (!n) return(NULL) result <- character(n) for (i in seq_len(n)) { element <- items[[i]] tmp <- xmlAttrs(element)["id"] if (is.null(tmp)) tmp <- "nil" result[i] <- tmp } result } expect_that(pid(':lang("EN")'), equals(c('first', 'second', 'third', 'fourth'))) expect_that(pid(':lang("en-us")'), equals(c('second', 'fourth'))) expect_that(pid(':lang(en-nz)'), equals('third')) expect_that(pid(':lang(fr)'), equals('fifth')) expect_that(pid(':lang(ru)'), equals('sixth')) expect_that(pid(":lang('ZH')"), equals('eighth')) expect_that(pid(':lang(de) :lang(zh)'), equals('eighth')) expect_that(pid(':lang(en), :lang(zh)'), equals(c('first', 'second', 'third', 'fourth', 'eighth'))) expect_that(pid(":lang(es)"), equals(NULL)) }) selectr/tests/testthat/test-where.R0000755000176200001440000002415615107555231017146 0ustar liggesuserscontext(":where() pseudo-class") test_that(":where() generates correct XPath", { xpath <- function(css) { css_to_xpath(css, prefix = "") } # Simple :where() with single selector expect_that(xpath("div:where(p)"), equals("div[((name() = 'p'))]")) # :where() with class selector expect_that(xpath("div:where(.foo)"), equals("div[((@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')))]")) # :where() with ID selector expect_that(xpath("section:where(#main)"), equals("section[((@id = 'main'))]")) # :where() with attribute selector expect_that(xpath("input:where([required])"), equals("input[((@required))]")) # :where() with multiple selectors (OR logic) expect_that(xpath("div:where(p, span)"), equals("div[((name() = 'p')) or ((name() = 'span'))]")) # :where() with element and class (both conditions must match) expect_that(xpath("*:where(div.content)"), equals("*[((@class and contains(concat(' ', normalize-space(@class), ' '), ' content ')) and (name() = 'div'))]")) # Multiple :where() selectors - currently treated as OR, not AND # Note: parser combines them into a single :where() expect_that(xpath("div:where(p):where(span)"), equals("div[((name() = 'p')) or ((name() = 'span'))]")) # :where() on universal selector expect_that(xpath("*:where(.highlight)"), equals("*[((@class and contains(concat(' ', normalize-space(@class), ' '), ' highlight ')))]")) # :where() with multiple classes expect_that(xpath("div:where(.foo, .bar)"), equals("div[((@class and contains(concat(' ', normalize-space(@class), ' '), ' foo '))) or ((@class and contains(concat(' ', normalize-space(@class), ' '), ' bar ')))]")) # Complex: :where() with mix of selectors expect_that(xpath("p:where(.highlight, #special, [data-key])"), equals("p[((@class and contains(concat(' ', normalize-space(@class), ' '), ' highlight '))) or ((@id = 'special')) or ((@data-key))]")) }) test_that(":where() works correctly with XML documents", { library(XML) html <- paste0( '', '
    Div 1
    ', ' ', '

    Para 1

    ', '

    Para 2

    ', ' Span 1', '
    ', '
    Article
    ', '
    ', '
    ' ) doc <- xmlRoot(xmlParse(html)) get_ids <- function(css) { results <- querySelectorAll(doc, css) sapply(results, function(x) xmlGetAttr(x, "id")) } # Elements matching div OR p (via :where) expect_that(get_ids("*:where(div, p)"), equals(c("d1", "d2", "p1", "p2"))) # Elements with class content (any element type) expect_that(get_ids("*:where(.content)"), equals(c("d1", "p1", "s1"))) # Div elements that are either content or sidebar expect_that(get_ids("div:where(.content, .sidebar)"), equals(c("d1", "d2"))) # Elements matching specific ID # Note: returns all ancestors in XML, so we check for inclusion ids <- get_ids("*:where(#p1)") expect_that("p1" %in% ids, equals(TRUE)) # :where() with element that has specific class expect_that(get_ids("*:where(p.highlight)"), equals("p2")) # :where() matches nothing if conditions don't align expect_that(length(querySelectorAll(doc, "div:where(p)")), equals(0)) }) test_that(":where() works correctly with xml2 documents", { library(xml2) html <- paste0( '', '
    Div 1
    ', ' ', '

    Para 1

    ', '

    Para 2

    ', ' Span 1', '
    ', '
    Article
    ', '
    ', '
    ' ) doc <- read_xml(html) get_ids <- function(css) { results <- querySelectorAll(doc, css) xml_attr(results, "id") } # Elements matching div OR p (via :where) expect_that(get_ids("*:where(div, p)"), equals(c("d1", "d2", "p1", "p2"))) # Elements with class content (any element type) expect_that(get_ids("*:where(.content)"), equals(c("d1", "p1", "s1"))) # Div elements that are either content or sidebar expect_that(get_ids("div:where(.content, .sidebar)"), equals(c("d1", "d2"))) # Elements matching specific ID # Note: returns all ancestors, so we check for inclusion ids <- get_ids("*:where(#p1)") expect_that("p1" %in% ids, equals(TRUE)) # :where() with element that has specific class expect_that(get_ids("*:where(p.highlight)"), equals("p2")) # :where() matches nothing if conditions don't align expect_that(length(querySelectorAll(doc, "div:where(p)")), equals(0)) }) test_that(":where() has zero specificity", { library(XML) html <- paste0( '', '
    Content
    ', '
    ' ) doc <- xmlRoot(xmlParse(html)) # All of these should match the same element # :where() doesn't add specificity regardless of what's inside expect_that(length(querySelectorAll(doc, "div:where(#test)")), equals(1)) expect_that(length(querySelectorAll(doc, "div:where(.foo)")), equals(1)) expect_that(length(querySelectorAll(doc, ":where(div)")), equals(1)) expect_that(length(querySelectorAll(doc, ":where(#test, .foo, div)")), equals(1)) # Specificity is handled in parser/specificity calculation # Here we just verify matching works }) test_that(":where() handles edge cases correctly", { library(XML) # Empty document case html1 <- '' doc1 <- xmlRoot(xmlParse(html1)) expect_that(length(querySelectorAll(doc1, "*:where(div)")), equals(0)) # Multiple classes html2 <- paste0( '', '
    A
    ', '
    B
    ', '
    C
    ', '
    ' ) doc2 <- xmlRoot(xmlParse(html2)) # Divs with foo OR bar class result <- querySelectorAll(doc2, "div:where(.foo, .bar)") expect_that(length(result), equals(3)) # :where() with universal selector inside html4 <- '

    ' doc4 <- xmlRoot(xmlParse(html4)) # This matches elements that are any type (essentially all elements plus root) result3 <- querySelectorAll(doc4, "*:where(*)") # Returns root plus all descendants expect_that(length(result3) >= 3, equals(TRUE)) }) test_that(":where() works with querySelector (returns first match)", { library(xml2) html <- paste0( '', '

    First
    ', '

    Second

    ', ' Third', '' ) doc <- read_xml(html) # Should return first element with class foo result <- querySelector(doc, "*:where(.foo)") expect_that(xml_attr(result, "id"), equals("d1")) # Should return first div or p result2 <- querySelector(doc, "*:where(div, p)") expect_that(xml_attr(result2, "id"), equals("d1")) # Should return NULL when no match result_none <- querySelector(doc, "*:where(article)") expect_that(result_none, equals(NULL)) }) test_that(":where() and :is() behave similarly in matching", { library(XML) html <- paste0( '', '
    Div
    ', '

    Para

    ', ' Span', '
    ' ) doc <- xmlRoot(xmlParse(html)) get_ids <- function(css) { results <- querySelectorAll(doc, css) sapply(results, function(x) xmlGetAttr(x, "id")) } # :where() and :is() should match the same elements # (only difference is specificity, which doesn't affect matching) where_result <- get_ids("*:where(div, p)") is_result <- get_ids("*:is(div, p)") expect_that(where_result, equals(is_result)) where_result2 <- get_ids("*:where(.content)") is_result2 <- get_ids("*:is(.content)") expect_that(where_result2, equals(is_result2)) }) test_that(":where() can be combined with other selectors", { library(xml2) html <- paste0( '', '
    ', '
    Div 1
    ', '

    Para 1

    ', '
    ', ' ', '
    ' ) doc <- read_xml(html) get_ids <- function(css) { results <- querySelectorAll(doc, css) xml_attr(results, "id") } # Descendant combinator: section containing divs or ps # Note: returns all matching descendants including ancestors ids <- get_ids("section *:where(div, p)") expect_that("d1" %in% ids && "p1" %in% ids, equals(TRUE)) # Class selector before :where() # Elements with class content that are divs or ps # (all 4 elements match: d1,p1 have .content, and :where checks div|p) result <- get_ids(".content:where(div, p)") expect_that("d1" %in% result && "p1" %in% result, equals(TRUE)) # Child combinator expect_that(get_ids("section > *:where(.content)"), equals(c("d1", "p1"))) }) selectr/tests/testthat/test-tokenizer.R0000755000176200001440000000202715107555231020037 0ustar liggesuserscontext("tokenizer") test_that("tokenizer extracts correct representation", { tokens <- tokenize('E > f [a~="y\\"x"]:nth(/* fu /]* */-3.7)') tokens <- unlist(lapply(tokens, function(x) x$repr())) expected_tokens <- c("", "", "' at 3>", "", "", "", "", "", "", "", "", "", "", "", "", "") expect_that(tokens, equals(expected_tokens)) }) selectr/tests/testthat/test-select-XML.R0000755000176200001440000002570515107555231017752 0ustar liggesuserscontext("large-test") test_that("selection works correctly on a large barrage of tests", { HTML_IDS <- paste0( c("", " ", " ", "", "
    ", " ", " link", " ", " link", "
      ", "
    1. content
    2. ", "
    3. ", "
      ", "
      ", "
    4. ", "
    5. ", "
    6. ", "
    7. ", "
    8. ", "
    9. ", "
    ", "

    ", " hi there", " guy", " ", " ", " ", " ", " ", " ", " ", "

    ", " ", " ", "
    ", "

    ", "
      ", "
    ", " ", " ", " ", " ", "
    ", "
    ", ""), collapse = "\n") library(XML) document <- xmlRoot(xmlParse(HTML_IDS)) gt <- GenericTranslator$new() ht <- HTMLTranslator$new() select_ids <- function(selector, html_only) { if (html_only) { xpath <- ht$css_to_xpath(selector) items <- getNodeSet(document, xpath) } else { xpath <- gt$css_to_xpath(selector) items <- getNodeSet(document, xpath) } n <- length(items) if (!n) return(NULL) result <- character(n) for (i in seq_len(n)) { element <- items[[i]] tmp <- xmlAttrs(element)["id"] if (is.null(tmp)) tmp <- "nil" result[i] <- tmp } result } pcss <- function(main, selectors = NULL, html_only = FALSE) { result <- select_ids(main, html_only) if (!is.null(selectors) && length(selectors)) { n <- length(selectors) for (i in seq_len(n)) { tmp_res <- select_ids(selectors[i], html_only = html_only) if (!is.null(result) && !is.null(tmp_res) && !identical(tmp_res, result)) stop("Difference between results of selectors") } } result } all_ids <- pcss('*') expect_that(all_ids[1:6], equals(c('html', 'nil', 'link-href', 'link-nohref', 'nil', 'outer-div'))) expect_that(tail(all_ids, 1), equals('foobar-span')) expect_that(pcss('div'), equals(c('outer-div', 'li-div', 'foobar-div'))) expect_that(pcss('DIV', html_only = TRUE), equals(c('outer-div', 'li-div', 'foobar-div'))) # case-insensitive in HTML expect_that(pcss('div div'), equals('li-div')) expect_that(pcss('div, div div'), equals(c('outer-div', 'li-div', 'foobar-div'))) expect_that(pcss('a[name]'), equals('name-anchor')) expect_that(pcss('a[NAme]', html_only = TRUE), equals('name-anchor')) # case-insensitive in HTML: expect_that(pcss('a[rel]'), equals(c('tag-anchor', 'nofollow-anchor'))) expect_that(pcss('a[rel="tag"]'), equals('tag-anchor')) expect_that(pcss('a[href*="localhost"]'), equals('tag-anchor')) expect_that(pcss('a[href*=""]'), equals(NULL)) expect_that(pcss('a[href^="http"]'), equals(c('tag-anchor', 'nofollow-anchor'))) expect_that(pcss('a[href^="http:"]'), equals('tag-anchor')) expect_that(pcss('a[href^=""]'), equals(NULL)) expect_that(pcss('a[href$="org"]'), equals('nofollow-anchor')) expect_that(pcss('a[href$=""]'), equals(NULL)) expect_that(pcss('div[foobar~="bc"]', 'div[foobar~="cde"]'), equals('foobar-div')) expect_that(pcss('[foobar~="ab bc"]', c('[foobar~=""]', '[foobar~=" \t"]')), equals(NULL)) expect_that(pcss('div[foobar~="cd"]'), equals(NULL)) expect_that(pcss('*[lang|="En"]', '[lang|="En-us"]'), equals('second-li')) # Attribute values are case sensitive expect_that(pcss('*[lang|="en"]', '[lang|="en-US"]'), equals(NULL)) expect_that(pcss('*[lang|="e"]'), equals(NULL)) # ... :lang() is not. expect_that(pcss(':lang("EN")', '*:lang(en-US)', html_only = TRUE), equals(c('second-li', 'li-div'))) expect_that(pcss(':lang("e")', html_only = TRUE), equals(NULL)) expect_that(pcss('li:nth-child(-n)'), equals(NULL)) expect_that(pcss('li:nth-child(n)'), equals(c('first-li', 'second-li', 'third-li', 'fourth-li', 'fifth-li', 'sixth-li', 'seventh-li'))) expect_that(pcss('li:nth-child(3)'), equals('third-li')) expect_that(pcss('li:nth-child(10)'), equals(NULL)) expect_that(pcss('li:nth-child(2n)', c('li:nth-child(even)', 'li:nth-child(2n+0)')), equals(c('second-li', 'fourth-li', 'sixth-li'))) expect_that(pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)'), equals(c('first-li', 'third-li', 'fifth-li', 'seventh-li'))) expect_that(pcss('li:nth-child(2n+4)'), equals(c('fourth-li', 'sixth-li'))) expect_that(pcss('li:nth-child(3n+1)'), equals(c('first-li', 'fourth-li', 'seventh-li'))) expect_that(pcss('li:nth-child(-n+3)'), equals(c('first-li', 'second-li', 'third-li'))) expect_that(pcss('li:nth-child(-2n+4)'), equals(c('second-li', 'fourth-li'))) expect_that(pcss('li:nth-last-child(0)'), equals(NULL)) expect_that(pcss('li:nth-last-child(1)'), equals('seventh-li')) expect_that(pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)'), equals(c('second-li', 'fourth-li', 'sixth-li'))) expect_that(pcss('li:nth-last-child(2n+2)'), equals(c('second-li', 'fourth-li', 'sixth-li'))) expect_that(pcss('ol:first-of-type'), equals('first-ol')) expect_that(pcss('ol:nth-child(1)'), equals(NULL)) expect_that(pcss('ol:nth-of-type(2)'), equals('second-ol')) expect_that(pcss('ol:nth-last-of-type(1)'), equals('second-ol')) expect_that(pcss('span:only-child'), equals('foobar-span')) expect_that(pcss('li div:only-child'), equals('li-div')) expect_that(pcss('div *:only-child'), equals(c('li-div', 'foobar-span'))) #self.assertRaises(ExpressionError, pcss, 'p *:only-of-type') expect_that(pcss('p:only-of-type'), equals('paragraph')) expect_that(pcss('a:empty', 'a:EMpty'), equals('name-anchor')) expect_that(pcss('li:empty'), equals(c('third-li', 'fourth-li', 'fifth-li', 'sixth-li'))) expect_that(pcss(':root', 'html:root'), equals('html')) expect_that(pcss('li:root', '* :root'), equals(NULL)) expect_that(pcss('*:contains("link")', ':CONtains("link")'), equals(c('html', 'nil', 'outer-div', 'tag-anchor', 'nofollow-anchor'))) expect_that(pcss('*:contains("LInk")'), equals(NULL)) # case sensitive expect_that(pcss('*:contains("e")'), equals(c('html', 'nil', 'outer-div', 'first-ol', 'first-li', 'paragraph', 'p-em'))) expect_that(pcss('*:contains("E")'), equals(NULL)) # case-sensitive expect_that(pcss('.a', c('.b', '*.a', 'ol.a')), equals('first-ol')) expect_that(pcss('.c', '*.c'), equals(c('first-ol', 'third-li', 'fourth-li'))) expect_that(pcss('ol *.c', c('ol li.c', 'li ~ li.c', 'ol > li.c')), equals(c('third-li', 'fourth-li'))) expect_that(pcss('#first-li', c('li#first-li', '*#first-li')), equals('first-li')) expect_that(pcss('li div', c('li > div', 'div div')), equals('li-div')) expect_that(pcss('div > div'), equals(NULL)) expect_that(pcss('div>.c', 'div > .c'), equals('first-ol')) expect_that(pcss('div + div'), equals('foobar-div')) expect_that(pcss('a ~ a'), equals(c('tag-anchor', 'nofollow-anchor'))) expect_that(pcss('a[rel="tag"] ~ a'), equals('nofollow-anchor')) expect_that(pcss('ol#first-ol li:last-child'), equals('seventh-li')) expect_that(pcss('ol#first-ol *:last-child'), equals(c('li-div', 'seventh-li'))) expect_that(pcss('#outer-div:first-child'), equals('outer-div')) expect_that(pcss('#outer-div :first-child'), equals(c('name-anchor', 'first-li', 'li-div', 'p-b', 'checkbox-fieldset-disabled', 'area-href'))) expect_that(pcss('a[href]'), equals(c('tag-anchor', 'nofollow-anchor'))) expect_that(pcss(':not(*)'), equals(NULL)) expect_that(pcss('a:not([href])'), equals('name-anchor')) expect_that(pcss('ol :Not(li[class])'), equals(c('first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'))) expect_that(pcss(':is(#first-li, #second-li)'), equals(c('first-li', 'second-li'))) expect_that(pcss('a:is(#name-anchor, #tag-anchor)'), equals(c('name-anchor', 'tag-anchor'))) expect_that(pcss(':is(.c)'), equals(c('first-ol', 'third-li', 'fourth-li'))) expect_that(pcss(':matches(#first-li, #second-li)'), equals(c('first-li', 'second-li'))) expect_that(pcss('a:matches(#name-anchor, #tag-anchor)'), equals(c('name-anchor', 'tag-anchor'))) expect_that(pcss(':matches(.c)'), equals(c('first-ol', 'third-li', 'fourth-li'))) expect_that(pcss('ol:has(li)'), equals('first-ol')) # :has(.c) matches all ancestors of elements with class 'c' expect_that(pcss(':has(.c)'), equals(c('html', 'nil', 'outer-div', 'first-ol'))) # Invalid characters in XPath element names, should not crash expect_that(pcss('di\ua0v', 'div\\['), equals(NULL)) expect_that(pcss('[h\ua0ref]', '[h\\]ref]'), equals(NULL)) ## HTML-specific expect_that(pcss(':link', html_only = TRUE), equals(c('link-href', 'tag-anchor', 'nofollow-anchor', 'area-href'))) expect_that(pcss(':visited', html_only = TRUE), equals(NULL)) expect_that(pcss(':enabled', html_only = TRUE), equals(c('link-href', 'tag-anchor', 'nofollow-anchor', 'checkbox-unchecked', 'text-checked', 'checkbox-checked', 'area-href'))) expect_that(pcss(':disabled', html_only = TRUE), equals(c('checkbox-disabled', 'checkbox-disabled-checked', 'fieldset', 'checkbox-fieldset-disabled'))) expect_that(pcss(':checked', html_only = TRUE), equals(c('checkbox-checked', 'checkbox-disabled-checked'))) }) selectr/tests/testthat/test-quoting.R0000755000176200001440000000130715107555231017513 0ustar liggesuserscontext("quoting") test_that("quote characters are escaped", { gt <- GenericTranslator$new() css <- function(x) gt$css_to_xpath(x) expect_that(css('*[aval="\'"]'), equals('descendant-or-self::*[(@aval = "\'")]')) expect_that(css('*[aval="\'\'\'"]'), equals("descendant-or-self::*[(@aval = \"'''\")]")) expect_that(css('*[aval=\'"\']'), equals("descendant-or-self::*[(@aval = '\"')]")) expect_that(css('*[aval=\'"""\']'), equals("descendant-or-self::*[(@aval = '\"\"\"')]")) expect_that(css('*[aval=\'"\\\'"\']'), equals("descendant-or-self::*[(@aval = concat('\"',\"'\",'\"'))]")) }) selectr/tests/testthat/test-parser.R0000755000176200001440000002112615107555231017322 0ustar liggesuserscontext("parser") test_that("parser parses canonical test expressions", { parse_many <- function(css) { selectors <- lapply(css, function(x) parse(x)) n <- length(selectors) results <- list() for (i in seq_len(n)) { selector <- selectors[[i]] if (is.list(selector)) { results[[i]] <- unlist(lapply(selector, function(x) x$repr())) } else { results[[i]] <- selector$repr() } } if (n) unlist(results) else character(0) } expect_that(parse_many("*"), equals("Element[*]")) expect_that(parse_many("*|*"), equals("Element[*]")) expect_that(parse_many("*|foo"), equals("Element[foo]")) expect_that(parse_many("|foo"), equals("Element[foo]")) expect_that(parse_many("foo|*"), equals("Element[foo|*]")) expect_that(parse_many("foo|bar"), equals("Element[foo|bar]")) expect_that(parse_many('foo[lang|="zh"]'), equals("Attrib[Element[foo][lang |= 'zh']]")) # This will never match, but it is valid: expect_that(parse_many("#foo#bar"), equals("Hash[Hash[Element[*]#foo]#bar]")) expect_that(parse_many(c("div>.foo", "div> .foo", "div >.foo", "div > .foo", "div > .foo", "div \n> \t \t .foo", "div\r>\n\n\n.foo", "div\f>\f.foo")), equals(rep("CombinedSelector[Element[div] > Class[Element[*].foo]]", 8))) expect_that(parse_many(c("td.foo,.bar", "td.foo, .bar", "td.foo\t\r\n\f ,\t\r\n\f .bar")), equals(rep(c("Class[Element[td].foo]", "Class[Element[*].bar]"), 3))) expect_that(parse_many(c("div, td.foo, div.bar span")), equals(c("Element[div]", "Class[Element[td].foo]", "CombinedSelector[Class[Element[div].bar] Element[span]]"))) expect_that(parse_many("div > p"), equals("CombinedSelector[Element[div] > Element[p]]")) expect_that(parse_many("td:first"), equals("Pseudo[Element[td]:first]")) expect_that(parse_many("td :first"), equals("CombinedSelector[Element[td] Pseudo[Element[*]:first]]")) expect_that(parse_many(c("a[name]", "a[ name\t]")), equals(rep("Attrib[Element[a][name]]", 2))) expect_that(parse_many("a [name]"), equals("CombinedSelector[Element[a] Attrib[Element[*][name]]]")) expect_that(parse_many(c('a[rel="include"]', 'a[rel = include]')), equals(rep("Attrib[Element[a][rel = 'include']]", 2))) expect_that(parse_many(c("a[hreflang |= 'en']", "a[hreflang|=en]")), equals(rep("Attrib[Element[a][hreflang |= 'en']]", 2))) expect_that(parse_many("div:nth-child(10)"), equals("Function[Element[div]:nth-child(['10'])]")) expect_that(parse_many(":nth-child(2n+2)"), equals("Function[Element[*]:nth-child(['2', 'n', '+2'])]")) expect_that(parse_many("div:nth-of-type(10)"), equals("Function[Element[div]:nth-of-type(['10'])]")) expect_that(parse_many("div div:nth-of-type(10) .aclass"), equals("CombinedSelector[CombinedSelector[Element[div] Function[Element[div]:nth-of-type(['10'])]] Class[Element[*].aclass]]")) expect_that(parse_many("label:only"), equals("Pseudo[Element[label]:only]")) expect_that(parse_many("a:lang(fr)"), equals("Function[Element[a]:lang(['fr'])]")) expect_that(parse_many('div:contains("foo")'), equals("Function[Element[div]:contains(['foo'])]")) expect_that(parse_many("div#foobar"), equals("Hash[Element[div]#foobar]")) expect_that(parse_many("div:not(div.foo)"), equals("Negation[Element[div]:not(Class[Element[div].foo])]")) # :not() with multiple arguments expect_that(parse_many("div:not(.foo, .bar)"), equals("Negation[Element[div]:not(Class[Element[*].foo], Class[Element[*].bar])]")) expect_that(parse_many("p:not(.foo, #bar)"), equals("Negation[Element[p]:not(Class[Element[*].foo], Hash[Element[*]#bar])]")) expect_that(parse_many(":not(p, span, div)"), equals("Negation[Element[*]:not(Element[p], Element[span], Element[div])]")) expect_that(parse_many("div:not([disabled], .hidden)"), equals("Negation[Element[div]:not(Attrib[Element[*][disabled]], Class[Element[*].hidden])]")) expect_that(parse_many(":not(:hover, :visited, :active)"), equals("Negation[Element[*]:not(Pseudo[Element[*]:hover], Pseudo[Element[*]:visited], Pseudo[Element[*]:active])]")) expect_that(parse_many("a:not(.link, [href], #special)"), equals("Negation[Element[a]:not(Class[Element[*].link], Attrib[Element[*][href]], Hash[Element[*]#special])]")) expect_that(parse_many("div:is(.foo, #bar)"), equals("Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]")) expect_that(parse_many(":is(:hover, :visited)"), equals("Matching[Element[*]:is(Pseudo[Element[*]:hover], Pseudo[Element[*]:visited])]")) expect_that(parse_many("div:matches(.foo, #bar)"), equals("Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]")) expect_that(parse_many(":matches(:hover, :visited)"), equals("Matching[Element[*]:is(Pseudo[Element[*]:hover], Pseudo[Element[*]:visited])]")) expect_that(parse_many("div:where(.foo, #bar)"), equals("Where[Element[div]:where(Class[Element[*].foo], Hash[Element[*]#bar])]")) expect_that(parse_many(":where(:hover, :visited)"), equals("Where[Element[*]:where(Pseudo[Element[*]:hover], Pseudo[Element[*]:visited])]")) expect_that(parse_many("div:has(.foo)"), equals("Has[Element[div]:has(Class[Element[*].foo])]")) expect_that(parse_many("ul:has(li)"), equals("Has[Element[ul]:has(Element[li])]")) expect_that(parse_many(":has(p, div)"), equals("Has[Element[*]:has(Element[p], Element[div])]")) expect_that(parse_many("td ~ th"), equals("CombinedSelector[Element[td] ~ Element[th]]")) # handle comments expect_that(parse_many("a /* test */"), equals("Element[a]")) expect_that(parse_many("a/* test */"), equals("Element[a]")) expect_that(parse_many("/* test */ a"), equals("Element[a]")) expect_that(parse_many("/* test */a"), equals("Element[a]")) expect_that(parse_many("a /* test */ b"), equals("CombinedSelector[Element[a] Element[b]]")) expect_that(parse_many("a /* test "), equals("Element[a]")) }) test_that("parsed elements print correctly", { shw <- function(x) trimws(capture.output(parse(x)[[1]]$show())) expect_that(shw("a"), equals("Element[a]")) expect_that(shw(".test"), equals("Class[Element[*].test]")) expect_that(shw(":active"), equals("Pseudo[Element[*]:active]")) expect_that(shw("a:not(.toggle)"), equals("Negation[Element[a]:not(Class[Element[*].toggle])]")) # :not() with multiple arguments print tests expect_that(shw("div:not(.foo, .bar)"), equals("Negation[Element[div]:not(Class[Element[*].foo], Class[Element[*].bar])]")) expect_that(shw("p:not(span, div, a)"), equals("Negation[Element[p]:not(Element[span], Element[div], Element[a])]")) expect_that(shw("[href]"), equals("Attrib[Element[*][href]]")) expect_that(shw("#id"), equals("Hash[Element[*]#id]")) }) test_that("compiled regex parsing functions behave as expected", { m_whitespace <- compile_('[ \t\r\n\f]+') m_number <- compile_('[+-]?(?:[0-9]*\\.[0-9]+|[0-9]+)') m_hash <- compile_(paste0("^#([_a-zA-Z0-9-]|", nonascii, "|\\\\(?:", delim_escapes, "))+")) m_ident <- compile_(paste0("^([_a-zA-Z0-9-]|", nonascii, "|\\\\(?:", delim_escapes, "))+")) expect_that(m_whitespace("a b"), equals(match_whitespace("a b"))) expect_that(m_number("a 1"), equals(match_number("a 1"))) expect_that(m_hash("a #test"), equals(match_hash("a #test"))) expect_that(m_ident(" test"), equals(match_ident(" test"))) }) selectr/tests/testthat/test-select-xml2.R0000755000176200001440000002567715107555231020204 0ustar liggesuserscontext("large-test") test_that("selection works correctly on a large barrage of tests", { HTML_IDS <- paste0( c("", " ", " ", "", "
    ", " ", " link", " ", " link", "
      ", "
    1. content
    2. ", "
    3. ", "
      ", "
      ", "
    4. ", "
    5. ", "
    6. ", "
    7. ", "
    8. ", "
    9. ", "
    ", "

    ", " hi there", " guy", " ", " ", " ", " ", " ", " ", " ", "

    ", " ", " ", "
    ", "

    ", "
      ", "
    ", " ", " ", " ", " ", "
    ", "
    ", ""), collapse = "\n") library(xml2) document <- read_xml(HTML_IDS) gt <- GenericTranslator$new() ht <- HTMLTranslator$new() select_ids <- function(selector, html_only) { if (html_only) { xpath <- ht$css_to_xpath(selector) items <- xml_find_all(document, xpath) } else { xpath <- gt$css_to_xpath(selector) items <- xml_find_all(document, xpath) } n <- length(items) if (!n) return(NULL) result <- character(n) for (i in seq_len(n)) { element <- items[[i]] tmp <- xml_attr(element, "id") if (is.na(tmp)) tmp <- "nil" result[i] <- tmp } result } pcss <- function(main, selectors = NULL, html_only = FALSE) { result <- select_ids(main, html_only) if (!is.null(selectors) && length(selectors)) { n <- length(selectors) for (i in seq_len(n)) { tmp_res <- select_ids(selectors[i], html_only = html_only) if (!is.null(result) && !is.null(tmp_res) && !identical(tmp_res, result)) stop("Difference between results of selectors") } } result } all_ids <- pcss('*') expect_that(all_ids[1:6], equals(c('html', 'nil', 'link-href', 'link-nohref', 'nil', 'outer-div'))) expect_that(tail(all_ids, 1), equals('foobar-span')) expect_that(pcss('div'), equals(c('outer-div', 'li-div', 'foobar-div'))) expect_that(pcss('DIV', html_only = TRUE), equals(c('outer-div', 'li-div', 'foobar-div'))) # case-insensitive in HTML expect_that(pcss('div div'), equals('li-div')) expect_that(pcss('div, div div'), equals(c('outer-div', 'li-div', 'foobar-div'))) expect_that(pcss('a[name]'), equals('name-anchor')) expect_that(pcss('a[NAme]', html_only = TRUE), equals('name-anchor')) # case-insensitive in HTML: expect_that(pcss('a[rel]'), equals(c('tag-anchor', 'nofollow-anchor'))) expect_that(pcss('a[rel="tag"]'), equals('tag-anchor')) expect_that(pcss('a[href*="localhost"]'), equals('tag-anchor')) expect_that(pcss('a[href*=""]'), equals(NULL)) expect_that(pcss('a[href^="http"]'), equals(c('tag-anchor', 'nofollow-anchor'))) expect_that(pcss('a[href^="http:"]'), equals('tag-anchor')) expect_that(pcss('a[href^=""]'), equals(NULL)) expect_that(pcss('a[href$="org"]'), equals('nofollow-anchor')) expect_that(pcss('a[href$=""]'), equals(NULL)) expect_that(pcss('div[foobar~="bc"]', 'div[foobar~="cde"]'), equals('foobar-div')) expect_that(pcss('[foobar~="ab bc"]', c('[foobar~=""]', '[foobar~=" \t"]')), equals(NULL)) expect_that(pcss('div[foobar~="cd"]'), equals(NULL)) expect_that(pcss('*[lang|="En"]', '[lang|="En-us"]'), equals('second-li')) # Attribute values are case sensitive expect_that(pcss('*[lang|="en"]', '[lang|="en-US"]'), equals(NULL)) expect_that(pcss('*[lang|="e"]'), equals(NULL)) # ... :lang() is not. expect_that(pcss(':lang("EN")', '*:lang(en-US)', html_only = TRUE), equals(c('second-li', 'li-div'))) expect_that(pcss(':lang("e")', html_only = TRUE), equals(NULL)) expect_that(pcss('li:nth-child(-n)'), equals(NULL)) expect_that(pcss('li:nth-child(n)'), equals(c('first-li', 'second-li', 'third-li', 'fourth-li', 'fifth-li', 'sixth-li', 'seventh-li'))) expect_that(pcss('li:nth-child(3)'), equals('third-li')) expect_that(pcss('li:nth-child(10)'), equals(NULL)) expect_that(pcss('li:nth-child(2n)', c('li:nth-child(even)', 'li:nth-child(2n+0)')), equals(c('second-li', 'fourth-li', 'sixth-li'))) expect_that(pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)'), equals(c('first-li', 'third-li', 'fifth-li', 'seventh-li'))) expect_that(pcss('li:nth-child(2n+4)'), equals(c('fourth-li', 'sixth-li'))) expect_that(pcss('li:nth-child(3n+1)'), equals(c('first-li', 'fourth-li', 'seventh-li'))) expect_that(pcss('li:nth-child(-n+3)'), equals(c('first-li', 'second-li', 'third-li'))) expect_that(pcss('li:nth-child(-2n+4)'), equals(c('second-li', 'fourth-li'))) expect_that(pcss('li:nth-last-child(0)'), equals(NULL)) expect_that(pcss('li:nth-last-child(1)'), equals('seventh-li')) expect_that(pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)'), equals(c('second-li', 'fourth-li', 'sixth-li'))) expect_that(pcss('li:nth-last-child(2n+2)'), equals(c('second-li', 'fourth-li', 'sixth-li'))) expect_that(pcss('ol:first-of-type'), equals('first-ol')) expect_that(pcss('ol:nth-child(1)'), equals(NULL)) expect_that(pcss('ol:nth-of-type(2)'), equals('second-ol')) expect_that(pcss('ol:nth-last-of-type(1)'), equals('second-ol')) expect_that(pcss('span:only-child'), equals('foobar-span')) expect_that(pcss('li div:only-child'), equals('li-div')) expect_that(pcss('div *:only-child'), equals(c('li-div', 'foobar-span'))) #self.assertRaises(ExpressionError, pcss, 'p *:only-of-type') expect_that(pcss('p:only-of-type'), equals('paragraph')) expect_that(pcss('a:empty', 'a:EMpty'), equals('name-anchor')) expect_that(pcss('li:empty'), equals(c('third-li', 'fourth-li', 'fifth-li', 'sixth-li'))) expect_that(pcss(':root', 'html:root'), equals('html')) expect_that(pcss('li:root', '* :root'), equals(NULL)) expect_that(pcss('*:contains("link")', ':CONtains("link")'), equals(c('html', 'nil', 'outer-div', 'tag-anchor', 'nofollow-anchor'))) expect_that(pcss('*:contains("LInk")'), equals(NULL)) # case sensitive expect_that(pcss('*:contains("e")'), equals(c('html', 'nil', 'outer-div', 'first-ol', 'first-li', 'paragraph', 'p-em'))) expect_that(pcss('*:contains("E")'), equals(NULL)) # case-sensitive expect_that(pcss('.a', c('.b', '*.a', 'ol.a')), equals('first-ol')) expect_that(pcss('.c', '*.c'), equals(c('first-ol', 'third-li', 'fourth-li'))) expect_that(pcss('ol *.c', c('ol li.c', 'li ~ li.c', 'ol > li.c')), equals(c('third-li', 'fourth-li'))) expect_that(pcss('#first-li', c('li#first-li', '*#first-li')), equals('first-li')) expect_that(pcss('li div', c('li > div', 'div div')), equals('li-div')) expect_that(pcss('div > div'), equals(NULL)) expect_that(pcss('div>.c', 'div > .c'), equals('first-ol')) expect_that(pcss('div + div'), equals('foobar-div')) expect_that(pcss('a ~ a'), equals(c('tag-anchor', 'nofollow-anchor'))) expect_that(pcss('a[rel="tag"] ~ a'), equals('nofollow-anchor')) expect_that(pcss('ol#first-ol li:last-child'), equals('seventh-li')) expect_that(pcss('ol#first-ol *:last-child'), equals(c('li-div', 'seventh-li'))) expect_that(pcss('#outer-div:first-child'), equals('outer-div')) expect_that(pcss('#outer-div :first-child'), equals(c('name-anchor', 'first-li', 'li-div', 'p-b', 'checkbox-fieldset-disabled', 'area-href'))) expect_that(pcss('a[href]'), equals(c('tag-anchor', 'nofollow-anchor'))) expect_that(pcss(':not(*)'), equals(NULL)) expect_that(pcss('a:not([href])'), equals('name-anchor')) expect_that(pcss('ol :Not(li[class])'), equals(c('first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'))) expect_that(pcss(':is(#first-li, #second-li)'), equals(c('first-li', 'second-li'))) expect_that(pcss('a:is(#name-anchor, #tag-anchor)'), equals(c('name-anchor', 'tag-anchor'))) expect_that(pcss(':is(.c)'), equals(c('first-ol', 'third-li', 'fourth-li'))) expect_that(pcss(':matches(#first-li, #second-li)'), equals(c('first-li', 'second-li'))) expect_that(pcss('a:matches(#name-anchor, #tag-anchor)'), equals(c('name-anchor', 'tag-anchor'))) expect_that(pcss(':matches(.c)'), equals(c('first-ol', 'third-li', 'fourth-li'))) expect_that(pcss('ol:has(li)'), equals('first-ol')) # :has(.c) matches all ancestors of elements with class 'c' expect_that(pcss(':has(.c)'), equals(c('html', 'nil', 'outer-div', 'first-ol'))) # Invalid characters in XPath element names, should not crash expect_that(pcss('di\ua0v', 'div\\['), equals(NULL)) expect_that(pcss('[h\ua0ref]', '[h\\]ref]'), equals(NULL)) ## HTML-specific expect_that(pcss(':link', html_only = TRUE), equals(c('link-href', 'tag-anchor', 'nofollow-anchor', 'area-href'))) expect_that(pcss(':visited', html_only = TRUE), equals(NULL)) expect_that(pcss(':enabled', html_only = TRUE), equals(c('link-href', 'tag-anchor', 'nofollow-anchor', 'checkbox-unchecked', 'text-checked', 'checkbox-checked', 'area-href'))) expect_that(pcss(':disabled', html_only = TRUE), equals(c('checkbox-disabled', 'checkbox-disabled-checked', 'fieldset', 'checkbox-fieldset-disabled'))) expect_that(pcss(':checked', html_only = TRUE), equals(c('checkbox-checked', 'checkbox-disabled-checked'))) }) selectr/tests/testthat/test-querySelector-default.R0000755000176200001440000000070115107555231022312 0ustar liggesuserscontext("querySelector-default") test_that("querySelector methods present an error on non-XML/xml2 objects", { expect_error(querySelector(list()), "The object given to querySelector.*") expect_error(querySelectorAll(list()), "The object given to querySelector.*") expect_error(querySelectorNS(list()), "The object given to querySelector.*") expect_error(querySelectorAllNS(list()), "The object given to querySelector.*") }) selectr/tests/testthat/test-pseudo.R0000755000176200001440000000614415107555231017330 0ustar liggesuserscontext("pseudo") test_that("parser parses canonical pseudo element expressions", { parse_pseudo <- function(css) { selectors <- lapply(css, function(x) parse(x)) n <- length(selectors) results <- list() for (i in seq_len(n)) { selector <- selectors[[i]] if (is.list(selector)) { results[[i]] <- lapply(selector, function(x) { el <- x pseudo <- x$pseudo_element el$pseudo_element <- NULL list(el$repr(), pseudo) }) } else { pseudo <- selector$pseudo_element selector$pseudo_element <- NULL results[[i]] <- list(list(selector$repr(), pseudo)) } } if (n) results[[seq_len(n)]] else results } parse_one <- function(css) { result <- parse_pseudo(css) #if (length(result) != 1) # stop("More than one result attempting to be parsed.") result[[1]] } expect_that(parse_one("foo"), equals(list("Element[foo]", NULL))) expect_that(parse_one("*"), equals(list("Element[*]", NULL))) expect_that(parse_one(":empty"), equals(list("Pseudo[Element[*]:empty]", NULL))) # Special cases for CSS 2.1 pseudo-elements expect_that(parse_one(":BEfore"), equals(list("Element[*]", "before"))) expect_that(parse_one(":aftER"), equals(list("Element[*]", "after"))) expect_that(parse_one(":First-Line"), equals(list("Element[*]", "first-line"))) expect_that(parse_one(":First-Letter"), equals(list("Element[*]", "first-letter"))) expect_that(parse_one("::befoRE"), equals(list("Element[*]", "before"))) expect_that(parse_one("::AFter"), equals(list("Element[*]", "after"))) expect_that(parse_one("::firsT-linE"), equals(list("Element[*]", "first-line"))) expect_that(parse_one("::firsT-letteR"), equals(list("Element[*]", "first-letter"))) expect_that(parse_one("::Selection"), equals(list("Element[*]", "selection"))) expect_that(parse_one("foo:after"), equals(list("Element[foo]", "after"))) expect_that(parse_one("foo::selection"), equals(list("Element[foo]", "selection"))) expect_that(parse_one("lorem#ipsum ~ a#b.c[href]:empty::selection"), equals(list("CombinedSelector[Hash[Element[lorem]#ipsum] ~ Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]", "selection"))) expect_that(parse_pseudo("foo:before, bar, baz:after"), equals(list(list("Element[foo]", "before"), list("Element[bar]", NULL), list("Element[baz]", "after")))) }) selectr/tests/testthat/test-querySelector-xml2.R0000755000176200001440000001176615107555231021565 0ustar liggesuserscontext("querySelector-xml2") test_that("querySelector returns a single node or NULL", { library(xml2) doc <- read_xml('') p <- function(x) { if (is.null(x)) x else as.character(x) } expect_that(p(querySelector(doc, "a")), equals(p(xml_find_first(doc, "//a")))) expect_that(p(querySelector(doc, "*", prefix = "")), equals(p(xml_find_first(doc, "*")))) expect_that(p(querySelector(doc, "d")), equals(NULL)) expect_that(p(querySelector(doc, "c")), equals(p(xml_find_first(doc, "//c")))) }) test_that("querySelectorAll returns expected nodes", { library(xml2) doc <- read_xml('') p <- function(x) { lapply(x, function(node) as.character(node)) } expect_that(p(querySelectorAll(doc, "a")), equals(p(xml_find_all(doc, "//a")))) expect_that(p(querySelectorAll(doc, "*", prefix = "")), equals(p(xml_find_all(doc, "*")))) expect_that(p(querySelectorAll(doc, "c")), equals(p(xml_find_all(doc, "//c")))) }) test_that("querySelectorAll returns empty list for no match", { library(xml2) doc <- read_xml('') p <- function(x) { lapply(x, function(node) as.character(node)) } expect_that(p(querySelectorAll(doc, "d")), equals(p(xml_find_all(doc, "//d")))) }) test_that("querySelector handles namespaces", { library(xml2) doc <- read_xml('') p <- function(x) { if (is.null(x)) x else as.character(x) } expect_that(querySelector(doc, "circle"), equals(NULL)) expect_that(querySelector(doc, "circle", ns = c(svg = "http://www.w3.org/2000/svg")), equals(NULL)) expect_that(p(querySelector(doc, "svg|circle", ns = c(svg = "http://www.w3.org/2000/svg"))), equals(p(xml_find_all(doc, "//svg:circle", ns = c(svg = "http://www.w3.org/2000/svg"))[[1]]))) # now with querySelectorNS expect_that(querySelectorNS(doc, "circle", c(svg = "http://www.w3.org/2000/svg")), equals(NULL)) expect_that(p(querySelectorNS(doc, "svg|circle", c(svg = "http://www.w3.org/2000/svg"))), equals(p(xml_find_all(doc, "//svg:circle", ns = c(svg = "http://www.w3.org/2000/svg"))[[1]]))) }) test_that("querySelectorAll handles namespaces", { library(xml2) doc <- read_xml('') p <- function(x) { lapply(x, function(node) as.character(node)) } expect_that(p(querySelectorAll(doc, "circle")), equals(p(xml_find_all(doc, "//circle")))) expect_that(p(querySelectorAll(doc, "circle", ns = c(svg = "http://www.w3.org/2000/svg"))), equals(p(xml_find_all(doc, "//circle", ns = c(svg = "http://www.w3.org/2000/svg"))))) expect_that(p(querySelectorAll(doc, "svg|circle", ns = c(svg = "http://www.w3.org/2000/svg"))), equals(p(xml_find_all(doc, "//svg:circle", ns = c(svg = "http://www.w3.org/2000/svg"))))) # now with querySelectorAllNS expect_that(p(querySelectorAllNS(doc, "circle", c(svg = "http://www.w3.org/2000/svg"))), equals(p(xml_find_all(doc, "//circle", ns = c(svg = "http://www.w3.org/2000/svg"))))) expect_that(p(querySelectorAllNS(doc, "svg|circle", c(svg = "http://www.w3.org/2000/svg"))), equals(p(xml_find_all(doc, "//svg:circle", ns = c(svg = "http://www.w3.org/2000/svg"))))) }) test_that("querySelector methods handle invalid arguments", { library(xml2) doc <- read_xml('') expect_error(querySelector(doc), "A valid selector (character vector) must be provided.", fixed = TRUE) expect_error(querySelectorAll(doc), "A valid selector (character vector) must be provided.", fixed = TRUE) expect_error(querySelectorNS(doc), "A valid selector (character vector) must be provided.", fixed = TRUE) expect_error(querySelectorAllNS(doc), "A valid selector (character vector) must be provided.", fixed = TRUE) expect_error(querySelectorNS(doc, "a"), "A namespace must be provided.", fixed = TRUE) expect_error(querySelectorNS(doc, "a", NULL), "A namespace must be provided.", fixed = TRUE) expect_error(querySelectorNS(doc, "a", character(0)), "A namespace must be provided.", fixed = TRUE) expect_error(querySelectorAllNS(doc, "a"), "A namespace must be provided.", fixed = TRUE) expect_error(querySelectorAllNS(doc, "a", NULL), "A namespace must be provided.", fixed = TRUE) expect_error(querySelectorAllNS(doc, "a", character(0)), "A namespace must be provided.", fixed = TRUE) }) selectr/tests/testthat/test-specificity.R0000755000176200001440000001016615107555231020343 0ustar liggesuserscontext("tokenizer") test_that("parser creates correct specificity", { spec <- function(css) { selectors <- parse(css) if (length(selectors) != 1) stop("More than one result attempting to be parsed.") selectors[[1]]$specificity() } expect_that(spec("*"), equals(rep(0, 3))) expect_that(spec(" foo"), equals(c(0, 0, 1))) expect_that(spec(":empty"), equals(c(0, 1, 0))) expect_that(spec(":before"), equals(c(0, 0, 1))) expect_that(spec("*:before"), equals(c(0, 0, 1))) expect_that(spec(":nth-child(2)"), equals(c(0, 1, 0))) expect_that(spec(".bar"), equals(c(0, 1, 0))) expect_that(spec("[baz]"), equals(c(0, 1, 0))) expect_that(spec('[baz="4"]'), equals(c(0, 1, 0))) expect_that(spec('[baz^="4"]'), equals(c(0, 1, 0))) expect_that(spec("#lipsum"), equals(c(1, 0, 0))) expect_that(spec(":not(*)"), equals(c(0, 0, 0))) expect_that(spec(":not(foo)"), equals(c(0, 0, 1))) expect_that(spec(":not(.foo)"), equals(c(0, 1, 0))) expect_that(spec(":not([foo])"), equals(c(0, 1, 0))) expect_that(spec(":not(:empty)"), equals(c(0, 1, 0))) expect_that(spec(":not(#foo)"), equals(c(1, 0, 0))) # :not() with multiple arguments - takes max specificity per CSS4 expect_that(spec(":not(*, foo)"), equals(c(0, 0, 1))) expect_that(spec(":not(.foo, .bar)"), equals(c(0, 1, 0))) expect_that(spec(":not(.foo, #bar)"), equals(c(1, 0, 0))) expect_that(spec(":not(foo, .bar)"), equals(c(0, 1, 0))) expect_that(spec(":not(foo, #bar)"), equals(c(1, 0, 0))) expect_that(spec(":not(.foo, .bar, .baz)"), equals(c(0, 1, 0))) expect_that(spec(":not(#foo, #bar, #baz)"), equals(c(1, 0, 0))) expect_that(spec(":not(p, span, div)"), equals(c(0, 0, 1))) expect_that(spec(":not([foo], [bar])"), equals(c(0, 1, 0))) expect_that(spec(":not(:hover, :visited)"), equals(c(0, 1, 0))) expect_that(spec(":not(.foo, [bar], #baz)"), equals(c(1, 0, 0))) # :not() with multiple arguments in combinations expect_that(spec("div:not(.foo, #bar)"), equals(c(1, 0, 1))) expect_that(spec("p:not(span, .foo)"), equals(c(0, 1, 1))) expect_that(spec("#main:not(.foo, .bar)"), equals(c(1, 1, 0))) expect_that(spec(".test:not(#foo, [bar])"), equals(c(1, 1, 0))) expect_that(spec(":is(.foo, #bar)"), equals(c(1, 0, 0))) expect_that(spec(":is(:hover, :visited)"), equals(c(0, 1, 0))) expect_that(spec(":matches(.foo, #bar)"), equals(c(1, 0, 0))) expect_that(spec(":matches(:hover, :visited)"), equals(c(0, 1, 0))) # :where() always has zero specificity expect_that(spec(":where(.foo, #bar)"), equals(c(0, 0, 0))) expect_that(spec(":where(:hover, :visited)"), equals(c(0, 0, 0))) expect_that(spec("div:where(.foo, #bar)"), equals(c(0, 0, 1))) expect_that(spec("p:where(span, .foo)"), equals(c(0, 0, 1))) expect_that(spec("#main:where(.foo, .bar)"), equals(c(1, 0, 0))) expect_that(spec(".test:where(#foo, [bar])"), equals(c(0, 1, 0))) # :has() takes the maximum specificity from its argument list expect_that(spec(":has(.foo, #bar)"), equals(c(1, 0, 0))) expect_that(spec(":has(:hover, :visited)"), equals(c(0, 1, 0))) expect_that(spec("div:has(.foo, #bar)"), equals(c(1, 0, 1))) expect_that(spec("p:has(span, .foo)"), equals(c(0, 1, 1))) expect_that(spec("#main:has(.foo, .bar)"), equals(c(1, 1, 0))) expect_that(spec(".test:has(#foo, [bar])"), equals(c(1, 1, 0))) expect_that(spec("foo:empty"), equals(c(0, 1, 1))) expect_that(spec("foo:before"), equals(c(0, 0, 2))) expect_that(spec("foo::before"), equals(c(0, 0, 2))) expect_that(spec("foo:empty::before"), equals(c(0, 1, 2))) # combinations expect_that(spec("* foo"), equals(c(0, 0, 1))) expect_that(spec("foo :empty"), equals(c(0, 1, 1))) expect_that(spec(":empty :before"), equals(c(0, 1, 1))) expect_that(spec(".bar [baz]"), equals(c(0, 2, 0))) expect_that(spec('[baz] [baz="4"]'), equals(c(0, 2, 0))) expect_that(spec('[baz="4"] [baz^="4"]'), equals(c(0, 2, 0))) expect_that(spec('[baz^="4"] #lipsum'), equals(c(1, 1, 0))) }) selectr/tests/testthat/test-shakespeare-XML.R0000755000176200001440000005331115107555231020760 0ustar liggesuserscontext("shakespeare-test-XML") test_that("selection works correctly on a shakespearean document", { HTML_SHAKESPEARE <- paste( "", "", "", "\t", "", "", "\t
    ", "\t
    ", "\t

    As You Like It

    ", "\t
    ", "\t by William Shakespeare", "\t
    ", "\t
    ", "\t

    ACT I, SCENE III. A room in the palace.

    ", "\t
    ", "\t
    Enter CELIA and ROSALIND
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Why, cousin! why, Rosalind! Cupid have mercy! not a word?
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Not one to throw at a dog.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    No, thy words are too precious to be cast away upon
    ", "\t
    curs; throw some of them at me; come, lame me with reasons.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    CELIA
    ", "\t
    ", "\t
    But is all this for your father?
    ", "\t
    ", "\t
    ", "\t
    Then there were two cousins laid up; when the one
    ", "\t
    should be lamed with reasons and the other mad
    ", "\t
    without any.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    No, some of it is for my child's father. O, how
    ", "\t
    full of briers is this working-day world!
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    They are but burs, cousin, thrown upon thee in
    ", "\t
    holiday foolery: if we walk not in the trodden
    ", "\t
    paths our very petticoats will catch them.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    I could shake them off my coat: these burs are in my heart.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Hem them away.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    I would try, if I could cry 'hem' and have him.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Come, come, wrestle with thy affections.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    O, they take the part of a better wrestler than myself!
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    O, a good wish upon you! you will try in time, in
    ", "\t
    despite of a fall. But, turning these jests out of
    ", "\t
    service, let us talk in good earnest: is it
    ", "\t
    possible, on such a sudden, you should fall into so
    ", "\t
    strong a liking with old Sir Rowland's youngest son?
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    The duke my father loved his father dearly.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Doth it therefore ensue that you should love his son
    ", "\t
    dearly? By this kind of chase, I should hate him,
    ", "\t
    for my father hated his father dearly; yet I hate
    ", "\t
    not Orlando.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    No, faith, hate him not, for my sake.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Why should I not? doth he not deserve well?
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Let me love him for that, and do you love him
    ", "\t
    because I do. Look, here comes the duke.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    With his eyes full of anger.
    ", "\t
    Enter DUKE FREDERICK, with Lords
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    Mistress, dispatch you with your safest haste
    ", "\t
    And get you from our court.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Me, uncle?
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    You, cousin
    ", "\t
    Within these ten days if that thou be'st found
    ", "\t
    So near our public court as twenty miles,
    ", "\t
    Thou diest for it.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    I do beseech your grace,
    ", "\t
    Let me the knowledge of my fault bear with me:
    ", "\t
    If with myself I hold intelligence
    ", "\t
    Or have acquaintance with mine own desires,
    ", "\t
    If that I do not dream or be not frantic,--
    ", "\t
    As I do trust I am not--then, dear uncle,
    ", "\t
    Never so much as in a thought unborn
    ", "\t
    Did I offend your highness.
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    Thus do all traitors:
    ", "\t
    If their purgation did consist in words,
    ", "\t
    They are as innocent as grace itself:
    ", "\t
    Let it suffice thee that I trust thee not.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Yet your mistrust cannot make me a traitor:
    ", "\t
    Tell me whereon the likelihood depends.
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    Thou art thy father's daughter; there's enough.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    So was I when your highness took his dukedom;
    ", "\t
    So was I when your highness banish'd him:
    ", "\t
    Treason is not inherited, my lord;
    ", "\t
    Or, if we did derive it from our friends,
    ", "\t
    What's that to me? my father was no traitor:
    ", "\t
    Then, good my liege, mistake me not so much
    ", "\t
    To think my poverty is treacherous.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Dear sovereign, hear me speak.
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    Ay, Celia; we stay'd her for your sake,
    ", "\t
    Else had she with her father ranged along.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    I did not then entreat to have her stay;
    ", "\t
    It was your pleasure and your own remorse:
    ", "\t
    I was too young that time to value her;
    ", "\t
    But now I know her: if she be a traitor,
    ", "\t
    Why so am I; we still have slept together,
    ", "\t
    Rose at an instant, learn'd, play'd, eat together,
    ", "\t
    And wheresoever we went, like Juno's swans,
    ", "\t
    Still we went coupled and inseparable.
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    She is too subtle for thee; and her smoothness,
    ", "\t
    Her very silence and her patience
    ", "\t
    Speak to the people, and they pity her.
    ", "\t
    Thou art a fool: she robs thee of thy name;
    ", "\t
    And thou wilt show more bright and seem more virtuous
    ", "\t
    When she is gone. Then open not thy lips:
    ", "\t
    Firm and irrevocable is my doom
    ", "\t
    Which I have pass'd upon her; she is banish'd.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Pronounce that sentence then on me, my liege:
    ", "\t
    I cannot live out of her company.
    ", "\t
    ", "\t
    DUKE FREDERICK
    ", "\t
    ", "\t
    You are a fool. You, niece, provide yourself:
    ", "\t
    If you outstay the time, upon mine honour,
    ", "\t
    And in the greatness of my word, you die.
    ", "\t
    Exeunt DUKE FREDERICK and Lords
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    O my poor Rosalind, whither wilt thou go?
    ", "\t
    Wilt thou change fathers? I will give thee mine.
    ", "\t
    I charge thee, be not thou more grieved than I am.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    I have more cause.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Thou hast not, cousin;
    ", "\t
    Prithee be cheerful: know'st thou not, the duke
    ", "\t
    Hath banish'd me, his daughter?
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    That he hath not.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    No, hath not? Rosalind lacks then the love
    ", "\t
    Which teacheth thee that thou and I am one:
    ", "\t
    Shall we be sunder'd? shall we part, sweet girl?
    ", "\t
    No: let my father seek another heir.
    ", "\t
    Therefore devise with me how we may fly,
    ", "\t
    Whither to go and what to bear with us;
    ", "\t
    And do not seek to take your change upon you,
    ", "\t
    To bear your griefs yourself and leave me out;
    ", "\t
    For, by this heaven, now at our sorrows pale,
    ", "\t
    Say what thou canst, I'll go along with thee.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Why, whither shall we go?
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    To seek my uncle in the forest of Arden.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Alas, what danger will it be to us,
    ", "\t
    Maids as we are, to travel forth so far!
    ", "\t
    Beauty provoketh thieves sooner than gold.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    I'll put myself in poor and mean attire
    ", "\t
    And with a kind of umber smirch my face;
    ", "\t
    The like do you: so shall we pass along
    ", "\t
    And never stir assailants.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    Were it not better,
    ", "\t
    Because that I am more than common tall,
    ", "\t
    That I did suit me all points like a man?
    ", "\t
    A gallant curtle-axe upon my thigh,
    ", "\t
    A boar-spear in my hand; and--in my heart
    ", "\t
    Lie there what hidden woman's fear there will--
    ", "\t
    We'll have a swashing and a martial outside,
    ", "\t
    As many other mannish cowards have
    ", "\t
    That do outface it with their semblances.
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    What shall I call thee when thou art a man?
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    I'll have no worse a name than Jove's own page;
    ", "\t
    And therefore look you call me Ganymede.
    ", "\t
    But what will you be call'd?
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    Something that hath a reference to my state
    ", "\t
    No longer Celia, but Aliena.
    ", "\t
    ", "\t
    ROSALIND
    ", "\t
    ", "\t
    But, cousin, what if we assay'd to steal
    ", "\t
    The clownish fool out of your father's court?
    ", "\t
    Would he not be a comfort to our travel?
    ", "\t
    ", "\t
    CELIA
    ", "\t
    ", "\t
    He'll go along o'er the wide world with me;
    ", "\t
    Leave me alone to woo him. Let's away,
    ", "\t
    And get our jewels and our wealth together,
    ", "\t
    Devise the fittest time and safest way
    ", "\t
    To hide us from pursuit that will be made
    ", "\t
    After my flight. Now go we in content
    ", "\t
    To liberty and not to banishment.
    ", "\t
    Exeunt
    ", "\t
    ", "\t
    ", "\t
    ", "
    ", "", "", sep = "\n") library(XML) document <- xmlRoot(htmlParse(HTML_SHAKESPEARE)) body <- getNodeSet(document, "//body")[[1]] gt <- GenericTranslator$new() count <- function(selector) { xpath <- gt$css_to_xpath(selector) results <- getNodeSet(body, xpath) length(results) } # Data borrowed from http://mootools.net/slickspeed/ ## Changed from original; probably because I'm only ## searching the body. #expect_that(count('*'), equals(252)) expect_that(count('*'), equals(246)) expect_that(count('div:contains(CELIA)'), equals(26)) expect_that(count('div:only-child'), equals(22)) # ? expect_that(count('div:nth-child(even)'), equals(106)) expect_that(count('div:nth-child(2n)'), equals(106)) expect_that(count('div:nth-child(odd)'), equals(137)) expect_that(count('div:nth-child(2n+1)'), equals(137)) expect_that(count('div:nth-child(n)'), equals(243)) expect_that(count('div:last-child'), equals(53)) expect_that(count('div:first-child'), equals(51)) expect_that(count('div > div'), equals(242)) expect_that(count('div + div'), equals(190)) expect_that(count('div ~ div'), equals(190)) expect_that(count('body'), equals(1)) expect_that(count('body div'), equals(243)) expect_that(count('div'), equals(243)) expect_that(count('div div'), equals(242)) expect_that(count('div div div'), equals(241)) expect_that(count('div, div, div'), equals(243)) expect_that(count('div, a, span'), equals(243)) expect_that(count('.dialog'), equals(51)) expect_that(count('div.dialog'), equals(51)) expect_that(count('div .dialog'), equals(51)) expect_that(count('div.character, div.dialog'), equals(99)) expect_that(count('div.direction.dialog'), equals(0)) expect_that(count('div.dialog.direction'), equals(0)) expect_that(count('div.dialog.scene'), equals(1)) expect_that(count('div.scene.scene'), equals(1)) expect_that(count('div.scene .scene'), equals(0)) expect_that(count('div.direction .dialog '), equals(0)) expect_that(count('div .dialog .direction'), equals(4)) expect_that(count('div.dialog .dialog .direction'), equals(4)) expect_that(count('#speech5'), equals(1)) expect_that(count('div#speech5'), equals(1)) expect_that(count('div #speech5'), equals(1)) expect_that(count('div.scene div.dialog'), equals(49)) expect_that(count('div#scene1 div.dialog div'), equals(142)) expect_that(count('#scene1 #speech1'), equals(1)) expect_that(count('div[class]'), equals(103)) expect_that(count('div[class=dialog]'), equals(50)) expect_that(count('div[class^=dia]'), equals(51)) expect_that(count('div[class$=log]'), equals(50)) expect_that(count('div[class*=sce]'), equals(1)) expect_that(count('div[class|=dialog]'), equals(50)) # ? Seems right expect_that(count('div[class!=madeup]'), equals(243)) # ? Seems right expect_that(count('div[class~=dialog]'), equals(51)) # ? Seems right }) selectr/tests/testthat/test-parse-errors.R0000755000176200001440000001022015107555231020443 0ustar liggesuserscontext("parse-errors") test_that("useful errors are returned", { get_error <- function(css) { parse(css) NULL } expect_that(get_error("attributes(href)/html/body/a"), throws_error("Unexpected character")) expect_that(get_error("attributes(href)"), throws_error("Expected selector")) expect_that(get_error("html/body/a"), throws_error("Unexpected character")) expect_that(get_error(" "), throws_error("Expected selector, got ")) expect_that(get_error("div, "), throws_error("Expected selector, got ")) expect_that(get_error(" , div"), throws_error("Expected selector, got ")) expect_that(get_error("p, , div"), throws_error("Expected selector, got ")) expect_that(get_error("div > "), throws_error("Expected selector, got ")) expect_that(get_error(" > div"), throws_error("Expected selector, got ' at 3>")) expect_that(get_error("foo|#bar"), throws_error("Expected ident or '\\*'")) expect_that(get_error("#.foo"), throws_error("Expected selector, got ")) expect_that(get_error(".#foo"), throws_error("Expected ident, got ")) expect_that(get_error(":#foo"), throws_error("Expected ident, got ")) expect_that(get_error("[*]"), throws_error("Expected '|'")) expect_that(get_error("[foo|]"), throws_error("Expected ident, got ")) expect_that(get_error("[#]"), throws_error("Expected ident or '\\*', got ")) expect_that(get_error("[foo=#]"), throws_error("Expected string or ident, got ")) expect_that(get_error(":nth-child()"), throws_error("Expected at least one argument, got ")) expect_that(get_error("[href]a"), throws_error("Expected selector, got ")) expect_that(get_error("[rel=stylesheet]"), equals(NULL)) expect_that(get_error("[rel:stylesheet]"), throws_error("Operator expected, got ")) expect_that(get_error("[rel=stylesheet"), throws_error("Expected ']', got ")) expect_that(get_error(":lang(fr)"), equals(NULL)) expect_that(get_error(":lang(fr"), throws_error("Expected an argument, got ")) expect_that(get_error(':contains("foo'), throws_error("Unclosed string at 11")) expect_that(get_error(':contains("foo\\"'), throws_error("Unclosed string at 11")) expect_that(get_error("foo!"), throws_error("Unexpected character")) expect_that(get_error("a:not(b;)"), throws_error("Unexpected character")) # Mis-placed pseudo-elements expect_that(get_error("a:before:empty"), throws_error("Got pseudo-element ::before not at the end of a selector")) expect_that(get_error("li:before a"), throws_error("Got pseudo-element ::before not at the end of a selector")) expect_that(get_error(":not(:before)"), throws_error("Got pseudo-element ::before inside :not\\(\\) at 13")) expect_that(get_error(":not(a,)"), throws_error("Expected ')', got .*")) expect_that(get_error(":not(:not(a))"), throws_error("Got nested :not()")) expect_that(get_error(":is(:before)"), throws_error("Got pseudo-element ::before inside :is\\(\\) at 12")) expect_that(get_error(":is(a b)"), throws_error("Expected an argument, got ")) expect_that(get_error(":matches(:before)"), throws_error("Got pseudo-element ::before inside :matches\\(\\) at 17")) expect_that(get_error(":matches(a b)"), throws_error("Expected an argument, got ")) }) selectr/tests/testthat/test-has.R0000755000176200001440000001764115107555231016610 0ustar liggesuserscontext(":has() pseudo-class") test_that(":has() generates correct XPath", { xpath <- function(css) { css_to_xpath(css, prefix = "") } # Simple :has() with element expect_that(xpath("div:has(p)"), equals("div[(.//*[(name() = 'p')])]")) # :has() with class selector expect_that(xpath("div:has(.foo)"), equals("div[(.//*[(@class and contains(concat(' ', normalize-space(@class), ' '), ' foo '))])]")) # :has() with ID selector expect_that(xpath("section:has(#main)"), equals("section[(.//*[(@id = 'main')])]")) # :has() with attribute selector expect_that(xpath("form:has([required])"), equals("form[(.//*[(@required)])]")) # :has() with multiple selectors (OR logic) expect_that(xpath("div:has(p, span)"), equals("div[(.//*[(name() = 'p')] | .//*[(name() = 'span')])]")) # Multiple :has() selectors expect_that(xpath("div:has(p):has(span)"), equals("div[(.//*[(name() = 'p')]) and (.//*[(name() = 'span')])]")) # :has() on universal selector expect_that(xpath("*:has(img)"), equals("*[(.//*[(name() = 'img')])]")) # Complex: :has() with class on descendant expect_that(xpath("section:has(div.content)"), equals("section[(.//*[(@class and contains(concat(' ', normalize-space(@class), ' '), ' content ')) and (name() = 'div')])]")) }) test_that(":has() works correctly with XML documents", { library(XML) # Create test document html <- paste0( '', '
    ', '
    ', '

    Paragraph in section 1

    ', '
    ', '
    ', '
    ', ' ', '
    ', '
    ', '
    ', '

    Title

    ', '
    ', '
    ', '
    ', '

    Article paragraph

    ', '
    ', '
    ' ) doc <- xmlRoot(xmlParse(html)) # Helper to get IDs get_ids <- function(css) { results <- querySelectorAll(doc, css) sapply(results, function(x) xmlGetAttr(x, "id")) } # Section containing a p element expect_that(get_ids("section:has(p)"), equals("s1")) # Section containing a div expect_that(get_ids("section:has(div)"), equals(c("s1", "s2"))) # Section containing an h1 expect_that(get_ids("section:has(h1)"), equals("s3")) # Section with div.content expect_that(get_ids("section:has(div.content)"), equals("s1")) # Section with div.sidebar expect_that(get_ids("section:has(div.sidebar)"), equals("s2")) # Any element containing a p # Note: XML returns root element too since it's also ancestor ids <- get_ids(":has(p)") expect_that("s1" %in% ids && "a1" %in% ids, equals(TRUE)) # Multiple selectors: section with p OR span expect_that(get_ids("section:has(p, span)"), equals(c("s1", "s2"))) # Chained :has() - section with both div and p expect_that(get_ids("section:has(div):has(p)"), equals("s1")) # :has() should not match the element itself expect_that(length(querySelectorAll(doc, "p:has(p)")), equals(0)) }) test_that(":has() works correctly with xml2 documents", { library(xml2) # Create test document html <- paste0( '', '
    ', '
    ', '

    Paragraph in section 1

    ', '
    ', '
    ', '
    ', ' ', '
    ', '
    ', '
    ', '

    Title

    ', '
    ', '
    ', '
    ', '

    Article paragraph

    ', '
    ', '
    ' ) doc <- read_xml(html) # Helper to get IDs get_ids <- function(css) { results <- querySelectorAll(doc, css) xml_attr(results, "id") } # Section containing a p element expect_that(get_ids("section:has(p)"), equals("s1")) # Section containing a div expect_that(get_ids("section:has(div)"), equals(c("s1", "s2"))) # Section containing an h1 expect_that(get_ids("section:has(h1)"), equals("s3")) # Section with div.content expect_that(get_ids("section:has(div.content)"), equals("s1")) # Section with div.sidebar expect_that(get_ids("section:has(div.sidebar)"), equals("s2")) # Any element containing a p # Note: returns all ancestors including root ids <- get_ids(":has(p)") expect_that("s1" %in% ids && "a1" %in% ids, equals(TRUE)) # Multiple selectors: section with p OR span expect_that(get_ids("section:has(p, span)"), equals(c("s1", "s2"))) # Chained :has() - section with both div and p expect_that(get_ids("section:has(div):has(p)"), equals("s1")) # :has() should not match the element itself expect_that(length(querySelectorAll(doc, "p:has(p)")), equals(0)) }) test_that(":has() handles edge cases correctly", { library(XML) # Empty elements html1 <- '

    ' doc1 <- xmlRoot(xmlParse(html1)) # Only d2 has a p descendant result1 <- querySelectorAll(doc1, "div:has(p)") expect_that(length(result1), equals(1)) expect_that(xmlGetAttr(result1[[1]], "id"), equals("d2")) # Nested :has() html2 <- paste0( '', '
    ', '
    ', '
    ', '

    Text

    ', '
    ', '
    ', '
    ', '
    ', '
    ', '

    Text

    ', '
    ', '
    ', '
    ' ) doc2 <- xmlRoot(xmlParse(html2)) # Section containing article with div result2 <- querySelectorAll(doc2, "section:has(article:has(div))") expect_that(length(result2), equals(1)) expect_that(xmlGetAttr(result2[[1]], "id"), equals("s1")) # Section containing p.highlight result3 <- querySelectorAll(doc2, "section:has(p.highlight)") expect_that(length(result3), equals(1)) expect_that(xmlGetAttr(result3[[1]], "id"), equals("s1")) # :has() with universal selector html3 <- '
    ' doc3 <- xmlRoot(xmlParse(html3)) # Div that has any descendant result4 <- querySelectorAll(doc3, "div:has(*)") expect_that(length(result4), equals(1)) expect_that(xmlGetAttr(result4[[1]], "id"), equals("d1")) }) test_that(":has() works with querySelector (returns first match)", { library(xml2) html <- paste0( '', '

    First

    ', '

    Second

    ', '
    Third
    ', '
    ' ) doc <- read_xml(html) # Should return first section with p result <- querySelector(doc, "section:has(p)") expect_that(xml_attr(result, "id"), equals("s1")) # Should return NULL when no match result_none <- querySelector(doc, "section:has(article)") expect_that(result_none, equals(NULL)) }) selectr/tests/testthat/test-nth-child-of.R0000755000176200001440000000752215107555231020306 0ustar liggesuserscontext("nth-child with 'of S' selector list (CSS Level 4)") test_that(":nth-child(n of S) parses correctly", { parsed <- selectr:::parse("div:nth-child(2 of .foo)") expect_equal(length(parsed), 1) fn_obj <- parsed[[1]]$parsed_tree expect_equal(class(fn_obj)[1], "Function") expect_equal(fn_obj$name, "nth-child") expect_false(is.null(fn_obj$selector_list)) expect_equal(length(fn_obj$selector_list), 1) }) test_that(":nth-last-child(n of S) parses correctly", { parsed <- selectr:::parse("li:nth-last-child(3 of .important)") expect_equal(length(parsed), 1) fn_obj <- parsed[[1]]$parsed_tree expect_equal(class(fn_obj)[1], "Function") expect_equal(fn_obj$name, "nth-last-child") expect_false(is.null(fn_obj$selector_list)) expect_equal(length(fn_obj$selector_list), 1) }) test_that(":nth-child(n of S) with multiple selectors parses correctly", { parsed <- selectr:::parse("div:nth-child(2 of .foo, .bar)") expect_equal(length(parsed), 1) fn_obj <- parsed[[1]]$parsed_tree expect_equal(fn_obj$name, "nth-child") expect_equal(length(fn_obj$selector_list), 2) }) test_that(":nth-child(n of S) generates correct XPath", { xpath <- css_to_xpath("div:nth-child(2 of .foo)") # Should count siblings matching .foo expect_true(grepl("count\\(preceding-sibling::\\*\\[", xpath)) expect_true(grepl("@class", xpath)) expect_true(grepl("foo", xpath)) # Should also check that current element matches .foo expect_true(grepl("and.*@class", xpath)) }) test_that(":nth-last-child(n of S) generates correct XPath", { xpath <- css_to_xpath("li:nth-last-child(3 of .important)") # Should count following siblings matching .important expect_true(grepl("count\\(following-sibling::\\*\\[", xpath)) expect_true(grepl("@class", xpath)) expect_true(grepl("important", xpath)) # Should also check that current element matches .important expect_true(grepl("and.*@class", xpath)) }) test_that(":nth-child(An+B of S) with formula works", { xpath <- css_to_xpath("p:nth-child(2n+1 of .highlight)") # Should have modulo operation for 2n+1 expect_true(grepl("mod", xpath)) # Should filter by .highlight expect_true(grepl("highlight", xpath)) }) test_that(":nth-child(n of S1, S2) with multiple selectors generates OR condition", { xpath <- css_to_xpath("div:nth-child(1 of .foo, .bar)") # Should have both selectors expect_true(grepl("foo", xpath)) expect_true(grepl("bar", xpath)) # Should have OR condition expect_true(grepl("or", xpath)) }) test_that("Regular :nth-child without 'of' still works", { xpath1 <- css_to_xpath("div:nth-child(2)") xpath2 <- css_to_xpath("div:nth-last-child(3)") # Should not have class checks expect_false(grepl("@class", xpath1)) expect_false(grepl("@class", xpath2)) # Should have simple counting expect_true(grepl("count\\(preceding-sibling::\\*\\)", xpath1)) expect_true(grepl("count\\(following-sibling::\\*\\)", xpath2)) }) test_that(":nth-child(odd of S) works", { xpath <- css_to_xpath("div:nth-child(odd of .item)") # Should have modulo 2 expect_true(grepl("mod 2", xpath)) # Should filter by .item expect_true(grepl("item", xpath)) }) test_that(":nth-child(even of S) works", { xpath <- css_to_xpath("div:nth-child(even of .item)") # Should have modulo 2 expect_true(grepl("mod 2", xpath)) # Should filter by .item expect_true(grepl("item", xpath)) }) test_that(":nth-child with complex selector works", { xpath <- css_to_xpath("div:nth-child(2 of div.foo)") # Should check element name expect_true(grepl("name\\(\\) = 'div'", xpath)) # Should check class expect_true(grepl("foo", xpath)) }) selectr/tests/test-all.R0000755000176200001440000000007215107010654014726 0ustar liggesuserslibrary(testthat) library(selectr) test_check("selectr") selectr/.Rinstignore0000755000176200001440000000001215106553322014215 0ustar liggesusersMakefile selectr/MD50000644000176200001440000000442315120470422012223 0ustar liggesusersbafc12bd205aaeaf8c70ef489e183d5c *DESCRIPTION 3f1e4e2a111ed275ca25012cbd658b79 *LICENCE 307d573f78e96833a184da040a2b6d5e *NAMESPACE 530048c59d463df62909d02f460f15ba *R/main.R 9b2fbee07228b80976f856f508526f0f *R/parser.R de444ce30fd94501e7db948553419192 *R/xpath.R 396b43f4014677c2ad7a6320ffebbf41 *README.md 2dfbd9071b69ea0bec93d05c2ad8ee8a *inst/CITATION 6d693ae639e4e06274d6331c58c6087d *inst/NEWS.Rd 944df9b3445e090bb4ffa83c7312a87c *inst/demos/svg-mathml.svg c1681851d43f42c568c585e8a8a1cb52 *man/css_to_xpath.Rd e3d86a86846ca363b6bba9e24e95aa40 *man/querySelectorAll.Rd b697c111a02fec0b26c003fed8a53a96 *tests/test-all.R cec73ba8dffbd2ae44fb886f8015b85d *tests/testthat/test-adjacent-sibling.R 0bebbd1c7d33477f327ef17d109716c1 *tests/testthat/test-has.R 8f6dac3236d7a3f671a17f11f906202f *tests/testthat/test-main.R b27989f32bb62b10250558e7f0a1311d *tests/testthat/test-method-registration.R 7f9b1be32964dae08551c1d6879cf9e6 *tests/testthat/test-nth-child-of.R 48b57e0bdf1c5d82996042995d9b8d2c *tests/testthat/test-nth-child.R b891ff66e9a9f31367ccb11cbbd912ef *tests/testthat/test-parse-errors.R d288b47abadf6701be264072dfbda0b6 *tests/testthat/test-parser.R 3f891d698c3692a2e430642aa98aec40 *tests/testthat/test-pseudo.R c8833210c9c207c915543f229053c910 *tests/testthat/test-querySelector-XML.R 4b7c0b8ba712fdb819f18c1dfefc49ff *tests/testthat/test-querySelector-default.R ce2f6d685fec146b230c97ebccef235a *tests/testthat/test-querySelector-xml2.R 7dccc672e6ef4b611506f1d455c92ce1 *tests/testthat/test-quoting.R b4d30af3437df638dde5a93afab3d931 *tests/testthat/test-select-XML.R 404f84077b9fd9200a2bac77c1f94a4f *tests/testthat/test-select-xml2.R 961c5d64cb805af05b5476d2810fba0d *tests/testthat/test-series.R 4b0cd1a924953d1877bce40a1d75714f *tests/testthat/test-shakespeare-XML.R b31287d4d667b0052fc2d6b29e8e6ac2 *tests/testthat/test-shakespeare-xml2.R ae39299f1f36ccde24f059f10e7f1a61 *tests/testthat/test-specificity.R d1bca8335ea93159c1d4461f8573ebd3 *tests/testthat/test-tokenizer.R fc52232c5bad2bc580289894652ba22b *tests/testthat/test-translation.R f1c86041f6b35ad1d55789ef6437daac *tests/testthat/test-where.R 310eeb35bd29b394b5e89eabac5bb379 *tests/testthat/test-xmllang-XML.R dbd3cfbd807024584632eb78d041ecda *tests/testthat/test-xmllang-xml2.R ed31e3629ea9665d2a1a2924981d0b9b *tests/testthat/test-xpath.R selectr/R/0000755000176200001440000000000015110034155012107 5ustar liggesusersselectr/R/main.R0000644000176200001440000002050215107276636013177 0ustar liggesuserscss_to_xpath <- function(selector, prefix = "descendant-or-self::", translator = "generic") { if (missing(selector) || is.null(selector)) stop("A valid selector (character vector) must be provided.") if (!is.character(selector)) stop("The 'selector' argument must be a character vector") if (!is.character(prefix)) stop("The 'prefix' argument must be a character vector") if (!is.character(translator)) stop("The 'translator' argument must be a character vector") if (anyNA(selector)) { warning("NA values were found in the 'selector' argument, they have been removed") selector <- selector[!is.na(selector)] } if (anyNA(prefix)) { warning("NA values were found in the 'prefix' argument, they have been removed") prefix <- prefix[!is.na(prefix)] } if (anyNA(translator)) { warning("NA values were found in the 'translator' argument, they have been removed") translator <- translator[!is.na(translator)] } zeroLengthArgs <- character(0) if (!length(selector)) zeroLengthArgs <- c(zeroLengthArgs, "selector") if (!length(prefix)) zeroLengthArgs <- c(zeroLengthArgs, "prefix") if (!length(translator)) zeroLengthArgs <- c(zeroLengthArgs, "translator") if (length(zeroLengthArgs)) { plural <- if (length(zeroLengthArgs) > 1) "s" else "" stop("Zero length character vector found for the following argument", plural, ": ", paste0(zeroLengthArgs, collapse = ", ")) } translator <- sapply(translator, function(tran) { match.arg(tolower(tran), c("generic", "html", "xhtml")) }) maxArgLength <- max(length(selector), length(prefix), length(translator)) selector <- rep(selector, length.out = maxArgLength) prefix <- rep(prefix, length.out = maxArgLength) translator <- rep(translator, length.out = maxArgLength) results <- character(maxArgLength) for (i in seq_len(maxArgLength)) { sel <- selector[i] pref <- prefix[i] trans <- translator[i] tran <- if (trans == "html") { HTMLTranslator$new() } else if (trans == "xhtml") { HTMLTranslator$new(xhtml = TRUE) } else { GenericTranslator$new() } results[i] <- tran$css_to_xpath(sel, pref) } as.character(results) } querySelector <- function(doc, selector, ns = NULL, ...) { UseMethod("querySelector", doc) } querySelectorAll <- function(doc, selector, ns = NULL, ...) { UseMethod("querySelectorAll", doc) } querySelectorNS <- function(doc, selector, ns, prefix = "descendant-or-self::", ...) { UseMethod("querySelectorNS", doc) } querySelectorAllNS <- function(doc, selector, ns, prefix = "descendant-or-self::", ...) { UseMethod("querySelectorAllNS", doc) } querySelector.default <- function(doc, selector, ns = NULL, ...) { stop("The object given to querySelector() is not an 'XML' or 'xml2' document or node.") } querySelectorAll.default <- function(doc, selector, ns = NULL, ...) { stop("The object given to querySelectorAll() is not an 'XML' or 'xml2' document or node.") } querySelectorNS.default <- function(doc, selector, ns, prefix = "descendant-or-self::", ...) { stop("The object given to querySelectorNS() is not an 'XML' or 'xml2' document or node.") } querySelectorAllNS.default <- function(doc, selector, ns, prefix = "descendant-or-self::", ...) { stop("The object given to querySelectorAllNS() is not an 'XML' or 'xml2' document or node.") } querySelector.XMLInternalNode <- querySelector.XMLInternalDocument <- function(doc, selector, ns = NULL, ...) { if (missing(selector)) stop("A valid selector (character vector) must be provided.") results <- querySelectorAll(doc, selector, ns, ...) if (length(results)) results[[1]] else NULL } querySelectorAll.XMLInternalNode <- function(doc, selector, ns = NULL, ...) { if (missing(selector)) stop("A valid selector (character vector) must be provided.") xpath <- css_to_xpath(selector, ...) if (!is.null(ns)) { ns <- formatNS(ns) XML::getNodeSet(doc, xpath, ns) } else { XML::getNodeSet(doc, xpath) } } querySelectorAll.XMLInternalDocument <- function(doc, selector, ns = NULL, ...) { if (missing(selector)) stop("A valid selector (character vector) must be provided.") doc <- XML::xmlRoot(doc) querySelectorAll(doc, selector, ns, ...) } querySelectorNS.XMLInternalNode <- querySelectorNS.XMLInternalDocument <- function(doc, selector, ns, prefix = "descendant-or-self::", ...) { if (missing(selector)) stop("A valid selector (character vector) must be provided.") if (missing(ns) || !length(ns)) stop("A namespace must be provided.") ns <- formatNS(ns) prefix <- formatNSPrefix(ns, prefix) querySelector(doc, selector, ns, prefix = prefix, ...) } querySelectorAllNS.XMLInternalNode <- querySelectorAllNS.XMLInternalDocument <- function(doc, selector, ns, prefix = "descendant-or-self::", ...) { if (missing(selector)) stop("A valid selector (character vector) must be provided.") if (missing(ns) || !length(ns)) stop("A namespace must be provided.") ns <- formatNS(ns) prefix <- formatNSPrefix(ns, prefix) querySelectorAll(doc, selector, ns, prefix = prefix, ...) } querySelector.xml_node <- function(doc, selector, ns = NULL, ...) { if (missing(selector)) stop("A valid selector (character vector) must be provided.") if (is.null(ns)) ns <- xml2::xml_ns(doc) validateNS(ns) xpath <- css_to_xpath(selector, ...) result <- xml2::xml_find_first(doc, xpath, ns) if (length(result)) result else NULL } querySelectorAll.xml_node <- function(doc, selector, ns = NULL, ...) { if (missing(selector)) stop("A valid selector (character vector) must be provided.") if (is.null(ns)) ns <- xml2::xml_ns(doc) validateNS(ns) xpath <- css_to_xpath(selector, ...) xml2::xml_find_all(doc, xpath, ns) } querySelectorNS.xml_node <- function(doc, selector, ns, prefix = "descendant-or-self::", ...) { if (missing(selector)) stop("A valid selector (character vector) must be provided.") if (missing(ns) || is.null(ns) || !length(ns)) stop("A namespace must be provided.") ns <- formatNS(ns) prefix <- formatNSPrefix(ns, prefix) querySelector(doc, selector, ns, prefix = prefix, ...) } querySelectorAllNS.xml_node <- function(doc, selector, ns, prefix = "descendant-or-self::", ...) { if (missing(selector)) stop("A valid selector (character vector) must be provided.") if (missing(ns) || is.null(ns) || !length(ns)) stop("A namespace must be provided.") ns <- formatNS(ns) prefix <- formatNSPrefix(ns, prefix) querySelectorAll(doc, selector, ns, prefix = prefix, ...) } # Takes a named vector or list and gives a named vector back formatNS <- function(ns) { if (is.null(ns)) return(NULL) if (!is.list(ns) && !is.character(ns)) stop("A namespace object must be either a named list or a named character vector.") nsNames <- names(ns) if (is.null(nsNames) || anyNA(nsNames) || !all(nzchar(nsNames))) stop("The namespace object either missing some or all names for each element in its collection.") ns <- unlist(ns) if (!is.character(ns)) stop("The values in the namespace object must be a character vector.") names(ns) <- nsNames ns } formatNSPrefix <- function(ns, prefix) { filters <- paste0("//", names(ns), ":*", collapse = "|") prefix <- paste0("(", filters, ")/", prefix) prefix } # Checks whether a vector is a valid character vector for namespaces validateNS <- function(ns) { if (!is.character(ns)) stop("A namespace object must be comprised of characters") nsNames <- names(ns) if (is.null(nsNames) || anyNA(nsNames)) stop("The namespace object either missing some or all names for each element in its collection.") } selectr/R/parser.R0000755000176200001440000010531015120447332013537 0ustar liggesusersescape <- paste0("\\\\([0-9a-f]{1,6})(\r\n|[ \n\r\t\f])?", "|\\\\[^\n\r\f0-9a-f]") nonascii <- "[^\1-\177]" hash_re <- "([_a-z0-9-]|\\\\([0-9a-f]{1,6})(\r\n|[ \n\r\t\f])?|[^\1-\177])" TokenMacros <- list(unicode_escape = "\\\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?", escape = escape, string_escape = paste0("\\\\(?:\n|\r\n|\r|\f)|", escape), nonascii = nonascii, nmchar = paste0("([_a-z0-9-]|", escape, "|", nonascii, ")"), nmstart = paste0("[_a-z]|", escape, "|", nonascii)) Selector <- R6Class("Selector", public = list( parsed_tree = NULL, pseudo_element = NULL, initialize = function(tree, pseudo_element = NULL) { self$parsed_tree <- tree if (!is.null(pseudo_element)) self$pseudo_element <- tolower(pseudo_element) }, repr = function() { pseudo_el <- if (is.null(self$pseudo_element)) "" else paste0("::", self$pseudo_element) paste0(self$parsed_tree$repr(), pseudo_el) }, specificity = function() { specs <- self$parsed_tree$specificity() if (!is.null(self$pseudo_element)) specs[3] <- specs[3] + 1 specs }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end ) ) ClassSelector <- R6Class("ClassSelector", public = list( selector = NULL, class_name = NULL, initialize = function(selector, class_name) { self$selector <- selector self$class_name <- class_name }, repr = function() { paste0( first_class_name(self), "[", self$selector$repr(), ".", self$class_name, "]") }, specificity = function() { specs <- self$selector$specificity() specs[2] <- specs[2] + 1 specs }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end ) ) Function <- R6Class("Function", public = list( selector = NULL, name = NULL, arguments = NULL, selector_list = NULL, initialize = function(selector, name, arguments, selector_list = NULL) { self$selector <- selector self$name <- tolower(name) self$arguments <- arguments self$selector_list <- selector_list }, repr = function() { token_values <- lapply(self$arguments, function(token) paste0("'", token$value, "'")) token_values <- paste0(unlist(token_values), collapse = ", ") token_values <- paste0("[", token_values, "]") selector_list_repr <- "" if (!is.null(self$selector_list)) { selector_list_repr <- paste0( " of ", paste0(sapply(self$selector_list, function(s) s$repr()), collapse = ", ") ) } paste0( first_class_name(self), "[", self$selector$repr(), ":", self$name, "(", token_values, selector_list_repr, ")]") }, argument_types = function() { token_types <- lapply(self$arguments, function(token) token$type) unlist(token_types) }, specificity = function() { specs <- self$selector$specificity() specs[2] <- specs[2] + 1 specs }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end ) ) Pseudo <- R6Class("Pseudo", public = list( selector = NULL, ident = NULL, initialize = function(selector, ident) { self$selector <- selector self$ident <- tolower(ident) }, repr = function() { paste0( first_class_name(self), "[", self$selector$repr(), ":", self$ident, "]") }, specificity = function() { specs <- self$selector$specificity() specs[2] <- specs[2] + 1 specs }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end ) ) Negation <- R6Class("Negation", public = list( selector = NULL, selector_list = NULL, initialize = function(selector, selector_list) { self$selector <- selector self$selector_list <- selector_list }, repr = function() { paste0( first_class_name(self), "[", self$selector$repr(), ":not(", paste0( sapply(self$selector_list, function(s) s$repr()), collapse = ", " ), ")]") }, specificity = function() { specs <- self$selector$specificity() # according to CSS Selectors Level 4, :not() takes the specificity of # its most specific argument sub_specs <- sapply(self$selector_list, function(s) s$specificity()) # sapply returns a matrix with each column being a selector's specificity if (is.matrix(sub_specs)) { # get rows as selectors sub_specs <- t(sub_specs) if (nrow(sub_specs) > 1) { # sort by specificity (id, class, element) descending sub_specs <- sub_specs[order(-sub_specs[, 1], -sub_specs[, 2], -sub_specs[, 3]), , drop = FALSE] } specs + sub_specs[1, ] } else { # single value case specs + sub_specs } }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end ) ) Matching <- R6Class("Matching", public = list( selector = NULL, selector_list = NULL, initialize = function(selector, selector_list) { self$selector <- selector self$selector_list <- selector_list }, repr = function() { paste0( first_class_name(self), "[", self$selector$repr(), ":is(", paste0( sapply(self$selector_list, function(s) s$repr()), collapse = ", " ), ")]" ) }, specificity = function() { specs <- sapply(self$selector_list, function(s) s$specificity()) specs <- t(specs) specs <- specs[order(-specs[, 1], -specs[, 2], -specs[, 3]), ] specs[1, ] }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end ) ) Where <- R6Class("Where", public = list( selector = NULL, selector_list = NULL, initialize = function(selector, selector_list) { self$selector <- selector self$selector_list <- selector_list }, repr = function() { paste0( first_class_name(self), "[", self$selector$repr(), ":where(", paste0( sapply(self$selector_list, function(s) s$repr()), collapse = ", " ), ")]" ) }, specificity = function() { # :where() always has zero specificity self$selector$specificity() }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end ) ) Has <- R6Class("Has", public = list( selector = NULL, selector_list = NULL, initialize = function(selector, selector_list) { self$selector <- selector self$selector_list <- selector_list }, repr = function() { paste0( first_class_name(self), "[", self$selector$repr(), ":has(", paste0( sapply(self$selector_list, function(s) s$repr()), collapse = ", " ), ")]" ) }, specificity = function() { specs <- sapply(self$selector_list, function(s) s$specificity()) specs <- t(specs) specs <- specs[order(-specs[, 1], -specs[, 2], -specs[, 3]), ] # Add the maximum specificity from the selector list to the base selector base_specs <- self$selector$specificity() base_specs + specs[1, ] }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end ) ) Attrib <- R6Class("Attrib", public = list( selector = NULL, namespace = NULL, attrib = NULL, operator = NULL, value = NULL, initialize = function(selector, namespace, attrib, operator, value) { self$selector <- selector self$namespace <- namespace self$attrib <- attrib self$operator <- operator self$value <- value }, repr = function() { attr <- if (!is.null(self$namespace)) paste0(self$namespace, "|", self$attrib) else self$attrib if (self$operator == "exists") paste0( first_class_name(self), "[", self$selector$repr(), "[", attr, "]]") else paste0( first_class_name(self), "[", self$selector$repr(), "[", attr, " ", self$operator, " '", self$value, "']]") }, specificity = function() { specs <- self$selector$specificity() specs[2] <- specs[2] + 1 specs }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end ) ) Element <- R6Class("Element", public = list( namespace = NULL, element = NULL, initialize = function(namespace = NULL, element = NULL) { self$namespace <- namespace self$element <- element }, repr = function() { el <- if (!is.null(self$element)) self$element else "*" if (!is.null(self$namespace)) el <- paste0(self$namespace, "|", el) paste0(first_class_name(self), "[", el, "]") }, specificity = function() { if (!is.null(self$element)) c(0, 0, 1) else rep(0, 3) }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end ) ) Hash <- R6Class("Hash", public = list( selector = NULL, id = NULL, initialize = function(selector, id) { self$selector <- selector self$id <- id }, repr = function() { paste0( first_class_name(self), "[", self$selector$repr(), "#", self$id, "]") }, specificity = function() { specs <- self$selector$specificity() specs[1] <- specs[1] + 1 specs }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end ) ) CombinedSelector <- R6Class("CombinedSelector", public = list( selector = NULL, combinator = NULL, subselector = NULL, initialize = function(selector, combinator, subselector) { if (is.null(selector)) stop("'selector' cannot be NULL") self$selector <- selector self$combinator <- combinator self$subselector <- subselector }, repr = function() { comb <- if (self$combinator == " ") "" else self$combinator paste0( first_class_name(self), "[", self$selector$repr(), " ", comb, " ", self$subselector$repr(), "]") }, specificity = function() { specs <- self$selector$specificity() sub_specs <- self$subselector$specificity() specs + sub_specs }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end ) ) #### Parser # foo el_re <- "^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$" # foo#bar or #bar id_re <- "^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$" # foo.bar or .bar class_re <- "^[ \t\r\n\f]*([a-zA-Z]*)\\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$" parse <- function(css) { el_match <- str_match(css, el_re)[1, 2] if (!is.na(el_match)) return(list(Selector$new(Element$new(element = el_match)))) id_match <- str_match(css, id_re)[1, 2:3] if (!is.na(id_match[2])) return(list(Selector$new( Hash$new( Element$new( element = if (nzchar(id_match[1]) == 0) NULL else id_match[1]), id_match[2])))) class_match <- str_match(css, class_re)[1, 2:3] if (!is.na(class_match[3])) return(list(Selector$new( ClassSelector$new( Element$new( element = if (is.null(class_match[2]) || is.na(class_match[2])) NULL else class_match[2]), class_match[3])))) stream <- TokenStream$new(tokenize(css)) stream$source_text <- css parse_selector_group(stream) } parse_selector_group <- function(stream) { stream$skip_whitespace() i <- 1 results <- list() while (TRUE) { parsed_selector <- parse_selector(stream) results[[i]] <- Selector$new(parsed_selector$result, parsed_selector$pseudo_element) i <- i + 1 if (token_equality(stream$peek(), "DELIM", ",")) { stream$nxt() stream$skip_whitespace() } else { break } } results } token_equality <- function(token, t, val) { if (token$type != t) return(FALSE) # val can be NULL or (maybe) NA if (is.null(val) && is.null(token$value)) return(TRUE) if (is.na(val) && is.na(token$value)) return(TRUE) # Should be OK with regular equality token$value == val } parse_selector <- function(stream) { results <- parse_simple_selector(stream) result <- results$result pseudo_element <- results$pseudo_element while (TRUE) { stream$skip_whitespace() peek <- stream$peek() if (token_equality(peek, "EOF", NULL) || token_equality(peek, "DELIM", ",")) { break } if (!is.null(pseudo_element) && nzchar(pseudo_element)) { stop("Got pseudo-element ::", pseudo_element, " not at the end of a selector") } if (peek$is_delim(c("+", ">", "~"))) { # A combinator combinator <- stream$nxt()$value stream$skip_whitespace() } else { # By exclusion, the last parse_simple_selector() ended # at peek == ' ' combinator <- " " } stuff <- parse_simple_selector(stream) pseudo_element <- stuff$pseudo_element result <- CombinedSelector$new(result, combinator, stuff$result) } list(result = result, pseudo_element = pseudo_element) } parse_simple_selector <- function(stream, inside_negation = FALSE) { stream$skip_whitespace() selector_start <- length(stream$used) peek <- stream$peek() if (peek$type == "IDENT" || token_equality(peek, "DELIM", "*")) { if (peek$type == "IDENT") { namespace <- stream$nxt()$value } else { stream$nxt() namespace <- NULL } if (token_equality(stream$peek(), "DELIM", "|")) { stream$nxt() element <- stream$next_ident_or_star() } else { element <- namespace namespace <- NULL } } else { element <- namespace <- NULL } result <- Element$new(namespace, element) pseudo_element <- NULL while (TRUE) { peek <- stream$peek() if (any(peek$type == c("S", "EOF")) || peek$is_delim(c(",", "+", ">", "~")) || (inside_negation && token_equality(peek, "DELIM", ")"))) { break } if (!is.null(pseudo_element)) { stop("Got pseudo-element ::", pseudo_element, " not at the end of a selector") } if (peek$type == "HASH") { result <- Hash$new(result, stream$nxt()$value) } else if (token_equality(peek, "DELIM", ".")) { stream$nxt() result <- ClassSelector$new(result, stream$next_ident()) } else if (token_equality(peek, "DELIM", "|")) { stream$nxt() result <- Element$new(element = stream$next_ident()) } else if (token_equality(peek, "DELIM", "[")) { stream$nxt() result <- parse_attrib(result, stream) } else if (token_equality(peek, "DELIM", ":") || token_equality(peek, "DELIM", "::")) { if (token_equality(peek, "DELIM", "::")) { stream$nxt() pseudo_element <- stream$next_ident() next } else { stream$nxt() } ident <- stream$next_ident() if (tolower(ident) %in% c( "first-line", "first-letter", "before", "after")) { # Special case: CSS 2.1 pseudo-elements can have a single ':' # Any new pseudo-element must have two. pseudo_element <- ident next } if (!token_equality(stream$peek(), "DELIM", "(")) { result <- Pseudo$new(result, ident) next } stream$nxt() stream$skip_whitespace() if (tolower(ident) == "not") { if (inside_negation) { stop("Got nested :not()") } selectors <- parse_simple_selector_arguments(stream, "not") result <- Negation$new(result, selectors) } else if (any(tolower(ident) == c("matches", "is"))) { selectors <- parse_simple_selector_arguments(stream, tolower(ident)) result <- Matching$new(result, selectors) } else if (tolower(ident) == "where") { selectors <- parse_simple_selector_arguments(stream, "where") result <- Where$new(result, selectors) } else if (tolower(ident) == "has") { selectors <- parse_simple_selector_arguments(stream, "has") result <- Has$new(result, selectors) } else { arguments <- list() selector_list <- NULL i <- 1 # Parse the function arguments (e.g., "2n+1" for nth-child) # :lang() and :dir() can accept comma-separated lists allow_commas <- tolower(ident) %in% c("lang", "dir") while (TRUE) { nt <- stream$nxt() if (nt$type %in% c("IDENT", "STRING", "NUMBER") || (token_equality(nt, "DELIM", "+") || token_equality(nt, "DELIM", "-"))) { arguments[[i]] <- nt i <- i + 1 # Check if this is the 'of' keyword for nth-child/nth-last-child if (nt$type == "IDENT" && tolower(nt$value) == "of" && any(tolower(ident) == c("nth-child", "nth-last-child"))) { # Remove 'of' from arguments - it's a keyword, not an argument arguments <- arguments[-length(arguments)] # Parse the selector list that follows 'of' stream$skip_whitespace() selector_list <- parse_simple_selector_arguments(stream, ident) break } } else if (token_equality(nt, "DELIM", "*") && allow_commas) { # For :lang() and :dir(), allow * as a wildcard arguments[[i]] <- nt i <- i + 1 } else if (nt$type == "S") { next } else if (token_equality(nt, "DELIM", ",") && allow_commas) { # For :lang() and :dir(), commas separate multiple values stream$skip_whitespace() next } else if (token_equality(nt, "DELIM", ")")) { break } else { stop("Expected an argument, got ", nt$repr()) } } if (length(arguments) == 0) { stop("Expected at least one argument, got ", nt$repr()) } result <- Function$new(result, ident, arguments, selector_list) } } else { stop("Expected selector, got ", stream$peek()$repr()) } } if (length(stream$used) == selector_start) { stop("Expected selector, got ", stream$peek()$repr()) } list(result = result, pseudo_element = pseudo_element) } parse_simple_selector_arguments <- function(stream, function_name = NULL) { # nolint: object_length_linter. index <- 1 arguments <- list() while (TRUE) { results <- parse_simple_selector(stream, inside_negation = TRUE) result <- results$result pseudo_element <- results$pseudo_element if (!is.null(pseudo_element)) { if (!is.null(function_name)) { stop("Got pseudo-element ::", pseudo_element, " inside :", function_name, "() at ", stream$peeked$pos) } else { stop("Got pseudo-element ::", pseudo_element, " inside function") } } arguments[[index]] <- result index <- index + 1 stream$skip_whitespace() nt <- stream$nxt() if (token_equality(nt, "DELIM", ")")) { break } else if (token_equality(nt, "DELIM", ",")) { stream$skip_whitespace() # Check if there's actually a selector after the comma peek <- stream$peek() if (token_equality(peek, "DELIM", ")")) { # Trailing comma before closing paren stop("Expected ')', got ", nt$repr()) } # Continue to parse next selector } else { stop("Expected an argument, got ", nt$repr()) } } arguments } parse_attrib <- function(selector, stream) { stream$skip_whitespace() attrib <- stream$next_ident_or_star() if (is.null(attrib) && !token_equality(stream$peek(), "DELIM", "|")) stop("Expected '|', got ", stream$peek()$repr()) if (token_equality(stream$peek(), "DELIM", "|")) { stream$nxt() namespace <- attrib attrib <- stream$next_ident() op <- NULL } else if (token_equality(stream$peek(), "DELIM", "|=")) { namespace <- NULL stream$nxt() op <- "|=" } else { namespace <- op <- NULL } if (is.null(op)) { stream$skip_whitespace() nt <- stream$nxt() if (token_equality(nt, "DELIM", "]")) { return(Attrib$new(selector, namespace, attrib, "exists", NULL)) } else if (token_equality(nt, "DELIM", "=")) { op <- "=" } else if (nt$is_delim(c("^=", "$=", "*=", "~=", "|=", "!="))) { op <- nt$value } else { stop("Operator expected, got ", nt$repr()) } } stream$skip_whitespace() value <- stream$nxt() if (!value$type %in% c("IDENT", "STRING")) { stop("Expected string or ident, got ", value$repr()) } stream$skip_whitespace() nt <- stream$nxt() if (!token_equality(nt, "DELIM", "]")) { stop("Expected ']', got ", nt$repr()) } Attrib$new(selector, namespace, attrib, op, value$value) } str_int <- function(s) { suppressWarnings(as.integer(s)) } parse_series <- function(tokens) { for (token in tokens) { if (token$type == "STRING") stop("String tokens not allowed in series.") } s <- paste0(sapply(tokens, function(x) x$value), collapse = "") if (s == "odd") return(2:1) else if (s == "even") return(c(2, 0)) else if (s == "n") return(1:0) if (is.na(str_locate(s, "n")[1, 1])) { result <- str_int(s) if (is.na(result)) { return(NULL) } else { return(c(0, result)) } } ab <- str_split_fixed(s, "n", 2)[1, ] a <- str_trim(ab[1]) b <- str_trim(ab[2]) intb <- str_int(b) if (!nzchar(a) && is.na(intb)) return(NULL) if (!nzchar(a)) a <- 1 else if (a == "-" || a == "+") a <- str_int(paste0(a, "1")) else a <- str_int(a) if (!nzchar(b)) b <- 0 else b <- str_int(b) c(a, b) } Token <- R6Class("Token", public = list( type = "", value = NULL, pos = 1, initialize = function(type = "", value = NULL, pos = 1) { self$type <- type self$value <- value self$pos <- pos }, repr = function() { paste0("<", self$type, " '", self$value, "' at ", self$pos, ">") }, is_delim = function(values) { self$type == "DELIM" && self$value %in% values }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end ) ) EOFToken <- R6Class("EOFToken", inherit = Token, public = list( initialize = function(pos = 1, type = "EOF", value = NULL) { super$initialize(type, value, pos) }, repr = function() { paste0("<", self$type, " at ", self$pos, ">") }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end )) compile_ <- function(pattern) { function(x) { str_locate(x, pattern)[1, ] } } delims_2ch <- c("~=", "|=", "^=", "$=", "*=", "::", "!=") delims_1ch <- c(">", "+", "~", ",", ".", "*", "=", "[", "]", "(", ")", "|", ":", "#") delim_escapes <- paste0("\\", delims_1ch, collapse = "|") match_whitespace <- compile_("[ \t\r\n\f]+") match_number <- compile_("[+-]?(?:[0-9]*\\.[0-9]+|[0-9]+)") match_hash <- compile_(paste0("^#([_a-zA-Z0-9-]|", nonascii, "|\\\\(?:", delim_escapes, "))+")) match_ident <- compile_(paste0("^([_a-zA-Z0-9-]|", nonascii, "|\\\\(?:", delim_escapes, "))+")) match_string_by_quote <- list("'" = compile_(paste0("([^\n\r\f\\']|", TokenMacros$string_escape, ")*")), '"' = compile_(paste0('([^\n\r\f\\"]|', TokenMacros$string_escape, ")*"))) # Substitution for escaped chars sub_simple_escape <- function(x) gsub("\\\\(.)", "\\1", x) sub_unicode_escape <- function(x) gsub(TokenMacros$unicode_escape, "\\1", x, ignore.case = TRUE) sub_newline_escape <- function(x) gsub("\\\\(?:\n|\r\n|\r|\f)", "", x) tokenize <- function(s) { pos <- 1 i <- 1 len_s <- nchar(s) results <- list() while (pos <= len_s) { ss <- substring(s, pos, len_s) match <- match_whitespace(ss) if (!anyNA(match) && match[1] == 1) { results[[i]] <- Token$new("S", " ", pos) match_end <- match[2] pos <- pos + match_end i <- i + 1 next } match <- match_number(ss) if (!anyNA(match) && match[1] == 1) { match_start <- match[1] match_end <- max(match[1], match[2]) value <- substring(ss, match_start, match_end) results[[i]] <- Token$new("NUMBER", value, pos) pos <- pos + match_end i <- i + 1 next } match <- match_ident(ss) if (!anyNA(match) && match[1] == 1) { match_start <- match[1] match_end <- max(match[1], match[2]) value <- substring(ss, match_start, match_end) value <- sub_simple_escape(sub_unicode_escape(value)) results[[i]] <- Token$new("IDENT", value, pos) pos <- pos + match_end i <- i + 1 next } match <- match_hash(ss) if (!anyNA(match) && match[1] == 1) { match_start <- match[1] match_end <- max(match[1], match[2]) value <- substring(ss, match_start, match_end) value <- sub_simple_escape(sub_unicode_escape(value)) hash_id <- substring(value, 2) results[[i]] <- Token$new("HASH", hash_id, pos) pos <- pos + match_end i <- i + 1 next } # Testing presence of two char delims nc_inds <- seq_len(nchar(ss)) if (length(nc_inds) %% 2 == 1) nc_inds <- c(nc_inds, length(nc_inds) + 1) split_ss_2ch <- substring(ss, nc_inds[(nc_inds %% 2) == 1], nc_inds[(nc_inds %% 2) == 0]) delim_inds_2ch <- which(split_ss_2ch %in% delims_2ch) if (length(delim_inds_2ch) && delim_inds_2ch[1] == 1) { # We have a 2ch delim results[[i]] <- Token$new("DELIM", split_ss_2ch[1], pos) pos <- pos + 2 i <- i + 1 next } # Testing presence of single char delims split_ss_1ch <- substring(ss, nc_inds, nc_inds) delim_inds_1ch <- which(split_ss_1ch %in% delims_1ch) if (length(delim_inds_1ch) && delim_inds_1ch[1] == 1) { # We have a single char delim results[[i]] <- Token$new("DELIM", split_ss_1ch[1], pos) pos <- pos + 1 i <- i + 1 next } quote <- substring(s, pos, pos) if (quote %in% c("'", '"')) { ncs <- nchar(s) split_chars <- substring(s, (pos + 1):ncs, (pos + 1):ncs) matching_quotes <- which(split_chars == quote) is_escaped <- logical(length(matching_quotes)) if (length(matching_quotes)) { for (j in seq_along(matching_quotes)) { end_quote <- matching_quotes[j] if (end_quote > 1) { # Count consecutive backslashes before the quote # If odd number of backslashes, the quote is escaped backslash_count <- 0 check_pos <- end_quote - 1 while (check_pos >= 1 && split_chars[check_pos] == "\\") { backslash_count <- backslash_count + 1 check_pos <- check_pos - 1 } is_escaped[j] <- (backslash_count %% 2) == 1 } } if (all(is_escaped)) { stop("Unclosed string at ", pos) } end_quote <- matching_quotes[min(which(!is_escaped))] value <- substring(s, pos + 1, pos + end_quote - 1) value <- sub_simple_escape( sub_unicode_escape( sub_newline_escape(value))) results[[i]] <- Token$new("STRING", value, pos) pos <- pos + end_quote + 1 # one for each quote char i <- i + 1 } else { stop("Unclosed string at ", pos) } } # Remove comments pos1 <- pos + 1 if (substring(s, pos, pos1) == "/*") { rel_pos <- str_locate(ss, "\\*/")[1] pos <- if (is.na(rel_pos)) { len_s + 1 } else { pos + rel_pos + 1 } next } # Because we always call 'next', if we're here there must have # been an error tmp <- substring(ss, 1, 1) if (!tmp %in% c(delims_1ch, '"', "'")) { stop("Unexpected character '", tmp, "' found at position ", pos) } } results[[i]] <- EOFToken$new(pos) results } TokenStream <- R6Class("TokenStream", public = list( pos = 1, tokens = NULL, ntokens = 0, used = list(), source_text = NULL, peeked = list(), peeking = FALSE, initialize = function(tokens, source_text = NULL) { self$tokens <- tokens self$ntokens <- length(tokens) self$source_text <- source_text }, nxt = function() { if (self$peeking) { self$peeking <- FALSE self$used[[self$pos]] <- self$peeked self$peeked } else { nt <- self$next_token() self$used[[self$pos]] <- nt nt } }, next_token = function() { nt <- self$tokens[[self$pos]] self$pos <- self$pos + 1 nt }, peek = function() { if (!self$peeking) { self$peeked <- self$next_token() self$peeking <- TRUE } self$peeked }, next_ident = function() { nt <- self$nxt() if (nt$type != "IDENT") stop("Expected ident, got ", nt$repr()) nt$value }, next_ident_or_star = function() { nt <- self$nxt() if (nt$type == "IDENT") nt$value else if (token_equality(nt, "DELIM", "*")) NULL else stop("Expected ident or '*', got ", nt$repr()) }, skip_whitespace = function() { peek <- self$peek() if (peek$type == "S") self$nxt() } ) ) selectr/R/xpath.R0000755000176200001440000013603615107555231013403 0ustar liggesusersXPathExpr <- R6Class("XPathExpr", public = list( path = "", element = "*", condition = "", star_prefix = FALSE, initialize = function( path = "", element = "*", condition = "", star_prefix = FALSE) { self$path <- path self$element <- element self$condition <- if (nzchar(condition)) paste0("(", condition, ")") else condition self$star_prefix <- star_prefix }, str = function() { p <- paste0(self$path, self$element) if (nzchar(self$condition)) p <- paste0(p, "[", self$condition, "]") p }, repr = function() { paste0(first_class_name(self), "[", self$str(), "]") }, add_condition = function(condition, conjunction = "and") { self$condition <- if (nzchar(self$condition)) paste0(self$condition, " ", conjunction, " (", condition, ")") else paste0("(", condition, ")") }, add_name_test = function() { if (self$element == "*") return() self$add_condition(paste0("name() = ", xpath_literal(self$element))) self$element <- "*" }, join = function(combiner, other) { p <- paste0(self$str(), combiner) if (other$path != "*/") p <- paste0(p, other$path) self$path <- p self$element <- other$element self$condition <- other$condition self }, show = function() { # nocov start cat(self$repr(), "\n") } # nocov end )) is_safe_name <- function(name) { grepl("^[a-zA-Z_][a-zA-Z0-9_.-]*$", name) } first_class_name <- function(obj) { result <- class(obj)[1] # HACK! # R.oo clashes with our preferred use of 'Class' for the name of the # ClassSelector class, even though it is hidden in our package. # Because the name of the class is used in places, perform a # special case rename from ClassSelector to Class. if (result == "ClassSelector") "Class" else result } xpath_literal <- function(literal) { if (!is.character(literal) || length(literal) != 1) { stop("literal must be a single character string") } lenseq <- seq_len(nchar(literal)) split_chars <- substring(literal, lenseq, lenseq) if (!any(split_chars == "'")) { literal <- paste0("'", literal, "'") } else if (!any(split_chars == '"')) { literal <- paste0('"', literal, '"') } else { dq_inds <- which(split_chars == "'") sq_inds <- which(split_chars != "'") split_chars[dq_inds] <- paste0('"', split_chars[dq_inds], '"') split_chars[sq_inds] <- paste0("'", split_chars[sq_inds], "'") literal <- paste(split_chars, collapse = ",") literal <- paste0("concat(", literal, ")") } literal } GenericTranslator <- R6Class("GenericTranslator", public = list( combinator_mapping = c(" " = "descendant", ">" = "child", "+" = "direct_adjacent", "~" = "indirect_adjacent"), # != is not in Selectors Level 3, but included anyway attribute_operator_mapping = c("exists" = "exists", "=" = "equals", "~=" = "includes", "|=" = "dashmatch", "^=" = "prefixmatch", "$=" = "suffixmatch", "*=" = "substringmatch", "!=" = "different"), id_attribute = "id", lang_attribute = "xml:lang", lower_case_element_names = FALSE, lower_case_attribute_names = FALSE, lower_case_attribute_values = FALSE, css_to_xpath = function(css, prefix = "descendant-or-self::") { selectors <- parse(css) selectors <- if (is.null(selectors)) list() else if (!is.list(selectors)) list(selectors) else selectors lapply(selectors, function(selector) { if (first_class_name(selector) == "Selector" && !is.null(selector$pseudo_element)) stop("Pseudo-elements are not supported.") }) char_selectors <- sapply(selectors, function(selector) self$selector_to_xpath(selector, prefix)) paste0(char_selectors, collapse = " | ") }, selector_to_xpath = function(selector, prefix = "descendant-or-self::") { tree <- selector$parsed_tree xpath <- self$xpath(tree) if (!inherits(xpath, "XPathExpr")) stop("'xpath' is not an instance of 'XPathExpr'") paste0(if (!is.null(prefix)) prefix else "", xpath$str()) }, xpath = function(parsed_selector) { type_name <- first_class_name(parsed_selector) method_name <- paste0("xpath_", tolower(type_name)) if (method_name == "xpath_attrib") self$xpath_attrib(parsed_selector) else if (method_name == "xpath_class") self$xpath_class(parsed_selector) else if (method_name == "xpath_combinedselector") self$xpath_combinedselector(parsed_selector) else if (method_name == "xpath_element") self$xpath_element(parsed_selector) else if (method_name == "xpath_matching") self$xpath_matching(parsed_selector) else if (method_name == "xpath_where") self$xpath_where(parsed_selector) else if (method_name == "xpath_has") self$xpath_has(parsed_selector) else if (method_name == "xpath_function") self$xpath_function(parsed_selector) else if (method_name == "xpath_hash") self$xpath_hash(parsed_selector) else if (method_name == "xpath_negation") self$xpath_negation(parsed_selector) else if (method_name == "xpath_pseudo") self$xpath_pseudo(parsed_selector) else stop("Unknown method name '", type_name, "'") }, xpath_combinedselector = function(combined) { combinator <- paste0( "xpath_", self$combinator_mapping[combined$combinator], "_combinator") left_xpath <- self$xpath(combined$selector) right_xpath <- self$xpath(combined$subselector) if (combinator == "xpath_descendant_combinator") self$xpath_descendant_combinator( left = left_xpath, right = right_xpath) else if (combinator == "xpath_child_combinator") self$xpath_child_combinator( left = left_xpath, right = right_xpath) else if (combinator == "xpath_direct_adjacent_combinator") self$xpath_direct_adjacent_combinator( left = left_xpath, right = right_xpath) else if (combinator == "xpath_indirect_adjacent_combinator") self$xpath_indirect_adjacent_combinator( left = left_xpath, right = right_xpath) else stop("Unknown combinator '", self$combinator_mapping[combined$combinator], "'") }, xpath_negation = function(negation) { xpath <- self$xpath(negation$selector) # Collect all conditions from the selector list conditions <- character(0) for (subselector in negation$selector_list) { sub_xpath <- self$xpath(subselector) sub_xpath$add_name_test() if (!is.null(sub_xpath$condition) && nzchar(sub_xpath$condition)) { conditions <- c(conditions, sub_xpath$condition) } } # Combine conditions with OR (any match means element is excluded) if (length(conditions) > 0) { combined_condition <- paste0(conditions, collapse = " or ") xpath$add_condition(paste0("not(", combined_condition, ")")) } else { xpath$add_condition("0") } xpath }, xpath_matching = function(matching) { xpath <- self$xpath(matching$selector) exprs <- sapply(matching$selector_list, function(s) self$xpath(s)) for (e in exprs) { e$add_name_test() if (nzchar(e$condition)) { xpath$add_condition(e$condition, "or") } } xpath }, xpath_where = function(where) { # :where() behaves exactly like :is() in terms of matching, # but has zero specificity (handled in the Where class itself) xpath <- self$xpath(where$selector) exprs <- sapply(where$selector_list, function(s) self$xpath(s)) for (e in exprs) { e$add_name_test() if (nzchar(e$condition)) { xpath$add_condition(e$condition, "or") } } xpath }, xpath_has = function(has) { # :has() matches elements that have descendants matching the selector list xpath <- self$xpath(has$selector) # Build conditions that check for the existence of descendants conditions <- character(0) for (subselector in has$selector_list) { sub_xpath <- self$xpath(subselector) # Build the full descendant path sub_xpath$add_name_test() desc_test <- paste0(".//", sub_xpath$element) if (nzchar(sub_xpath$condition)) { desc_test <- paste0(desc_test, "[", sub_xpath$condition, "]") } conditions <- c(conditions, desc_test) } # Combine conditions with OR (any descendant match means the element matches) if (length(conditions) > 0) { combined_condition <- paste0(conditions, collapse = " | ") xpath$add_condition(combined_condition) } xpath }, xpath_function = function(fn) { method_name <- paste0( "xpath_", gsub("-", "_", fn$name), "_function") xp <- self$xpath(fn$selector) if (method_name == "xpath_contains_function") self$xpath_contains_function(xp, fn) else if (method_name == "xpath_lang_function") self$xpath_lang_function(xp, fn) else if (method_name == "xpath_dir_function") self$xpath_dir_function(xp, fn) else if (method_name == "xpath_nth_child_function") self$xpath_nth_child_function(xp, fn) else if (method_name == "xpath_nth_last_child_function") self$xpath_nth_last_child_function(xp, fn) else if (method_name == "xpath_nth_of_type_function") self$xpath_nth_of_type_function(xp, fn) else if (method_name == "xpath_nth_last_of_type_function") self$xpath_nth_last_of_type_function(xp, fn) else stop("The pseudo-class :", gsub("-", "_", fn$name), "() is unknown") }, xpath_pseudo = function(pseudo) { method_name <- paste0( "xpath_", gsub("-", "_", pseudo$ident), "_pseudo") xp <- self$xpath(pseudo$selector) if (method_name == "xpath_root_pseudo") self$xpath_root_pseudo(xp) else if (method_name == "xpath_first_child_pseudo") self$xpath_first_child_pseudo(xp) else if (method_name == "xpath_last_child_pseudo") self$xpath_last_child_pseudo(xp) else if (method_name == "xpath_first_of_type_pseudo") self$xpath_first_of_type_pseudo(xp) else if (method_name == "xpath_last_of_type_pseudo") self$xpath_last_of_type_pseudo(xp) else if (method_name == "xpath_only_child_pseudo") self$xpath_only_child_pseudo(xp) else if (method_name == "xpath_only_of_type_pseudo") self$xpath_only_of_type_pseudo(xp) else if (method_name == "xpath_empty_pseudo") self$xpath_empty_pseudo(xp) else if (method_name == "xpath_any_link_pseudo") self$xpath_any_link_pseudo(xp) else if (method_name == "xpath_link_pseudo") self$xpath_link_pseudo(xp) else if (method_name == "xpath_visited_pseudo") self$xpath_visited_pseudo(xp) else if (method_name == "xpath_hover_pseudo") self$xpath_hover_pseudo(xp) else if (method_name == "xpath_active_pseudo") self$xpath_active_pseudo(xp) else if (method_name == "xpath_focus_pseudo") self$xpath_focus_pseudo(xp) else if (method_name == "xpath_target_pseudo") self$xpath_target_pseudo(xp) else if (method_name == "xpath_target_within_pseudo") self$xpath_target_within_pseudo(xp) else if (method_name == "xpath_local_link_pseudo") self$xpath_local_link_pseudo(xp) else if (method_name == "xpath_enabled_pseudo") self$xpath_enabled_pseudo(xp) else if (method_name == "xpath_disabled_pseudo") self$xpath_disabled_pseudo(xp) else if (method_name == "xpath_checked_pseudo") self$xpath_checked_pseudo(xp) else stop("The pseudo-class :", pseudo$ident, " is unknown") }, xpath_attrib = function(selector) { operator <- self$attribute_operator_mapping[selector$operator] method_name <- paste0("xpath_attrib_", operator) if (self$lower_case_attribute_names) { name <- tolower(selector$attrib) } else { name <- selector$attrib } safe <- is_safe_name(name) if (!is.null(selector$namespace)) { name <- paste0(selector$namespace, ":", name) } if (safe) { attrib <- paste0("@", name) } else { attrib <- paste0( "attribute::*[name() = ", xpath_literal(name), "]") } if (self$lower_case_attribute_names) { value <- tolower(selector$value) } else { value <- selector$value } xp <- self$xpath(selector$selector) if (method_name == "xpath_attrib_dashmatch") self$xpath_attrib_dashmatch(xp, attrib, value) else if (method_name == "xpath_attrib_different") self$xpath_attrib_different(xp, attrib, value) else if (method_name == "xpath_attrib_equals") self$xpath_attrib_equals(xp, attrib, value) else if (method_name == "xpath_attrib_exists") self$xpath_attrib_exists(xp, attrib, value) else if (method_name == "xpath_attrib_includes") self$xpath_attrib_includes(xp, attrib, value) else if (method_name == "xpath_attrib_prefixmatch") self$xpath_attrib_prefixmatch(xp, attrib, value) else if (method_name == "xpath_attrib_substringmatch") self$xpath_attrib_substringmatch(xp, attrib, value) else if (method_name == "xpath_attrib_suffixmatch") self$xpath_attrib_suffixmatch(xp, attrib, value) else stop("Unknown attribute operator '", operator, "'") }, # .foo is defined as [class~=foo] in the spec xpath_class = function(class_selector) { xpath <- self$xpath(class_selector$selector) self$xpath_attrib_includes(xpath, "@class", class_selector$class_name) xpath }, xpath_hash = function(id_selector) { xpath <- self$xpath(id_selector$selector) self$xpath_attrib_equals(xpath, "@id", id_selector$id) xpath }, xpath_element = function(selector) { element <- selector$element if (is.null(element)) { element <- "*" safe <- TRUE } else { safe <- is_safe_name(element) if (self$lower_case_element_names) element <- tolower(element) } if (!is.null(selector$namespace)) { # Namespace prefixes are case-sensitive. # https://www.w3.org/TR/css-namespaces-3/#prefixes element <- paste0(selector$namespace, ":", element) safe <- safe && is_safe_name(selector$namespace) } xpath <- XPathExpr$new(element = element) if (!safe) xpath$add_name_test() xpath }, xpath_descendant_combinator = function(left, right) { left$join("//", right) }, xpath_child_combinator = function(left, right) { left$join("/", right) }, xpath_direct_adjacent_combinator = function(left, right) { xpath <- left$join("/following-sibling::", right) target_element <- xpath$element existing_condition <- xpath$condition xpath$add_name_test() if (nzchar(existing_condition)) { # Has existing conditions from right selector (e.g., attributes) # Result: *[1][self::element][existing_condition] xpath$condition <- paste0("1][self::", target_element, "][", existing_condition) } else { # No existing conditions, just position and element test xpath$condition <- paste0("1][self::", target_element) } xpath }, xpath_indirect_adjacent_combinator = function(left, right) { left$join("/following-sibling::", right) }, xpath_nth_child_function = function(xpath, fn, last = FALSE, add_name_test = TRUE) { ab <- parse_series(fn$arguments) # Validate that parse_series returned valid results if (is.null(ab) || length(ab) != 2) { stop("Invalid nth-child expression") } a <- ab[1] b <- ab[2] # Validate that a and b are valid integers (not NA) if (is.na(a) || is.na(b)) { stop("Invalid nth-child expression: could not parse as valid integers") } # From https://www.w3.org/TR/selectors-4/#structural-pseudos: # # :nth-child(an+b) # an+b-1 siblings before # # :nth-last-child(an+b) # an+b-1 siblings after # # :nth-of-type(an+b) # an+b-1 siblings with the same expanded element name before # # :nth-last-of-type(an+b) # an+b-1 siblings with the same expanded element name after # # CSS Selectors Level 4 adds optional "of S" selector list: # :nth-child(an+b of S) - count only siblings that match selector S # # So, # for :nth-child and :nth-of-type # # count(preceding-sibling::) = an+b-1 # # for :nth-last-child and :nth-last-of-type # # count(following-sibling::) = an+b-1 # # therefore, # count(...) - (b-1) = 0 (mod a) # # if a == 0: # ~~~~~~~~~~ # count(...) = b-1 # # if a < 0: # ~~~~~~~~~ # count(...) - b +1 <= 0 # -> count(...) <= b-1 # # if a > 0: # ~~~~~~~~~ # count(...) - b +1 >= 0 # -> count(...) >= b-1 # work with b-1 instead b_min_1 <- b - 1 # early-exit condition 1: # ~~~~~~~~~~~~~~~~~~~~~~~ # for a == 1, nth-*(an+b) means n+b-1 siblings before/after, # and since n %in% {0, 1, 2, ...}, if b-1<=0, # there is always an "n" matching any number of siblings (maybe none) if (a == 1 && b_min_1 <= 0) { # CSS Level 4: When selector list is provided, ensure current element matches if (!is.null(fn$selector_list) && length(fn$selector_list) > 0) { conditions <- character(0) for (subselector in fn$selector_list) { sub_xpath <- self$xpath(subselector) sub_xpath$add_name_test() if (!is.null(sub_xpath$condition) && nzchar(sub_xpath$condition)) { conditions <- c(conditions, sub_xpath$condition) } } if (length(conditions) > 0) { # Current element must match at least one selector (OR) combined_condition <- paste0(conditions, collapse = " or ") xpath$add_condition(combined_condition) } } return(xpath) } # early-exit condition 2: # ~~~~~~~~~~~~~~~~~~~~~~~ # an+b-1 siblings with a<0 and (b-1)<0 is not possible if (a < 0 && b_min_1 < 0) { xpath$add_condition("0") # CSS Level 4: When selector list is provided, ensure current element matches # Even though the condition is always false, we should still check the selector if (!is.null(fn$selector_list) && length(fn$selector_list) > 0) { conditions <- character(0) for (subselector in fn$selector_list) { sub_xpath <- self$xpath(subselector) sub_xpath$add_name_test() if (!is.null(sub_xpath$condition) && nzchar(sub_xpath$condition)) { conditions <- c(conditions, sub_xpath$condition) } } if (length(conditions) > 0) { # Current element must match at least one selector (OR) combined_condition <- paste0(conditions, collapse = " or ") xpath$add_condition(combined_condition) } } return(xpath) } # `add_name_test` boolean is inverted and somewhat counter-intuitive: # # nth_of_type() calls nth_child(add_name_test=False) if (add_name_test) { nodetest <- "*" } else { nodetest <- xpath$element } # Build the predicate for selector list filtering (CSS Level 4) selector_predicate <- "" if (!is.null(fn$selector_list) && length(fn$selector_list) > 0) { # Generate XPath conditions for each selector in the list conditions <- character(0) for (subselector in fn$selector_list) { sub_xpath <- self$xpath(subselector) sub_xpath$add_name_test() if (!is.null(sub_xpath$condition) && nzchar(sub_xpath$condition)) { conditions <- c(conditions, sub_xpath$condition) } } if (length(conditions) > 0) { # Combine conditions with OR (any match counts the sibling) combined_condition <- paste0(conditions, collapse = " or ") selector_predicate <- paste0("[", combined_condition, "]") } } # count siblings before or after the element if (!last) { siblings_count <- paste0("count(preceding-sibling::", nodetest, selector_predicate, ")") } else { siblings_count <- paste0("count(following-sibling::", nodetest, selector_predicate, ")") } # special case of fixed position: nth-*(0n+b) # if a == 0: # ~~~~~~~~~~ # count(***-sibling::***) = b-1 if (a == 0) { xpath$add_condition(paste0(siblings_count, " = ", b_min_1)) # CSS Level 4: When selector list is provided, ensure current element matches if (!is.null(fn$selector_list) && length(fn$selector_list) > 0) { conditions <- character(0) for (subselector in fn$selector_list) { sub_xpath <- self$xpath(subselector) sub_xpath$add_name_test() if (!is.null(sub_xpath$condition) && nzchar(sub_xpath$condition)) { conditions <- c(conditions, sub_xpath$condition) } } if (length(conditions) > 0) { # Current element must match at least one selector (OR) combined_condition <- paste0(conditions, collapse = " or ") xpath$add_condition(combined_condition) } } return(xpath) } expr <- character(0) if (a > 0) { # siblings count, an+b-1, is always >= 0, # so if a>0, and (b-1)<=0, an "n" exists to satisfy this, # therefore, the predicate is only interesting if (b-1)>0 if (b_min_1 > 0) { expr <- c(expr, paste0(siblings_count, " >= ", b_min_1)) } } else { # if a<0, and (b-1)<0, no "n" satisfies this, # this is tested above as an early exist condition # otherwise, expr <- c(expr, paste0(siblings_count, " <= ", b_min_1)) } # operations modulo 1 or -1 are simpler, one only needs to verify: # # - either: # count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc., # i.e. count(***-sibling::***) >= (b-1) # # - or: # count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc., # i.e. count(***-sibling::***) <= (b-1) # we we just did above. # if (abs(a) != 1) { # count(***-sibling::***) - (b-1) = 0 (mod a) left <- siblings_count # apply "modulo a" on 2nd term, -(b-1), # to simplify things like "(... +6) % -3", # and also make it positive with |a| b_neg <- (-b_min_1) %% abs(a) if (b_neg != 0) { b_neg <- paste0("+", b_neg) left <- paste0("(", left, " ", b_neg, ")") } expr <- c(expr, paste0(left, " mod ", a, " = 0")) } if (length(expr)) { expr <- paste0(expr, collapse = " and ") xpath$add_condition(expr) } # CSS Level 4: When selector list is provided, ensure current element matches if (!is.null(fn$selector_list) && length(fn$selector_list) > 0) { conditions <- character(0) for (subselector in fn$selector_list) { sub_xpath <- self$xpath(subselector) sub_xpath$add_name_test() if (!is.null(sub_xpath$condition) && nzchar(sub_xpath$condition)) { conditions <- c(conditions, sub_xpath$condition) } } if (length(conditions) > 0) { # Current element must match at least one selector (OR) combined_condition <- paste0(conditions, collapse = " or ") xpath$add_condition(combined_condition) } } xpath }, xpath_nth_last_child_function = function(xpath, fn) { self$xpath_nth_child_function(xpath, fn, last = TRUE) }, xpath_nth_of_type_function = function(xpath, fn) { if (xpath$element == "*") { stop("*:nth-of-type() is not implemented") } self$xpath_nth_child_function(xpath, fn, add_name_test = FALSE) }, xpath_nth_last_of_type_function = function(xpath, fn) { if (xpath$element == "*") { stop("*:nth-last-of-type() is not implemented") } self$xpath_nth_child_function(xpath, fn, last = TRUE, add_name_test = FALSE) }, xpath_contains_function = function(xpath, fn) { if (!(fn$argument_types() %in% c("STRING", "IDENT"))) { stop("Expected a single string or ident for :contains(), got (", paste0(fn$argument_types(), collapse = ", "), ")") } value <- fn$arguments[[1]]$value xpath$add_condition(paste0( "contains(., ", xpath_literal(value), ")")) xpath }, xpath_lang_function = function(xpath, fn) { # Validate all arguments are STRING, IDENT, or * (DELIM) arg_types <- fn$argument_types() valid_types <- arg_types %in% c("STRING", "IDENT") | (arg_types == "DELIM" & sapply(fn$arguments, function(a) a$value == "*")) if (!all(valid_types)) { stop("Expected string, ident, or * arguments for :lang(), got ", fn$arguments[[1]]$repr()) } # Extract language values from arguments, combining IDENT-* patterns lang_values <- character(0) i <- 1 while (i <= length(fn$arguments)) { arg <- fn$arguments[[i]] # Check if this is an IDENT ending with '-' followed by a '*' DELIM if (arg$type %in% c("IDENT", "STRING") && grepl("-$", arg$value) && i < length(fn$arguments) && fn$arguments[[i + 1]]$type == "DELIM" && fn$arguments[[i + 1]]$value == "*") { # Combine them: "en-" + "*" = "en-*" lang_values <- c(lang_values, paste0(arg$value, "*")) i <- i + 2 # Skip the next token since we combined it } else { lang_values <- c(lang_values, arg$value) i <- i + 1 } } # Build conditions for each language value conditions <- character(0) for (value in lang_values) { if (value == "*") { # Wildcard * matches everything - use a condition that's always true conditions <- c(conditions, "true()") } else if (grepl("\\*$", value)) { # Wildcard suffix like "en-*" - match any language starting with prefix # Use XPath's lang() function which does prefix matching prefix <- sub("\\*$", "", value) # Remove trailing * conditions <- c(conditions, paste0("lang(", xpath_literal(prefix), ")")) } else { # Regular language tag conditions <- c(conditions, paste0("lang(", xpath_literal(value), ")")) } } # Combine conditions with OR if (length(conditions) == 1) { xpath$add_condition(conditions[1]) } else if (length(conditions) > 1) { combined <- paste0("(", paste(conditions, collapse = " or "), ")") xpath$add_condition(combined) } xpath }, xpath_dir_function = function(xpath, fn) { # validate all arguments are STRING, IDENT, or * (DELIM) arg_types <- fn$argument_types() valid_types <- arg_types %in% c("STRING", "IDENT") | (arg_types == "DELIM" & sapply(fn$arguments, function(a) a$value == "*")) if (!all(valid_types)) { stop("Expected string, ident, or * arguments for :dir(), got ", fn$arguments[[1]]$repr()) } # :dir() requires runtime directionality detection based on # document language, inherited dir attributes, and text analysis. # Not possible in static XPath, so we make it never match. xpath$add_condition("0") xpath }, xpath_root_pseudo = function(xpath) { xpath$add_condition("not(parent::*)") xpath }, xpath_first_child_pseudo = function(xpath) { xpath$add_condition("count(preceding-sibling::*) = 0") xpath }, xpath_last_child_pseudo = function(xpath) { xpath$add_condition("count(following-sibling::*) = 0") xpath }, xpath_first_of_type_pseudo = function(xpath) { if (xpath$element == "*") { stop("*:first-of-type is not implemented") } xpath$add_condition(paste0( "count(preceding-sibling::", xpath$element, ") = 0")) xpath }, xpath_last_of_type_pseudo = function(xpath) { if (xpath$element == "*") { stop("*:last-of-type is not implemented") } xpath$add_condition(paste0( "count(following-sibling::", xpath$element, ") = 0")) xpath }, xpath_only_child_pseudo = function(xpath) { xpath$add_condition("count(parent::*/child::*) = 1") xpath }, xpath_only_of_type_pseudo = function(xpath) { if (xpath$element == "*") { stop("*:only-of-type is not implemented") } xpath$add_condition(paste0( "count(parent::*/child::", xpath$element, ") = 1")) xpath }, xpath_empty_pseudo = function(xpath) { xpath$add_condition("not(*) and not(string-length())") xpath }, #pseudo_never_matches = function(xpath) { # xpath$add_condition("0") # xpath #}, # All are pseudo_never_matches() xpath_any_link_pseudo = function(xpath) { xpath$add_condition("0") ; xpath }, xpath_link_pseudo = function(xpath) { xpath$add_condition("0") ; xpath }, xpath_visited_pseudo = function(xpath) { xpath$add_condition("0") ; xpath }, xpath_hover_pseudo = function(xpath) { xpath$add_condition("0") ; xpath }, xpath_active_pseudo = function(xpath) { xpath$add_condition("0") ; xpath }, xpath_focus_pseudo = function(xpath) { xpath$add_condition("0") ; xpath }, xpath_target_pseudo = function(xpath) { xpath$add_condition("0") ; xpath }, xpath_target_within_pseudo = function(xpath) { xpath$add_condition("0") ; xpath }, xpath_local_link_pseudo = function(xpath) { xpath$add_condition("0") ; xpath }, xpath_enabled_pseudo = function(xpath) { xpath$add_condition("0") ; xpath }, xpath_disabled_pseudo = function(xpath) { xpath$add_condition("0") ; xpath }, xpath_checked_pseudo = function(xpath) { xpath$add_condition("0") ; xpath }, xpath_attrib_exists = function(xpath, name, value) { xpath$add_condition(name) xpath }, xpath_attrib_equals = function(xpath, name, value) { xpath$add_condition(paste0(name, " = ", xpath_literal(value))) xpath }, xpath_attrib_different = function(xpath, name, value) { xpath$add_condition(paste0("not(", name, ") or ", name, " != ", xpath_literal(value))) xpath }, xpath_attrib_includes = function(xpath, name, value) { if (!is.null(value) && nzchar(value) && grepl("^[^ \t\r\n\f]+$", value)) { xpath$add_condition(paste0( name, " and contains(concat(' ', normalize-space(", name, "), ' '), ", xpath_literal(paste0(" ", value, " ")), ")")) } else { xpath$add_condition("0") } xpath }, xpath_attrib_dashmatch = function(xpath, name, value) { xpath$add_condition(paste0( name, " and (", name, " = ", xpath_literal(value), " or starts-with(", name, ", ", xpath_literal(paste0(value, "-")), "))")) xpath }, xpath_attrib_prefixmatch = function(xpath, name, value) { if (!is.null(value) && nzchar(value)) { xpath$add_condition(paste0( name, " and starts-with(", name, ", ", xpath_literal(value), ")")) } else { xpath$add_condition("0") } xpath }, # In XPath there is starts-with but not ends-with, hence the oddness xpath_attrib_suffixmatch = function(xpath, name, value) { if (!is.null(value) && nzchar(value)) { xpath$add_condition(paste0( name, " and substring(", name, ", string-length(", name, ")-", nchar(value) - 1, ") = ", xpath_literal(value))) } else { xpath$add_condition("0") } xpath }, xpath_attrib_substringmatch = function(xpath, name, value) { if (!is.null(value) && nzchar(value)) { xpath$add_condition(paste0( name, " and contains(", name, ", ", xpath_literal(value), ")")) } else { xpath$add_condition("0") } xpath } ) ) HTMLTranslator <- R6Class("HTMLTranslator", inherit = GenericTranslator, public = list( xhtml = FALSE, initialize = function(xhtml = FALSE, ...) { self$xhtml <- xhtml if (!xhtml) { self$lower_case_element_names <- TRUE self$lower_case_attribute_names <- TRUE } self$lang_attribute <- "lang" }, xpath_checked_pseudo = function(xpath) { xpath$add_condition( paste0("(@selected and name(.) = 'option') or ", "(@checked ", "and (name(.) = 'input' or name(.) = 'command')", "and (@type = 'checkbox' or @type = 'radio'))")) xpath }, xpath_lang_function = function(xpath, fn) { # Validate all arguments are STRING, IDENT, or * (DELIM) arg_types <- fn$argument_types() valid_types <- arg_types %in% c("STRING", "IDENT") | (arg_types == "DELIM" & sapply(fn$arguments, function(a) a$value == "*")) if (!all(valid_types)) { stop("Expected string, ident, or * arguments for :lang(), got ", fn$arguments[[1]]$repr()) } # Extract language values from arguments, combining IDENT-* patterns lang_values <- character(0) i <- 1 while (i <= length(fn$arguments)) { arg <- fn$arguments[[i]] # Check if this is an IDENT ending with '-' followed by a '*' DELIM if (arg$type %in% c("IDENT", "STRING") && grepl("-$", arg$value) && i < length(fn$arguments) && fn$arguments[[i + 1]]$type == "DELIM" && fn$arguments[[i + 1]]$value == "*") { # Combine them: "en-" + "*" = "en-*" lang_values <- c(lang_values, paste0(arg$value, "*")) i <- i + 2 # Skip the next token since we combined it } else { lang_values <- c(lang_values, arg$value) i <- i + 1 } } # Build conditions for each language value conditions <- character(0) for (value in lang_values) { if (value == "*") { # Wildcard * matches any element with a lang attribute # Check for any ancestor-or-self with @lang attribute conditions <- c(conditions, paste0("ancestor-or-self::*[@", self$lang_attribute, "]")) } else if (grepl("\\*$", value)) { # Wildcard suffix like "en-*" - match any language starting with prefix prefix <- sub("\\*$", "", value) # Remove trailing * # Don't add '-' if prefix already ends with it search_prefix <- if (grepl("-$", prefix)) tolower(prefix) else paste0(tolower(prefix), "-") conditions <- c(conditions, paste0( "ancestor-or-self::*[@", self$lang_attribute, "][1][starts-with(concat(", "translate(@", self$lang_attribute, ", 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', ", "'abcdefghijklmnopqrstuvwxyz'), '-'), ", xpath_literal(search_prefix), ")]")) } else { # Regular language tag conditions <- c(conditions, paste0( "ancestor-or-self::*[@", self$lang_attribute, "][1][starts-with(concat(", "translate(@", self$lang_attribute, ", 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', ", "'abcdefghijklmnopqrstuvwxyz'), '-'), ", xpath_literal(paste0(tolower(value), "-")), ")]")) } } # Combine conditions with OR if (length(conditions) == 1) { xpath$add_condition(conditions[1]) } else if (length(conditions) > 1) { combined <- paste0("(", paste(conditions, collapse = " or "), ")") xpath$add_condition(combined) } xpath }, xpath_dir_function = function(xpath, fn) { # Validate all arguments are STRING, IDENT, or * (DELIM) arg_types <- fn$argument_types() valid_types <- arg_types %in% c("STRING", "IDENT") | (arg_types == "DELIM" & sapply(fn$arguments, function(a) a$value == "*")) if (!all(valid_types)) { stop("Expected string, ident, or * arguments for :dir(), got ", fn$arguments[[1]]$repr()) } # :dir() requires runtime directionality detection based on # document language, inherited dir attributes, and text analysis. # Not possible in static XPath, so we make it never match. xpath$add_condition("0") xpath }, xpath_link_pseudo = function(xpath) { xpath$add_condition("@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')") xpath }, xpath_disabled_pseudo = function(xpath) { xpath$add_condition( paste("(", "@disabled and", "(", "(name(.) = 'input' and @type != 'hidden') or", "name(.) = 'button' or", "name(.) = 'select' or", "name(.) = 'textarea' or", "name(.) = 'command' or", "name(.) = 'fieldset' or", "name(.) = 'optgroup' or", "name(.) = 'option'", ")", ") or (", "(", "(name(.) = 'input' and @type != 'hidden') or", "name(.) = 'button' or", "name(.) = 'select' or", "name(.) = 'textarea'", ")", "and ancestor::fieldset[@disabled]", ")")) xpath }, xpath_enabled_pseudo = function(xpath) { xpath$add_condition( paste("(@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area'))", "or", "((name(.) = 'command' or name(.) = 'fieldset' or name(.) = 'optgroup') and not(@disabled))", "or", "(((name(.) = 'input' and @type != 'hidden')", "or name(.) = 'button'", "or name(.) = 'select'", "or name(.) = 'textarea'", "or name(.) = 'keygen')", "and not (@disabled or ancestor::fieldset[@disabled]))", "or (name(.) = 'option' and not(@disabled or ancestor::optgroup[@disabled]))")) xpath } ) ) selectr/NAMESPACE0000755000176200001440000000170015110033126013123 0ustar liggesusersimport(methods) importFrom(stringr, str_locate, str_match, str_split_fixed, str_trim) importFrom(R6, R6Class) export(css_to_xpath) export(querySelector) export(querySelectorAll) export(querySelectorNS) export(querySelectorAllNS) S3method(querySelector, default) S3method(querySelectorAll, default) S3method(querySelectorNS, default) S3method(querySelectorAllNS, default) # XML package methods S3method(querySelector, XMLInternalNode) S3method(querySelector, XMLInternalDocument) S3method(querySelectorAll, XMLInternalNode) S3method(querySelectorAll, XMLInternalDocument) S3method(querySelectorNS, XMLInternalNode) S3method(querySelectorNS, XMLInternalDocument) S3method(querySelectorAllNS, XMLInternalNode) S3method(querySelectorAllNS, XMLInternalDocument) # xml2 package methods S3method(querySelector, xml_node) S3method(querySelectorAll, xml_node) S3method(querySelectorNS, xml_node) S3method(querySelectorAllNS, xml_node) selectr/inst/0000755000176200001440000000000015107410634012671 5ustar liggesusersselectr/inst/CITATION0000755000176200001440000000053615110016442014026 0ustar liggesusersbibentry( bibtype = "techreport", title = "Introducing the selectr Package", author = person("Simon", "Potter"), institution = "The University of Auckland", address = "Auckland, New Zealand", year = 2012, url = "https://stattech.wordpress.fos.auckland.ac.nz/2012/11/27/2012-10-introducing-the-selectr-package/" ) selectr/inst/demos/0000755000176200001440000000000015107001643013774 5ustar liggesusersselectr/inst/demos/svg-mathml.svg0000644000176200001440000000364615107001643016605 0ustar liggesusers a + b 2 selectr/inst/NEWS.Rd0000755000176200001440000003025215120452316013737 0ustar liggesusers\name{NEWS} \title{NEWS file for the selectr package} \encoding{UTF-8} \section{Changes in version 0.5-1}{ \subsection{BUG FIXES}{ \itemize{ \item Fixed handling of CSS unicode escapes in attribute values. This would be observed when the attribute value contained hexadecimal sequences like \code{(abcdef)} where the characters inside the parentheses were not properly escaped. This fix ensures that such sequences are correctly translated to their XPath equivalents. Thanks to André Veríssimo for reporting the issue. } } } \section{Changes in version 0.5-0}{ \subsection{NEW FEATURES}{ \itemize{ \item Added support for CSS Selectors Level 4 pseudo-classes \code{:is()}, \code{:where()}, and \code{:has()}. The \code{:is()} pseudo-class matches elements against a list of selectors, taking the maximum specificity from its arguments. The \code{:where()} pseudo-class works similarly to \code{:is()} but always has zero specificity. The \code{:has()} pseudo-class represents an element if any of the relative selectors match when anchored against that element, with specificity calculated from the maximum of its arguments. \item Added support for complex selectors in \code{:nth-child()} and \code{:nth-last-child()} using the \code{of S} syntax (e.g., \code{:nth-child(2 of .foo)}). This allows matching the nth child that matches a specific selector or selector list. \item Extended \code{:not()} to accept multiple selectors separated by commas (e.g., \code{:not(.foo, #bar)}), following CSS Selectors Level 4. Specificity is now calculated as the maximum specificity among all arguments, rather than the sum. \item Added support for additional CSS Selectors Level 4 pseudo-classes: \code{:any-link}, \code{:target-within}, and \code{:local-link}. These pseudo-classes do not match any elements in static XML/HTML documents and translate to XPath expressions that always evaluate to false. For now, most of the new Level 4 pseudo-classes that depend on dynamic document state e.g. \code{:user-valid} and \code{:placeholder-shown} are not implemented, but may be at a future date to be non-matching selectors. \item The \code{:lang()} and \code{:dir()} pseudo-classes now support multiple comma-separated arguments (e.g., \code{:lang(en, fr, de)}). \item Added \code{:matches()} as a backwards-compatible alias for \code{:is()}. } } \subsection{MINOR CHANGES}{ \itemize{ \item Improved sibling selector translation to use a more compact form. For the adjacent sibling combinator \code{a + b}, the generated XPath now uses \code{a/following-sibling::*[1][self::b]} instead of \code{a/following-sibling::*[(name() = 'b') and (position() = 1)]}. \item The descendant combinator \code{a b} now uses \code{a//b} instead of \code{a/descendant::b} for a more concise XPath. Unfortunately a similar optimisation cannot be applied to in general when attempting to replace \code{descendant-or-self::} with \code{.//a} as it would prevent root nodes being matched correctly. \item Improved validation of CSS selector arguments. Better error messages are now provided when pseudo-elements appear inside functional pseudo-classes where they are not permitted (e.g., inside \code{:is()}, \code{:matches()}, \code{:where()}, or \code{:has()}). \item Enhanced input validation for \code{:lang()} and \code{:dir()} pseudo-classes to ensure proper argument formatting and to reject invalid or empty language tags. \item Improved handling of edge cases in selector parsing, including better validation of class selector syntax and more robust handling of null or missing element components. \item Simplified method registration for XML and xml2 objects. No longer necessary to hook into package load/unload events. } } } \section{Changes in version 0.4-2}{ \subsection{MINOR CHANGES}{ \itemize{ \item Improve handling of vectors of length > 1 in logical comparison. Contributed by Garrick Aden-Buie. \item Minor improvements to error message construction. Contributed by Michael Chirico. } } } \section{Changes in version 0.4-1}{ \subsection{BUG FIXES}{ \itemize{ \item When the \pkg{R.oo} package is attached, the use of class selectors no longer worked. This is due to the use of the \code{Class} name for \pkg{R.oo}'s base class object, where \pkg{selectr} was also using it (but not exporting) the same name of \code{Class} for representing a class selector. Consequently, \pkg{selectr}'s code was changed to rename the class to avoid any clashing. Because it was not exported, this is purely an internal change. Thanks to Francois Lemaire-Sicre for reporting the issue. } } } \section{Changes in version 0.4-0}{ \subsection{MINOR CHANGES}{ \itemize{ \item Large rewrite of internals to use the R6 OO system instead of Reference Classes. This does not affect any external facing code as the results should be identical to the previous implementation, which is why this change is marked as minor. Initial and crude performance testing (by running the test suite) indicates that the R6 implementation is approximately twice as fast at generating XPath as the Reference Classes implementation. \item The minimum required version of R for \pkg{selectr} has been increased from \code{2.15.2} to \code{3.0} as that is the minimum required version of \pkg{R6}. \item Minor performance enhancements have been made. Not only is \pkg{R6} faster than Reference Classes, the use of string formatting has been replaced with string concatenation. Additionally dynamic calling of methods via \code{do.call()} has been replaced with direct method calls. } } \subsection{BUG FIXES}{ \itemize{ \item The issues in previous releases where methods can sometimes be missing should now be resolved. The bug appeared to lie in core Reference Classes code. By switching to \pkg{R6}, this type of issue should no longer be possible. } } } \section{Changes in version 0.3-2}{ \subsection{MINOR CHANGES}{ \itemize{ \item Improved method registration for \pkg{XML} and \pkg{xml2} objects. Avoids checks on each use and is only performed once per dependent package load/unload. } } \subsection{BUG FIXES}{ \itemize{ \item In some environments, reference class methods were missing at runtime. This appears to be due to some internal behaviour in them \pkg{methods} package where methods are registered on an objects when the \code{$} operator is used for a field or method. Instead, when a method is missing, they are manually bound to the object. } } } \section{Changes in version 0.3-1}{ \subsection{MINOR CHANGES}{ \itemize{ \item Enabled partial matching on the translator argument to \code{css_to_xpath()}. Instead of defaulting to a generic translator, a non-matching argument will be returned with an error. \item Introduced many more unit tests via the \pkg{covr} package. This enabled dead code to be trimmed and also identified areas of code which needed improvement. Minor enhancements include: tolerate whitespace within a \code{:not()}, more consistent results returned from parser methods, improvements to argument parsing. } } \subsection{BUG FIXES}{ \itemize{ \item The \code{|=} attribute matching operator was not being parsed correctly for the generic translator. \item Handle scenario where a CSS comment is unclosed. Results in everything after the comment start to be removed (which may or may not result in a valid selector). } } } \section{Changes in version 0.3-0}{ \subsection{MAJOR CHANGES}{ \itemize{ \item Added support for documents from the \pkg{xml2} package. \item selectr now also does not strictly depend on the XML package. If either the \pkg{XML} or \pkg{xml2} packages are present (which are required for the \code{querySelector} methods to work) then \code{querySelector} will begin to work for them. This also enables selectr to be used for translation-only. } } \subsection{BUG FIXES}{ \itemize{ \item Improve support for nth-*(an+b) selectors. Ported from cssselect. } } } \section{Changes in version 0.2-3}{ \subsection{MINOR CHANGES}{ \itemize{ \item Code cleanup contributed by Kun Ren (#1). \item Updated DESCRIPTION to include URL and BugReports fields. Also update email address. } } \subsection{BUG FIXES}{ \itemize{ \item Fix behaviour for nth-*(an+b) pseudo-class selectors for negative a's. Contributed to cssselect by Paul Tremberth, ported to R. \item Escape delimiting characters to support new version of the stringr package. Probably should have been done in the first place. Reported by Hadley Wickham (#5). } } } \section{Changes in version 0.2-2}{ \subsection{MINOR CHANGES}{ \itemize{ \item Corrected licence to BSD 3 clause. This was the licence in use previously, but has now been made more explicit. \item Removed 'Enhances' field because we import functions from \pkg{XML}. This choice is made because \pkg{XML} is a required package, rather than an optional package that can be worked with. This and the previous change have been made to keep up with recent changes in R-devel. } } } \section{Changes in version 0.2-1}{ \subsection{MINOR FEATURES}{ \itemize{ \item Added a 'CITATION' file which cites a technical report on the package. \item \code{show()} methods are now available on internal objects, making interactive extensibility and bug-fixing easier. This is simply wrapping the \code{repr()} methods (mirroring the Python source) that the same objects have. } } \subsection{BUG FIXES}{ \itemize{ \item Use the session character encoding to determine whether to run unicode tests. Tests break in non-unicode sessions otherwise. } } } \section{Changes in version 0.2-0}{ \subsection{NEW FEATURES}{ \itemize{ \item Introduced new functions \code{querySelectorNS()} and \code{querySelectorAllNS()} to ease the use of namespaces within a document. Previously this would have required knowledge of XPath. } } \subsection{BUG FIXES}{ \itemize{ \item Fix meaning of \code{:empty}, whitespace is not empty. \item Use \code{lang()} for XML documents with the \code{:lang()} CSS selector. \item \code{|ident} no longer produces a parsing error, but is now equivalent to just 'ident'. } } } \section{Changes in version 0.1-1}{ \subsection{BUG FIXES}{ \itemize{ \item Now testing unicode only in non-Windows platforms on package check. Output should still be consistent, just depends on the current charset being unicode. } } } \section{Changes in version 0.1-0}{ \subsection{NEW FEATURES}{ \itemize{ \item Initial port of the Python 'cssselect' package. Code is very literally ported, including the test suite. \item Wrapped translation functionality into a single function, \code{css_to_xpath()}. \item Created two convenience functions, \code{querySelector()} and \code{querySelectorAll()}. These mirror the behaviour of the same functions present in a web browser. \code{querySelector()} returns a node, while \code{querySelectorAll()} returns a list of nodes. } } } selectr/LICENCE0000755000176200001440000000013215061512244012677 0ustar liggesusersYEAR: 2022 COPYRIGHT HOLDER: Simon Potter, Simon Sapin, Ian Bicking ORGANIZATION: None selectr/README.md0000755000176200001440000000446715110016161013200 0ustar liggesusers# selectr [![License (3-Clause BSD)](https://img.shields.io/badge/license-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) [![GitHub Actions](https://github.com/sjp/selectr/actions/workflows/r.yml/badge.svg)](https://github.com/sjp/selectr/actions/workflows/r.yml) [![CRAN version](https://www.r-pkg.org/badges/version/selectr)](https://cran.r-project.org/package=selectr) [![codecov](https://codecov.io/gh/sjp/selectr/branch/master/graph/badge.svg)](https://app.codecov.io/gh/sjp/selectr) ![Downloads per month](https://cranlogs.r-pkg.org/badges/last-month/selectr) selectr is a package which makes working with HTML and XML documents easier. It does this by performing translation of CSS selectors into XPath expressions so that you can query `XML` and `xml2` documents easily. ``` r library(selectr) xpath <- css_to_xpath("#selectr") xpath #> [1] "descendant-or-self::*[@id = 'selectr']" ``` ## Installation ### Install the release version from CRAN ``` r install.packages("selectr") ``` ### Install the development version from GitHub ``` r # install.packages("devtools") devtools::install_github("sjp/selectr") ``` ## Overview The key functions in selectr are: * Translate a CSS selector into an XPath expression with `css_to_xpath()`. * Query an `XML` or `xml2` document with `querySelector()` and its variants. * Find the first matching node with `querySelector()`. * Find all matching nodes with `querySelectorAll()`. * Find the first matching node in a namespaced document with `querySelectorNS()`. * Find all matching nodes in a namespaced document with `querySelectorAllNS()`. ## Examples Here is a simple example to demonstrate how to query an `XML` or `xml2` document with `querySelector()`. ``` r library(selectr) xmlText <- '' library(XML) doc <- xmlParse(xmlText) querySelector(doc, "baz") #> querySelectorAll(doc, "baz") #> [[1]] #> #> #> [[2]] #> #> #> attr(,"class") #> [1] "XMLNodeSet" library(xml2) doc <- read_xml(xmlText) querySelector(doc, "baz") #> {xml_node} #> querySelectorAll(doc, "baz") #> {xml_nodeset (2)} #> [1] #> [2] ``` selectr/man/0000755000176200001440000000000015061512244012466 5ustar liggesusersselectr/man/querySelectorAll.Rd0000755000176200001440000001441615110016365016263 0ustar liggesusers\name{querySelectorAll} \alias{querySelector} \alias{querySelectorAll} \alias{querySelectorNS} \alias{querySelectorAllNS} \title{ Find nodes that match a group of CSS selectors in an XML tree. } \description{ The purpose of these functions is to mimic the functionality of the \code{querySelector} and \code{querySelectorAll} functions present in Internet browsers. This is so we can succinctly query an XML tree for nodes matching a CSS selector. Namespaced functions \code{querySelectorNS} and \code{querySelectorAllNS} are also provided to search relative to a given namespace. } \usage{ querySelector(doc, selector, ns = NULL, ...) querySelectorAll(doc, selector, ns = NULL, ...) querySelectorNS(doc, selector, ns, prefix = "descendant-or-self::", ...) querySelectorAllNS(doc, selector, ns, prefix = "descendant-or-self::", ...) } \arguments{ \item{doc}{ The XML document or node to be evaluated against. } \item{selector}{ A selector used to query \code{doc}. This must be a single character string. } \item{ns}{ The namespace that the query will be filtered to. This is a named list or vector which has as its name a namespace, and its value is the namespace URI. This can be ignored for the un-namespaced functions. } \item{prefix}{ The prefix to apply to the resulting XPath expression. The default or \code{""} are most commonly used. } \item{...}{ Parameters to be passed onto \code{css_to_xpath}. } } \details{ The \code{querySelectorNS} and \code{querySelectorAllNS} functions are convenience functions for working with namespaced documents. They filter out all content that does not belong within the given namespaces. Note that when searching for particular elements in a selector, they must have a namespace prefix, e.g. \code{"svg|g"}. The namespace argument, \code{ns}, is simply passed on to \code{\link[XML]{getNodeSet}} or \code{\link[xml2]{xml_find_all}} if it is necessary to use a namespace present within the document. This can be ignored for content lacking a namespace, which is usually the case when using \code{querySelector} or \code{querySelectorAll}. } \value{ For \code{querySelector}, the result is a single node that represents the first matched node from a selector. If no matching nodes are found, \code{NULL} is returned. For \code{querySelectorAll}, the result is a list of XML nodes. This list may be empty in the case that no match is found. The \code{querySelectorNS} and \code{querySelectorAllNS} functions return the same type of content as their un-namespaced counterparts. } \references{ CSS Selectors Level 4 \url{https://www.w3.org/TR/selectors-4/}, XPath \url{https://www.w3.org/TR/xpath/}, querySelectorAll \url{https://developer.mozilla.org/en-US/docs/DOM/Document.querySelectorAll} and \url{https://www.w3.org/TR/selectors-api/#interface-definitions}. } \author{ Simon Potter } \examples{ hasXML <- require(XML) hasxml2 <- require(xml2) if (!hasXML && !hasxml2) return() # can't demo without XML or xml2 packages present parseFn <- if (hasXML) xmlParse else read_xml # Demo for working with the XML package (if present, otherwise xml2) exdoc <- parseFn('') querySelector(exdoc, "#anid") # Returns the matching node querySelector(exdoc, ".aclass") # Returns the matching node querySelector(exdoc, "b, c") # First match from grouped selection querySelectorAll(exdoc, "b, c") # Grouped selection querySelectorAll(exdoc, "b") # A list of length one querySelector(exdoc, "d") # No match querySelectorAll(exdoc, "d") # No match # Read in a document where two namespaces are being set: # SVG and MathML svgdoc <- parseFn(system.file("demos/svg-mathml.svg", package = "selectr")) # Search for