selectr/ 0000755 0001762 0000144 00000000000 15120470422 011710 5 ustar ligges users selectr/tests/ 0000755 0001762 0000144 00000000000 15120446446 013063 5 ustar ligges users selectr/tests/testthat/ 0000755 0001762 0000144 00000000000 15120452724 014717 5 ustar ligges users selectr/tests/testthat/test-querySelector-XML.R 0000755 0001762 0000144 00000014124 15107555231 021332 0 ustar ligges users context("querySelector-XML")
test_that("querySelector returns a single node or NULL", {
library(XML)
doc <- xmlRoot(xmlParse(''))
p <- function(x) {
if (is.null(x))
return(x)
saveXML(x, file = NULL)
}
expect_that(p(querySelector(doc, "a")),
equals(p(getNodeSet(doc, "//a")[[1]])))
expect_that(p(querySelector(doc, "*", prefix = "")),
equals(p(getNodeSet(doc, "*")[[1]])))
expect_that(p(querySelector(doc, "d")), equals(NULL))
expect_that(p(querySelector(doc, "c")), equals(p(getNodeSet(doc, "//c")[[1]])))
# do the same again but on the xml doc itself
doc <- xmlParse('')
expect_that(p(querySelector(doc, "a")),
equals(p(getNodeSet(xmlRoot(doc), "//a")[[1]])))
expect_that(p(querySelector(doc, "*", prefix = "")),
equals(p(getNodeSet(xmlRoot(doc), "*")[[1]])))
expect_that(p(querySelector(doc, "d")), equals(NULL))
expect_that(p(querySelector(doc, "c")), equals(p(getNodeSet(xmlRoot(doc), "//c")[[1]])))
})
test_that("querySelectorAll returns expected nodes", {
library(XML)
doc <- xmlRoot(xmlParse(''))
p <- function(x) {
lapply(x, function(node) saveXML(node, file = NULL))
}
expect_that(p(querySelectorAll(doc, "a")),
equals(p(getNodeSet(doc, "//a"))))
expect_that(p(querySelectorAll(doc, "*", prefix = "")),
equals(p(getNodeSet(doc, "*"))))
expect_that(p(querySelectorAll(doc, "c")),
equals(p(getNodeSet(doc, "//c"))))
# do the same again but on the xml doc itself
doc <- xmlParse('')
expect_that(p(querySelectorAll(doc, "a")),
equals(p(getNodeSet(xmlRoot(doc), "//a"))))
expect_that(p(querySelectorAll(doc, "*", prefix = "")),
equals(p(getNodeSet(xmlRoot(doc), "*"))))
expect_that(p(querySelectorAll(doc, "c")),
equals(p(getNodeSet(xmlRoot(doc), "//c"))))
})
test_that("querySelectorAll returns empty list for no match", {
library(XML)
doc <- xmlRoot(xmlParse(''))
p <- function(x) {
lapply(x, function(node) saveXML(node, file = NULL))
}
expect_that(p(querySelectorAll(doc, "d")),
equals(p(getNodeSet(doc, "//d"))))
})
test_that("querySelector handles namespaces", {
library(XML)
doc <- xmlRoot(xmlParse(''))
p <- function(x) {
if (is.null(x)) x else saveXML(x, file = NULL)
}
expect_that(querySelector(doc, "circle"), equals(NULL))
expect_that(querySelector(doc, "circle", ns = c(svg = "http://www.w3.org/2000/svg")),
equals(NULL))
expect_that(p(querySelector(doc, "svg|circle", ns = c(svg = "http://www.w3.org/2000/svg"))),
equals(p(getNodeSet(doc, "//svg:circle", namespaces = c(svg = "http://www.w3.org/2000/svg"))[[1]])))
# now with querySelectorNS
expect_that(querySelectorNS(doc, "circle", c(svg = "http://www.w3.org/2000/svg")), equals(NULL))
expect_that(p(querySelectorNS(doc, "svg|circle", c(svg = "http://www.w3.org/2000/svg"))),
equals(p(getNodeSet(doc, "//svg:circle", namespaces = c(svg = "http://www.w3.org/2000/svg"))[[1]])))
})
test_that("querySelectorAll handles namespaces", {
library(XML)
doc <- xmlRoot(xmlParse(''))
p <- function(x) {
lapply(x, function(node) saveXML(node, file = NULL))
}
expect_that(p(querySelectorAll(doc, "circle")),
equals(p(getNodeSet(doc, "//circle"))))
expect_that(p(querySelectorAll(doc, "circle", ns = c(svg = "http://www.w3.org/2000/svg"))),
equals(p(getNodeSet(doc, "//circle", namespaces = c(svg = "http://www.w3.org/2000/svg")))))
expect_that(p(querySelectorAll(doc, "svg|circle", ns = c(svg = "http://www.w3.org/2000/svg"))),
equals(p(getNodeSet(doc, "//svg:circle", namespaces = c(svg = "http://www.w3.org/2000/svg")))))
# now with querySelectorAllNS
expect_that(p(querySelectorAllNS(doc, "circle", c(svg = "http://www.w3.org/2000/svg"))),
equals(p(getNodeSet(doc, "//circle", namespaces = c(svg = "http://www.w3.org/2000/svg")))))
expect_that(p(querySelectorAllNS(doc, "svg|circle", c(svg = "http://www.w3.org/2000/svg"))),
equals(p(getNodeSet(doc, "//svg:circle", namespaces = c(svg = "http://www.w3.org/2000/svg")))))
})
test_that("querySelector methods handle invalid arguments", {
library(XML)
doc <- xmlParse('')
expect_error(querySelector(doc), "A valid selector (character vector) must be provided.", fixed = TRUE)
expect_error(querySelectorAll(doc), "A valid selector (character vector) must be provided.", fixed = TRUE)
expect_error(querySelectorNS(doc), "A valid selector (character vector) must be provided.", fixed = TRUE)
expect_error(querySelectorAllNS(doc), "A valid selector (character vector) must be provided.", fixed = TRUE)
expect_error(querySelectorNS(doc, "a"), "A namespace must be provided.", fixed = TRUE)
expect_error(querySelectorNS(doc, "a", NULL), "A namespace must be provided.", fixed = TRUE)
expect_error(querySelectorNS(doc, "a", character(0)), "A namespace must be provided.", fixed = TRUE)
expect_error(querySelectorAllNS(doc, "a"), "A namespace must be provided.", fixed = TRUE)
expect_error(querySelectorAllNS(doc, "a", NULL), "A namespace must be provided.", fixed = TRUE)
expect_error(querySelectorAllNS(doc, "a", character(0)), "A namespace must be provided.", fixed = TRUE)
})
selectr/tests/testthat/test-xpath.R 0000755 0001762 0000144 00000016457 15107555231 017165 0 ustar ligges users context("xpath")
test_that("XPathExpr objects print correctly", {
shw <- function(x) trimws(capture.output(x$show()))
xp <- XPathExpr$new()
expect_that(xp$repr(), equals("XPathExpr[*]"))
expect_that(shw(xp), equals("XPathExpr[*]"))
xp <- XPathExpr$new("//")
expect_that(xp$repr(), equals("XPathExpr[//*]"))
expect_that(shw(xp), equals("XPathExpr[//*]"))
xp <- XPathExpr$new(element = "a")
expect_that(xp$repr(), equals("XPathExpr[a]"))
expect_that(shw(xp), equals("XPathExpr[a]"))
xp <- XPathExpr$new("//a/", "b")
expect_that(xp$repr(), equals("XPathExpr[//a/b]"))
expect_that(shw(xp), equals("XPathExpr[//a/b]"))
})
test_that("Generic translator validates language arguments", {
translator <- GenericTranslator$new()
expect_that(translator$css_to_xpath("xml:lang(en)"), equals("descendant-or-self::xml[(lang('en'))]"))
expect_that(translator$css_to_xpath("xml:lang(en-nz)"), equals("descendant-or-self::xml[(lang('en-nz'))]"))
expect_error(translator$css_to_xpath("xml:lang()"), "Expected at least one argument.*")
expect_error(translator$css_to_xpath("xml:lang(1)"), "Expected string, ident, or \\* arguments.*")
# Multiple languages with OR logic
expect_that(translator$css_to_xpath("xml:lang(en, fr)"), equals("descendant-or-self::xml[((lang('en') or lang('fr')))]"))
expect_that(translator$css_to_xpath("xml:lang(en, de, fr)"), equals("descendant-or-self::xml[((lang('en') or lang('de') or lang('fr')))]"))
})
test_that("HTML translator validates language arguments", {
translator <- HTMLTranslator$new()
expect_that(translator$css_to_xpath("html:lang(en)"), equals("descendant-or-self::html[(ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'en-')])]"))
expect_that(translator$css_to_xpath("html:lang(en-nz)"), equals("descendant-or-self::html[(ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'en-nz-')])]"))
expect_error(translator$css_to_xpath("html:lang()"), "Expected at least one argument.*")
expect_error(translator$css_to_xpath("html:lang(1)"), "Expected string, ident, or \\* arguments.*")
# Multiple languages with OR logic
expect_that(translator$css_to_xpath("html:lang(en, fr)"),
equals("descendant-or-self::html[((ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'en-')] or ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'fr-')]))]"))
})
test_that("Generic translator handles :lang() wildcards and comma lists", {
translator <- GenericTranslator$new()
# Simple languages still work
expect_that(translator$css_to_xpath("div:lang(en)"), equals("descendant-or-self::div[(lang('en'))]"))
# Wildcard * matches everything
expect_that(translator$css_to_xpath('div:lang(*)'), equals("descendant-or-self::div[(true())]"))
# Wildcard suffix like en-* for prefix matching
expect_that(translator$css_to_xpath('div:lang(en-*)'), equals("descendant-or-self::div[(lang('en-'))]"))
expect_that(translator$css_to_xpath('div:lang(fr-*)'), equals("descendant-or-self::div[(lang('fr-'))]"))
# Comma-separated lists with OR logic
expect_that(translator$css_to_xpath('div:lang(en, fr)'), equals("descendant-or-self::div[((lang('en') or lang('fr')))]"))
expect_that(translator$css_to_xpath('div:lang(en, de, fr)'), equals("descendant-or-self::div[((lang('en') or lang('de') or lang('fr')))]"))
# Mixed wildcards and regular languages
expect_that(translator$css_to_xpath('div:lang(en-*, fr)'), equals("descendant-or-self::div[((lang('en-') or lang('fr')))]"))
expect_that(translator$css_to_xpath('div:lang(*, de)'), equals("descendant-or-self::div[((true() or lang('de')))]"))
})
test_that("HTML translator handles :lang() wildcards and comma lists", {
translator <- HTMLTranslator$new()
# Wildcard * matches any element with lang attribute
expect_that(translator$css_to_xpath('div:lang(*)'), equals("descendant-or-self::div[(ancestor-or-self::*[@lang])]"))
# Wildcard suffix for prefix matching
expect_that(translator$css_to_xpath('div:lang(en-*)'),
equals("descendant-or-self::div[(ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'en-')])]"))
# Multiple values with OR logic
expect_that(translator$css_to_xpath('div:lang(en, fr)'),
equals("descendant-or-self::div[((ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'en-')] or ancestor-or-self::*[@lang][1][starts-with(concat(translate(@lang, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), '-'), 'fr-')]))]"))
})
test_that("Generic translator handles :dir() function", {
translator <- GenericTranslator$new()
# :dir() uses "never matches" pattern (requires runtime directionality detection)
expect_that(translator$css_to_xpath("div:dir(ltr)"), equals("descendant-or-self::div[(0)]"))
expect_that(translator$css_to_xpath("div:dir(rtl)"), equals("descendant-or-self::div[(0)]"))
expect_that(translator$css_to_xpath(":dir(ltr)"), equals("descendant-or-self::*[(0)]"))
expect_error(translator$css_to_xpath("div:dir()"), "Expected at least one argument.*")
expect_error(translator$css_to_xpath("div:dir(1)"), "Expected string, ident, or \\* arguments.*")
})
test_that("HTML translator handles :dir() function", {
translator <- HTMLTranslator$new()
# :dir() uses "never matches" pattern (requires runtime directionality detection)
expect_that(translator$css_to_xpath("div:dir(ltr)"), equals("descendant-or-self::div[(0)]"))
expect_that(translator$css_to_xpath("div:dir(rtl)"), equals("descendant-or-self::div[(0)]"))
expect_that(translator$css_to_xpath(":dir(ltr)"), equals("descendant-or-self::*[(0)]"))
expect_error(translator$css_to_xpath("div:dir()"), "Expected at least one argument.*")
expect_error(translator$css_to_xpath("div:dir(1)"), "Expected string, ident, or \\* arguments.*")
})
test_that("unimplemented methods throw errors", {
translator <- GenericTranslator$new()
expect_error(translator$css_to_xpath("*:nth-of-type(2n)"), ".* is not implemented")
expect_error(translator$css_to_xpath("*:nth-last-of-type(2n)"), ".* is not implemented")
expect_error(translator$css_to_xpath("*:first-of-type"), ".* is not implemented")
expect_error(translator$css_to_xpath("*:last-of-type"), ".* is not implemented")
expect_error(translator$css_to_xpath("*:only-of-type"), ".* is not implemented")
})
test_that("contains method only takes string arguments", {
expect_that(css_to_xpath("a:contains(b)"),
equals("descendant-or-self::a[(contains(., 'b'))]"))
expect_that(css_to_xpath("a:contains('b')"),
equals("descendant-or-self::a[(contains(., 'b'))]"))
expect_error(css_to_xpath("a:contains(1)"),
"Expected a single string or ident for :contains\\(\\), got .*")
})
selectr/tests/testthat/test-main.R 0000755 0001762 0000144 00000010522 15107555231 016750 0 ustar ligges users context("main")
# We know that the results are correct via other tests, just check that
# this produces the correct results with respect to its arguments
test_that("css_to_xpath vectorises arguments", {
expect_that(css_to_xpath("a b"), equals("descendant-or-self::a//b"))
expect_that(css_to_xpath("a b", prefix = ""), equals("a//b"))
expect_that(css_to_xpath("a b", prefix = c("descendant-or-self::", "")), equals(c("descendant-or-self::a//b", "a//b")))
expect_that(css_to_xpath("a:checked", prefix = "", translator = c("generic", "html", "xhtml")),
equals(c("a[(0)]", "a[((@selected and name(.) = 'option') or (@checked and (name(.) = 'input' or name(.) = 'command')and (@type = 'checkbox' or @type = 'radio')))]", "a[((@selected and name(.) = 'option') or (@checked and (name(.) = 'input' or name(.) = 'command')and (@type = 'checkbox' or @type = 'radio')))]")))
expect_that(css_to_xpath(c("a b", "b c"), prefix = ""), equals(c("a//b", "b//c")))
})
test_that("css_to_xpath handles bad arguments", {
# must have a selector arg provided
expect_error(css_to_xpath(), "A valid selector (character vector) must be provided.", fixed = TRUE)
expect_error(css_to_xpath(NULL), "A valid selector (character vector) must be provided.", fixed = TRUE)
# should complain about incorrect vector type
expect_error(css_to_xpath(1), "The 'selector' argument.*")
expect_error(css_to_xpath("a", prefix = 1), "The 'prefix' argument.*")
expect_error(css_to_xpath("a", translator = 1), "The 'translator' argument.*")
# should strip the NA values out
expect_that(length(css_to_xpath(c("a", NA))), equals(1))
expect_that(length(css_to_xpath("a", prefix = c("", NA))), equals(1))
expect_that(length(css_to_xpath("a", translator = c("generic", NA))), equals(1))
# expect NAs to be stripped out resulting in zero length args (unusable)
expect_error(css_to_xpath(NA_character_), "Zero length character vector.*")
expect_error(css_to_xpath("a", prefix = NA_character_), "Zero length character vector.*")
expect_error(css_to_xpath("a", translator = NA_character_), "Zero length character vector.*")
# performs partial matching
expect_that(css_to_xpath("a", translator = "g"),
equals("descendant-or-self::a"))
expect_that(css_to_xpath("a", translator = "gEnErIC"),
equals("descendant-or-self::a"))
expect_that(css_to_xpath("a", translator = "h"),
equals("descendant-or-self::a"))
expect_that(css_to_xpath("a", translator = "x"),
equals("descendant-or-self::a"))
expect_that(css_to_xpath("a", translator = c("g", "h", "x")),
equals(rep("descendant-or-self::a", 3)))
# errors anything not matching generic, html, xhtml
expect_error(css_to_xpath("a", translator = ""), "'arg' should be one of.*")
expect_error(css_to_xpath("a", translator = "a"), "'arg' should be one of.*")
expect_error(css_to_xpath("a", translator = c("generic", "a")), "'arg' should be one of.*")
})
test_that("namespace handling works correctly", {
# formatNS must return a NULL or a named vector
expect_that(formatNS(NULL), equals(NULL))
expect_that(formatNS(list(a = "b")), equals(c(a = "b")))
expect_that(formatNS(c(a = "b")), equals(c(a = "b")))
# bad input handling
expect_error(formatNS(1), "A namespace object must be.*")
expect_error(formatNS(TRUE), "A namespace object must be.*")
expect_error(formatNS("a"), "The namespace object either missing some or all names.*")
expect_error(formatNS(c(a = "a", "b")), "The namespace object either missing some or all names.*")
tmp <- letters
names(tmp) <- letters[1:5]
expect_error(formatNS(tmp), "The namespace object either missing some or all names.*")
expect_error(formatNS(list(a = 1, b = 2)), "The values in the namespace object.*")
# formatNSPrefix must return a pipe separated string of namespace prefixes
expect_that(formatNSPrefix(c(svg = "svg"), ""), equals("(//svg:*)/"))
expect_that(formatNSPrefix(c(svg = "svg"), "asd"), equals("(//svg:*)/asd"))
expect_that(formatNSPrefix(c(svg = "svg", math = "mathml"), ""), equals("(//svg:*|//math:*)/"))
expect_that(formatNSPrefix(c(svg = "svg", math = "mathml"), "asd"), equals("(//svg:*|//math:*)/asd"))
})
selectr/tests/testthat/test-nth-child.R 0000755 0001762 0000144 00000053737 15107555231 017715 0 ustar ligges users context(":nth-child() and :nth-last-child() pseudo-classes")
test_that(":nth-child() generates correct XPath", {
xpath <- function(css) {
css_to_xpath(css, prefix = "")
}
# :nth-child(1) - first child
result <- xpath("li:nth-child(1)")
expect_true(grepl("count\\(preceding-sibling::\\*\\) = 0", result))
# :nth-child(2) - second child
result <- xpath("li:nth-child(2)")
expect_true(grepl("count\\(preceding-sibling::\\*\\) = 1", result))
# :nth-child(odd) - odd children
result <- xpath("li:nth-child(odd)")
expect_true(grepl("count\\(preceding-sibling::\\*\\)", result))
expect_true(grepl("mod 2", result))
# :nth-child(even) - even children
result <- xpath("li:nth-child(even)")
expect_true(grepl("count\\(preceding-sibling::\\*\\)", result))
expect_true(grepl("mod 2", result))
# :nth-child(2n) - every 2nd child (even)
result <- xpath("li:nth-child(2n)")
expect_true(grepl("count\\(preceding-sibling::\\*\\)", result))
# :nth-child(3n+1) - every 3rd starting from 1st
result <- xpath("li:nth-child(3n+1)")
expect_true(grepl("count\\(preceding-sibling::\\*\\)", result))
# :nth-child(n) - all children (simplifies to just the element)
result <- xpath("li:nth-child(n)")
expect_that(result, equals("li"))
# :nth-child(-n+3) - first 3 children
result <- xpath("li:nth-child(-n+3)")
expect_true(grepl("count\\(preceding-sibling::\\*\\)", result))
})
test_that(":nth-last-child() generates correct XPath", {
xpath <- function(css) {
css_to_xpath(css, prefix = "")
}
# :nth-last-child(1) - last child
result <- xpath("li:nth-last-child(1)")
expect_true(grepl("count\\(following-sibling::\\*\\) = 0", result))
# :nth-last-child(2) - second from last
result <- xpath("li:nth-last-child(2)")
expect_true(grepl("count\\(following-sibling::\\*\\) = 1", result))
# :nth-last-child(odd) - odd from end
result <- xpath("li:nth-last-child(odd)")
expect_true(grepl("count\\(following-sibling::\\*\\)", result))
expect_true(grepl("mod 2", result))
# :nth-last-child(even) - even from end
result <- xpath("li:nth-last-child(even)")
expect_true(grepl("count\\(following-sibling::\\*\\)", result))
expect_true(grepl("mod 2", result))
# :nth-last-child(-n+2) - last 2 children
result <- xpath("li:nth-last-child(-n+2)")
expect_true(grepl("count\\(following-sibling::\\*\\)", result))
})
test_that(":nth-child() works correctly with XML documents", {
library(XML)
html <- paste0(
'',
'
',
'
Item 1
',
'
Item 2
',
'
Item 3
',
'
Item 4
',
'
Item 5
',
'
',
'
',
'
A
',
'
B
',
'
C
',
'
',
''
)
doc <- xmlRoot(xmlParse(html))
get_ids <- function(css) {
results <- querySelectorAll(doc, css)
sapply(results, function(x) xmlGetAttr(x, "id"))
}
# First child
expect_that(get_ids("li:nth-child(1)"),
equals(c("li1", "li6")))
# Second child
expect_that(get_ids("li:nth-child(2)"),
equals(c("li2", "li7")))
# Third child
expect_that(get_ids("li:nth-child(3)"),
equals(c("li3", "li8")))
# Odd children (1, 3, 5)
expect_that(get_ids("li:nth-child(odd)"),
equals(c("li1", "li3", "li5", "li6", "li8")))
# Even children (2, 4)
expect_that(get_ids("li:nth-child(even)"),
equals(c("li2", "li4", "li7")))
# Every 2nd child starting from 2 (same as even)
expect_that(get_ids("li:nth-child(2n)"),
equals(c("li2", "li4", "li7")))
# Every 2nd child starting from 1 (same as odd)
expect_that(get_ids("li:nth-child(2n+1)"),
equals(c("li1", "li3", "li5", "li6", "li8")))
# Every 3rd child starting from 1 (1, 4)
expect_that(get_ids("li:nth-child(3n+1)"),
equals(c("li1", "li4", "li6")))
# Every 3rd child starting from 2 (2, 5)
expect_that(get_ids("li:nth-child(3n+2)"),
equals(c("li2", "li5", "li7")))
# First 3 children
expect_that(get_ids("li:nth-child(-n+3)"),
equals(c("li1", "li2", "li3", "li6", "li7", "li8")))
# All children (n matches all positive integers)
all_ids <- get_ids("li:nth-child(n)")
expect_that(length(all_ids), equals(8))
})
test_that(":nth-last-child() works correctly with XML documents", {
library(XML)
html <- paste0(
'',
'
',
'
Item 1
',
'
Item 2
',
'
Item 3
',
'
Item 4
',
'
Item 5
',
'
',
'
',
'
A
',
'
B
',
'
C
',
'
',
''
)
doc <- xmlRoot(xmlParse(html))
get_ids <- function(css) {
results <- querySelectorAll(doc, css)
sapply(results, function(x) xmlGetAttr(x, "id"))
}
# Last child
expect_that(get_ids("li:nth-last-child(1)"),
equals(c("li5", "li8")))
# Second from last
expect_that(get_ids("li:nth-last-child(2)"),
equals(c("li4", "li7")))
# Third from last
expect_that(get_ids("li:nth-last-child(3)"),
equals(c("li3", "li6")))
# Odd from end (last=1, 3rd-last=3, 5th-last=5)
expect_that(get_ids("li:nth-last-child(odd)"),
equals(c("li1", "li3", "li5", "li6", "li8")))
# Even from end (2nd-last=2, 4th-last=4)
expect_that(get_ids("li:nth-last-child(even)"),
equals(c("li2", "li4", "li7")))
# Last 2 children
expect_that(get_ids("li:nth-last-child(-n+2)"),
equals(c("li4", "li5", "li7", "li8")))
# Last 3 children
expect_that(get_ids("li:nth-last-child(-n+3)"),
equals(c("li3", "li4", "li5", "li6", "li7", "li8")))
})
test_that(":nth-child() works correctly with xml2 documents", {
library(xml2)
html <- paste0(
'',
'
',
'
Item 1
',
'
Item 2
',
'
Item 3
',
'
Item 4
',
'
Item 5
',
'
',
'
',
'
A
',
'
B
',
'
C
',
'
',
''
)
doc <- read_xml(html)
get_ids <- function(css) {
results <- querySelectorAll(doc, css)
xml_attr(results, "id")
}
# First child
expect_that(get_ids("li:nth-child(1)"),
equals(c("li1", "li6")))
# Second child
expect_that(get_ids("li:nth-child(2)"),
equals(c("li2", "li7")))
# Odd children
expect_that(get_ids("li:nth-child(odd)"),
equals(c("li1", "li3", "li5", "li6", "li8")))
# Even children
expect_that(get_ids("li:nth-child(even)"),
equals(c("li2", "li4", "li7")))
# First 3 children
expect_that(get_ids("li:nth-child(-n+3)"),
equals(c("li1", "li2", "li3", "li6", "li7", "li8")))
})
test_that(":nth-last-child() works correctly with xml2 documents", {
library(xml2)
html <- paste0(
'',
'
',
'
Item 1
',
'
Item 2
',
'
Item 3
',
'
Item 4
',
'
Item 5
',
'
',
''
)
doc <- read_xml(html)
get_ids <- function(css) {
results <- querySelectorAll(doc, css)
xml_attr(results, "id")
}
# Last child
expect_that(get_ids("li:nth-last-child(1)"),
equals("li5"))
# Second from last
expect_that(get_ids("li:nth-last-child(2)"),
equals("li4"))
# Last 2 children
expect_that(get_ids("li:nth-last-child(-n+2)"),
equals(c("li4", "li5")))
})
test_that(":nth-child() and :nth-last-child() can be combined", {
library(XML)
html <- paste0(
'',
'
',
'
1
',
'
2
',
'
3
',
'
4
',
'
5
',
'
',
''
)
doc <- xmlRoot(xmlParse(html))
get_ids <- function(css) {
results <- querySelectorAll(doc, css)
sapply(results, function(x) xmlGetAttr(x, "id"))
}
# Second child AND second from last (middle element in list of 5)
expect_that(get_ids("li:nth-child(2):nth-last-child(4)"),
equals("li2"))
# Middle element (3rd child AND 3rd from last)
expect_that(get_ids("li:nth-child(3):nth-last-child(3)"),
equals("li3"))
# First child that's also last child (only child)
# This won't match in our test case since we have 5 items
expect_that(length(querySelectorAll(doc, "li:nth-child(1):nth-last-child(1)")),
equals(0))
})
test_that(":nth-child() edge cases", {
library(XML)
# Empty list
html1 <- '
'
doc2 <- xmlRoot(xmlParse(html2))
# Should match as first child
result <- querySelectorAll(doc2, "li:nth-child(1)")
expect_that(length(result), equals(1))
expect_that(xmlGetAttr(result[[1]], "id"), equals("only"))
# Should also match as last child
result2 <- querySelectorAll(doc2, "li:nth-last-child(1)")
expect_that(length(result2), equals(1))
expect_that(xmlGetAttr(result2[[1]], "id"), equals("only"))
# Mixed element types
html3 <- paste0(
'',
'
',
'
Para
',
'
Div
',
'
Para
',
' Span',
'
',
''
)
doc3 <- xmlRoot(xmlParse(html3))
get_ids <- function(css) {
results <- querySelectorAll(doc3, css)
sapply(results, function(x) xmlGetAttr(x, "id"))
}
# First child (p element that is first child)
expect_that(get_ids("p:nth-child(1)"),
equals("p1"))
# Second child (div element that is second child)
expect_that(get_ids("div:nth-child(2)"),
equals("d1"))
# All p elements that are odd children
expect_that(get_ids("p:nth-child(odd)"),
equals(c("p1", "p2")))
})
test_that(":nth-child() with querySelector returns first match", {
library(xml2)
html <- paste0(
'',
'
',
'
1
',
'
2
',
'
',
'
',
'
3
',
'
4
',
'
',
''
)
doc <- read_xml(html)
# Should return first element that's a first child (li1)
result <- querySelector(doc, "li:nth-child(1)")
expect_that(xml_attr(result, "id"), equals("li1"))
# Should return first element that's a second child (li2)
result2 <- querySelector(doc, "li:nth-child(2)")
expect_that(xml_attr(result2, "id"), equals("li2"))
})
test_that(":nth-child() with different element types", {
library(XML)
# Test that :nth-child counts all siblings, not just same type
html <- paste0(
'',
'
',
'
Heading
',
'
Para 1
',
'
Para 2
',
' Span',
'
',
''
)
doc <- xmlRoot(xmlParse(html))
get_ids <- function(css) {
results <- querySelectorAll(doc, css)
sapply(results, function(x) xmlGetAttr(x, "id"))
}
# p:nth-child(2) - p that is 2nd child (p1)
expect_that(get_ids("p:nth-child(2)"),
equals("p1"))
# p:nth-child(3) - p that is 3rd child (p2)
expect_that(get_ids("p:nth-child(3)"),
equals("p2"))
# p:nth-child(1) - p that is first child (none)
expect_that(length(querySelectorAll(doc, "p:nth-child(1)")),
equals(0))
# span:nth-child(4) - span that is 4th child (s1)
expect_that(get_ids("span:nth-child(4)"),
equals("s1"))
})
test_that(":nth-child() with complex selectors", {
library(xml2)
html <- paste0(
'',
'
',
''
)
doc <- read_xml(html)
# :nth-child(-n of .item) -> a=-1, b=0 -> impossible even with selector
expect_that(length(querySelectorAll(doc, "li:nth-child(-n of .item)")),
equals(0))
# :nth-child(-2n-1 of .item) -> a=-2, b=-1 -> impossible
expect_that(length(querySelectorAll(doc, "li:nth-child(-2n-1 of .item)")),
equals(0))
# Verify XPath contains both "0" condition and selector check
xpath <- css_to_xpath("li:nth-child(-n of .item)")
expect_true(grepl("0", xpath))
expect_true(grepl("item", xpath))
})
test_that(":nth-last-child() early-exit condition 2: a<0, b-1<0", {
library(XML)
html <- paste0(
'',
'
',
'
1
',
'
2
',
'
3
',
'
',
''
)
doc <- xmlRoot(xmlParse(html))
# :nth-last-child(-n) -> a=-1, b=0, b-1=-1<0 -> impossible
expect_that(length(querySelectorAll(doc, "li:nth-last-child(-n)")),
equals(0))
# :nth-last-child(-n-1) -> a=-1, b=-1, b-1=-2<0 -> impossible
expect_that(length(querySelectorAll(doc, "li:nth-last-child(-n-1)")),
equals(0))
# :nth-last-child(-n of .special) -> impossible even with selector
expect_that(length(querySelectorAll(doc, "li:nth-last-child(-n of .special)")),
equals(0))
})
test_that(":nth-child() boundary between early-exit conditions", {
library(xml2)
html <- paste0(
'',
'
',
'
1
',
'
2
',
'
3
',
'
',
''
)
doc <- read_xml(html)
get_ids <- function(css) {
results <- querySelectorAll(doc, css)
xml_attr(results, "id")
}
# :nth-child(-n+0) -> a=-1, b=0, b-1=-1 -> NOT early-exit (b-1<0 but a<0 not b-1<0)
# This should match nothing (0 or fewer siblings)
expect_that(length(querySelectorAll(doc, "li:nth-child(-n+0)")),
equals(0))
# :nth-child(-n+1) -> a=-1, b=1, b-1=0 -> NOT early-exit (b-1 not <0)
# This should match first child only
expect_that(get_ids("li:nth-child(-n+1)"),
equals("li1"))
# :nth-child(-n+2) -> a=-1, b=2, b-1=1 -> matches first 2
expect_that(get_ids("li:nth-child(-n+2)"),
equals(c("li1", "li2")))
# :nth-child(-2n+2) -> a=-2, b=2, b-1=1 -> matches 2nd child
expect_that(get_ids("li:nth-child(-2n+2)"),
equals("li2"))
# :nth-child(-2n+0) -> a=-2, b=0, b-1=-1<0 -> early-exit condition 2
expect_that(length(querySelectorAll(doc, "li:nth-child(-2n+0)")),
equals(0))
})
selectr/tests/testthat/test-series.R 0000755 0001762 0000144 00000002171 15107555231 017317 0 ustar ligges users context("series")
test_that("parser generates correct series", {
series <- function(css) {
selector <- parse(paste0(":nth-child(", css, ")"))[[1]]
args <- selector$parsed_tree$arguments
parse_series(args)
}
expect_that(series("1n+3"), equals(c(1, 3)))
expect_that(series("1n +3"), equals(c(1, 3)))
expect_that(series("1n + 3"), equals(c(1, 3)))
expect_that(series("1n+ 3"), equals(c(1, 3)))
expect_that(series("1n-3"), equals(c(1, -3)))
expect_that(series("1n -3"), equals(c(1, -3)))
expect_that(series("1n - 3"), equals(c(1, -3)))
expect_that(series("1n- 3"), equals(c(1, -3)))
expect_that(series("n-5"), equals(c(1, -5)))
expect_that(series("odd"), equals(c(2, 1)))
expect_that(series("even"), equals(c(2, 0)))
expect_that(series("3n"), equals(c(3, 0)))
expect_that(series("n"), equals(c(1, 0)))
expect_that(series("+n"), equals(c(1, 0)))
expect_that(series("-n"), equals(c(-1, 0)))
expect_that(series("5"), equals(c(0, 5)))
expect_that(series("foo"), equals(NULL))
expect_that(series("n+"), equals(NULL))
})
selectr/tests/testthat/test-xmllang-xml2.R 0000755 0001762 0000144 00000003412 15107555231 020346 0 ustar ligges users context("lang-xml2")
test_that("xml lang function matches correct elements", {
xmlLangText <- paste0('',
'a',
'b',
'c',
'd',
'e',
'f',
'',
'')
library(xml2)
xmldoc <- read_xml(xmlLangText)
gt <- GenericTranslator$new()
pid <- function(selector) {
xpath <- gt$css_to_xpath(selector)
items <- xml_find_all(xmldoc, xpath)
n <- length(items)
if (!n)
return(NULL)
result <- character(n)
for (i in seq_len(n)) {
element <- items[[i]]
tmp <- xml_attrs(element)["id"]
if (is.null(tmp))
tmp <- "nil"
result[i] <- tmp
}
result
}
expect_that(pid(':lang("EN")'), equals(c('first', 'second', 'third', 'fourth')))
expect_that(pid(':lang("en-us")'), equals(c('second', 'fourth')))
expect_that(pid(':lang(en-nz)'), equals('third'))
expect_that(pid(':lang(fr)'), equals('fifth'))
expect_that(pid(':lang(ru)'), equals('sixth'))
expect_that(pid(":lang('ZH')"), equals('eighth'))
expect_that(pid(':lang(de) :lang(zh)'), equals('eighth'))
expect_that(pid(':lang(en), :lang(zh)'), equals(c('first', 'second', 'third', 'fourth', 'eighth')))
expect_that(pid(":lang(es)"), equals(NULL))
})
selectr/tests/testthat/test-adjacent-sibling.R 0000755 0001762 0000144 00000014525 15107555231 021231 0 ustar ligges users context("adjacent sibling combinator")
test_that("adjacent sibling combinator generates simplified XPath", {
xpath <- function(css) {
css_to_xpath(css, prefix = "")
}
# Simple element + element
expect_that(xpath('a + b'),
equals("a/following-sibling::*[1][self::b]"))
# With attribute on right side
expect_that(xpath('a + b[id]'),
equals("a/following-sibling::*[1][self::b][(@id)]"))
# With class on right side
expect_that(xpath('a + b.test'),
equals("a/following-sibling::*[1][self::b][(@class and contains(concat(' ', normalize-space(@class), ' '), ' test '))]"))
# With ID on right side
expect_that(xpath('a + b#myid'),
equals("a/following-sibling::*[1][self::b][(@id = 'myid')]"))
# With multiple attributes on right side
expect_that(xpath('a + b[id][title]'),
equals("a/following-sibling::*[1][self::b][(@id) and (@title)]"))
# With class and attribute on right side
expect_that(xpath('a + b.test[title]'),
equals("a/following-sibling::*[1][self::b][(@class and contains(concat(' ', normalize-space(@class), ' '), ' test ')) and (@title)]"))
# With conditions on both sides
expect_that(xpath('a.link + b[id]'),
equals("a[(@class and contains(concat(' ', normalize-space(@class), ' '), ' link '))]/following-sibling::*[1][self::b][(@id)]"))
expect_that(xpath('a[href] + b.test'),
equals("a[(@href)]/following-sibling::*[1][self::b][(@class and contains(concat(' ', normalize-space(@class), ' '), ' test '))]"))
# With ID on left, class and attribute on right
expect_that(xpath('div#main + p.intro[title]'),
equals("div[(@id = 'main')]/following-sibling::*[1][self::p][(@class and contains(concat(' ', normalize-space(@class), ' '), ' intro ')) and (@title)]"))
# Universal selector on right
expect_that(xpath('h1 + *[rel=up]'),
equals("h1/following-sibling::*[1][self::*][(@rel = 'up')]"))
# Combined with child combinator
expect_that(xpath('div > h1 + p'),
equals("div/h1/following-sibling::*[1][self::p]"))
expect_that(xpath('div#main > h1 + p[class]'),
equals("div[(@id = 'main')]/h1/following-sibling::*[1][self::p][(@class)]"))
# With descendant combinator
expect_that(xpath('section a + b'),
equals("section//a/following-sibling::*[1][self::b]"))
# Complex: multiple combinators and conditions
expect_that(xpath('article.post > h2.title + p.intro[data-info]'),
equals("article[(@class and contains(concat(' ', normalize-space(@class), ' '), ' post '))]/h2[(@class and contains(concat(' ', normalize-space(@class), ' '), ' title '))]/following-sibling::*[1][self::p][(@class and contains(concat(' ', normalize-space(@class), ' '), ' intro ')) and (@data-info)]"))
})
test_that("adjacent sibling combinator works correctly with querySelector", {
skip_if_not_installed("XML")
library(XML)
# Test with immediate adjacent siblings
doc1 <- htmlParse('
AB')
results1 <- querySelectorAll(doc1, "a + b")
expect_equal(length(results1), 1)
expect_equal(xmlGetAttr(results1[[1]], "id"), "b1")
# Test with intervening element (should NOT match)
doc2 <- htmlParse('ACB')
results2 <- querySelectorAll(doc2, "a + b")
expect_equal(length(results2), 0)
# Test with attributes on right side
doc3 <- htmlParse('
Link1B1Link2B2
')
results3 <- querySelectorAll(doc3, "a + b[id]")
expect_equal(length(results3), 1)
expect_equal(xmlGetAttr(results3[[1]], "id"), "b1")
# Test with classes on both sides
doc4 <- htmlParse('
LinkB1Link2B2
')
results4 <- querySelectorAll(doc4, "a.link + b.text")
expect_equal(length(results4), 1)
# Test with multiple adjacent pairs
doc5 <- htmlParse('
A1B1A2B2
')
results5 <- querySelectorAll(doc5, "a + b")
expect_equal(length(results5), 2)
expect_equal(xmlGetAttr(results5[[1]], "id"), "b1")
expect_equal(xmlGetAttr(results5[[2]], "id"), "b2")
})
test_that("adjacent sibling maintains correct semantics", {
skip_if_not_installed("XML")
library(XML)
# Verify it only matches IMMEDIATE adjacent siblings
doc <- htmlParse('
Title
Immediate
Intervening
Not immediate
Subtitle
Immediate
')
results <- querySelectorAll(doc, "h1 + p, h2 + p")
expect_equal(length(results), 2)
ids <- sapply(results, xmlGetAttr, "id")
expect_true("p1" %in% ids)
expect_true("p3" %in% ids)
expect_false("p2" %in% ids)
# Test that it respects element type
doc2 <- htmlParse('
LinkBC
')
results_b <- querySelectorAll(doc2, "a + b")
expect_equal(length(results_b), 1)
expect_equal(xmlName(results_b[[1]]), "b")
results_c <- querySelectorAll(doc2, "a + c")
expect_equal(length(results_c), 0) # c is not immediately after a
results_star <- querySelectorAll(doc2, "a + *")
expect_equal(length(results_star), 1)
expect_equal(xmlName(results_star[[1]]), "b")
})
test_that("adjacent sibling with pseudo-classes", {
xpath <- function(css) {
css_to_xpath(css, prefix = "")
}
# Adjacent sibling with pseudo-class on right
expect_that(xpath('h1 + p:first-child'),
equals("h1/following-sibling::*[1][self::p][(count(preceding-sibling::*) = 0)]"))
# Adjacent sibling with nth-child
expect_that(xpath('h1 + p:nth-child(2)'),
equals("h1/following-sibling::*[1][self::p][(count(preceding-sibling::*) = 1)]"))
})
selectr/tests/testthat/test-shakespeare-xml2.R 0000755 0001762 0000144 00000053207 15107555231 021206 0 ustar ligges users context("shakespeare-test-xml2")
test_that("selection works correctly on a shakespearean document", {
HTML_SHAKESPEARE <- paste(
"",
"",
#"",
"", "\t
", "\t
",
"\t
As You Like It
", "\t
", "\t by William Shakespeare",
"\t
", "\t
",
"\t
ACT I, SCENE III. A room in the palace.
", "\t
",
"\t
Enter CELIA and ROSALIND
",
"\t
", "\t
CELIA
",
"\t
", "\t
Why, cousin! why, Rosalind! Cupid have mercy! not a word?
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Not one to throw at a dog.
",
"\t
", "\t
CELIA
",
"\t
", "\t
No, thy words are too precious to be cast away upon
",
"\t
curs; throw some of them at me; come, lame me with reasons.
",
"\t
", "\t
ROSALIND
",
"\t
CELIA
", "\t
",
"\t
But is all this for your father?
",
"\t
", "\t
", "\t
Then there were two cousins laid up; when the one
",
"\t
should be lamed with reasons and the other mad
",
"\t
without any.
", "\t
",
"\t
ROSALIND
",
"\t
", "\t
No, some of it is for my child's father. O, how
",
"\t
full of briers is this working-day world!
",
"\t
", "\t
CELIA
",
"\t
", "\t
They are but burs, cousin, thrown upon thee in
",
"\t
holiday foolery: if we walk not in the trodden
",
"\t
paths our very petticoats will catch them.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
I could shake them off my coat: these burs are in my heart.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Hem them away.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
I would try, if I could cry 'hem' and have him.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Come, come, wrestle with thy affections.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
O, they take the part of a better wrestler than myself!
",
"\t
", "\t
CELIA
",
"\t
", "\t
O, a good wish upon you! you will try in time, in
",
"\t
despite of a fall. But, turning these jests out of
",
"\t
service, let us talk in good earnest: is it
",
"\t
possible, on such a sudden, you should fall into so
",
"\t
strong a liking with old Sir Rowland's youngest son?
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
The duke my father loved his father dearly.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Doth it therefore ensue that you should love his son
",
"\t
dearly? By this kind of chase, I should hate him,
",
"\t
for my father hated his father dearly; yet I hate
",
"\t
not Orlando.
", "\t
",
"\t
ROSALIND
",
"\t
", "\t
No, faith, hate him not, for my sake.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Why should I not? doth he not deserve well?
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Let me love him for that, and do you love him
",
"\t
because I do. Look, here comes the duke.
",
"\t
", "\t
CELIA
",
"\t
", "\t
With his eyes full of anger.
",
"\t
Enter DUKE FREDERICK, with Lords
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
Mistress, dispatch you with your safest haste
",
"\t
And get you from our court.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Me, uncle?
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
You, cousin
",
"\t
Within these ten days if that thou be'st found
",
"\t
So near our public court as twenty miles,
",
"\t
Thou diest for it.
", "\t
",
"\t
ROSALIND
",
"\t
", "\t
I do beseech your grace,
",
"\t
Let me the knowledge of my fault bear with me:
",
"\t
If with myself I hold intelligence
",
"\t
Or have acquaintance with mine own desires,
",
"\t
If that I do not dream or be not frantic,--
",
"\t
As I do trust I am not--then, dear uncle,
",
"\t
Never so much as in a thought unborn
",
"\t
Did I offend your highness.
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
Thus do all traitors:
",
"\t
If their purgation did consist in words,
",
"\t
They are as innocent as grace itself:
",
"\t
Let it suffice thee that I trust thee not.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Yet your mistrust cannot make me a traitor:
",
"\t
Tell me whereon the likelihood depends.
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
Thou art thy father's daughter; there's enough.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
So was I when your highness took his dukedom;
",
"\t
So was I when your highness banish'd him:
",
"\t
Treason is not inherited, my lord;
",
"\t
Or, if we did derive it from our friends,
",
"\t
What's that to me? my father was no traitor:
",
"\t
Then, good my liege, mistake me not so much
",
"\t
To think my poverty is treacherous.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Dear sovereign, hear me speak.
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
Ay, Celia; we stay'd her for your sake,
",
"\t
Else had she with her father ranged along.
",
"\t
", "\t
CELIA
",
"\t
", "\t
I did not then entreat to have her stay;
",
"\t
It was your pleasure and your own remorse:
",
"\t
I was too young that time to value her;
",
"\t
But now I know her: if she be a traitor,
",
"\t
Why so am I; we still have slept together,
",
"\t
Rose at an instant, learn'd, play'd, eat together,
",
"\t
And wheresoever we went, like Juno's swans,
",
"\t
Still we went coupled and inseparable.
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
She is too subtle for thee; and her smoothness,
",
"\t
Her very silence and her patience
",
"\t
Speak to the people, and they pity her.
",
"\t
Thou art a fool: she robs thee of thy name;
",
"\t
And thou wilt show more bright and seem more virtuous
",
"\t
When she is gone. Then open not thy lips:
",
"\t
Firm and irrevocable is my doom
",
"\t
Which I have pass'd upon her; she is banish'd.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Pronounce that sentence then on me, my liege:
",
"\t
I cannot live out of her company.
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
You are a fool. You, niece, provide yourself:
",
"\t
If you outstay the time, upon mine honour,
",
"\t
And in the greatness of my word, you die.
",
"\t
Exeunt DUKE FREDERICK and Lords
",
"\t
", "\t
CELIA
",
"\t
", "\t
O my poor Rosalind, whither wilt thou go?
",
"\t
Wilt thou change fathers? I will give thee mine.
",
"\t
I charge thee, be not thou more grieved than I am.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
I have more cause.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Thou hast not, cousin;
",
"\t
Prithee be cheerful: know'st thou not, the duke
",
"\t
Hath banish'd me, his daughter?
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
That he hath not.
",
"\t
", "\t
CELIA
",
"\t
", "\t
No, hath not? Rosalind lacks then the love
",
"\t
Which teacheth thee that thou and I am one:
",
"\t
Shall we be sunder'd? shall we part, sweet girl?
",
"\t
No: let my father seek another heir.
",
"\t
Therefore devise with me how we may fly,
",
"\t
Whither to go and what to bear with us;
",
"\t
And do not seek to take your change upon you,
",
"\t
To bear your griefs yourself and leave me out;
",
"\t
For, by this heaven, now at our sorrows pale,
",
"\t
Say what thou canst, I'll go along with thee.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Why, whither shall we go?
",
"\t
", "\t
CELIA
",
"\t
", "\t
To seek my uncle in the forest of Arden.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Alas, what danger will it be to us,
",
"\t
Maids as we are, to travel forth so far!
",
"\t
Beauty provoketh thieves sooner than gold.
",
"\t
", "\t
CELIA
",
"\t
", "\t
I'll put myself in poor and mean attire
",
"\t
And with a kind of umber smirch my face;
",
"\t
The like do you: so shall we pass along
",
"\t
And never stir assailants.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Were it not better,
",
"\t
Because that I am more than common tall,
",
"\t
That I did suit me all points like a man?
",
"\t
A gallant curtle-axe upon my thigh,
",
"\t
A boar-spear in my hand; and--in my heart
",
"\t
Lie there what hidden woman's fear there will--
",
"\t
We'll have a swashing and a martial outside,
",
"\t
As many other mannish cowards have
",
"\t
That do outface it with their semblances.
",
"\t
", "\t
CELIA
",
"\t
", "\t
What shall I call thee when thou art a man?
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
I'll have no worse a name than Jove's own page;
",
"\t
And therefore look you call me Ganymede.
",
"\t
But what will you be call'd?
",
"\t
", "\t
CELIA
",
"\t
", "\t
Something that hath a reference to my state
",
"\t
No longer Celia, but Aliena.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
But, cousin, what if we assay'd to steal
",
"\t
The clownish fool out of your father's court?
",
"\t
Would he not be a comfort to our travel?
",
"\t
", "\t
CELIA
",
"\t
", "\t
He'll go along o'er the wide world with me;
",
"\t
Leave me alone to woo him. Let's away,
",
"\t
And get our jewels and our wealth together,
",
"\t
Devise the fittest time and safest way
",
"\t
To hide us from pursuit that will be made
",
"\t
After my flight. Now go we in content
",
"\t
To liberty and not to banishment.
",
"\t
Exeunt
", "\t
", "\t
",
"\t
", "
", "", "", sep = "\n")
library(xml2)
document <- read_xml(HTML_SHAKESPEARE)
xml_ns_strip(document)
body <- xml_find_first(document, "//body")
gt <- GenericTranslator$new()
count <- function(selector) {
xpath <- gt$css_to_xpath(selector)
results <- xml_find_all(body, xpath)
length(results)
}
# Data borrowed from http://mootools.net/slickspeed/
## Changed from original; probably because I'm only
## searching the body.
#expect_that(count('*'), equals(252))
expect_that(count('*'), equals(246))
expect_that(count('div:contains(CELIA)'), equals(26))
expect_that(count('div:only-child'), equals(22)) # ?
expect_that(count('div:nth-child(even)'), equals(106))
expect_that(count('div:nth-child(2n)'), equals(106))
expect_that(count('div:nth-child(odd)'), equals(137))
expect_that(count('div:nth-child(2n+1)'), equals(137))
expect_that(count('div:nth-child(n)'), equals(243))
expect_that(count('div:last-child'), equals(53))
expect_that(count('div:first-child'), equals(51))
expect_that(count('div > div'), equals(242))
expect_that(count('div + div'), equals(190))
expect_that(count('div ~ div'), equals(190))
expect_that(count('body'), equals(1))
expect_that(count('body div'), equals(243))
expect_that(count('div'), equals(243))
expect_that(count('div div'), equals(242))
expect_that(count('div div div'), equals(241))
expect_that(count('div, div, div'), equals(243))
expect_that(count('div, a, span'), equals(243))
expect_that(count('.dialog'), equals(51))
expect_that(count('div.dialog'), equals(51))
expect_that(count('div .dialog'), equals(51))
expect_that(count('div.character, div.dialog'), equals(99))
expect_that(count('div.direction.dialog'), equals(0))
expect_that(count('div.dialog.direction'), equals(0))
expect_that(count('div.dialog.scene'), equals(1))
expect_that(count('div.scene.scene'), equals(1))
expect_that(count('div.scene .scene'), equals(0))
expect_that(count('div.direction .dialog '), equals(0))
expect_that(count('div .dialog .direction'), equals(4))
expect_that(count('div.dialog .dialog .direction'), equals(4))
expect_that(count('#speech5'), equals(1))
expect_that(count('div#speech5'), equals(1))
expect_that(count('div #speech5'), equals(1))
expect_that(count('div.scene div.dialog'), equals(49))
expect_that(count('div#scene1 div.dialog div'), equals(142))
expect_that(count('#scene1 #speech1'), equals(1))
expect_that(count('div[class]'), equals(103))
expect_that(count('div[class=dialog]'), equals(50))
expect_that(count('div[class^=dia]'), equals(51))
expect_that(count('div[class$=log]'), equals(50))
expect_that(count('div[class*=sce]'), equals(1))
expect_that(count('div[class|=dialog]'), equals(50)) # ? Seems right
expect_that(count('div[class!=madeup]'), equals(243)) # ? Seems right
expect_that(count('div[class~=dialog]'), equals(51)) # ? Seems right
})
selectr/tests/testthat/test-method-registration.R 0000755 0001762 0000144 00000000464 15107555231 022020 0 ustar ligges users context("methods")
test_that("method registration occurs correctly", {
library(XML)
xdoc <- xmlParse("")
library(xml2)
x2doc <- read_xml("")
results <- querySelector(xdoc, "circle")
results <- querySelector(x2doc, "circle")
})
selectr/tests/testthat/test-translation.R 0000755 0001762 0000144 00000017366 15120450063 020367 0 ustar ligges users context("translation")
test_that("translation from parsed objects to XPath works", {
gt <- GenericTranslator$new()
xpath <- function(css) {
gt$css_to_xpath(css, prefix = "")
}
expect_that(xpath("*"), equals("*"))
expect_that(xpath("e"), equals("e"))
expect_that(xpath("*|e"), equals("e"))
expect_that(xpath("e|f"), equals("e:f"))
expect_that(xpath("e[foo]"), equals("e[(@foo)]"))
expect_that(xpath("e[foo|bar]"), equals("e[(@foo:bar)]"))
expect_that(xpath('e[foo="bar"]'), equals("e[(@foo = 'bar')]"))
expect_that(xpath('e[foo!="bar"]'), equals("e[(not(@foo) or @foo != 'bar')]"))
expect_that(xpath("e[foo='(test)']"), equals("e[(@foo = '(test)')]"))
expect_that(xpath('e[foo="(test)"]'), equals("e[(@foo = '(test)')]"))
expect_that(xpath("e[foo='(abc)']"), equals("e[(@foo = '(abc)')]"))
expect_that(xpath("e[foo='(e2e)']"), equals("e[(@foo = '(e2e)')]"))
expect_that(xpath('e[foo="(e2e)"]'), equals("e[(@foo = '(e2e)')]"))
expect_that(xpath("e[foo='(123)']"), equals("e[(@foo = '(123)')]"))
expect_that(xpath("e[foo='(12345)']"), equals("e[(@foo = '(12345)')]"))
# Six hex digits (max for CSS unicode escape)
expect_that(xpath("e[foo='(abcdef)']"), equals("e[(@foo = '(abcdef)')]"))
expect_that(xpath("e[foo='(123456)']"), equals("e[(@foo = '(123456)')]"))
# Seven hex digits (exceeds max, so not unicode escape required)
expect_that(xpath("e[foo='(1234567)']"), equals("e[(@foo = '(1234567)')]"))
expect_that(xpath("e[foo='(AbCdEf)']"), equals("e[(@foo = '(AbCdEf)')]"))
expect_that(xpath("e[foo='(E2E)']"), equals("e[(@foo = '(E2E)')]"))
expect_that(xpath("e[foo='(o2o)']"), equals("e[(@foo = '(o2o)')]"))
expect_that(xpath('e[foo="(o2o)"]'), equals("e[(@foo = '(o2o)')]"))
expect_that(xpath("e[foo='(xyz)']"), equals("e[(@foo = '(xyz)')]"))
expect_that(xpath("e[foo='(test123)']"), equals("e[(@foo = '(test123)')]"))
expect_that(xpath("e[foo='(abc)(def)']"), equals("e[(@foo = '(abc)(def)')]"))
expect_that(xpath("e[foo='(abc )']"), equals("e[(@foo = '(abc )')]"))
expect_that(xpath('e[foo~="bar"]'),
equals("e[(@foo and contains(concat(' ', normalize-space(@foo), ' '), ' bar '))]"))
expect_that(xpath('e[foo^="bar"]'),
equals("e[(@foo and starts-with(@foo, 'bar'))]"))
expect_that(xpath('e[foo$="bar"]'),
equals("e[(@foo and substring(@foo, string-length(@foo)-2) = 'bar')]"))
expect_that(xpath('e[foo*="bar"]'),
equals("e[(@foo and contains(@foo, 'bar'))]"))
expect_that(xpath('e[hreflang|="en"]'),
equals("e[(@hreflang and (@hreflang = 'en' or starts-with(@hreflang, 'en-')))]"))
expect_that(xpath('e:nth-child(1)'),
equals("e[(count(preceding-sibling::*) = 0)]"))
expect_that(xpath('e:nth-child(3n+2)'),
equals("e[(count(preceding-sibling::*) >= 1 and (count(preceding-sibling::*) +2) mod 3 = 0)]"))
expect_that(xpath('e:nth-child(3n-2)'),
equals("e[(count(preceding-sibling::*) mod 3 = 0)]"))
expect_that(xpath('e:nth-child(-n+6)'),
equals("e[(count(preceding-sibling::*) <= 5)]"))
expect_that(xpath('e:nth-last-child(1)'),
equals("e[(count(following-sibling::*) = 0)]"))
expect_that(xpath('e:nth-last-child(2n)'),
equals("e[((count(following-sibling::*) +1) mod 2 = 0)]"))
expect_that(xpath('e:nth-last-child(2n+1)'),
equals("e[(count(following-sibling::*) mod 2 = 0)]"))
expect_that(xpath('e:nth-last-child(2n+2)'),
equals("e[(count(following-sibling::*) >= 1 and (count(following-sibling::*) +1) mod 2 = 0)]"))
expect_that(xpath('e:nth-last-child(3n+1)'),
equals("e[(count(following-sibling::*) mod 3 = 0)]"))
expect_that(xpath('e:nth-last-child(-n+2)'),
equals("e[(count(following-sibling::*) <= 1)]"))
expect_that(xpath('e:nth-of-type(1)'),
equals("e[(count(preceding-sibling::e) = 0)]"))
expect_that(xpath('e:nth-last-of-type(1)'),
equals("e[(count(following-sibling::e) = 0)]"))
expect_that(xpath('div e:nth-last-of-type(1) .aclass'),
equals("div//e[(count(following-sibling::e) = 0)]//*[(@class and contains(concat(' ', normalize-space(@class), ' '), ' aclass '))]"))
expect_that(xpath('e:first-child'),
equals("e[(count(preceding-sibling::*) = 0)]"))
expect_that(xpath('e:last-child'),
equals("e[(count(following-sibling::*) = 0)]"))
expect_that(xpath('e:first-of-type'),
equals("e[(count(preceding-sibling::e) = 0)]"))
expect_that(xpath('e:last-of-type'),
equals("e[(count(following-sibling::e) = 0)]"))
expect_that(xpath('e:only-child'),
equals("e[(count(parent::*/child::*) = 1)]"))
expect_that(xpath('e:only-of-type'),
equals("e[(count(parent::*/child::e) = 1)]"))
expect_that(xpath('e:empty'),
equals("e[(not(*) and not(string-length()))]"))
expect_that(xpath('e:EmPTY'),
equals("e[(not(*) and not(string-length()))]"))
expect_that(xpath('e:root'),
equals("e[(not(parent::*))]"))
expect_that(xpath('e:hover'),
equals("e[(0)]")) #never matches
expect_that(xpath('e:contains("foo")'),
equals("e[(contains(., 'foo'))]"))
expect_that(xpath('e:ConTains(foo)'),
equals("e[(contains(., 'foo'))]"))
expect_that(xpath('e.warning'),
equals("e[(@class and contains(concat(' ', normalize-space(@class), ' '), ' warning '))]"))
expect_that(xpath('e#myid'),
equals("e[(@id = 'myid')]"))
expect_that(xpath('e:not(:nth-child(odd))'),
equals("e[(not((count(preceding-sibling::*) mod 2 = 0)))]"))
expect_that(xpath('e:nOT(*)'),
equals("e[(0)]")) # never matches
expect_that(xpath('e f'),
equals("e//f"))
expect_that(xpath('e > f'),
equals("e/f"))
expect_that(xpath('e + f'),
equals("e/following-sibling::*[1][self::f]"))
expect_that(xpath('e ~ f'),
equals("e/following-sibling::f"))
expect_that(xpath('e ~ f:nth-child(3)'),
equals("e/following-sibling::f[(count(preceding-sibling::*) = 2)]"))
expect_that(xpath('div#container p'),
equals("div[(@id = 'container')]//p"))
# expect that the following do nothing for the generic translator
expect_that(xpath('a:any-link'), equals("a[(0)]"))
expect_that(xpath('a:link'), equals("a[(0)]"))
expect_that(xpath('a:visited'), equals("a[(0)]"))
expect_that(xpath('a:hover'), equals("a[(0)]"))
expect_that(xpath('a:active'), equals("a[(0)]"))
expect_that(xpath('a:focus'), equals("a[(0)]"))
expect_that(xpath('a:target'), equals("a[(0)]"))
expect_that(xpath('a:target-within'), equals("a[(0)]"))
expect_that(xpath('a:local-link'), equals("a[(0)]"))
expect_that(xpath('a:enabled'), equals("a[(0)]"))
expect_that(xpath('a:disabled'), equals("a[(0)]"))
expect_that(xpath('a:checked'), equals("a[(0)]"))
# Invalid characters in XPath element names
charsets <- localeToCharset()
if (!anyNA(charsets) && charsets[1] == "UTF-8") {
expect_that(xpath('di\ua0v'),
equals("*[(name() = 'di v')]")) # div\ua0v
expect_that(xpath('[h\ua0ref]'),
equals("*[(attribute::*[name() = 'h ref'])]")) # h\ua0ref
}
expect_that(xpath('di\\[v'),
equals("*[(name() = 'di[v')]"))
expect_that(xpath('[h\\]ref]'),
equals("*[(attribute::*[name() = 'h]ref'])]"))
})
selectr/tests/testthat/test-xmllang-XML.R 0000755 0001762 0000144 00000003416 15107555231 020130 0 ustar ligges users context("lang-XML")
test_that("xml lang function matches correct elements", {
xmlLangText <- paste0('',
'a',
'b',
'c',
'd',
'e',
'f',
'',
'')
library(XML)
xmldoc <- xmlRoot(xmlParse(xmlLangText))
gt <- GenericTranslator$new()
pid <- function(selector) {
xpath <- gt$css_to_xpath(selector)
items <- getNodeSet(xmldoc, xpath)
n <- length(items)
if (!n)
return(NULL)
result <- character(n)
for (i in seq_len(n)) {
element <- items[[i]]
tmp <- xmlAttrs(element)["id"]
if (is.null(tmp))
tmp <- "nil"
result[i] <- tmp
}
result
}
expect_that(pid(':lang("EN")'), equals(c('first', 'second', 'third', 'fourth')))
expect_that(pid(':lang("en-us")'), equals(c('second', 'fourth')))
expect_that(pid(':lang(en-nz)'), equals('third'))
expect_that(pid(':lang(fr)'), equals('fifth'))
expect_that(pid(':lang(ru)'), equals('sixth'))
expect_that(pid(":lang('ZH')"), equals('eighth'))
expect_that(pid(':lang(de) :lang(zh)'), equals('eighth'))
expect_that(pid(':lang(en), :lang(zh)'), equals(c('first', 'second', 'third', 'fourth', 'eighth')))
expect_that(pid(":lang(es)"), equals(NULL))
})
selectr/tests/testthat/test-where.R 0000755 0001762 0000144 00000024156 15107555231 017146 0 ustar ligges users context(":where() pseudo-class")
test_that(":where() generates correct XPath", {
xpath <- function(css) {
css_to_xpath(css, prefix = "")
}
# Simple :where() with single selector
expect_that(xpath("div:where(p)"),
equals("div[((name() = 'p'))]"))
# :where() with class selector
expect_that(xpath("div:where(.foo)"),
equals("div[((@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')))]"))
# :where() with ID selector
expect_that(xpath("section:where(#main)"),
equals("section[((@id = 'main'))]"))
# :where() with attribute selector
expect_that(xpath("input:where([required])"),
equals("input[((@required))]"))
# :where() with multiple selectors (OR logic)
expect_that(xpath("div:where(p, span)"),
equals("div[((name() = 'p')) or ((name() = 'span'))]"))
# :where() with element and class (both conditions must match)
expect_that(xpath("*:where(div.content)"),
equals("*[((@class and contains(concat(' ', normalize-space(@class), ' '), ' content ')) and (name() = 'div'))]"))
# Multiple :where() selectors - currently treated as OR, not AND
# Note: parser combines them into a single :where()
expect_that(xpath("div:where(p):where(span)"),
equals("div[((name() = 'p')) or ((name() = 'span'))]"))
# :where() on universal selector
expect_that(xpath("*:where(.highlight)"),
equals("*[((@class and contains(concat(' ', normalize-space(@class), ' '), ' highlight ')))]"))
# :where() with multiple classes
expect_that(xpath("div:where(.foo, .bar)"),
equals("div[((@class and contains(concat(' ', normalize-space(@class), ' '), ' foo '))) or ((@class and contains(concat(' ', normalize-space(@class), ' '), ' bar ')))]"))
# Complex: :where() with mix of selectors
expect_that(xpath("p:where(.highlight, #special, [data-key])"),
equals("p[((@class and contains(concat(' ', normalize-space(@class), ' '), ' highlight '))) or ((@id = 'special')) or ((@data-key))]"))
})
test_that(":where() works correctly with XML documents", {
library(XML)
html <- paste0(
'',
'
Div 1
',
'
Div 2
',
'
Para 1
',
'
Para 2
',
' Span 1',
' ',
' Article',
' ',
''
)
doc <- xmlRoot(xmlParse(html))
get_ids <- function(css) {
results <- querySelectorAll(doc, css)
sapply(results, function(x) xmlGetAttr(x, "id"))
}
# Elements matching div OR p (via :where)
expect_that(get_ids("*:where(div, p)"),
equals(c("d1", "d2", "p1", "p2")))
# Elements with class content (any element type)
expect_that(get_ids("*:where(.content)"),
equals(c("d1", "p1", "s1")))
# Div elements that are either content or sidebar
expect_that(get_ids("div:where(.content, .sidebar)"),
equals(c("d1", "d2")))
# Elements matching specific ID
# Note: returns all ancestors in XML, so we check for inclusion
ids <- get_ids("*:where(#p1)")
expect_that("p1" %in% ids, equals(TRUE))
# :where() with element that has specific class
expect_that(get_ids("*:where(p.highlight)"),
equals("p2"))
# :where() matches nothing if conditions don't align
expect_that(length(querySelectorAll(doc, "div:where(p)")),
equals(0))
})
test_that(":where() works correctly with xml2 documents", {
library(xml2)
html <- paste0(
'',
'
Div 1
',
'
Div 2
',
'
Para 1
',
'
Para 2
',
' Span 1',
' ',
' Article',
' ',
''
)
doc <- read_xml(html)
get_ids <- function(css) {
results <- querySelectorAll(doc, css)
xml_attr(results, "id")
}
# Elements matching div OR p (via :where)
expect_that(get_ids("*:where(div, p)"),
equals(c("d1", "d2", "p1", "p2")))
# Elements with class content (any element type)
expect_that(get_ids("*:where(.content)"),
equals(c("d1", "p1", "s1")))
# Div elements that are either content or sidebar
expect_that(get_ids("div:where(.content, .sidebar)"),
equals(c("d1", "d2")))
# Elements matching specific ID
# Note: returns all ancestors, so we check for inclusion
ids <- get_ids("*:where(#p1)")
expect_that("p1" %in% ids, equals(TRUE))
# :where() with element that has specific class
expect_that(get_ids("*:where(p.highlight)"),
equals("p2"))
# :where() matches nothing if conditions don't align
expect_that(length(querySelectorAll(doc, "div:where(p)")),
equals(0))
})
test_that(":where() has zero specificity", {
library(XML)
html <- paste0(
'',
'
Content
',
''
)
doc <- xmlRoot(xmlParse(html))
# All of these should match the same element
# :where() doesn't add specificity regardless of what's inside
expect_that(length(querySelectorAll(doc, "div:where(#test)")), equals(1))
expect_that(length(querySelectorAll(doc, "div:where(.foo)")), equals(1))
expect_that(length(querySelectorAll(doc, ":where(div)")), equals(1))
expect_that(length(querySelectorAll(doc, ":where(#test, .foo, div)")), equals(1))
# Specificity is handled in parser/specificity calculation
# Here we just verify matching works
})
test_that(":where() handles edge cases correctly", {
library(XML)
# Empty document case
html1 <- ''
doc1 <- xmlRoot(xmlParse(html1))
expect_that(length(querySelectorAll(doc1, "*:where(div)")), equals(0))
# Multiple classes
html2 <- paste0(
'',
'
A
',
'
B
',
'
C
',
''
)
doc2 <- xmlRoot(xmlParse(html2))
# Divs with foo OR bar class
result <- querySelectorAll(doc2, "div:where(.foo, .bar)")
expect_that(length(result), equals(3))
# :where() with universal selector inside
html4 <- ''
doc4 <- xmlRoot(xmlParse(html4))
# This matches elements that are any type (essentially all elements plus root)
result3 <- querySelectorAll(doc4, "*:where(*)")
# Returns root plus all descendants
expect_that(length(result3) >= 3, equals(TRUE))
})
test_that(":where() works with querySelector (returns first match)", {
library(xml2)
html <- paste0(
'',
'
First
',
'
Second
',
' Third',
''
)
doc <- read_xml(html)
# Should return first element with class foo
result <- querySelector(doc, "*:where(.foo)")
expect_that(xml_attr(result, "id"), equals("d1"))
# Should return first div or p
result2 <- querySelector(doc, "*:where(div, p)")
expect_that(xml_attr(result2, "id"), equals("d1"))
# Should return NULL when no match
result_none <- querySelector(doc, "*:where(article)")
expect_that(result_none, equals(NULL))
})
test_that(":where() and :is() behave similarly in matching", {
library(XML)
html <- paste0(
'',
'
Div
',
'
Para
',
' Span',
''
)
doc <- xmlRoot(xmlParse(html))
get_ids <- function(css) {
results <- querySelectorAll(doc, css)
sapply(results, function(x) xmlGetAttr(x, "id"))
}
# :where() and :is() should match the same elements
# (only difference is specificity, which doesn't affect matching)
where_result <- get_ids("*:where(div, p)")
is_result <- get_ids("*:is(div, p)")
expect_that(where_result, equals(is_result))
where_result2 <- get_ids("*:where(.content)")
is_result2 <- get_ids("*:is(.content)")
expect_that(where_result2, equals(is_result2))
})
test_that(":where() can be combined with other selectors", {
library(xml2)
html <- paste0(
'',
' ',
'
Div 1
',
'
Para 1
',
' ',
' ',
''
)
doc <- read_xml(html)
get_ids <- function(css) {
results <- querySelectorAll(doc, css)
xml_attr(results, "id")
}
# Descendant combinator: section containing divs or ps
# Note: returns all matching descendants including ancestors
ids <- get_ids("section *:where(div, p)")
expect_that("d1" %in% ids && "p1" %in% ids, equals(TRUE))
# Class selector before :where()
# Elements with class content that are divs or ps
# (all 4 elements match: d1,p1 have .content, and :where checks div|p)
result <- get_ids(".content:where(div, p)")
expect_that("d1" %in% result && "p1" %in% result, equals(TRUE))
# Child combinator
expect_that(get_ids("section > *:where(.content)"),
equals(c("d1", "p1")))
})
selectr/tests/testthat/test-tokenizer.R 0000755 0001762 0000144 00000002027 15107555231 020037 0 ustar ligges users context("tokenizer")
test_that("tokenizer extracts correct representation", {
tokens <- tokenize('E > f [a~="y\\"x"]:nth(/* fu /]* */-3.7)')
tokens <- unlist(lapply(tokens, function(x) x$repr()))
expected_tokens <- c("",
"",
"' at 3>",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"")
expect_that(tokens, equals(expected_tokens))
})
selectr/tests/testthat/test-select-XML.R 0000755 0001762 0000144 00000025705 15107555231 017752 0 ustar ligges users context("large-test")
test_that("selection works correctly on a large barrage of tests", {
HTML_IDS <- paste0(
c("", " ",
" ", "", "
Why, cousin! why, Rosalind! Cupid have mercy! not a word?
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Not one to throw at a dog.
",
"\t
", "\t
CELIA
",
"\t
", "\t
No, thy words are too precious to be cast away upon
",
"\t
curs; throw some of them at me; come, lame me with reasons.
",
"\t
", "\t
ROSALIND
",
"\t
CELIA
", "\t
",
"\t
But is all this for your father?
",
"\t
", "\t
", "\t
Then there were two cousins laid up; when the one
",
"\t
should be lamed with reasons and the other mad
",
"\t
without any.
", "\t
",
"\t
ROSALIND
",
"\t
", "\t
No, some of it is for my child's father. O, how
",
"\t
full of briers is this working-day world!
",
"\t
", "\t
CELIA
",
"\t
", "\t
They are but burs, cousin, thrown upon thee in
",
"\t
holiday foolery: if we walk not in the trodden
",
"\t
paths our very petticoats will catch them.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
I could shake them off my coat: these burs are in my heart.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Hem them away.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
I would try, if I could cry 'hem' and have him.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Come, come, wrestle with thy affections.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
O, they take the part of a better wrestler than myself!
",
"\t
", "\t
CELIA
",
"\t
", "\t
O, a good wish upon you! you will try in time, in
",
"\t
despite of a fall. But, turning these jests out of
",
"\t
service, let us talk in good earnest: is it
",
"\t
possible, on such a sudden, you should fall into so
",
"\t
strong a liking with old Sir Rowland's youngest son?
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
The duke my father loved his father dearly.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Doth it therefore ensue that you should love his son
",
"\t
dearly? By this kind of chase, I should hate him,
",
"\t
for my father hated his father dearly; yet I hate
",
"\t
not Orlando.
", "\t
",
"\t
ROSALIND
",
"\t
", "\t
No, faith, hate him not, for my sake.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Why should I not? doth he not deserve well?
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Let me love him for that, and do you love him
",
"\t
because I do. Look, here comes the duke.
",
"\t
", "\t
CELIA
",
"\t
", "\t
With his eyes full of anger.
",
"\t
Enter DUKE FREDERICK, with Lords
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
Mistress, dispatch you with your safest haste
",
"\t
And get you from our court.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Me, uncle?
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
You, cousin
",
"\t
Within these ten days if that thou be'st found
",
"\t
So near our public court as twenty miles,
",
"\t
Thou diest for it.
", "\t
",
"\t
ROSALIND
",
"\t
", "\t
I do beseech your grace,
",
"\t
Let me the knowledge of my fault bear with me:
",
"\t
If with myself I hold intelligence
",
"\t
Or have acquaintance with mine own desires,
",
"\t
If that I do not dream or be not frantic,--
",
"\t
As I do trust I am not--then, dear uncle,
",
"\t
Never so much as in a thought unborn
",
"\t
Did I offend your highness.
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
Thus do all traitors:
",
"\t
If their purgation did consist in words,
",
"\t
They are as innocent as grace itself:
",
"\t
Let it suffice thee that I trust thee not.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Yet your mistrust cannot make me a traitor:
",
"\t
Tell me whereon the likelihood depends.
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
Thou art thy father's daughter; there's enough.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
So was I when your highness took his dukedom;
",
"\t
So was I when your highness banish'd him:
",
"\t
Treason is not inherited, my lord;
",
"\t
Or, if we did derive it from our friends,
",
"\t
What's that to me? my father was no traitor:
",
"\t
Then, good my liege, mistake me not so much
",
"\t
To think my poverty is treacherous.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Dear sovereign, hear me speak.
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
Ay, Celia; we stay'd her for your sake,
",
"\t
Else had she with her father ranged along.
",
"\t
", "\t
CELIA
",
"\t
", "\t
I did not then entreat to have her stay;
",
"\t
It was your pleasure and your own remorse:
",
"\t
I was too young that time to value her;
",
"\t
But now I know her: if she be a traitor,
",
"\t
Why so am I; we still have slept together,
",
"\t
Rose at an instant, learn'd, play'd, eat together,
",
"\t
And wheresoever we went, like Juno's swans,
",
"\t
Still we went coupled and inseparable.
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
She is too subtle for thee; and her smoothness,
",
"\t
Her very silence and her patience
",
"\t
Speak to the people, and they pity her.
",
"\t
Thou art a fool: she robs thee of thy name;
",
"\t
And thou wilt show more bright and seem more virtuous
",
"\t
When she is gone. Then open not thy lips:
",
"\t
Firm and irrevocable is my doom
",
"\t
Which I have pass'd upon her; she is banish'd.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Pronounce that sentence then on me, my liege:
",
"\t
I cannot live out of her company.
",
"\t
", "\t
DUKE FREDERICK
",
"\t
", "\t
You are a fool. You, niece, provide yourself:
",
"\t
If you outstay the time, upon mine honour,
",
"\t
And in the greatness of my word, you die.
",
"\t
Exeunt DUKE FREDERICK and Lords
",
"\t
", "\t
CELIA
",
"\t
", "\t
O my poor Rosalind, whither wilt thou go?
",
"\t
Wilt thou change fathers? I will give thee mine.
",
"\t
I charge thee, be not thou more grieved than I am.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
I have more cause.
",
"\t
", "\t
CELIA
",
"\t
", "\t
Thou hast not, cousin;
",
"\t
Prithee be cheerful: know'st thou not, the duke
",
"\t
Hath banish'd me, his daughter?
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
That he hath not.
",
"\t
", "\t
CELIA
",
"\t
", "\t
No, hath not? Rosalind lacks then the love
",
"\t
Which teacheth thee that thou and I am one:
",
"\t
Shall we be sunder'd? shall we part, sweet girl?
",
"\t
No: let my father seek another heir.
",
"\t
Therefore devise with me how we may fly,
",
"\t
Whither to go and what to bear with us;
",
"\t
And do not seek to take your change upon you,
",
"\t
To bear your griefs yourself and leave me out;
",
"\t
For, by this heaven, now at our sorrows pale,
",
"\t
Say what thou canst, I'll go along with thee.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Why, whither shall we go?
",
"\t
", "\t
CELIA
",
"\t
", "\t
To seek my uncle in the forest of Arden.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Alas, what danger will it be to us,
",
"\t
Maids as we are, to travel forth so far!
",
"\t
Beauty provoketh thieves sooner than gold.
",
"\t
", "\t
CELIA
",
"\t
", "\t
I'll put myself in poor and mean attire
",
"\t
And with a kind of umber smirch my face;
",
"\t
The like do you: so shall we pass along
",
"\t
And never stir assailants.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
Were it not better,
",
"\t
Because that I am more than common tall,
",
"\t
That I did suit me all points like a man?
",
"\t
A gallant curtle-axe upon my thigh,
",
"\t
A boar-spear in my hand; and--in my heart
",
"\t
Lie there what hidden woman's fear there will--
",
"\t
We'll have a swashing and a martial outside,
",
"\t
As many other mannish cowards have
",
"\t
That do outface it with their semblances.
",
"\t
", "\t
CELIA
",
"\t
", "\t
What shall I call thee when thou art a man?
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
I'll have no worse a name than Jove's own page;
",
"\t
And therefore look you call me Ganymede.
",
"\t
But what will you be call'd?
",
"\t
", "\t
CELIA
",
"\t
", "\t
Something that hath a reference to my state
",
"\t
No longer Celia, but Aliena.
",
"\t
", "\t
ROSALIND
",
"\t
", "\t
But, cousin, what if we assay'd to steal
",
"\t
The clownish fool out of your father's court?
",
"\t
Would he not be a comfort to our travel?
",
"\t
", "\t
CELIA
",
"\t
", "\t
He'll go along o'er the wide world with me;
",
"\t
Leave me alone to woo him. Let's away,
",
"\t
And get our jewels and our wealth together,
",
"\t
Devise the fittest time and safest way
",
"\t
To hide us from pursuit that will be made
",
"\t
After my flight. Now go we in content
",
"\t
To liberty and not to banishment.
",
"\t
Exeunt
", "\t
", "\t
",
"\t
", "
", "", "", sep = "\n")
library(XML)
document <- xmlRoot(htmlParse(HTML_SHAKESPEARE))
body <- getNodeSet(document, "//body")[[1]]
gt <- GenericTranslator$new()
count <- function(selector) {
xpath <- gt$css_to_xpath(selector)
results <- getNodeSet(body, xpath)
length(results)
}
# Data borrowed from http://mootools.net/slickspeed/
## Changed from original; probably because I'm only
## searching the body.
#expect_that(count('*'), equals(252))
expect_that(count('*'), equals(246))
expect_that(count('div:contains(CELIA)'), equals(26))
expect_that(count('div:only-child'), equals(22)) # ?
expect_that(count('div:nth-child(even)'), equals(106))
expect_that(count('div:nth-child(2n)'), equals(106))
expect_that(count('div:nth-child(odd)'), equals(137))
expect_that(count('div:nth-child(2n+1)'), equals(137))
expect_that(count('div:nth-child(n)'), equals(243))
expect_that(count('div:last-child'), equals(53))
expect_that(count('div:first-child'), equals(51))
expect_that(count('div > div'), equals(242))
expect_that(count('div + div'), equals(190))
expect_that(count('div ~ div'), equals(190))
expect_that(count('body'), equals(1))
expect_that(count('body div'), equals(243))
expect_that(count('div'), equals(243))
expect_that(count('div div'), equals(242))
expect_that(count('div div div'), equals(241))
expect_that(count('div, div, div'), equals(243))
expect_that(count('div, a, span'), equals(243))
expect_that(count('.dialog'), equals(51))
expect_that(count('div.dialog'), equals(51))
expect_that(count('div .dialog'), equals(51))
expect_that(count('div.character, div.dialog'), equals(99))
expect_that(count('div.direction.dialog'), equals(0))
expect_that(count('div.dialog.direction'), equals(0))
expect_that(count('div.dialog.scene'), equals(1))
expect_that(count('div.scene.scene'), equals(1))
expect_that(count('div.scene .scene'), equals(0))
expect_that(count('div.direction .dialog '), equals(0))
expect_that(count('div .dialog .direction'), equals(4))
expect_that(count('div.dialog .dialog .direction'), equals(4))
expect_that(count('#speech5'), equals(1))
expect_that(count('div#speech5'), equals(1))
expect_that(count('div #speech5'), equals(1))
expect_that(count('div.scene div.dialog'), equals(49))
expect_that(count('div#scene1 div.dialog div'), equals(142))
expect_that(count('#scene1 #speech1'), equals(1))
expect_that(count('div[class]'), equals(103))
expect_that(count('div[class=dialog]'), equals(50))
expect_that(count('div[class^=dia]'), equals(51))
expect_that(count('div[class$=log]'), equals(50))
expect_that(count('div[class*=sce]'), equals(1))
expect_that(count('div[class|=dialog]'), equals(50)) # ? Seems right
expect_that(count('div[class!=madeup]'), equals(243)) # ? Seems right
expect_that(count('div[class~=dialog]'), equals(51)) # ? Seems right
})
selectr/tests/testthat/test-parse-errors.R 0000755 0001762 0000144 00000010220 15107555231 020443 0 ustar ligges users context("parse-errors")
test_that("useful errors are returned", {
get_error <- function(css) {
parse(css)
NULL
}
expect_that(get_error("attributes(href)/html/body/a"),
throws_error("Unexpected character"))
expect_that(get_error("attributes(href)"),
throws_error("Expected selector"))
expect_that(get_error("html/body/a"),
throws_error("Unexpected character"))
expect_that(get_error(" "),
throws_error("Expected selector, got "))
expect_that(get_error("div, "),
throws_error("Expected selector, got "))
expect_that(get_error(" , div"),
throws_error("Expected selector, got "))
expect_that(get_error("p, , div"),
throws_error("Expected selector, got "))
expect_that(get_error("div > "),
throws_error("Expected selector, got "))
expect_that(get_error(" > div"),
throws_error("Expected selector, got ' at 3>"))
expect_that(get_error("foo|#bar"),
throws_error("Expected ident or '\\*'"))
expect_that(get_error("#.foo"),
throws_error("Expected selector, got "))
expect_that(get_error(".#foo"),
throws_error("Expected ident, got "))
expect_that(get_error(":#foo"),
throws_error("Expected ident, got "))
expect_that(get_error("[*]"),
throws_error("Expected '|'"))
expect_that(get_error("[foo|]"),
throws_error("Expected ident, got "))
expect_that(get_error("[#]"),
throws_error("Expected ident or '\\*', got "))
expect_that(get_error("[foo=#]"),
throws_error("Expected string or ident, got "))
expect_that(get_error(":nth-child()"),
throws_error("Expected at least one argument, got "))
expect_that(get_error("[href]a"),
throws_error("Expected selector, got "))
expect_that(get_error("[rel=stylesheet]"),
equals(NULL))
expect_that(get_error("[rel:stylesheet]"),
throws_error("Operator expected, got "))
expect_that(get_error("[rel=stylesheet"),
throws_error("Expected ']', got "))
expect_that(get_error(":lang(fr)"),
equals(NULL))
expect_that(get_error(":lang(fr"),
throws_error("Expected an argument, got "))
expect_that(get_error(':contains("foo'),
throws_error("Unclosed string at 11"))
expect_that(get_error(':contains("foo\\"'),
throws_error("Unclosed string at 11"))
expect_that(get_error("foo!"),
throws_error("Unexpected character"))
expect_that(get_error("a:not(b;)"),
throws_error("Unexpected character"))
# Mis-placed pseudo-elements
expect_that(get_error("a:before:empty"),
throws_error("Got pseudo-element ::before not at the end of a selector"))
expect_that(get_error("li:before a"),
throws_error("Got pseudo-element ::before not at the end of a selector"))
expect_that(get_error(":not(:before)"),
throws_error("Got pseudo-element ::before inside :not\\(\\) at 13"))
expect_that(get_error(":not(a,)"),
throws_error("Expected ')', got .*"))
expect_that(get_error(":not(:not(a))"),
throws_error("Got nested :not()"))
expect_that(get_error(":is(:before)"),
throws_error("Got pseudo-element ::before inside :is\\(\\) at 12"))
expect_that(get_error(":is(a b)"),
throws_error("Expected an argument, got "))
expect_that(get_error(":matches(:before)"),
throws_error("Got pseudo-element ::before inside :matches\\(\\) at 17"))
expect_that(get_error(":matches(a b)"),
throws_error("Expected an argument, got "))
})
selectr/tests/testthat/test-has.R 0000755 0001762 0000144 00000017641 15107555231 016610 0 ustar ligges users context(":has() pseudo-class")
test_that(":has() generates correct XPath", {
xpath <- function(css) {
css_to_xpath(css, prefix = "")
}
# Simple :has() with element
expect_that(xpath("div:has(p)"),
equals("div[(.//*[(name() = 'p')])]"))
# :has() with class selector
expect_that(xpath("div:has(.foo)"),
equals("div[(.//*[(@class and contains(concat(' ', normalize-space(@class), ' '), ' foo '))])]"))
# :has() with ID selector
expect_that(xpath("section:has(#main)"),
equals("section[(.//*[(@id = 'main')])]"))
# :has() with attribute selector
expect_that(xpath("form:has([required])"),
equals("form[(.//*[(@required)])]"))
# :has() with multiple selectors (OR logic)
expect_that(xpath("div:has(p, span)"),
equals("div[(.//*[(name() = 'p')] | .//*[(name() = 'span')])]"))
# Multiple :has() selectors
expect_that(xpath("div:has(p):has(span)"),
equals("div[(.//*[(name() = 'p')]) and (.//*[(name() = 'span')])]"))
# :has() on universal selector
expect_that(xpath("*:has(img)"),
equals("*[(.//*[(name() = 'img')])]"))
# Complex: :has() with class on descendant
expect_that(xpath("section:has(div.content)"),
equals("section[(.//*[(@class and contains(concat(' ', normalize-space(@class), ' '), ' content ')) and (name() = 'div')])]"))
})
test_that(":has() works correctly with XML documents", {
library(XML)
# Create test document
html <- paste0(
'',
' ',
'
',
'
Paragraph in section 1
',
'
',
' ',
' ',
'
',
' Span in section 2',
'
',
' ',
' ',
' ',
'
Title
',
' ',
' ',
' ',
'
Article paragraph
',
' ',
''
)
doc <- xmlRoot(xmlParse(html))
# Helper to get IDs
get_ids <- function(css) {
results <- querySelectorAll(doc, css)
sapply(results, function(x) xmlGetAttr(x, "id"))
}
# Section containing a p element
expect_that(get_ids("section:has(p)"),
equals("s1"))
# Section containing a div
expect_that(get_ids("section:has(div)"),
equals(c("s1", "s2")))
# Section containing an h1
expect_that(get_ids("section:has(h1)"),
equals("s3"))
# Section with div.content
expect_that(get_ids("section:has(div.content)"),
equals("s1"))
# Section with div.sidebar
expect_that(get_ids("section:has(div.sidebar)"),
equals("s2"))
# Any element containing a p
# Note: XML returns root element too since it's also ancestor
ids <- get_ids(":has(p)")
expect_that("s1" %in% ids && "a1" %in% ids, equals(TRUE))
# Multiple selectors: section with p OR span
expect_that(get_ids("section:has(p, span)"),
equals(c("s1", "s2")))
# Chained :has() - section with both div and p
expect_that(get_ids("section:has(div):has(p)"),
equals("s1"))
# :has() should not match the element itself
expect_that(length(querySelectorAll(doc, "p:has(p)")),
equals(0))
})
test_that(":has() works correctly with xml2 documents", {
library(xml2)
# Create test document
html <- paste0(
'',
' ',
'
',
'
Paragraph in section 1
',
'
',
' ',
' ',
'
',
' Span in section 2',
'
',
' ',
' ',
' ',
'
Title
',
' ',
' ',
' ',
'
Article paragraph
',
' ',
''
)
doc <- read_xml(html)
# Helper to get IDs
get_ids <- function(css) {
results <- querySelectorAll(doc, css)
xml_attr(results, "id")
}
# Section containing a p element
expect_that(get_ids("section:has(p)"),
equals("s1"))
# Section containing a div
expect_that(get_ids("section:has(div)"),
equals(c("s1", "s2")))
# Section containing an h1
expect_that(get_ids("section:has(h1)"),
equals("s3"))
# Section with div.content
expect_that(get_ids("section:has(div.content)"),
equals("s1"))
# Section with div.sidebar
expect_that(get_ids("section:has(div.sidebar)"),
equals("s2"))
# Any element containing a p
# Note: returns all ancestors including root
ids <- get_ids(":has(p)")
expect_that("s1" %in% ids && "a1" %in% ids, equals(TRUE))
# Multiple selectors: section with p OR span
expect_that(get_ids("section:has(p, span)"),
equals(c("s1", "s2")))
# Chained :has() - section with both div and p
expect_that(get_ids("section:has(div):has(p)"),
equals("s1"))
# :has() should not match the element itself
expect_that(length(querySelectorAll(doc, "p:has(p)")),
equals(0))
})
test_that(":has() handles edge cases correctly", {
library(XML)
# Empty elements
html1 <- '
'
doc1 <- xmlRoot(xmlParse(html1))
# Only d2 has a p descendant
result1 <- querySelectorAll(doc1, "div:has(p)")
expect_that(length(result1), equals(1))
expect_that(xmlGetAttr(result1[[1]], "id"), equals("d2"))
# Nested :has()
html2 <- paste0(
'',
' ',
' ',
'
'
doc3 <- xmlRoot(xmlParse(html3))
# Div that has any descendant
result4 <- querySelectorAll(doc3, "div:has(*)")
expect_that(length(result4), equals(1))
expect_that(xmlGetAttr(result4[[1]], "id"), equals("d1"))
})
test_that(":has() works with querySelector (returns first match)", {
library(xml2)
html <- paste0(
'',
'
First
',
'
Second
',
' Third',
''
)
doc <- read_xml(html)
# Should return first section with p
result <- querySelector(doc, "section:has(p)")
expect_that(xml_attr(result, "id"), equals("s1"))
# Should return NULL when no match
result_none <- querySelector(doc, "section:has(article)")
expect_that(result_none, equals(NULL))
})
selectr/tests/testthat/test-nth-child-of.R 0000755 0001762 0000144 00000007522 15107555231 020306 0 ustar ligges users context("nth-child with 'of S' selector list (CSS Level 4)")
test_that(":nth-child(n of S) parses correctly", {
parsed <- selectr:::parse("div:nth-child(2 of .foo)")
expect_equal(length(parsed), 1)
fn_obj <- parsed[[1]]$parsed_tree
expect_equal(class(fn_obj)[1], "Function")
expect_equal(fn_obj$name, "nth-child")
expect_false(is.null(fn_obj$selector_list))
expect_equal(length(fn_obj$selector_list), 1)
})
test_that(":nth-last-child(n of S) parses correctly", {
parsed <- selectr:::parse("li:nth-last-child(3 of .important)")
expect_equal(length(parsed), 1)
fn_obj <- parsed[[1]]$parsed_tree
expect_equal(class(fn_obj)[1], "Function")
expect_equal(fn_obj$name, "nth-last-child")
expect_false(is.null(fn_obj$selector_list))
expect_equal(length(fn_obj$selector_list), 1)
})
test_that(":nth-child(n of S) with multiple selectors parses correctly", {
parsed <- selectr:::parse("div:nth-child(2 of .foo, .bar)")
expect_equal(length(parsed), 1)
fn_obj <- parsed[[1]]$parsed_tree
expect_equal(fn_obj$name, "nth-child")
expect_equal(length(fn_obj$selector_list), 2)
})
test_that(":nth-child(n of S) generates correct XPath", {
xpath <- css_to_xpath("div:nth-child(2 of .foo)")
# Should count siblings matching .foo
expect_true(grepl("count\\(preceding-sibling::\\*\\[", xpath))
expect_true(grepl("@class", xpath))
expect_true(grepl("foo", xpath))
# Should also check that current element matches .foo
expect_true(grepl("and.*@class", xpath))
})
test_that(":nth-last-child(n of S) generates correct XPath", {
xpath <- css_to_xpath("li:nth-last-child(3 of .important)")
# Should count following siblings matching .important
expect_true(grepl("count\\(following-sibling::\\*\\[", xpath))
expect_true(grepl("@class", xpath))
expect_true(grepl("important", xpath))
# Should also check that current element matches .important
expect_true(grepl("and.*@class", xpath))
})
test_that(":nth-child(An+B of S) with formula works", {
xpath <- css_to_xpath("p:nth-child(2n+1 of .highlight)")
# Should have modulo operation for 2n+1
expect_true(grepl("mod", xpath))
# Should filter by .highlight
expect_true(grepl("highlight", xpath))
})
test_that(":nth-child(n of S1, S2) with multiple selectors generates OR condition", {
xpath <- css_to_xpath("div:nth-child(1 of .foo, .bar)")
# Should have both selectors
expect_true(grepl("foo", xpath))
expect_true(grepl("bar", xpath))
# Should have OR condition
expect_true(grepl("or", xpath))
})
test_that("Regular :nth-child without 'of' still works", {
xpath1 <- css_to_xpath("div:nth-child(2)")
xpath2 <- css_to_xpath("div:nth-last-child(3)")
# Should not have class checks
expect_false(grepl("@class", xpath1))
expect_false(grepl("@class", xpath2))
# Should have simple counting
expect_true(grepl("count\\(preceding-sibling::\\*\\)", xpath1))
expect_true(grepl("count\\(following-sibling::\\*\\)", xpath2))
})
test_that(":nth-child(odd of S) works", {
xpath <- css_to_xpath("div:nth-child(odd of .item)")
# Should have modulo 2
expect_true(grepl("mod 2", xpath))
# Should filter by .item
expect_true(grepl("item", xpath))
})
test_that(":nth-child(even of S) works", {
xpath <- css_to_xpath("div:nth-child(even of .item)")
# Should have modulo 2
expect_true(grepl("mod 2", xpath))
# Should filter by .item
expect_true(grepl("item", xpath))
})
test_that(":nth-child with complex selector works", {
xpath <- css_to_xpath("div:nth-child(2 of div.foo)")
# Should check element name
expect_true(grepl("name\\(\\) = 'div'", xpath))
# Should check class
expect_true(grepl("foo", xpath))
})
selectr/tests/test-all.R 0000755 0001762 0000144 00000000072 15107010654 014726 0 ustar ligges users library(testthat)
library(selectr)
test_check("selectr")
selectr/.Rinstignore 0000755 0001762 0000144 00000000012 15106553322 014215 0 ustar ligges users Makefile
selectr/MD5 0000644 0001762 0000144 00000004423 15120470422 012223 0 ustar ligges users bafc12bd205aaeaf8c70ef489e183d5c *DESCRIPTION
3f1e4e2a111ed275ca25012cbd658b79 *LICENCE
307d573f78e96833a184da040a2b6d5e *NAMESPACE
530048c59d463df62909d02f460f15ba *R/main.R
9b2fbee07228b80976f856f508526f0f *R/parser.R
de444ce30fd94501e7db948553419192 *R/xpath.R
396b43f4014677c2ad7a6320ffebbf41 *README.md
2dfbd9071b69ea0bec93d05c2ad8ee8a *inst/CITATION
6d693ae639e4e06274d6331c58c6087d *inst/NEWS.Rd
944df9b3445e090bb4ffa83c7312a87c *inst/demos/svg-mathml.svg
c1681851d43f42c568c585e8a8a1cb52 *man/css_to_xpath.Rd
e3d86a86846ca363b6bba9e24e95aa40 *man/querySelectorAll.Rd
b697c111a02fec0b26c003fed8a53a96 *tests/test-all.R
cec73ba8dffbd2ae44fb886f8015b85d *tests/testthat/test-adjacent-sibling.R
0bebbd1c7d33477f327ef17d109716c1 *tests/testthat/test-has.R
8f6dac3236d7a3f671a17f11f906202f *tests/testthat/test-main.R
b27989f32bb62b10250558e7f0a1311d *tests/testthat/test-method-registration.R
7f9b1be32964dae08551c1d6879cf9e6 *tests/testthat/test-nth-child-of.R
48b57e0bdf1c5d82996042995d9b8d2c *tests/testthat/test-nth-child.R
b891ff66e9a9f31367ccb11cbbd912ef *tests/testthat/test-parse-errors.R
d288b47abadf6701be264072dfbda0b6 *tests/testthat/test-parser.R
3f891d698c3692a2e430642aa98aec40 *tests/testthat/test-pseudo.R
c8833210c9c207c915543f229053c910 *tests/testthat/test-querySelector-XML.R
4b7c0b8ba712fdb819f18c1dfefc49ff *tests/testthat/test-querySelector-default.R
ce2f6d685fec146b230c97ebccef235a *tests/testthat/test-querySelector-xml2.R
7dccc672e6ef4b611506f1d455c92ce1 *tests/testthat/test-quoting.R
b4d30af3437df638dde5a93afab3d931 *tests/testthat/test-select-XML.R
404f84077b9fd9200a2bac77c1f94a4f *tests/testthat/test-select-xml2.R
961c5d64cb805af05b5476d2810fba0d *tests/testthat/test-series.R
4b0cd1a924953d1877bce40a1d75714f *tests/testthat/test-shakespeare-XML.R
b31287d4d667b0052fc2d6b29e8e6ac2 *tests/testthat/test-shakespeare-xml2.R
ae39299f1f36ccde24f059f10e7f1a61 *tests/testthat/test-specificity.R
d1bca8335ea93159c1d4461f8573ebd3 *tests/testthat/test-tokenizer.R
fc52232c5bad2bc580289894652ba22b *tests/testthat/test-translation.R
f1c86041f6b35ad1d55789ef6437daac *tests/testthat/test-where.R
310eeb35bd29b394b5e89eabac5bb379 *tests/testthat/test-xmllang-XML.R
dbd3cfbd807024584632eb78d041ecda *tests/testthat/test-xmllang-xml2.R
ed31e3629ea9665d2a1a2924981d0b9b *tests/testthat/test-xpath.R
selectr/R/ 0000755 0001762 0000144 00000000000 15110034155 012107 5 ustar ligges users selectr/R/main.R 0000644 0001762 0000144 00000020502 15107276636 013177 0 ustar ligges users css_to_xpath <- function(selector, prefix = "descendant-or-self::", translator = "generic") {
if (missing(selector) || is.null(selector))
stop("A valid selector (character vector) must be provided.")
if (!is.character(selector))
stop("The 'selector' argument must be a character vector")
if (!is.character(prefix))
stop("The 'prefix' argument must be a character vector")
if (!is.character(translator))
stop("The 'translator' argument must be a character vector")
if (anyNA(selector)) {
warning("NA values were found in the 'selector' argument, they have been removed")
selector <- selector[!is.na(selector)]
}
if (anyNA(prefix)) {
warning("NA values were found in the 'prefix' argument, they have been removed")
prefix <- prefix[!is.na(prefix)]
}
if (anyNA(translator)) {
warning("NA values were found in the 'translator' argument, they have been removed")
translator <- translator[!is.na(translator)]
}
zeroLengthArgs <- character(0)
if (!length(selector))
zeroLengthArgs <- c(zeroLengthArgs, "selector")
if (!length(prefix))
zeroLengthArgs <- c(zeroLengthArgs, "prefix")
if (!length(translator))
zeroLengthArgs <- c(zeroLengthArgs, "translator")
if (length(zeroLengthArgs)) {
plural <- if (length(zeroLengthArgs) > 1) "s" else ""
stop("Zero length character vector found for the following argument",
plural,
": ",
paste0(zeroLengthArgs, collapse = ", "))
}
translator <- sapply(translator, function(tran) {
match.arg(tolower(tran), c("generic", "html", "xhtml"))
})
maxArgLength <- max(length(selector), length(prefix), length(translator))
selector <- rep(selector, length.out = maxArgLength)
prefix <- rep(prefix, length.out = maxArgLength)
translator <- rep(translator, length.out = maxArgLength)
results <- character(maxArgLength)
for (i in seq_len(maxArgLength)) {
sel <- selector[i]
pref <- prefix[i]
trans <- translator[i]
tran <- if (trans == "html") {
HTMLTranslator$new()
} else if (trans == "xhtml") {
HTMLTranslator$new(xhtml = TRUE)
} else {
GenericTranslator$new()
}
results[i] <- tran$css_to_xpath(sel, pref)
}
as.character(results)
}
querySelector <- function(doc, selector, ns = NULL, ...) {
UseMethod("querySelector", doc)
}
querySelectorAll <- function(doc, selector, ns = NULL, ...) {
UseMethod("querySelectorAll", doc)
}
querySelectorNS <- function(doc, selector, ns,
prefix = "descendant-or-self::", ...) {
UseMethod("querySelectorNS", doc)
}
querySelectorAllNS <- function(doc, selector, ns,
prefix = "descendant-or-self::", ...) {
UseMethod("querySelectorAllNS", doc)
}
querySelector.default <- function(doc, selector, ns = NULL, ...) {
stop("The object given to querySelector() is not an 'XML' or 'xml2' document or node.")
}
querySelectorAll.default <- function(doc, selector, ns = NULL, ...) {
stop("The object given to querySelectorAll() is not an 'XML' or 'xml2' document or node.")
}
querySelectorNS.default <- function(doc, selector, ns,
prefix = "descendant-or-self::", ...) {
stop("The object given to querySelectorNS() is not an 'XML' or 'xml2' document or node.")
}
querySelectorAllNS.default <- function(doc, selector, ns,
prefix = "descendant-or-self::", ...) {
stop("The object given to querySelectorAllNS() is not an 'XML' or 'xml2' document or node.")
}
querySelector.XMLInternalNode <-
querySelector.XMLInternalDocument <- function(doc, selector, ns = NULL, ...) {
if (missing(selector))
stop("A valid selector (character vector) must be provided.")
results <- querySelectorAll(doc, selector, ns, ...)
if (length(results))
results[[1]]
else
NULL
}
querySelectorAll.XMLInternalNode <- function(doc, selector, ns = NULL, ...) {
if (missing(selector))
stop("A valid selector (character vector) must be provided.")
xpath <- css_to_xpath(selector, ...)
if (!is.null(ns)) {
ns <- formatNS(ns)
XML::getNodeSet(doc, xpath, ns)
} else {
XML::getNodeSet(doc, xpath)
}
}
querySelectorAll.XMLInternalDocument <- function(doc, selector, ns = NULL, ...) {
if (missing(selector))
stop("A valid selector (character vector) must be provided.")
doc <- XML::xmlRoot(doc)
querySelectorAll(doc, selector, ns, ...)
}
querySelectorNS.XMLInternalNode <-
querySelectorNS.XMLInternalDocument <- function(doc, selector, ns,
prefix = "descendant-or-self::", ...) {
if (missing(selector))
stop("A valid selector (character vector) must be provided.")
if (missing(ns) || !length(ns))
stop("A namespace must be provided.")
ns <- formatNS(ns)
prefix <- formatNSPrefix(ns, prefix)
querySelector(doc, selector, ns, prefix = prefix, ...)
}
querySelectorAllNS.XMLInternalNode <-
querySelectorAllNS.XMLInternalDocument <- function(doc, selector, ns,
prefix = "descendant-or-self::", ...) {
if (missing(selector))
stop("A valid selector (character vector) must be provided.")
if (missing(ns) || !length(ns))
stop("A namespace must be provided.")
ns <- formatNS(ns)
prefix <- formatNSPrefix(ns, prefix)
querySelectorAll(doc, selector, ns, prefix = prefix, ...)
}
querySelector.xml_node <- function(doc, selector, ns = NULL, ...) {
if (missing(selector))
stop("A valid selector (character vector) must be provided.")
if (is.null(ns))
ns <- xml2::xml_ns(doc)
validateNS(ns)
xpath <- css_to_xpath(selector, ...)
result <- xml2::xml_find_first(doc, xpath, ns)
if (length(result))
result
else
NULL
}
querySelectorAll.xml_node <- function(doc, selector, ns = NULL, ...) {
if (missing(selector))
stop("A valid selector (character vector) must be provided.")
if (is.null(ns))
ns <- xml2::xml_ns(doc)
validateNS(ns)
xpath <- css_to_xpath(selector, ...)
xml2::xml_find_all(doc, xpath, ns)
}
querySelectorNS.xml_node <- function(doc, selector, ns,
prefix = "descendant-or-self::", ...) {
if (missing(selector))
stop("A valid selector (character vector) must be provided.")
if (missing(ns) || is.null(ns) || !length(ns))
stop("A namespace must be provided.")
ns <- formatNS(ns)
prefix <- formatNSPrefix(ns, prefix)
querySelector(doc, selector, ns, prefix = prefix, ...)
}
querySelectorAllNS.xml_node <- function(doc, selector, ns,
prefix = "descendant-or-self::", ...) {
if (missing(selector))
stop("A valid selector (character vector) must be provided.")
if (missing(ns) || is.null(ns) || !length(ns))
stop("A namespace must be provided.")
ns <- formatNS(ns)
prefix <- formatNSPrefix(ns, prefix)
querySelectorAll(doc, selector, ns, prefix = prefix, ...)
}
# Takes a named vector or list and gives a named vector back
formatNS <- function(ns) {
if (is.null(ns))
return(NULL)
if (!is.list(ns) && !is.character(ns))
stop("A namespace object must be either a named list or a named character vector.")
nsNames <- names(ns)
if (is.null(nsNames) || anyNA(nsNames) || !all(nzchar(nsNames)))
stop("The namespace object either missing some or all names for each element in its collection.")
ns <- unlist(ns)
if (!is.character(ns))
stop("The values in the namespace object must be a character vector.")
names(ns) <- nsNames
ns
}
formatNSPrefix <- function(ns, prefix) {
filters <- paste0("//", names(ns), ":*", collapse = "|")
prefix <- paste0("(", filters, ")/", prefix)
prefix
}
# Checks whether a vector is a valid character vector for namespaces
validateNS <- function(ns) {
if (!is.character(ns))
stop("A namespace object must be comprised of characters")
nsNames <- names(ns)
if (is.null(nsNames) || anyNA(nsNames))
stop("The namespace object either missing some or all names for each element in its collection.")
}
selectr/R/parser.R 0000755 0001762 0000144 00000105310 15120447332 013537 0 ustar ligges users escape <- paste0("\\\\([0-9a-f]{1,6})(\r\n|[ \n\r\t\f])?", "|\\\\[^\n\r\f0-9a-f]")
nonascii <- "[^\1-\177]"
hash_re <- "([_a-z0-9-]|\\\\([0-9a-f]{1,6})(\r\n|[ \n\r\t\f])?|[^\1-\177])"
TokenMacros <- list(unicode_escape = "\\\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?",
escape = escape,
string_escape = paste0("\\\\(?:\n|\r\n|\r|\f)|", escape),
nonascii = nonascii,
nmchar = paste0("([_a-z0-9-]|", escape, "|", nonascii, ")"),
nmstart = paste0("[_a-z]|", escape, "|", nonascii))
Selector <- R6Class("Selector",
public = list(
parsed_tree = NULL,
pseudo_element = NULL,
initialize = function(tree, pseudo_element = NULL) {
self$parsed_tree <- tree
if (!is.null(pseudo_element))
self$pseudo_element <- tolower(pseudo_element)
},
repr = function() {
pseudo_el <-
if (is.null(self$pseudo_element)) ""
else paste0("::", self$pseudo_element)
paste0(self$parsed_tree$repr(), pseudo_el)
},
specificity = function() {
specs <- self$parsed_tree$specificity()
if (!is.null(self$pseudo_element))
specs[3] <- specs[3] + 1
specs
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
)
)
ClassSelector <- R6Class("ClassSelector",
public = list(
selector = NULL,
class_name = NULL,
initialize = function(selector, class_name) {
self$selector <- selector
self$class_name <- class_name
},
repr = function() {
paste0(
first_class_name(self),
"[",
self$selector$repr(),
".",
self$class_name,
"]")
},
specificity = function() {
specs <- self$selector$specificity()
specs[2] <- specs[2] + 1
specs
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
)
)
Function <- R6Class("Function",
public = list(
selector = NULL,
name = NULL,
arguments = NULL,
selector_list = NULL,
initialize = function(selector, name, arguments, selector_list = NULL) {
self$selector <- selector
self$name <- tolower(name)
self$arguments <- arguments
self$selector_list <- selector_list
},
repr = function() {
token_values <- lapply(self$arguments,
function(token) paste0("'", token$value, "'"))
token_values <- paste0(unlist(token_values), collapse = ", ")
token_values <- paste0("[", token_values, "]")
selector_list_repr <- ""
if (!is.null(self$selector_list)) {
selector_list_repr <- paste0(
" of ",
paste0(sapply(self$selector_list, function(s) s$repr()), collapse = ", ")
)
}
paste0(
first_class_name(self),
"[",
self$selector$repr(),
":",
self$name,
"(",
token_values,
selector_list_repr,
")]")
},
argument_types = function() {
token_types <- lapply(self$arguments, function(token) token$type)
unlist(token_types)
},
specificity = function() {
specs <- self$selector$specificity()
specs[2] <- specs[2] + 1
specs
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
)
)
Pseudo <- R6Class("Pseudo",
public = list(
selector = NULL,
ident = NULL,
initialize = function(selector, ident) {
self$selector <- selector
self$ident <- tolower(ident)
},
repr = function() {
paste0(
first_class_name(self),
"[",
self$selector$repr(),
":",
self$ident,
"]")
},
specificity = function() {
specs <- self$selector$specificity()
specs[2] <- specs[2] + 1
specs
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
)
)
Negation <- R6Class("Negation",
public = list(
selector = NULL,
selector_list = NULL,
initialize = function(selector, selector_list) {
self$selector <- selector
self$selector_list <- selector_list
},
repr = function() {
paste0(
first_class_name(self),
"[",
self$selector$repr(),
":not(",
paste0(
sapply(self$selector_list, function(s) s$repr()),
collapse = ", "
),
")]")
},
specificity = function() {
specs <- self$selector$specificity()
# according to CSS Selectors Level 4, :not() takes the specificity of
# its most specific argument
sub_specs <- sapply(self$selector_list, function(s) s$specificity())
# sapply returns a matrix with each column being a selector's specificity
if (is.matrix(sub_specs)) {
# get rows as selectors
sub_specs <- t(sub_specs)
if (nrow(sub_specs) > 1) {
# sort by specificity (id, class, element) descending
sub_specs <- sub_specs[order(-sub_specs[, 1], -sub_specs[, 2], -sub_specs[, 3]), , drop = FALSE]
}
specs + sub_specs[1, ]
} else {
# single value case
specs + sub_specs
}
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
)
)
Matching <- R6Class("Matching",
public = list(
selector = NULL,
selector_list = NULL,
initialize = function(selector, selector_list) {
self$selector <- selector
self$selector_list <- selector_list
},
repr = function() {
paste0(
first_class_name(self),
"[",
self$selector$repr(),
":is(",
paste0(
sapply(self$selector_list, function(s) s$repr()),
collapse = ", "
),
")]"
)
},
specificity = function() {
specs <- sapply(self$selector_list, function(s) s$specificity())
specs <- t(specs)
specs <- specs[order(-specs[, 1], -specs[, 2], -specs[, 3]), ]
specs[1, ]
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
)
)
Where <- R6Class("Where",
public = list(
selector = NULL,
selector_list = NULL,
initialize = function(selector, selector_list) {
self$selector <- selector
self$selector_list <- selector_list
},
repr = function() {
paste0(
first_class_name(self),
"[",
self$selector$repr(),
":where(",
paste0(
sapply(self$selector_list, function(s) s$repr()),
collapse = ", "
),
")]"
)
},
specificity = function() {
# :where() always has zero specificity
self$selector$specificity()
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
)
)
Has <- R6Class("Has",
public = list(
selector = NULL,
selector_list = NULL,
initialize = function(selector, selector_list) {
self$selector <- selector
self$selector_list <- selector_list
},
repr = function() {
paste0(
first_class_name(self),
"[",
self$selector$repr(),
":has(",
paste0(
sapply(self$selector_list, function(s) s$repr()),
collapse = ", "
),
")]"
)
},
specificity = function() {
specs <- sapply(self$selector_list, function(s) s$specificity())
specs <- t(specs)
specs <- specs[order(-specs[, 1], -specs[, 2], -specs[, 3]), ]
# Add the maximum specificity from the selector list to the base selector
base_specs <- self$selector$specificity()
base_specs + specs[1, ]
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
)
)
Attrib <- R6Class("Attrib",
public = list(
selector = NULL,
namespace = NULL,
attrib = NULL,
operator = NULL,
value = NULL,
initialize = function(selector, namespace, attrib, operator, value) {
self$selector <- selector
self$namespace <- namespace
self$attrib <- attrib
self$operator <- operator
self$value <- value
},
repr = function() {
attr <-
if (!is.null(self$namespace))
paste0(self$namespace, "|", self$attrib)
else
self$attrib
if (self$operator == "exists")
paste0(
first_class_name(self),
"[",
self$selector$repr(),
"[",
attr,
"]]")
else
paste0(
first_class_name(self),
"[",
self$selector$repr(),
"[",
attr,
" ",
self$operator,
" '",
self$value,
"']]")
},
specificity = function() {
specs <- self$selector$specificity()
specs[2] <- specs[2] + 1
specs
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
)
)
Element <- R6Class("Element",
public = list(
namespace = NULL,
element = NULL,
initialize = function(namespace = NULL, element = NULL) {
self$namespace <- namespace
self$element <- element
},
repr = function() {
el <-
if (!is.null(self$element)) self$element
else "*"
if (!is.null(self$namespace))
el <- paste0(self$namespace, "|", el)
paste0(first_class_name(self), "[", el, "]")
},
specificity = function() {
if (!is.null(self$element)) c(0, 0, 1)
else rep(0, 3)
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
)
)
Hash <- R6Class("Hash",
public = list(
selector = NULL,
id = NULL,
initialize = function(selector, id) {
self$selector <- selector
self$id <- id
},
repr = function() {
paste0(
first_class_name(self),
"[",
self$selector$repr(),
"#",
self$id,
"]")
},
specificity = function() {
specs <- self$selector$specificity()
specs[1] <- specs[1] + 1
specs
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
)
)
CombinedSelector <- R6Class("CombinedSelector",
public = list(
selector = NULL,
combinator = NULL,
subselector = NULL,
initialize = function(selector, combinator, subselector) {
if (is.null(selector))
stop("'selector' cannot be NULL")
self$selector <- selector
self$combinator <- combinator
self$subselector <- subselector
},
repr = function() {
comb <-
if (self$combinator == " ") ""
else self$combinator
paste0(
first_class_name(self),
"[",
self$selector$repr(),
" ",
comb,
" ",
self$subselector$repr(),
"]")
},
specificity = function() {
specs <- self$selector$specificity()
sub_specs <- self$subselector$specificity()
specs + sub_specs
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
)
)
#### Parser
# foo
el_re <- "^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$"
# foo#bar or #bar
id_re <- "^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$"
# foo.bar or .bar
class_re <- "^[ \t\r\n\f]*([a-zA-Z]*)\\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$"
parse <- function(css) {
el_match <- str_match(css, el_re)[1, 2]
if (!is.na(el_match))
return(list(Selector$new(Element$new(element = el_match))))
id_match <- str_match(css, id_re)[1, 2:3]
if (!is.na(id_match[2]))
return(list(Selector$new(
Hash$new(
Element$new(
element =
if (nzchar(id_match[1]) == 0) NULL
else id_match[1]),
id_match[2]))))
class_match <- str_match(css, class_re)[1, 2:3]
if (!is.na(class_match[3]))
return(list(Selector$new(
ClassSelector$new(
Element$new(
element =
if (is.null(class_match[2]) || is.na(class_match[2])) NULL
else class_match[2]),
class_match[3]))))
stream <- TokenStream$new(tokenize(css))
stream$source_text <- css
parse_selector_group(stream)
}
parse_selector_group <- function(stream) {
stream$skip_whitespace()
i <- 1
results <- list()
while (TRUE) {
parsed_selector <- parse_selector(stream)
results[[i]] <- Selector$new(parsed_selector$result,
parsed_selector$pseudo_element)
i <- i + 1
if (token_equality(stream$peek(), "DELIM", ",")) {
stream$nxt()
stream$skip_whitespace()
} else {
break
}
}
results
}
token_equality <- function(token, t, val) {
if (token$type != t)
return(FALSE)
# val can be NULL or (maybe) NA
if (is.null(val) && is.null(token$value))
return(TRUE)
if (is.na(val) && is.na(token$value))
return(TRUE)
# Should be OK with regular equality
token$value == val
}
parse_selector <- function(stream) {
results <- parse_simple_selector(stream)
result <- results$result
pseudo_element <- results$pseudo_element
while (TRUE) {
stream$skip_whitespace()
peek <- stream$peek()
if (token_equality(peek, "EOF", NULL) ||
token_equality(peek, "DELIM", ",")) {
break
}
if (!is.null(pseudo_element) && nzchar(pseudo_element)) {
stop("Got pseudo-element ::",
pseudo_element,
" not at the end of a selector")
}
if (peek$is_delim(c("+", ">", "~"))) {
# A combinator
combinator <- stream$nxt()$value
stream$skip_whitespace()
} else {
# By exclusion, the last parse_simple_selector() ended
# at peek == ' '
combinator <- " "
}
stuff <- parse_simple_selector(stream)
pseudo_element <- stuff$pseudo_element
result <- CombinedSelector$new(result, combinator, stuff$result)
}
list(result = result, pseudo_element = pseudo_element)
}
parse_simple_selector <- function(stream, inside_negation = FALSE) {
stream$skip_whitespace()
selector_start <- length(stream$used)
peek <- stream$peek()
if (peek$type == "IDENT" || token_equality(peek, "DELIM", "*")) {
if (peek$type == "IDENT") {
namespace <- stream$nxt()$value
} else {
stream$nxt()
namespace <- NULL
}
if (token_equality(stream$peek(), "DELIM", "|")) {
stream$nxt()
element <- stream$next_ident_or_star()
} else {
element <- namespace
namespace <- NULL
}
} else {
element <- namespace <- NULL
}
result <- Element$new(namespace, element)
pseudo_element <- NULL
while (TRUE) {
peek <- stream$peek()
if (any(peek$type == c("S", "EOF")) ||
peek$is_delim(c(",", "+", ">", "~")) ||
(inside_negation && token_equality(peek, "DELIM", ")"))) {
break
}
if (!is.null(pseudo_element)) {
stop("Got pseudo-element ::",
pseudo_element,
" not at the end of a selector")
}
if (peek$type == "HASH") {
result <- Hash$new(result, stream$nxt()$value)
} else if (token_equality(peek, "DELIM", ".")) {
stream$nxt()
result <- ClassSelector$new(result, stream$next_ident())
} else if (token_equality(peek, "DELIM", "|")) {
stream$nxt()
result <- Element$new(element = stream$next_ident())
} else if (token_equality(peek, "DELIM", "[")) {
stream$nxt()
result <- parse_attrib(result, stream)
} else if (token_equality(peek, "DELIM", ":") ||
token_equality(peek, "DELIM", "::")) {
if (token_equality(peek, "DELIM", "::")) {
stream$nxt()
pseudo_element <- stream$next_ident()
next
} else {
stream$nxt()
}
ident <- stream$next_ident()
if (tolower(ident) %in% c(
"first-line", "first-letter", "before", "after")) {
# Special case: CSS 2.1 pseudo-elements can have a single ':'
# Any new pseudo-element must have two.
pseudo_element <- ident
next
}
if (!token_equality(stream$peek(), "DELIM", "(")) {
result <- Pseudo$new(result, ident)
next
}
stream$nxt()
stream$skip_whitespace()
if (tolower(ident) == "not") {
if (inside_negation) {
stop("Got nested :not()")
}
selectors <- parse_simple_selector_arguments(stream, "not")
result <- Negation$new(result, selectors)
} else if (any(tolower(ident) == c("matches", "is"))) {
selectors <- parse_simple_selector_arguments(stream, tolower(ident))
result <- Matching$new(result, selectors)
} else if (tolower(ident) == "where") {
selectors <- parse_simple_selector_arguments(stream, "where")
result <- Where$new(result, selectors)
} else if (tolower(ident) == "has") {
selectors <- parse_simple_selector_arguments(stream, "has")
result <- Has$new(result, selectors)
} else {
arguments <- list()
selector_list <- NULL
i <- 1
# Parse the function arguments (e.g., "2n+1" for nth-child)
# :lang() and :dir() can accept comma-separated lists
allow_commas <- tolower(ident) %in% c("lang", "dir")
while (TRUE) {
nt <- stream$nxt()
if (nt$type %in% c("IDENT", "STRING", "NUMBER") ||
(token_equality(nt, "DELIM", "+") ||
token_equality(nt, "DELIM", "-"))) {
arguments[[i]] <- nt
i <- i + 1
# Check if this is the 'of' keyword for nth-child/nth-last-child
if (nt$type == "IDENT" && tolower(nt$value) == "of" &&
any(tolower(ident) == c("nth-child", "nth-last-child"))) {
# Remove 'of' from arguments - it's a keyword, not an argument
arguments <- arguments[-length(arguments)]
# Parse the selector list that follows 'of'
stream$skip_whitespace()
selector_list <- parse_simple_selector_arguments(stream, ident)
break
}
} else if (token_equality(nt, "DELIM", "*") && allow_commas) {
# For :lang() and :dir(), allow * as a wildcard
arguments[[i]] <- nt
i <- i + 1
} else if (nt$type == "S") {
next
} else if (token_equality(nt, "DELIM", ",") && allow_commas) {
# For :lang() and :dir(), commas separate multiple values
stream$skip_whitespace()
next
} else if (token_equality(nt, "DELIM", ")")) {
break
} else {
stop("Expected an argument, got ", nt$repr())
}
}
if (length(arguments) == 0) {
stop("Expected at least one argument, got ", nt$repr())
}
result <- Function$new(result, ident, arguments, selector_list)
}
} else {
stop("Expected selector, got ", stream$peek()$repr())
}
}
if (length(stream$used) == selector_start) {
stop("Expected selector, got ", stream$peek()$repr())
}
list(result = result, pseudo_element = pseudo_element)
}
parse_simple_selector_arguments <- function(stream, function_name = NULL) { # nolint: object_length_linter.
index <- 1
arguments <- list()
while (TRUE) {
results <- parse_simple_selector(stream, inside_negation = TRUE)
result <- results$result
pseudo_element <- results$pseudo_element
if (!is.null(pseudo_element)) {
if (!is.null(function_name)) {
stop("Got pseudo-element ::", pseudo_element, " inside :", function_name, "() at ", stream$peeked$pos)
} else {
stop("Got pseudo-element ::", pseudo_element, " inside function")
}
}
arguments[[index]] <- result
index <- index + 1
stream$skip_whitespace()
nt <- stream$nxt()
if (token_equality(nt, "DELIM", ")")) {
break
} else if (token_equality(nt, "DELIM", ",")) {
stream$skip_whitespace()
# Check if there's actually a selector after the comma
peek <- stream$peek()
if (token_equality(peek, "DELIM", ")")) {
# Trailing comma before closing paren
stop("Expected ')', got ", nt$repr())
}
# Continue to parse next selector
} else {
stop("Expected an argument, got ", nt$repr())
}
}
arguments
}
parse_attrib <- function(selector, stream) {
stream$skip_whitespace()
attrib <- stream$next_ident_or_star()
if (is.null(attrib) && !token_equality(stream$peek(), "DELIM", "|"))
stop("Expected '|', got ", stream$peek()$repr())
if (token_equality(stream$peek(), "DELIM", "|")) {
stream$nxt()
namespace <- attrib
attrib <- stream$next_ident()
op <- NULL
} else if (token_equality(stream$peek(), "DELIM", "|=")) {
namespace <- NULL
stream$nxt()
op <- "|="
} else {
namespace <- op <- NULL
}
if (is.null(op)) {
stream$skip_whitespace()
nt <- stream$nxt()
if (token_equality(nt, "DELIM", "]")) {
return(Attrib$new(selector, namespace, attrib, "exists", NULL))
} else if (token_equality(nt, "DELIM", "=")) {
op <- "="
} else if (nt$is_delim(c("^=", "$=", "*=", "~=", "|=", "!="))) {
op <- nt$value
} else {
stop("Operator expected, got ", nt$repr())
}
}
stream$skip_whitespace()
value <- stream$nxt()
if (!value$type %in% c("IDENT", "STRING")) {
stop("Expected string or ident, got ", value$repr())
}
stream$skip_whitespace()
nt <- stream$nxt()
if (!token_equality(nt, "DELIM", "]")) {
stop("Expected ']', got ", nt$repr())
}
Attrib$new(selector, namespace, attrib, op, value$value)
}
str_int <- function(s) {
suppressWarnings(as.integer(s))
}
parse_series <- function(tokens) {
for (token in tokens) {
if (token$type == "STRING")
stop("String tokens not allowed in series.")
}
s <- paste0(sapply(tokens, function(x) x$value), collapse = "")
if (s == "odd")
return(2:1)
else if (s == "even")
return(c(2, 0))
else if (s == "n")
return(1:0)
if (is.na(str_locate(s, "n")[1, 1])) {
result <- str_int(s)
if (is.na(result)) {
return(NULL)
} else {
return(c(0, result))
}
}
ab <- str_split_fixed(s, "n", 2)[1, ]
a <- str_trim(ab[1])
b <- str_trim(ab[2])
intb <- str_int(b)
if (!nzchar(a) && is.na(intb))
return(NULL)
if (!nzchar(a))
a <- 1
else if (a == "-" || a == "+")
a <- str_int(paste0(a, "1"))
else
a <- str_int(a)
if (!nzchar(b))
b <- 0
else
b <- str_int(b)
c(a, b)
}
Token <- R6Class("Token",
public = list(
type = "",
value = NULL,
pos = 1,
initialize = function(type = "", value = NULL, pos = 1) {
self$type <- type
self$value <- value
self$pos <- pos
},
repr = function() {
paste0("<", self$type, " '", self$value, "' at ", self$pos, ">")
},
is_delim = function(values) {
self$type == "DELIM" && self$value %in% values
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
)
)
EOFToken <- R6Class("EOFToken",
inherit = Token,
public = list(
initialize = function(pos = 1, type = "EOF", value = NULL) {
super$initialize(type, value, pos)
},
repr = function() {
paste0("<", self$type, " at ", self$pos, ">")
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
))
compile_ <- function(pattern) {
function(x) {
str_locate(x, pattern)[1, ]
}
}
delims_2ch <- c("~=", "|=", "^=", "$=", "*=", "::", "!=")
delims_1ch <- c(">", "+", "~", ",", ".", "*", "=", "[", "]", "(", ")", "|", ":", "#")
delim_escapes <- paste0("\\", delims_1ch, collapse = "|")
match_whitespace <- compile_("[ \t\r\n\f]+")
match_number <- compile_("[+-]?(?:[0-9]*\\.[0-9]+|[0-9]+)")
match_hash <- compile_(paste0("^#([_a-zA-Z0-9-]|", nonascii, "|\\\\(?:", delim_escapes, "))+"))
match_ident <- compile_(paste0("^([_a-zA-Z0-9-]|", nonascii, "|\\\\(?:", delim_escapes, "))+"))
match_string_by_quote <- list("'" = compile_(paste0("([^\n\r\f\\']|", TokenMacros$string_escape, ")*")),
'"' = compile_(paste0('([^\n\r\f\\"]|', TokenMacros$string_escape, ")*")))
# Substitution for escaped chars
sub_simple_escape <- function(x) gsub("\\\\(.)", "\\1", x)
sub_unicode_escape <- function(x) gsub(TokenMacros$unicode_escape, "\\1", x, ignore.case = TRUE)
sub_newline_escape <- function(x) gsub("\\\\(?:\n|\r\n|\r|\f)", "", x)
tokenize <- function(s) {
pos <- 1
i <- 1
len_s <- nchar(s)
results <- list()
while (pos <= len_s) {
ss <- substring(s, pos, len_s)
match <- match_whitespace(ss)
if (!anyNA(match) && match[1] == 1) {
results[[i]] <- Token$new("S", " ", pos)
match_end <- match[2]
pos <- pos + match_end
i <- i + 1
next
}
match <- match_number(ss)
if (!anyNA(match) && match[1] == 1) {
match_start <- match[1]
match_end <- max(match[1], match[2])
value <- substring(ss, match_start, match_end)
results[[i]] <- Token$new("NUMBER", value, pos)
pos <- pos + match_end
i <- i + 1
next
}
match <- match_ident(ss)
if (!anyNA(match) && match[1] == 1) {
match_start <- match[1]
match_end <- max(match[1], match[2])
value <- substring(ss, match_start, match_end)
value <- sub_simple_escape(sub_unicode_escape(value))
results[[i]] <- Token$new("IDENT", value, pos)
pos <- pos + match_end
i <- i + 1
next
}
match <- match_hash(ss)
if (!anyNA(match) && match[1] == 1) {
match_start <- match[1]
match_end <- max(match[1], match[2])
value <- substring(ss, match_start, match_end)
value <- sub_simple_escape(sub_unicode_escape(value))
hash_id <- substring(value, 2)
results[[i]] <- Token$new("HASH", hash_id, pos)
pos <- pos + match_end
i <- i + 1
next
}
# Testing presence of two char delims
nc_inds <- seq_len(nchar(ss))
if (length(nc_inds) %% 2 == 1)
nc_inds <- c(nc_inds, length(nc_inds) + 1)
split_ss_2ch <- substring(ss, nc_inds[(nc_inds %% 2) == 1],
nc_inds[(nc_inds %% 2) == 0])
delim_inds_2ch <- which(split_ss_2ch %in% delims_2ch)
if (length(delim_inds_2ch) && delim_inds_2ch[1] == 1) {
# We have a 2ch delim
results[[i]] <- Token$new("DELIM", split_ss_2ch[1], pos)
pos <- pos + 2
i <- i + 1
next
}
# Testing presence of single char delims
split_ss_1ch <- substring(ss, nc_inds, nc_inds)
delim_inds_1ch <- which(split_ss_1ch %in% delims_1ch)
if (length(delim_inds_1ch) && delim_inds_1ch[1] == 1) {
# We have a single char delim
results[[i]] <- Token$new("DELIM", split_ss_1ch[1], pos)
pos <- pos + 1
i <- i + 1
next
}
quote <- substring(s, pos, pos)
if (quote %in% c("'", '"')) {
ncs <- nchar(s)
split_chars <- substring(s, (pos + 1):ncs, (pos + 1):ncs)
matching_quotes <- which(split_chars == quote)
is_escaped <- logical(length(matching_quotes))
if (length(matching_quotes)) {
for (j in seq_along(matching_quotes)) {
end_quote <- matching_quotes[j]
if (end_quote > 1) {
# Count consecutive backslashes before the quote
# If odd number of backslashes, the quote is escaped
backslash_count <- 0
check_pos <- end_quote - 1
while (check_pos >= 1 && split_chars[check_pos] == "\\") {
backslash_count <- backslash_count + 1
check_pos <- check_pos - 1
}
is_escaped[j] <- (backslash_count %% 2) == 1
}
}
if (all(is_escaped)) {
stop("Unclosed string at ", pos)
}
end_quote <- matching_quotes[min(which(!is_escaped))]
value <- substring(s, pos + 1, pos + end_quote - 1)
value <- sub_simple_escape(
sub_unicode_escape(
sub_newline_escape(value)))
results[[i]] <- Token$new("STRING", value, pos)
pos <- pos + end_quote + 1 # one for each quote char
i <- i + 1
} else {
stop("Unclosed string at ", pos)
}
}
# Remove comments
pos1 <- pos + 1
if (substring(s, pos, pos1) == "/*") {
rel_pos <- str_locate(ss, "\\*/")[1]
pos <-
if (is.na(rel_pos)) {
len_s + 1
} else {
pos + rel_pos + 1
}
next
}
# Because we always call 'next', if we're here there must have
# been an error
tmp <- substring(ss, 1, 1)
if (!tmp %in% c(delims_1ch, '"', "'")) {
stop("Unexpected character '",
tmp,
"' found at position ",
pos)
}
}
results[[i]] <- EOFToken$new(pos)
results
}
TokenStream <- R6Class("TokenStream",
public = list(
pos = 1,
tokens = NULL,
ntokens = 0,
used = list(),
source_text = NULL,
peeked = list(),
peeking = FALSE,
initialize = function(tokens, source_text = NULL) {
self$tokens <- tokens
self$ntokens <- length(tokens)
self$source_text <- source_text
},
nxt = function() {
if (self$peeking) {
self$peeking <- FALSE
self$used[[self$pos]] <- self$peeked
self$peeked
} else {
nt <- self$next_token()
self$used[[self$pos]] <- nt
nt
}
},
next_token = function() {
nt <- self$tokens[[self$pos]]
self$pos <- self$pos + 1
nt
},
peek = function() {
if (!self$peeking) {
self$peeked <- self$next_token()
self$peeking <- TRUE
}
self$peeked
},
next_ident = function() {
nt <- self$nxt()
if (nt$type != "IDENT")
stop("Expected ident, got ", nt$repr())
nt$value
},
next_ident_or_star = function() {
nt <- self$nxt()
if (nt$type == "IDENT")
nt$value
else if (token_equality(nt, "DELIM", "*"))
NULL
else
stop("Expected ident or '*', got ", nt$repr())
},
skip_whitespace = function() {
peek <- self$peek()
if (peek$type == "S")
self$nxt()
}
)
)
selectr/R/xpath.R 0000755 0001762 0000144 00000136036 15107555231 013403 0 ustar ligges users XPathExpr <- R6Class("XPathExpr",
public = list(
path = "",
element = "*",
condition = "",
star_prefix = FALSE,
initialize = function(
path = "", element = "*", condition = "", star_prefix = FALSE) {
self$path <- path
self$element <- element
self$condition <-
if (nzchar(condition)) paste0("(", condition, ")")
else condition
self$star_prefix <- star_prefix
},
str = function() {
p <- paste0(self$path, self$element)
if (nzchar(self$condition))
p <- paste0(p, "[", self$condition, "]")
p
},
repr = function() {
paste0(first_class_name(self), "[", self$str(), "]")
},
add_condition = function(condition, conjunction = "and") {
self$condition <-
if (nzchar(self$condition))
paste0(self$condition, " ", conjunction, " (", condition, ")")
else
paste0("(", condition, ")")
},
add_name_test = function() {
if (self$element == "*")
return()
self$add_condition(paste0("name() = ", xpath_literal(self$element)))
self$element <- "*"
},
join = function(combiner, other) {
p <- paste0(self$str(), combiner)
if (other$path != "*/")
p <- paste0(p, other$path)
self$path <- p
self$element <- other$element
self$condition <- other$condition
self
},
show = function() { # nocov start
cat(self$repr(), "\n")
} # nocov end
))
is_safe_name <- function(name) {
grepl("^[a-zA-Z_][a-zA-Z0-9_.-]*$", name)
}
first_class_name <- function(obj) {
result <- class(obj)[1]
# HACK!
# R.oo clashes with our preferred use of 'Class' for the name of the
# ClassSelector class, even though it is hidden in our package.
# Because the name of the class is used in places, perform a
# special case rename from ClassSelector to Class.
if (result == "ClassSelector") "Class" else result
}
xpath_literal <- function(literal) {
if (!is.character(literal) || length(literal) != 1) {
stop("literal must be a single character string")
}
lenseq <- seq_len(nchar(literal))
split_chars <- substring(literal, lenseq, lenseq)
if (!any(split_chars == "'")) {
literal <- paste0("'", literal, "'")
} else if (!any(split_chars == '"')) {
literal <- paste0('"', literal, '"')
} else {
dq_inds <- which(split_chars == "'")
sq_inds <- which(split_chars != "'")
split_chars[dq_inds] <- paste0('"', split_chars[dq_inds], '"')
split_chars[sq_inds] <- paste0("'", split_chars[sq_inds], "'")
literal <- paste(split_chars, collapse = ",")
literal <- paste0("concat(", literal, ")")
}
literal
}
GenericTranslator <- R6Class("GenericTranslator",
public = list(
combinator_mapping = c(" " = "descendant",
">" = "child",
"+" = "direct_adjacent",
"~" = "indirect_adjacent"),
# != is not in Selectors Level 3, but included anyway
attribute_operator_mapping = c("exists" = "exists",
"=" = "equals",
"~=" = "includes",
"|=" = "dashmatch",
"^=" = "prefixmatch",
"$=" = "suffixmatch",
"*=" = "substringmatch",
"!=" = "different"),
id_attribute = "id",
lang_attribute = "xml:lang",
lower_case_element_names = FALSE,
lower_case_attribute_names = FALSE,
lower_case_attribute_values = FALSE,
css_to_xpath = function(css, prefix = "descendant-or-self::") {
selectors <- parse(css)
selectors <-
if (is.null(selectors)) list()
else if (!is.list(selectors)) list(selectors)
else selectors
lapply(selectors, function(selector) {
if (first_class_name(selector) == "Selector" &&
!is.null(selector$pseudo_element))
stop("Pseudo-elements are not supported.")
})
char_selectors <-
sapply(selectors,
function(selector)
self$selector_to_xpath(selector, prefix))
paste0(char_selectors, collapse = " | ")
},
selector_to_xpath = function(selector, prefix = "descendant-or-self::") {
tree <- selector$parsed_tree
xpath <- self$xpath(tree)
if (!inherits(xpath, "XPathExpr"))
stop("'xpath' is not an instance of 'XPathExpr'")
paste0(if (!is.null(prefix)) prefix else "", xpath$str())
},
xpath = function(parsed_selector) {
type_name <- first_class_name(parsed_selector)
method_name <- paste0("xpath_", tolower(type_name))
if (method_name == "xpath_attrib")
self$xpath_attrib(parsed_selector)
else if (method_name == "xpath_class")
self$xpath_class(parsed_selector)
else if (method_name == "xpath_combinedselector")
self$xpath_combinedselector(parsed_selector)
else if (method_name == "xpath_element")
self$xpath_element(parsed_selector)
else if (method_name == "xpath_matching")
self$xpath_matching(parsed_selector)
else if (method_name == "xpath_where")
self$xpath_where(parsed_selector)
else if (method_name == "xpath_has")
self$xpath_has(parsed_selector)
else if (method_name == "xpath_function")
self$xpath_function(parsed_selector)
else if (method_name == "xpath_hash")
self$xpath_hash(parsed_selector)
else if (method_name == "xpath_negation")
self$xpath_negation(parsed_selector)
else if (method_name == "xpath_pseudo")
self$xpath_pseudo(parsed_selector)
else
stop("Unknown method name '", type_name, "'")
},
xpath_combinedselector = function(combined) {
combinator <- paste0(
"xpath_",
self$combinator_mapping[combined$combinator],
"_combinator")
left_xpath <- self$xpath(combined$selector)
right_xpath <- self$xpath(combined$subselector)
if (combinator == "xpath_descendant_combinator")
self$xpath_descendant_combinator(
left = left_xpath, right = right_xpath)
else if (combinator == "xpath_child_combinator")
self$xpath_child_combinator(
left = left_xpath, right = right_xpath)
else if (combinator == "xpath_direct_adjacent_combinator")
self$xpath_direct_adjacent_combinator(
left = left_xpath, right = right_xpath)
else if (combinator == "xpath_indirect_adjacent_combinator")
self$xpath_indirect_adjacent_combinator(
left = left_xpath, right = right_xpath)
else
stop("Unknown combinator '",
self$combinator_mapping[combined$combinator], "'")
},
xpath_negation = function(negation) {
xpath <- self$xpath(negation$selector)
# Collect all conditions from the selector list
conditions <- character(0)
for (subselector in negation$selector_list) {
sub_xpath <- self$xpath(subselector)
sub_xpath$add_name_test()
if (!is.null(sub_xpath$condition) && nzchar(sub_xpath$condition)) {
conditions <- c(conditions, sub_xpath$condition)
}
}
# Combine conditions with OR (any match means element is excluded)
if (length(conditions) > 0) {
combined_condition <- paste0(conditions, collapse = " or ")
xpath$add_condition(paste0("not(", combined_condition, ")"))
} else {
xpath$add_condition("0")
}
xpath
},
xpath_matching = function(matching) {
xpath <- self$xpath(matching$selector)
exprs <- sapply(matching$selector_list, function(s) self$xpath(s))
for (e in exprs) {
e$add_name_test()
if (nzchar(e$condition)) {
xpath$add_condition(e$condition, "or")
}
}
xpath
},
xpath_where = function(where) {
# :where() behaves exactly like :is() in terms of matching,
# but has zero specificity (handled in the Where class itself)
xpath <- self$xpath(where$selector)
exprs <- sapply(where$selector_list, function(s) self$xpath(s))
for (e in exprs) {
e$add_name_test()
if (nzchar(e$condition)) {
xpath$add_condition(e$condition, "or")
}
}
xpath
},
xpath_has = function(has) {
# :has() matches elements that have descendants matching the selector list
xpath <- self$xpath(has$selector)
# Build conditions that check for the existence of descendants
conditions <- character(0)
for (subselector in has$selector_list) {
sub_xpath <- self$xpath(subselector)
# Build the full descendant path
sub_xpath$add_name_test()
desc_test <- paste0(".//", sub_xpath$element)
if (nzchar(sub_xpath$condition)) {
desc_test <- paste0(desc_test, "[", sub_xpath$condition, "]")
}
conditions <- c(conditions, desc_test)
}
# Combine conditions with OR (any descendant match means the element matches)
if (length(conditions) > 0) {
combined_condition <- paste0(conditions, collapse = " | ")
xpath$add_condition(combined_condition)
}
xpath
},
xpath_function = function(fn) {
method_name <- paste0(
"xpath_",
gsub("-", "_", fn$name),
"_function")
xp <- self$xpath(fn$selector)
if (method_name == "xpath_contains_function")
self$xpath_contains_function(xp, fn)
else if (method_name == "xpath_lang_function")
self$xpath_lang_function(xp, fn)
else if (method_name == "xpath_dir_function")
self$xpath_dir_function(xp, fn)
else if (method_name == "xpath_nth_child_function")
self$xpath_nth_child_function(xp, fn)
else if (method_name == "xpath_nth_last_child_function")
self$xpath_nth_last_child_function(xp, fn)
else if (method_name == "xpath_nth_of_type_function")
self$xpath_nth_of_type_function(xp, fn)
else if (method_name == "xpath_nth_last_of_type_function")
self$xpath_nth_last_of_type_function(xp, fn)
else
stop("The pseudo-class :",
gsub("-", "_", fn$name),
"() is unknown")
},
xpath_pseudo = function(pseudo) {
method_name <- paste0(
"xpath_",
gsub("-", "_", pseudo$ident),
"_pseudo")
xp <- self$xpath(pseudo$selector)
if (method_name == "xpath_root_pseudo")
self$xpath_root_pseudo(xp)
else if (method_name == "xpath_first_child_pseudo")
self$xpath_first_child_pseudo(xp)
else if (method_name == "xpath_last_child_pseudo")
self$xpath_last_child_pseudo(xp)
else if (method_name == "xpath_first_of_type_pseudo")
self$xpath_first_of_type_pseudo(xp)
else if (method_name == "xpath_last_of_type_pseudo")
self$xpath_last_of_type_pseudo(xp)
else if (method_name == "xpath_only_child_pseudo")
self$xpath_only_child_pseudo(xp)
else if (method_name == "xpath_only_of_type_pseudo")
self$xpath_only_of_type_pseudo(xp)
else if (method_name == "xpath_empty_pseudo")
self$xpath_empty_pseudo(xp)
else if (method_name == "xpath_any_link_pseudo")
self$xpath_any_link_pseudo(xp)
else if (method_name == "xpath_link_pseudo")
self$xpath_link_pseudo(xp)
else if (method_name == "xpath_visited_pseudo")
self$xpath_visited_pseudo(xp)
else if (method_name == "xpath_hover_pseudo")
self$xpath_hover_pseudo(xp)
else if (method_name == "xpath_active_pseudo")
self$xpath_active_pseudo(xp)
else if (method_name == "xpath_focus_pseudo")
self$xpath_focus_pseudo(xp)
else if (method_name == "xpath_target_pseudo")
self$xpath_target_pseudo(xp)
else if (method_name == "xpath_target_within_pseudo")
self$xpath_target_within_pseudo(xp)
else if (method_name == "xpath_local_link_pseudo")
self$xpath_local_link_pseudo(xp)
else if (method_name == "xpath_enabled_pseudo")
self$xpath_enabled_pseudo(xp)
else if (method_name == "xpath_disabled_pseudo")
self$xpath_disabled_pseudo(xp)
else if (method_name == "xpath_checked_pseudo")
self$xpath_checked_pseudo(xp)
else
stop("The pseudo-class :", pseudo$ident, " is unknown")
},
xpath_attrib = function(selector) {
operator <- self$attribute_operator_mapping[selector$operator]
method_name <- paste0("xpath_attrib_", operator)
if (self$lower_case_attribute_names) {
name <- tolower(selector$attrib)
} else {
name <- selector$attrib
}
safe <- is_safe_name(name)
if (!is.null(selector$namespace)) {
name <- paste0(selector$namespace, ":", name)
}
if (safe) {
attrib <- paste0("@", name)
} else {
attrib <- paste0(
"attribute::*[name() = ", xpath_literal(name), "]")
}
if (self$lower_case_attribute_names) {
value <- tolower(selector$value)
} else {
value <- selector$value
}
xp <- self$xpath(selector$selector)
if (method_name == "xpath_attrib_dashmatch")
self$xpath_attrib_dashmatch(xp, attrib, value)
else if (method_name == "xpath_attrib_different")
self$xpath_attrib_different(xp, attrib, value)
else if (method_name == "xpath_attrib_equals")
self$xpath_attrib_equals(xp, attrib, value)
else if (method_name == "xpath_attrib_exists")
self$xpath_attrib_exists(xp, attrib, value)
else if (method_name == "xpath_attrib_includes")
self$xpath_attrib_includes(xp, attrib, value)
else if (method_name == "xpath_attrib_prefixmatch")
self$xpath_attrib_prefixmatch(xp, attrib, value)
else if (method_name == "xpath_attrib_substringmatch")
self$xpath_attrib_substringmatch(xp, attrib, value)
else if (method_name == "xpath_attrib_suffixmatch")
self$xpath_attrib_suffixmatch(xp, attrib, value)
else
stop("Unknown attribute operator '", operator, "'")
},
# .foo is defined as [class~=foo] in the spec
xpath_class = function(class_selector) {
xpath <- self$xpath(class_selector$selector)
self$xpath_attrib_includes(xpath, "@class",
class_selector$class_name)
xpath
},
xpath_hash = function(id_selector) {
xpath <- self$xpath(id_selector$selector)
self$xpath_attrib_equals(xpath, "@id", id_selector$id)
xpath
},
xpath_element = function(selector) {
element <- selector$element
if (is.null(element)) {
element <- "*"
safe <- TRUE
} else {
safe <- is_safe_name(element)
if (self$lower_case_element_names)
element <- tolower(element)
}
if (!is.null(selector$namespace)) {
# Namespace prefixes are case-sensitive.
# https://www.w3.org/TR/css-namespaces-3/#prefixes
element <- paste0(selector$namespace, ":", element)
safe <- safe && is_safe_name(selector$namespace)
}
xpath <- XPathExpr$new(element = element)
if (!safe)
xpath$add_name_test()
xpath
},
xpath_descendant_combinator = function(left, right) {
left$join("//", right)
},
xpath_child_combinator = function(left, right) {
left$join("/", right)
},
xpath_direct_adjacent_combinator = function(left, right) {
xpath <- left$join("/following-sibling::", right)
target_element <- xpath$element
existing_condition <- xpath$condition
xpath$add_name_test()
if (nzchar(existing_condition)) {
# Has existing conditions from right selector (e.g., attributes)
# Result: *[1][self::element][existing_condition]
xpath$condition <- paste0("1][self::", target_element, "][", existing_condition)
} else {
# No existing conditions, just position and element test
xpath$condition <- paste0("1][self::", target_element)
}
xpath
},
xpath_indirect_adjacent_combinator = function(left, right) {
left$join("/following-sibling::", right)
},
xpath_nth_child_function = function(xpath, fn, last = FALSE,
add_name_test = TRUE) {
ab <- parse_series(fn$arguments)
# Validate that parse_series returned valid results
if (is.null(ab) || length(ab) != 2) {
stop("Invalid nth-child expression")
}
a <- ab[1]
b <- ab[2]
# Validate that a and b are valid integers (not NA)
if (is.na(a) || is.na(b)) {
stop("Invalid nth-child expression: could not parse as valid integers")
}
# From https://www.w3.org/TR/selectors-4/#structural-pseudos:
#
# :nth-child(an+b)
# an+b-1 siblings before
#
# :nth-last-child(an+b)
# an+b-1 siblings after
#
# :nth-of-type(an+b)
# an+b-1 siblings with the same expanded element name before
#
# :nth-last-of-type(an+b)
# an+b-1 siblings with the same expanded element name after
#
# CSS Selectors Level 4 adds optional "of S" selector list:
# :nth-child(an+b of S) - count only siblings that match selector S
#
# So,
# for :nth-child and :nth-of-type
#
# count(preceding-sibling::) = an+b-1
#
# for :nth-last-child and :nth-last-of-type
#
# count(following-sibling::) = an+b-1
#
# therefore,
# count(...) - (b-1) = 0 (mod a)
#
# if a == 0:
# ~~~~~~~~~~
# count(...) = b-1
#
# if a < 0:
# ~~~~~~~~~
# count(...) - b +1 <= 0
# -> count(...) <= b-1
#
# if a > 0:
# ~~~~~~~~~
# count(...) - b +1 >= 0
# -> count(...) >= b-1
# work with b-1 instead
b_min_1 <- b - 1
# early-exit condition 1:
# ~~~~~~~~~~~~~~~~~~~~~~~
# for a == 1, nth-*(an+b) means n+b-1 siblings before/after,
# and since n %in% {0, 1, 2, ...}, if b-1<=0,
# there is always an "n" matching any number of siblings (maybe none)
if (a == 1 && b_min_1 <= 0) {
# CSS Level 4: When selector list is provided, ensure current element matches
if (!is.null(fn$selector_list) && length(fn$selector_list) > 0) {
conditions <- character(0)
for (subselector in fn$selector_list) {
sub_xpath <- self$xpath(subselector)
sub_xpath$add_name_test()
if (!is.null(sub_xpath$condition) && nzchar(sub_xpath$condition)) {
conditions <- c(conditions, sub_xpath$condition)
}
}
if (length(conditions) > 0) {
# Current element must match at least one selector (OR)
combined_condition <- paste0(conditions, collapse = " or ")
xpath$add_condition(combined_condition)
}
}
return(xpath)
}
# early-exit condition 2:
# ~~~~~~~~~~~~~~~~~~~~~~~
# an+b-1 siblings with a<0 and (b-1)<0 is not possible
if (a < 0 && b_min_1 < 0) {
xpath$add_condition("0")
# CSS Level 4: When selector list is provided, ensure current element matches
# Even though the condition is always false, we should still check the selector
if (!is.null(fn$selector_list) && length(fn$selector_list) > 0) {
conditions <- character(0)
for (subselector in fn$selector_list) {
sub_xpath <- self$xpath(subselector)
sub_xpath$add_name_test()
if (!is.null(sub_xpath$condition) && nzchar(sub_xpath$condition)) {
conditions <- c(conditions, sub_xpath$condition)
}
}
if (length(conditions) > 0) {
# Current element must match at least one selector (OR)
combined_condition <- paste0(conditions, collapse = " or ")
xpath$add_condition(combined_condition)
}
}
return(xpath)
}
# `add_name_test` boolean is inverted and somewhat counter-intuitive:
#
# nth_of_type() calls nth_child(add_name_test=False)
if (add_name_test) {
nodetest <- "*"
} else {
nodetest <- xpath$element
}
# Build the predicate for selector list filtering (CSS Level 4)
selector_predicate <- ""
if (!is.null(fn$selector_list) && length(fn$selector_list) > 0) {
# Generate XPath conditions for each selector in the list
conditions <- character(0)
for (subselector in fn$selector_list) {
sub_xpath <- self$xpath(subselector)
sub_xpath$add_name_test()
if (!is.null(sub_xpath$condition) && nzchar(sub_xpath$condition)) {
conditions <- c(conditions, sub_xpath$condition)
}
}
if (length(conditions) > 0) {
# Combine conditions with OR (any match counts the sibling)
combined_condition <- paste0(conditions, collapse = " or ")
selector_predicate <- paste0("[", combined_condition, "]")
}
}
# count siblings before or after the element
if (!last) {
siblings_count <- paste0("count(preceding-sibling::",
nodetest, selector_predicate, ")")
} else {
siblings_count <- paste0("count(following-sibling::",
nodetest, selector_predicate, ")")
}
# special case of fixed position: nth-*(0n+b)
# if a == 0:
# ~~~~~~~~~~
# count(***-sibling::***) = b-1
if (a == 0) {
xpath$add_condition(paste0(siblings_count, " = ", b_min_1))
# CSS Level 4: When selector list is provided, ensure current element matches
if (!is.null(fn$selector_list) && length(fn$selector_list) > 0) {
conditions <- character(0)
for (subselector in fn$selector_list) {
sub_xpath <- self$xpath(subselector)
sub_xpath$add_name_test()
if (!is.null(sub_xpath$condition) && nzchar(sub_xpath$condition)) {
conditions <- c(conditions, sub_xpath$condition)
}
}
if (length(conditions) > 0) {
# Current element must match at least one selector (OR)
combined_condition <- paste0(conditions, collapse = " or ")
xpath$add_condition(combined_condition)
}
}
return(xpath)
}
expr <- character(0)
if (a > 0) {
# siblings count, an+b-1, is always >= 0,
# so if a>0, and (b-1)<=0, an "n" exists to satisfy this,
# therefore, the predicate is only interesting if (b-1)>0
if (b_min_1 > 0) {
expr <- c(expr, paste0(siblings_count, " >= ", b_min_1))
}
} else {
# if a<0, and (b-1)<0, no "n" satisfies this,
# this is tested above as an early exist condition
# otherwise,
expr <- c(expr, paste0(siblings_count, " <= ", b_min_1))
}
# operations modulo 1 or -1 are simpler, one only needs to verify:
#
# - either:
# count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc.,
# i.e. count(***-sibling::***) >= (b-1)
#
# - or:
# count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc.,
# i.e. count(***-sibling::***) <= (b-1)
# we we just did above.
#
if (abs(a) != 1) {
# count(***-sibling::***) - (b-1) = 0 (mod a)
left <- siblings_count
# apply "modulo a" on 2nd term, -(b-1),
# to simplify things like "(... +6) % -3",
# and also make it positive with |a|
b_neg <- (-b_min_1) %% abs(a)
if (b_neg != 0) {
b_neg <- paste0("+", b_neg)
left <- paste0("(", left, " ", b_neg, ")")
}
expr <- c(expr, paste0(left, " mod ", a, " = 0"))
}
if (length(expr)) {
expr <- paste0(expr, collapse = " and ")
xpath$add_condition(expr)
}
# CSS Level 4: When selector list is provided, ensure current element matches
if (!is.null(fn$selector_list) && length(fn$selector_list) > 0) {
conditions <- character(0)
for (subselector in fn$selector_list) {
sub_xpath <- self$xpath(subselector)
sub_xpath$add_name_test()
if (!is.null(sub_xpath$condition) && nzchar(sub_xpath$condition)) {
conditions <- c(conditions, sub_xpath$condition)
}
}
if (length(conditions) > 0) {
# Current element must match at least one selector (OR)
combined_condition <- paste0(conditions, collapse = " or ")
xpath$add_condition(combined_condition)
}
}
xpath
},
xpath_nth_last_child_function = function(xpath, fn) {
self$xpath_nth_child_function(xpath, fn, last = TRUE)
},
xpath_nth_of_type_function = function(xpath, fn) {
if (xpath$element == "*") {
stop("*:nth-of-type() is not implemented")
}
self$xpath_nth_child_function(xpath, fn, add_name_test = FALSE)
},
xpath_nth_last_of_type_function = function(xpath, fn) {
if (xpath$element == "*") {
stop("*:nth-last-of-type() is not implemented")
}
self$xpath_nth_child_function(xpath, fn, last = TRUE,
add_name_test = FALSE)
},
xpath_contains_function = function(xpath, fn) {
if (!(fn$argument_types() %in% c("STRING", "IDENT"))) {
stop("Expected a single string or ident for :contains(), got (",
paste0(fn$argument_types(), collapse = ", "), ")")
}
value <- fn$arguments[[1]]$value
xpath$add_condition(paste0(
"contains(., ", xpath_literal(value), ")"))
xpath
},
xpath_lang_function = function(xpath, fn) {
# Validate all arguments are STRING, IDENT, or * (DELIM)
arg_types <- fn$argument_types()
valid_types <- arg_types %in% c("STRING", "IDENT") |
(arg_types == "DELIM" & sapply(fn$arguments, function(a) a$value == "*"))
if (!all(valid_types)) {
stop("Expected string, ident, or * arguments for :lang(), got ",
fn$arguments[[1]]$repr())
}
# Extract language values from arguments, combining IDENT-* patterns
lang_values <- character(0)
i <- 1
while (i <= length(fn$arguments)) {
arg <- fn$arguments[[i]]
# Check if this is an IDENT ending with '-' followed by a '*' DELIM
if (arg$type %in% c("IDENT", "STRING") &&
grepl("-$", arg$value) &&
i < length(fn$arguments) &&
fn$arguments[[i + 1]]$type == "DELIM" &&
fn$arguments[[i + 1]]$value == "*") {
# Combine them: "en-" + "*" = "en-*"
lang_values <- c(lang_values, paste0(arg$value, "*"))
i <- i + 2 # Skip the next token since we combined it
} else {
lang_values <- c(lang_values, arg$value)
i <- i + 1
}
}
# Build conditions for each language value
conditions <- character(0)
for (value in lang_values) {
if (value == "*") {
# Wildcard * matches everything - use a condition that's always true
conditions <- c(conditions, "true()")
} else if (grepl("\\*$", value)) {
# Wildcard suffix like "en-*" - match any language starting with prefix
# Use XPath's lang() function which does prefix matching
prefix <- sub("\\*$", "", value) # Remove trailing *
conditions <- c(conditions, paste0("lang(", xpath_literal(prefix), ")"))
} else {
# Regular language tag
conditions <- c(conditions, paste0("lang(", xpath_literal(value), ")"))
}
}
# Combine conditions with OR
if (length(conditions) == 1) {
xpath$add_condition(conditions[1])
} else if (length(conditions) > 1) {
combined <- paste0("(", paste(conditions, collapse = " or "), ")")
xpath$add_condition(combined)
}
xpath
},
xpath_dir_function = function(xpath, fn) {
# validate all arguments are STRING, IDENT, or * (DELIM)
arg_types <- fn$argument_types()
valid_types <- arg_types %in% c("STRING", "IDENT") |
(arg_types == "DELIM" & sapply(fn$arguments, function(a) a$value == "*"))
if (!all(valid_types)) {
stop("Expected string, ident, or * arguments for :dir(), got ",
fn$arguments[[1]]$repr())
}
# :dir() requires runtime directionality detection based on
# document language, inherited dir attributes, and text analysis.
# Not possible in static XPath, so we make it never match.
xpath$add_condition("0")
xpath
},
xpath_root_pseudo = function(xpath) {
xpath$add_condition("not(parent::*)")
xpath
},
xpath_first_child_pseudo = function(xpath) {
xpath$add_condition("count(preceding-sibling::*) = 0")
xpath
},
xpath_last_child_pseudo = function(xpath) {
xpath$add_condition("count(following-sibling::*) = 0")
xpath
},
xpath_first_of_type_pseudo = function(xpath) {
if (xpath$element == "*") {
stop("*:first-of-type is not implemented")
}
xpath$add_condition(paste0(
"count(preceding-sibling::", xpath$element, ") = 0"))
xpath
},
xpath_last_of_type_pseudo = function(xpath) {
if (xpath$element == "*") {
stop("*:last-of-type is not implemented")
}
xpath$add_condition(paste0(
"count(following-sibling::", xpath$element, ") = 0"))
xpath
},
xpath_only_child_pseudo = function(xpath) {
xpath$add_condition("count(parent::*/child::*) = 1")
xpath
},
xpath_only_of_type_pseudo = function(xpath) {
if (xpath$element == "*") {
stop("*:only-of-type is not implemented")
}
xpath$add_condition(paste0(
"count(parent::*/child::", xpath$element, ") = 1"))
xpath
},
xpath_empty_pseudo = function(xpath) {
xpath$add_condition("not(*) and not(string-length())")
xpath
},
#pseudo_never_matches = function(xpath) {
# xpath$add_condition("0")
# xpath
#},
# All are pseudo_never_matches()
xpath_any_link_pseudo = function(xpath) { xpath$add_condition("0") ; xpath },
xpath_link_pseudo = function(xpath) { xpath$add_condition("0") ; xpath },
xpath_visited_pseudo = function(xpath) { xpath$add_condition("0") ; xpath },
xpath_hover_pseudo = function(xpath) { xpath$add_condition("0") ; xpath },
xpath_active_pseudo = function(xpath) { xpath$add_condition("0") ; xpath },
xpath_focus_pseudo = function(xpath) { xpath$add_condition("0") ; xpath },
xpath_target_pseudo = function(xpath) { xpath$add_condition("0") ; xpath },
xpath_target_within_pseudo = function(xpath) { xpath$add_condition("0") ; xpath },
xpath_local_link_pseudo = function(xpath) { xpath$add_condition("0") ; xpath },
xpath_enabled_pseudo = function(xpath) { xpath$add_condition("0") ; xpath },
xpath_disabled_pseudo = function(xpath) { xpath$add_condition("0") ; xpath },
xpath_checked_pseudo = function(xpath) { xpath$add_condition("0") ; xpath },
xpath_attrib_exists = function(xpath, name, value) {
xpath$add_condition(name)
xpath
},
xpath_attrib_equals = function(xpath, name, value) {
xpath$add_condition(paste0(name, " = ", xpath_literal(value)))
xpath
},
xpath_attrib_different = function(xpath, name, value) {
xpath$add_condition(paste0("not(", name, ") or ", name, " != ",
xpath_literal(value)))
xpath
},
xpath_attrib_includes = function(xpath, name, value) {
if (!is.null(value) && nzchar(value) &&
grepl("^[^ \t\r\n\f]+$", value)) {
xpath$add_condition(paste0(
name,
" and contains(concat(' ', normalize-space(",
name,
"), ' '), ",
xpath_literal(paste0(" ", value, " ")),
")"))
} else {
xpath$add_condition("0")
}
xpath
},
xpath_attrib_dashmatch = function(xpath, name, value) {
xpath$add_condition(paste0(
name,
" and (",
name,
" = ",
xpath_literal(value),
" or starts-with(",
name,
", ",
xpath_literal(paste0(value, "-")),
"))"))
xpath
},
xpath_attrib_prefixmatch = function(xpath, name, value) {
if (!is.null(value) && nzchar(value)) {
xpath$add_condition(paste0(
name,
" and starts-with(",
name,
", ",
xpath_literal(value),
")"))
} else {
xpath$add_condition("0")
}
xpath
},
# In XPath there is starts-with but not ends-with, hence the oddness
xpath_attrib_suffixmatch = function(xpath, name, value) {
if (!is.null(value) && nzchar(value)) {
xpath$add_condition(paste0(
name,
" and substring(",
name,
", string-length(",
name,
")-",
nchar(value) - 1,
") = ",
xpath_literal(value)))
} else {
xpath$add_condition("0")
}
xpath
},
xpath_attrib_substringmatch = function(xpath, name, value) {
if (!is.null(value) && nzchar(value)) {
xpath$add_condition(paste0(
name,
" and contains(",
name,
", ",
xpath_literal(value),
")"))
} else {
xpath$add_condition("0")
}
xpath
}
)
)
HTMLTranslator <- R6Class("HTMLTranslator",
inherit = GenericTranslator,
public = list(
xhtml = FALSE,
initialize = function(xhtml = FALSE, ...) {
self$xhtml <- xhtml
if (!xhtml) {
self$lower_case_element_names <- TRUE
self$lower_case_attribute_names <- TRUE
}
self$lang_attribute <- "lang"
},
xpath_checked_pseudo = function(xpath) {
xpath$add_condition(
paste0("(@selected and name(.) = 'option') or ",
"(@checked ",
"and (name(.) = 'input' or name(.) = 'command')",
"and (@type = 'checkbox' or @type = 'radio'))"))
xpath
},
xpath_lang_function = function(xpath, fn) {
# Validate all arguments are STRING, IDENT, or * (DELIM)
arg_types <- fn$argument_types()
valid_types <- arg_types %in% c("STRING", "IDENT") |
(arg_types == "DELIM" & sapply(fn$arguments, function(a) a$value == "*"))
if (!all(valid_types)) {
stop("Expected string, ident, or * arguments for :lang(), got ",
fn$arguments[[1]]$repr())
}
# Extract language values from arguments, combining IDENT-* patterns
lang_values <- character(0)
i <- 1
while (i <= length(fn$arguments)) {
arg <- fn$arguments[[i]]
# Check if this is an IDENT ending with '-' followed by a '*' DELIM
if (arg$type %in% c("IDENT", "STRING") &&
grepl("-$", arg$value) &&
i < length(fn$arguments) &&
fn$arguments[[i + 1]]$type == "DELIM" &&
fn$arguments[[i + 1]]$value == "*") {
# Combine them: "en-" + "*" = "en-*"
lang_values <- c(lang_values, paste0(arg$value, "*"))
i <- i + 2 # Skip the next token since we combined it
} else {
lang_values <- c(lang_values, arg$value)
i <- i + 1
}
}
# Build conditions for each language value
conditions <- character(0)
for (value in lang_values) {
if (value == "*") {
# Wildcard * matches any element with a lang attribute
# Check for any ancestor-or-self with @lang attribute
conditions <- c(conditions,
paste0("ancestor-or-self::*[@", self$lang_attribute, "]"))
} else if (grepl("\\*$", value)) {
# Wildcard suffix like "en-*" - match any language starting with prefix
prefix <- sub("\\*$", "", value) # Remove trailing *
# Don't add '-' if prefix already ends with it
search_prefix <- if (grepl("-$", prefix)) tolower(prefix) else paste0(tolower(prefix), "-")
conditions <- c(conditions, paste0(
"ancestor-or-self::*[@", self$lang_attribute, "][1][starts-with(concat(",
"translate(@",
self$lang_attribute,
", 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', ",
"'abcdefghijklmnopqrstuvwxyz'), '-'), ",
xpath_literal(search_prefix),
")]"))
} else {
# Regular language tag
conditions <- c(conditions, paste0(
"ancestor-or-self::*[@", self$lang_attribute, "][1][starts-with(concat(",
"translate(@",
self$lang_attribute,
", 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', ",
"'abcdefghijklmnopqrstuvwxyz'), '-'), ",
xpath_literal(paste0(tolower(value), "-")),
")]"))
}
}
# Combine conditions with OR
if (length(conditions) == 1) {
xpath$add_condition(conditions[1])
} else if (length(conditions) > 1) {
combined <- paste0("(", paste(conditions, collapse = " or "), ")")
xpath$add_condition(combined)
}
xpath
},
xpath_dir_function = function(xpath, fn) {
# Validate all arguments are STRING, IDENT, or * (DELIM)
arg_types <- fn$argument_types()
valid_types <- arg_types %in% c("STRING", "IDENT") |
(arg_types == "DELIM" & sapply(fn$arguments, function(a) a$value == "*"))
if (!all(valid_types)) {
stop("Expected string, ident, or * arguments for :dir(), got ",
fn$arguments[[1]]$repr())
}
# :dir() requires runtime directionality detection based on
# document language, inherited dir attributes, and text analysis.
# Not possible in static XPath, so we make it never match.
xpath$add_condition("0")
xpath
},
xpath_link_pseudo = function(xpath) {
xpath$add_condition("@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')")
xpath
},
xpath_disabled_pseudo = function(xpath) {
xpath$add_condition(
paste("(",
"@disabled and",
"(",
"(name(.) = 'input' and @type != 'hidden') or",
"name(.) = 'button' or",
"name(.) = 'select' or",
"name(.) = 'textarea' or",
"name(.) = 'command' or",
"name(.) = 'fieldset' or",
"name(.) = 'optgroup' or",
"name(.) = 'option'",
")",
") or (",
"(",
"(name(.) = 'input' and @type != 'hidden') or",
"name(.) = 'button' or",
"name(.) = 'select' or",
"name(.) = 'textarea'",
")",
"and ancestor::fieldset[@disabled]",
")"))
xpath
},
xpath_enabled_pseudo = function(xpath) {
xpath$add_condition(
paste("(@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area'))",
"or",
"((name(.) = 'command' or name(.) = 'fieldset' or name(.) = 'optgroup') and not(@disabled))",
"or",
"(((name(.) = 'input' and @type != 'hidden')",
"or name(.) = 'button'",
"or name(.) = 'select'",
"or name(.) = 'textarea'",
"or name(.) = 'keygen')",
"and not (@disabled or ancestor::fieldset[@disabled]))",
"or (name(.) = 'option' and not(@disabled or ancestor::optgroup[@disabled]))"))
xpath
}
)
)
selectr/NAMESPACE 0000755 0001762 0000144 00000001700 15110033126 013123 0 ustar ligges users import(methods)
importFrom(stringr, str_locate, str_match, str_split_fixed, str_trim)
importFrom(R6, R6Class)
export(css_to_xpath)
export(querySelector)
export(querySelectorAll)
export(querySelectorNS)
export(querySelectorAllNS)
S3method(querySelector, default)
S3method(querySelectorAll, default)
S3method(querySelectorNS, default)
S3method(querySelectorAllNS, default)
# XML package methods
S3method(querySelector, XMLInternalNode)
S3method(querySelector, XMLInternalDocument)
S3method(querySelectorAll, XMLInternalNode)
S3method(querySelectorAll, XMLInternalDocument)
S3method(querySelectorNS, XMLInternalNode)
S3method(querySelectorNS, XMLInternalDocument)
S3method(querySelectorAllNS, XMLInternalNode)
S3method(querySelectorAllNS, XMLInternalDocument)
# xml2 package methods
S3method(querySelector, xml_node)
S3method(querySelectorAll, xml_node)
S3method(querySelectorNS, xml_node)
S3method(querySelectorAllNS, xml_node)
selectr/inst/ 0000755 0001762 0000144 00000000000 15107410634 012671 5 ustar ligges users selectr/inst/CITATION 0000755 0001762 0000144 00000000536 15110016442 014026 0 ustar ligges users bibentry(
bibtype = "techreport",
title = "Introducing the selectr Package",
author = person("Simon", "Potter"),
institution = "The University of Auckland",
address = "Auckland, New Zealand",
year = 2012,
url = "https://stattech.wordpress.fos.auckland.ac.nz/2012/11/27/2012-10-introducing-the-selectr-package/"
)
selectr/inst/demos/ 0000755 0001762 0000144 00000000000 15107001643 013774 5 ustar ligges users selectr/inst/demos/svg-mathml.svg 0000644 0001762 0000144 00000003646 15107001643 016605 0 ustar ligges users