")
children <- xml_children(x)
x <- xml_find_first(children, ".//b|.//i")
expect_equal(xml_name(x[[1]]), "b")
expect_equal(xml_name(x[[2]]), "i")
expect_equal(xml_name(x[[3]]), NA_character_)
expect_equal(xml_name(x), c("b", "i", NA_character_))
})
test_that("qualified names returned when ns given", {
x <- read_xml(test_path("ns-multiple-default.xml"))
ns <- xml_ns(x)
bars <- xml_children(xml_children(x))
expect_equal(xml_name(bars), c("bar", "bar"))
expect_equal(xml_name(bars, ns), c("d1:bar", "d2:bar"))
})
test_that("error if missing ns spec", {
x <- read_xml(test_path("ns-multiple-default.xml"))
ns <- xml_ns(x)[1]
bars <- xml_children(xml_children(x))
expect_snapshot(error = TRUE, xml_name(bars, ns))
})
test_that("xml_name<- modifies the name", {
x <- read_xml(test_path("ns-multiple-default.xml"))
ns <- xml_ns(x)
bars <- xml_children(xml_children(x))
bar <- bars[[1]]
xml_name(bar) <- "foo"
expect_equal(xml_name(bar), "foo")
expect_equal(xml_name(bar, ns), "d1:foo")
# ns is ignored
xml_name(bar, ns) <- "bar"
expect_equal(xml_name(bar), "bar")
expect_equal(xml_name(bar, ns), "d1:bar")
xml_name(bars) <- "foo"
expect_equal(xml_name(bars), c("foo", "foo"))
old_mss <- mss <- xml_missing()
xml_name(mss) <- "foo"
expect_identical(old_mss, mss)
})
test_that("xml_set_name modifies the name", {
x <- read_xml(test_path("ns-multiple-default.xml"))
ns <- xml_ns(x)
bars <- xml_children(xml_children(x))
bar <- bars[[1]]
xml_set_name(bar, "foo")
expect_equal(xml_name(bar), "foo")
expect_equal(xml_name(bar, ns), "d1:foo")
# ns is ignored
xml_set_name(bar, "bar", ns)
expect_equal(xml_name(bar), "bar")
expect_equal(xml_name(bar, ns), "d1:bar")
xml_set_name(bars, "foo")
expect_equal(xml_name(bars), c("foo", "foo"))
old_mss <- mss <- xml_missing()
xml_set_name(mss, "foo")
expect_identical(old_mss, mss)
})
xml2/tests/testthat/test-format.R 0000644 0001762 0000144 00000001167 14727652467 016557 0 ustar ligges users test_that("format.xml_node prints attributes for root nodes", {
x <- read_xml("")
expect_equal(format(x), "")
})
test_that("format.xml_node prints namespaces for root nodes", {
x <- read_xml("")
expect_equal(format(x), "")
y <- read_xml("")
expect_equal(format(y), "")
z <- read_xml("")
expect_equal(format(z), "")
})
xml2/tests/testthat/records.xml 0000644 0001762 0000144 00000000217 14727652467 016345 0 ustar ligges users
&hoqc;
xml2/tests/testthat/test-xml_attr.R 0000644 0001762 0000144 00000016275 14727652467 017127 0 ustar ligges users test_that("missing attributes returned as NA by default", {
x <- read_xml("")
expect_equal(xml_attr(x, "id"), NA_character_)
})
test_that("missing attributes returned as NA", {
x <- read_xml("")
expect_equal(xml_attr(x, "id", default = 1), "1")
})
test_that("attributes are correctly found", {
x <- read_xml("")
expect_true(xml_has_attr(x, "id"))
expect_false(xml_has_attr(x, "id2"))
})
test_that("returning an attribute node prints properly", {
x <- read_xml("")
t1 <- xml_find_first(x, "//@c")
expect_equal(format(t1), "")
})
# Namespaces -------------------------------------------------------------------
# Default namespace doesn't apply to attributes
test_that("qualified names returned when ns given", {
x <- read_xml(test_path("ns-multiple.xml"))
ns <- xml_ns(x)
bars <- xml_children(xml_children(x))
attr <- xml_attrs(bars, ns)
expect_named(attr[[1]], "f:id")
expect_named(attr[[2]], "g:id")
})
x <- read_xml('
')
doc <- xml_children(x)[[1]]
docs <- xml_find_all(x, "//doc")
ns <- xml_ns(x)
test_that("qualified attributes get own values", {
expect_equal(xml_attrs(doc, ns), c("b:id" = "b", "f:id" = "f", "id" = ""))
})
test_that("unqualified name gets unnamespace attribute", {
expect_equal(xml_attr(doc, "id", ns), "")
})
test_that("namespace names gets namespaced attribute", {
expect_equal(xml_attr(doc, "b:id", ns), "b")
expect_equal(xml_attr(doc, "f:id", ns), "f")
})
test_that("xml_attr<- modifies properties", {
xml_attr(doc, "id", ns) <- "test"
expect_equal(xml_attr(doc, "id", ns), "test")
xml_attr(doc, "b:id", ns) <- "b_test"
expect_equal(xml_attr(doc, "b:id", ns), "b_test")
xml_attr(doc, "f:id", ns) <- "f_test"
expect_equal(xml_attr(doc, "f:id", ns), "f_test")
xml_attr(docs, "f:id", ns) <- "f_test2"
expect_equal(xml_attr(docs, "f:id", ns), c("f_test2", "f_test2"))
xml_attr(docs, "f:id", ns) <- NULL
expect_equal(xml_attr(docs, "f:id", ns), c(NA_character_, NA_character_))
})
test_that("xml_attr<- recycles values", {
x <- read_xml("")
a <- xml_find_all(x, "a")
xml_attr(a, "b") <- c("e", "f")
expect_equal(xml_attr(a, "b"), c("e", "f"))
})
test_that("xml_attrs<- modifies all attributes", {
expect_error(xml_attrs(doc) <- 1, "`value` must be a named character vector or `NULL`")
expect_error(xml_attrs(doc) <- "test", "`value` must be a named character vector or `NULL`")
xml_attrs(doc, ns) <- c("b:id" = "b", "f:id" = "f", "id" = "test")
expect_equal(xml_attrs(doc, ns), c("b:id" = "b", "id" = "test", "f:id" = "f"))
xml_attrs(doc, ns) <- c("b:id" = "b", "f:id" = "f")
expect_equal(xml_attrs(doc, ns), c("b:id" = "b", "f:id" = "f"))
xml_attrs(doc, ns) <- c("b:id" = "b", "id" = "test")
expect_equal(xml_attrs(doc, ns), c("b:id" = "b", "id" = "test"))
expect_snapshot(error = TRUE, xml_attrs(docs) <- "test")
xml_attrs(docs, ns) <- c("b:id" = "b", "id" = "test")
expect_equal(
xml_attrs(docs, ns),
list(
c("b:id" = "b", "id" = "test"),
c("b:id" = "b", "id" = "test")
)
)
xml_attrs(docs, ns) <- NULL
expect_equal(xml_attrs(docs, ns), list(setNames(character(0), character()), setNames(character(0), character())))
})
test_that("xml_attr<- accepts non-character values", {
x <- read_xml("")
svg <- xml_root(x)
xml_attr(svg, "width") <- 8L
expect_equal(xml_attr(svg, "width"), "8")
xml_attr(svg, "height") <- 12.5
expect_equal(xml_attr(svg, "height"), "12.5")
expect_equal(xml_attrs(svg), c(width = "8", height = "12.5"))
xml_attrs(svg) <- c(width = 14L, height = 23.45)
expect_equal(xml_attrs(svg), c(width = "14", height = "23.45"))
})
test_that("xml_attr<- can set empty strings, and removes attributes with NULL", {
x <- read_xml("")
xml_attr(x, "test") <- ""
expect_equal(xml_attr(x, "test"), "")
xml_attr(x, "test") <- NULL
expect_equal(xml_attr(x, "test"), NA_character_)
})
test_that("xml_attr<- removes namespaces if desired", {
xml_attr(x, "xmlns:b") <- NULL
expect_equal(xml_attrs(x), c("xmlns:f" = "http://foo.com"))
})
test_that("xml_attr<- removes namespaces if desired", {
x <- read_xml("")
# cannot find //b with a default namespace
expect_length(xml_find_all(x, "//b"), 0)
# unless we specify it explicitly
expect_length(xml_find_all(x, "//b"), 0)
expect_length(xml_find_all(x, "//d1:b", xml_ns(x)), 1)
# but can find it once we remove the namespace
xml_attr(x, "xmlns") <- NULL
expect_length(xml_find_all(x, "//b"), 1)
# and add the old namespace back
xml_attr(x, "xmlns") <- "tag:foo"
expect_equal(xml_attr(x, "xmlns"), "tag:foo")
expect_length(xml_find_all(x, "//b"), 0)
expect_length(xml_find_all(x, "//d1:b", xml_ns(x)), 1)
expect_equal(xml_attr(x, "xmlns"), "tag:foo")
})
test_that("xml_attr<- removes prefixed namespaces if desired", {
x <- read_xml("")
# cannot find //b with a prefixed namespace
expect_length(xml_find_all(x, "//b"), 0)
# unless we specify it explicitly
expect_length(xml_find_all(x, "//b"), 0)
expect_length(xml_find_all(x, "//pre:b", xml_ns(x)), 1)
# but can find it once we remove the namespace
xml_attr(x, "xmlns:pre") <- NULL
expect_length(xml_find_all(x, "//b"), 1)
# and add the old namespace back
xml_attr(x, "xmlns:pre") <- "tag:foo"
xml_set_namespace(xml_children(x)[[1]], "pre")
expect_equal(xml_attr(x, "xmlns:pre"), "tag:foo")
expect_length(xml_find_all(x, "//b"), 0)
expect_length(xml_find_all(x, "//pre:b", xml_ns(x)), 1)
expect_equal(xml_attr(x, "xmlns:pre"), "tag:foo")
})
test_that("xml_set_attr works identically to xml_attr<-", {
content <- ""
x <- read_xml(content)
y <- read_xml(content)
xml_attr(x, "a") <- "test"
xml_set_attr(y, "a", "test")
expect_equal(as.character(x), as.character(y))
bx <- xml_find_all(x, "//b")
by <- xml_find_all(y, "//b")
xml_attr(bx, "b") <- "test2"
xml_set_attr(by, "b", "test2")
expect_equal(as.character(x), as.character(y))
# No errors for xml_missing
mss <- xml_find_first(bx, "./c")
expect_no_error(xml_attr(mss[[2]], "b") <- "blah")
expect_no_error(xml_set_attr(mss[[2]], "b", "blah"))
})
test_that("xml_set_attrs works identically to xml_attrs<-", {
content <- ""
x <- read_xml(content)
y <- read_xml(content)
xml_attrs(x) <- c(a = "test")
xml_set_attrs(y, c(a = "test"))
expect_equal(as.character(x), as.character(y))
bx <- xml_find_all(x, "//b")
by <- xml_find_all(y, "//b")
xml_attrs(bx) <- c(b = "test2")
xml_set_attrs(by, c(b = "test2"))
expect_equal(as.character(x), as.character(y))
# No errors for xml_missing
mss <- xml_find_first(bx, "./c")
expect_no_error(xml_attrs(mss[[2]]) <- c("b" = "blah"))
expect_no_error(xml_set_attrs(mss[[2]], c("b" = "blah")))
})
test_that("xml_set_attr can set the same namespace multiple times", {
doc <- xml_new_root("foo")
xml_set_attr(doc, "xmlns:bar", "http://a/namespace")
xml_set_attr(doc, "xmlns:bar", "http://b/namespace")
expect_equal(xml_attr(doc, "xmlns:bar"), "http://b/namespace")
})
xml2/tests/testthat/records.dtd 0000644 0001762 0000144 00000000242 14727652467 016316 0 ustar ligges users
xml2/tests/testthat/test-xml_text.R 0000644 0001762 0000144 00000004014 14727652467 017125 0 ustar ligges users test_that("xml_text returns only text without markup", {
x <- read_xml("
This is some text. This is bold!
")
expect_identical(xml_text(x), "This is some text. This is bold!")
expect_identical(xml_text(xml_children(x)), "bold!")
})
test_that("xml_text works properly with xml_nodeset objects", {
x <- read_xml("
Some text.
Some other.
No bold text
")
children <- xml_children(x)
x <- xml_find_first(children, ".//b|.//i")
expect_identical(
xml_text(x),
c("text", "other", NA)
)
})
test_that("xml_text<- and xml_set_text work properly with xml_nodeset objects", {
x <- read_xml("This is some text. This is some nested text.")
expect_identical(xml_text(x), "This is some text. This is some nested text.")
xml_text(x) <- "test"
expect_identical(xml_text(x), "testThis is some nested text.")
xml_set_text(x, "test2")
expect_identical(xml_text(x), "test2This is some nested text.")
})
test_that("xml_text trims whitespace if requested, including non-breaking spaces", {
x <- read_html("
Some text €
")
expect_identical(
xml_text(x),
" Some text \u20ac \u00a0"
)
expect_identical(
xml_text(x, trim = TRUE),
"Some text \u20ac"
)
x2 <- read_html("
Some text €
and more € text ")
expect_identical(
xml_text(xml_find_all(x2, ".//p"), trim = TRUE),
c("Some text \u20ac", "and more \u20ac text")
)
})
test_that("xml_integer() returns an integer vector", {
x <- read_xml("")
expect_identical(
xml_integer(xml_find_all(x, "//@x")),
c(1L, 2L)
)
})
test_that("xml_double() returns a numeric vector", {
x <- read_xml("")
expect_identical(xml_double(xml_find_all(x, "//@latitude")), c(42.3466456, -36.8523378))
})
xml2/tests/testthat/test-xml_nodeset.R 0000644 0001762 0000144 00000005320 14727652467 017603 0 ustar ligges users test_that("methods work on empty nodesets", {
x <- read_xml("")
empty <- xml_find_all(x, "//c")
expect_error(empty[[1]], "subscript out of bounds")
expect_identical(empty[1], empty)
test <- empty
xml_attr(test, "test") <- 1
expect_identical(test, empty)
xml_attrs(test) <- c("test" = 1)
expect_identical(test, empty)
xml_name(test) <- "test"
expect_identical(test, empty)
xml_text(test) <- "test"
expect_identical(test, empty)
expect_identical(as.character(empty), character(0))
expect_identical(as_list(empty), list())
expect_identical(nodeset_apply(empty, identical), empty)
expect_output(print(empty), "\\{xml_nodeset \\(0\\)\\}")
expect_silent(tree_structure(empty))
xml_add_child(test, "test")
expect_identical(test, empty)
xml_add_sibling(test, "test")
expect_identical(test, empty)
expect_identical(xml_attr(empty, "test"), character())
expect_identical(xml_attrs(empty), list())
expect_identical(xml_double(empty), numeric())
expect_identical(xml_find_all(empty), empty)
expect_identical(xml_find_chr(empty), character())
expect_identical(xml_find_first(empty), empty)
expect_identical(xml_find_lgl(empty), logical())
expect_identical(xml_find_num(empty), numeric())
expect_identical(xml_integer(empty), integer())
expect_identical(xml_length(empty), 0L)
expect_identical(xml_name(empty), character())
expect_identical(xml_ns(empty), character())
expect_identical(xml_parent(empty), empty)
expect_identical(xml_path(empty), character())
xml_remove(test)
expect_identical(test, empty)
xml_replace(test)
expect_identical(test, empty)
xml_set_attr(test, "test", 1)
expect_identical(test, empty)
xml_set_attrs(test, c("test" = 1))
expect_identical(test, empty)
xml_set_name(test, "test")
expect_identical(test, empty)
xml_set_text(test, "test")
expect_identical(test, empty)
expect_identical(xml_siblings(empty), empty)
expect_silent(xml_structure(empty))
expect_identical(xml_text(empty), character())
expect_identical(xml_url(empty), character())
})
test_that("print method is correct", {
skip_if(getOption("width") < 20L, "Screen too narrow")
x <- read_html(test_path("lego.html.bz2"))
body <- xml_find_first(x, "//body")
divs <- xml_find_all(body, ".//div")[1:10]
expect_snapshot(print(divs))
# double-substring() logic
s <- c(
"123456789\\", # always too wide, '\' never encoded
"12345", # always fits
"12\\45" # doesn't fit when '\' is encoded
)
# embed as text on nodes ,,
s <- sprintf("<%1$s>%2$s%1$s>", letters[1:3], s)
x <- read_xml(sprintf("%s", paste(s, collapse="")))
expect_snapshot({
print(x, width = 13L)
print(x, width = 14L)
})
})
xml2/tests/testthat/test-xml_missing.R 0000644 0001762 0000144 00000002766 14727652467 017626 0 ustar ligges users x <- read_xml("
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
{text}
xml2/tests/testthat/_snaps/xml_attr.md 0000644 0001762 0000144 00000000267 15051322363 017602 0 ustar ligges users # xml_attrs<- modifies all attributes
Code
xml_attrs(docs) <- "test"
Condition
Error in `xml_attrs<-`:
! `test` must be a list of named character vectors.
xml2/tests/testthat/_snaps/xml_find.md 0000644 0001762 0000144 00000005043 15051322363 017545 0 ustar ligges users # xml_find_num errors with non numeric results
Code
xml_find_num(x, "//z")
Condition
Error in `xml_find_num()`:
! Element at path `//z` must be a number, not a object.
Code
xml_find_num(x, "//y")
Condition
Error in `xml_find_num()`:
! Element at path `//y` must be a number, not a list.
Code
xml_find_num(x, "1=1")
Condition
Error in `xml_find_num()`:
! Element at path `1=1` must be a number, not `TRUE`.
Code
xml_find_num(x, "string(5)")
Condition
Error in `xml_find_num()`:
! Element at path `string(5)` must be a number, not the string "5".
# xml_find_int errors with non integer results
Code
xml_find_int(x, "//z")
Condition
Error in `xml_find_int()`:
! Element at path `//z` must be a whole number, not a object.
Code
xml_find_int(x, "//y")
Condition
Error in `xml_find_int()`:
! Element at path `//y` must be a whole number, not a list.
Code
xml_find_int(x, "number(1.1)")
Condition
Error in `xml_find_int()`:
! Element at path `number(1.1)` must be a whole number, not the number 1.1.
# xml_find_chr errors with non character results
Code
xml_find_chr(x, "//z")
Condition
Error in `xml_find_chr()`:
! Element at path `//z` must be a single string, not a object.
Code
xml_find_chr(x, "//y")
Condition
Error in `xml_find_chr()`:
! Element at path `//y` must be a single string, not a list.
Code
xml_find_chr(x, "1=1")
Condition
Error in `xml_find_chr()`:
! Element at path `1=1` must be a single string, not `TRUE`.
Code
xml_find_chr(x, "1+1")
Condition
Error in `xml_find_chr()`:
! Element at path `1+1` must be a single string, not the number 2.
# xml_find_lgl errors with non logical results
Code
xml_find_lgl(x, "//z")
Condition
Error in `xml_find_lgl()`:
! Element at path `//z` must be `TRUE` or `FALSE`, not a object.
Code
xml_find_lgl(x, "//y")
Condition
Error in `xml_find_lgl()`:
! Element at path `//y` must be `TRUE` or `FALSE`, not a list.
Code
xml_find_lgl(x, "string(5)")
Condition
Error in `xml_find_lgl()`:
! Element at path `string(5)` must be `TRUE` or `FALSE`, not the string "5".
Code
xml_find_lgl(x, "1+1")
Condition
Error in `xml_find_lgl()`:
! Element at path `1+1` must be `TRUE` or `FALSE`, not the number 2.
xml2/tests/testthat/_snaps/xml_nodeset.md 0000644 0001762 0000144 00000002246 15051322364 020271 0 ustar ligges users # print method is correct
Code
print(divs)
Output
{xml_nodeset (10)}
[1]
\n
\n\n
Node Modification
Jim Hester
2025-08-20
Modifying Existing XML
Modifying existing XML can be done in xml2 by using the replacement
functions of the accessors. They all have methods for both individual
xml_node objects as well as xml_nodeset
objects. If a vector of values is provided it is applied piecewise over
the nodeset, otherwise the value is recycled.
Text Modification
Text modification only happens on text nodes. If a given node has
more than one text node only the first will be affected. If you want to
modify additional text nodes you need to select them explicitly with
/text().
x <-read_xml("<p>This is some <b>text</b>. This is more.</p>")xml_text(x)#> [1] "This is some text. This is more."xml_text(x) <-"This is some other text."xml_text(x)#> [1] "This is some other text.text. This is more."# You can avoid this by explicitly selecting the text node.x <-read_xml("<p>This is some text. This is <b>bold!</b></p>")text_only <-xml_find_all(x, "//text()")xml_text(text_only) <-c("This is some other text. ", "Still bold!")xml_text(x)#> [1] "This is some other text. Still bold!"xml_structure(x)#> <p>#> {text}#> <b>#> {text}
Attribute and Namespace Definition Modification
Attributes and namespace definitions are modified one at a time with
xml_attr() or all at once with xml_attrs(). In
both cases using NULL as the value will remove the
attribute completely.
x <-read_xml("<a href='invalid!'>xml2</a>")xml_attr(x, "href")#> [1] "invalid!"xml_attr(x, "href") <-"https://github.com/r-lib/xml2"xml_attr(x, "href")#> [1] "https://github.com/r-lib/xml2"xml_attrs(x) <-c(id ="xml2", href ="https://github.com/r-lib/xml2")xml_attrs(x)#> href id #> "https://github.com/r-lib/xml2" "xml2"x#> {xml_document}#> <a href="https://github.com/r-lib/xml2" id="xml2">xml_attrs(x) <-NULLx#> {xml_document}#> <a># Namespaces are added with as a xmlns or xmlns:prefix attributexml_attr(x, "xmlns") <-"http://foo"x#> {xml_document}#> <a xmlns="http://foo">xml_attr(x, "xmlns:bar") <-"http://bar"x#> {xml_document}#> <a xmlns="http://foo" xmlns:bar="http://bar">
All of these functions have a .copy argument. If this is
set to FALSE they will remove the new node from its
location before inserting it into the new location. Otherwise they make
a copy of the node before insertion.
The xml_remove() can be used to remove a node (and its
children) from a tree. The default behavior is to unlink the node from
the tree, but does not free the memory for the node, so R
objects pointing to the node are still valid.
This allows code like the following to work without crashing R
x <-read_xml("<foo><bar><baz/></bar></foo>")x1 <- x %>%xml_children() %>% .[[1]]x2 <- x1 %>%xml_children() %>% .[[1]]xml_remove(x1)rm(x1)gc()#> used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)#> Ncells 625925 33.5 1419331 75.9 NA 1186593 63.4#> Vcells 1181080 9.1 8388608 64.0 16384 2402021 18.4x2#> {xml_node}#> <baz>
If you are not planning on referencing these nodes again this memory
is wasted. Calling xml_remove(free = TRUE) will remove the
nodes and free the memory used to store them.
Note In this case any node which previously
pointed to the node or its children will instead be pointing to free
memory and may cause R to crash. xml2 can’t figure this out for you, so
it’s your responsibility to remove any objects which are no longer
valid.
In particular xml_find_*() results are easy to overlook,
for example
x <-read_xml("<a><b /><b><b /></b></a>")bees <-xml_find_all(x, "//b")xml_remove(xml_child(x), free =TRUE)# bees[[1]] is no longer valid!!!rm(bees)gc()#> used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)#> Ncells 625975 33.5 1419331 75.9 NA 1186593 63.4#> Vcells 1181291 9.1 8388608 64.0 16384 2402021 18.4
d <-xml_new_root("sld","xmlns"="http://www.opengis.net/sld","xmlns:ogc"="http://www.opengis.net/ogc","xmlns:se"="http://www.opengis.net//se",version ="1.1.0") %>%xml_add_child("layer") %>%xml_add_child("se:Name", "My Layer") %>%xml_root()d#> {xml_document}#> <sld version="1.1.0" xmlns="http://www.opengis.net/sld" xmlns:ogc="http://www.opengis.net/ogc" xmlns:se="http://www.opengis.net//se">#> [1] <layer>\n <se:Name>My Layer</se:Name>\n</layer>
xml2/inst/doc/modification.Rmd 0000644 0001762 0000144 00000012031 14727652467 016010 0 ustar ligges users ---
title: "Node Modification"
author: "Jim Hester"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Node Modification}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r, echo = FALSE, message = FALSE}
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
library(xml2)
library(magrittr)
```
# Modifying Existing XML
Modifying existing XML can be done in xml2 by using the replacement functions
of the accessors. They all have methods for both individual `xml_node` objects
as well as `xml_nodeset` objects. If a vector of values is provided it is
applied piecewise over the nodeset, otherwise the value is recycled.
## Text Modification ##
Text modification only happens on text nodes. If a given node has more than one
text node only the first will be affected. If you want to modify additional
text nodes you need to select them explicitly with `/text()`.
```{r}
x <- read_xml("
This is some text. This is more.
")
xml_text(x)
xml_text(x) <- "This is some other text."
xml_text(x)
# You can avoid this by explicitly selecting the text node.
x <- read_xml("
This is some text. This is bold!
")
text_only <- xml_find_all(x, "//text()")
xml_text(text_only) <- c("This is some other text. ", "Still bold!")
xml_text(x)
xml_structure(x)
```
## Attribute and Namespace Definition Modification ##
Attributes and namespace definitions are modified one at a time with
`xml_attr()` or all at once with `xml_attrs()`. In both cases using `NULL` as
the value will remove the attribute completely.
```{r}
x <- read_xml("xml2")
xml_attr(x, "href")
xml_attr(x, "href") <- "https://github.com/r-lib/xml2"
xml_attr(x, "href")
xml_attrs(x) <- c(id = "xml2", href = "https://github.com/r-lib/xml2")
xml_attrs(x)
x
xml_attrs(x) <- NULL
x
# Namespaces are added with as a xmlns or xmlns:prefix attribute
xml_attr(x, "xmlns") <- "http://foo"
x
xml_attr(x, "xmlns:bar") <- "http://bar"
x
```
## Name Modification ##
Node names are modified with `xml_name()`.
```{r}
x <- read_xml("")
x
xml_name(x)
xml_name(x) <- "c"
x
```
# Node modification #
All of these functions have a `.copy` argument. If this is set to `FALSE` they
will remove the new node from its location before inserting it into the new
location. Otherwise they make a copy of the node before insertion.
## Replacing existing nodes ##
```{r}
x <- read_xml("123")
children <- xml_children(x)
t1 <- children[[1]]
t2 <- children[[2]]
t3 <- xml_children(children[[2]])[[1]]
xml_replace(t1, t3)
x
```
## Add a sibling ##
```{r}
x <- read_xml("123")
children <- xml_children(x)
t1 <- children[[1]]
t2 <- children[[2]]
t3 <- xml_children(children[[2]])[[1]]
xml_add_sibling(t1, t3)
x
xml_add_sibling(t3, t1, where = "before")
x
```
## Add a child ##
```{r}
x <- read_xml("123")
children <- xml_children(x)
t1 <- children[[1]]
t2 <- children[[2]]
t3 <- xml_children(children[[2]])[[1]]
xml_add_child(t1, t3)
x
xml_add_child(t1, read_xml(""))
x
```
## Removing nodes ##
The `xml_remove()` can be used to remove a node (and its children) from a
tree. The default behavior is to unlink the node from the tree, but does _not_
free the memory for the node, so R objects pointing to the node are still
valid.
This allows code like the following to work without crashing R
```{r}
x <- read_xml("")
x1 <- x %>%
xml_children() %>%
.[[1]]
x2 <- x1 %>%
xml_children() %>%
.[[1]]
xml_remove(x1)
rm(x1)
gc()
x2
```
If you are not planning on referencing these nodes again this memory is wasted.
Calling `xml_remove(free = TRUE)` will remove the nodes _and_ free the memory
used to store them. **Note** In this case _any_ node which previously pointed
to the node or its children will instead be pointing to free memory and may
cause R to crash. xml2 can't figure this out for you, so it's your
responsibility to remove any objects which are no longer valid.
In particular `xml_find_*()` results are easy to overlook, for example
```{r}
x <- read_xml("")
bees <- xml_find_all(x, "//b")
xml_remove(xml_child(x), free = TRUE)
# bees[[1]] is no longer valid!!!
rm(bees)
gc()
```
## Namespaces ##
We want to construct a document with the following namespace layout. (From
).
```xml
My Layer
```
```{r}
d <- xml_new_root("sld",
"xmlns" = "http://www.opengis.net/sld",
"xmlns:ogc" = "http://www.opengis.net/ogc",
"xmlns:se" = "http://www.opengis.net//se",
version = "1.1.0"
) %>%
xml_add_child("layer") %>%
xml_add_child("se:Name", "My Layer") %>%
xml_root()
d
```
xml2/inst/extdata/ 0000755 0001762 0000144 00000000000 14727652467 013567 5 ustar ligges users xml2/inst/extdata/r-project.html 0000644 0001762 0000144 00000011601 14727652467 016361 0 ustar ligges users
R: The R Project for Statistical Computing
R is a free software environment for statistical computing and graphics. It compiles and runs on a wide variety of UNIX platforms, Windows and MacOS. To download R, please choose your preferred CRAN mirror.
If you have questions about R like how to download and install the software, or what the license terms are, please read our answers to frequently asked questions before you send an email.
R version 3.1.2 (Pumpkin Helmet) has been released on 2014-10-31.
useR! 2015, will take place at the University of Aalborg, Denmark, June 30 - July 3, 2015.
useR! 2014, took place at the University of California, Los Angeles, USA June 30 - July 3, 2014.
xml2/inst/extdata/cd_catalog.xml 0000644 0001762 0000144 00000011730 14727652467 016373 0 ustar ligges users
Empire BurlesqueBob DylanUSAColumbia10.901985Hide your heartBonnie TylorUKCBS Records9.901988Greatest HitsDolly PartonUSARCA9.901982Still got the bluesGary MoreUKVirgin redords10.201990ErosEros RamazzottiEUBMG9.901997One night onlyBee GeesUKPolydor10.901998Sylvias MotherDr.HookUKCBS8.101973Maggie MayRod StewartUKPickwick8.501990RomanzaAndrea BocelliEUPolydor10.801996When a man loves a womanPercy SledgeUSAAtlantic8.701987Black angelSavage RoseEUMega10.9019951999 Grammy NomineesManyUSAGrammy10.201999For the good timesKenny RogersUKMucik Master8.701995Big Willie styleWill SmithUSAColumbia9.901997Tupelo HoneyVan MorrisonUKPolydor8.201971SoulsvilleJorn HoelNorwayWEA7.901996The very best ofCat StevensUKIsland8.901990StopSam BrownUKA and M8.901988Bridge of SpiesT`PauUKSiren7.901987Private DancerTina TurnerUKCapitol8.901983Midt om nattenKim LarsenEUMedley7.801983Pavarotti Gala ConcertLuciano PavarottiUKDECCA9.901991The dock of the bayOtis ReddingUSAAtlantic7.901987Picture bookSimply RedEUElektra7.201985RedThe CommunardsUKLondon7.801987Unchain my heartJoe CockerUSAEMI8.201987
xml2/inst/extdata/order-doc.xml 0000644 0001762 0000144 00000002000 14727652467 016157 0 ustar ligges users
Alice Smith123 Maple StreetMill ValleyCA90952Robert Smith8 Oak AvenueOld TownPA95819Hurry, my lawn is going wild!Lawnmower1148.95Confirm this is electricBaby Monitor139.981999-05-21
xml2/inst/extdata/order-schema.xml 0000644 0001762 0000144 00000004616 14727652467 016671 0 ustar ligges users
Purchase order schema for Example.com.
Copyright 2000 Example.com. All rights reserved.
Purchase order schema for Example.Microsoft.com.
Copyright 2001 Example.Microsoft.com. All rights reserved.
Application info.
xml2/tools/ 0000755 0001762 0000144 00000000000 14727652467 012320 5 ustar ligges users xml2/tools/winlibs.R 0000644 0001762 0000144 00000001616 14760656721 014110 0 ustar ligges users if(!file.exists("../windows/libxml2/include/libxml2/libxml")){
unlink("../windows", recursive = TRUE)
url <- if(grepl("aarch", R.version$platform)){
"https://github.com/r-windows/bundles/releases/download/libxml2-2.11.5/libxml2-2.11.5-clang-aarch64.tar.xz"
} else if(grepl("clang", Sys.getenv('R_COMPILED_BY'))){
"https://github.com/r-windows/bundles/releases/download/libxml2-2.11.5/libxml2-2.11.5-clang-x86_64.tar.xz"
} else if(getRversion() >= "4.2") {
"https://github.com/r-windows/bundles/releases/download/libxml2-2.11.5/libxml2-2.11.5-ucrt-x86_64.tar.xz"
} else {
"https://github.com/rwinlib/libxml2/archive/v2.10.3.tar.gz"
}
download.file(url, basename(url), quiet = TRUE)
dir.create("../windows", showWarnings = FALSE)
untar(basename(url), exdir = "../windows", tar = 'internal')
unlink(basename(url))
setwd("../windows")
file.rename(list.files(), 'libxml2')
}
xml2/README.md 0000644 0001762 0000144 00000004505 14727652467 012443 0 ustar ligges users
# xml2
[](https://cran.r-project.org/package=xml2)
[](https://app.codecov.io/gh/r-lib/xml2?branch=main)
[](https://github.com/r-lib/xml2/actions)
[](https://github.com/r-lib/xml2/actions/workflows/R-CMD-check.yaml)
The xml2 package is a binding to [libxml2](http://xmlsoft.org), making
it easy to work with HTML and XML from R. The API is somewhat inspired
by [jQuery](https://jquery.com).
## Installation
You can install xml2 from CRAN,
``` r
install.packages("xml2")
```
or you can install the development version from github, using
`pak`:
``` r
# install.packages("pak")
pak::pak("r-lib/xml2")
```
## Usage
``` r
library(xml2)
x <- read_xml(" text ")
x
xml_name(x)
xml_children(x)
xml_text(x)
xml_find_all(x, ".//baz")
h <- read_html("