quick-xml-0.38.4/.cargo_vcs_info.json0000644000000001360000000000100130420ustar { "git": { "sha1": "595033e6d1b8078c15da89ed6acf2ae6b45b1918" }, "path_in_vcs": "" }quick-xml-0.38.4/Cargo.lock0000644000000441700000000000100110230ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "aho-corasick" version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] [[package]] name = "anes" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "arbitrary" version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" dependencies = [ "derive_arbitrary", ] [[package]] name = "async-stream" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" dependencies = [ "async-stream-impl", "futures-core", "pin-project-lite", ] [[package]] name = "async-stream-impl" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "autocfg" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bumpalo" version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "bytes" version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] name = "cast" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "ciborium" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", "serde", ] [[package]] name = "ciborium-io" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", "half", ] [[package]] name = "clap" version = "4.5.51" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" version = "4.5.51" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a" dependencies = [ "anstyle", "clap_lex", ] [[package]] name = "clap_lex" version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "criterion" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3bf7af66b0989381bd0be551bd7cc91912a655a58c6918420c9527b1fd8b4679" dependencies = [ "anes", "cast", "ciborium", "clap", "criterion-plot", "itertools 0.13.0", "num-traits", "oorandom", "plotters", "rayon", "regex", "serde", "serde_json", "tinytemplate", "walkdir", ] [[package]] name = "criterion-plot" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", "itertools 0.10.5", ] [[package]] name = "crossbeam-deque" version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "derive_arbitrary" version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "diff" version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "document-features" version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" dependencies = [ "litrs", ] [[package]] name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "encoding_rs" version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ "cfg-if", ] [[package]] name = "futures-core" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" [[package]] name = "half" version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "cfg-if", "crunchy", "zerocopy", ] [[package]] name = "itertools" version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] [[package]] name = "itertools" version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] [[package]] name = "itoa" version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "js-sys" version = "0.3.82" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" dependencies = [ "once_cell", "wasm-bindgen", ] [[package]] name = "litrs" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" [[package]] name = "memchr" version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "num-traits" version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] [[package]] name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "oorandom" version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "ordered-float" version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" dependencies = [ "num-traits", ] [[package]] name = "pin-project-lite" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] name = "plotters" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ "num-traits", "plotters-backend", "plotters-svg", "wasm-bindgen", "web-sys", ] [[package]] name = "plotters-backend" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" [[package]] name = "plotters-svg" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" dependencies = [ "plotters-backend", ] [[package]] name = "pretty_assertions" version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" dependencies = [ "diff", "yansi", ] [[package]] name = "proc-macro2" version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] [[package]] name = "quick-xml" version = "0.38.4" dependencies = [ "arbitrary", "criterion", "document-features", "encoding_rs", "memchr", "pretty_assertions", "regex", "serde", "serde-value", "serde_derive", "tokio", "tokio-test", ] [[package]] name = "quote" version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" dependencies = [ "proc-macro2", ] [[package]] name = "rayon" version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" dependencies = [ "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" dependencies = [ "crossbeam-deque", "crossbeam-utils", ] [[package]] name = "regex" version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "rustversion" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "same-file" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ "winapi-util", ] [[package]] name = "serde" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", "serde_derive", ] [[package]] name = "serde-value" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" dependencies = [ "ordered-float", "serde", ] [[package]] name = "serde_core" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ "itoa", "memchr", "ryu", "serde", "serde_core", ] [[package]] name = "syn" version = "2.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "tinytemplate" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" dependencies = [ "serde", "serde_json", ] [[package]] name = "tokio" version = "1.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" dependencies = [ "bytes", "pin-project-lite", "tokio-macros", ] [[package]] name = "tokio-macros" version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "tokio-stream" version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" dependencies = [ "futures-core", "pin-project-lite", "tokio", ] [[package]] name = "tokio-test" version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2468baabc3311435b55dd935f702f42cd1b8abb7e754fb7dfb16bd36aa88f9f7" dependencies = [ "async-stream", "bytes", "futures-core", "tokio", "tokio-stream", ] [[package]] name = "unicode-ident" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "walkdir" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", ] [[package]] name = "wasm-bindgen" version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" dependencies = [ "quote", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" dependencies = [ "bumpalo", "proc-macro2", "quote", "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" version = "0.3.82" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] name = "winapi-util" version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ "windows-sys", ] [[package]] name = "windows-link" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-sys" version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ "windows-link", ] [[package]] name = "yansi" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] name = "zerocopy" version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", "syn", ] quick-xml-0.38.4/Cargo.toml0000644000000041750000000000100110470ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.56" name = "quick-xml" version = "0.38.4" build = false include = [ "src/*", "LICENSE-MIT.md", "README.md", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "High performance xml reader and writer" documentation = "https://docs.rs/quick-xml" readme = "README.md" keywords = [ "xml", "serde", "parser", "writer", "html", ] categories = [ "asynchronous", "encoding", "parsing", "parser-implementations", ] license = "MIT" repository = "https://github.com/tafia/quick-xml" [package.metadata.docs.rs] all-features = true [features] async-tokio = ["tokio"] default = [] encoding = ["encoding_rs"] escape-html = [] overlapped-lists = [] serde-types = ["serde/derive"] serialize = ["serde"] [lib] name = "quick_xml" path = "src/lib.rs" bench = false [dependencies.arbitrary] version = "1" features = ["derive"] optional = true [dependencies.document-features] version = "0.2" optional = true [dependencies.encoding_rs] version = "0.8" optional = true [dependencies.memchr] version = "2.1" [dependencies.serde] version = ">=1.0.139" optional = true [dependencies.tokio] version = "1.10" features = ["io-util"] optional = true default-features = false [dev-dependencies.criterion] version = ">=0.4,<0.8" [dev-dependencies.pretty_assertions] version = "1.4" [dev-dependencies.regex] version = "1" [dev-dependencies.serde-value] version = "0.7" [dev-dependencies.serde_derive] version = "1.0.206" [dev-dependencies.tokio] version = "1.21" features = [ "macros", "rt", ] default-features = false [dev-dependencies.tokio-test] version = "0.4" quick-xml-0.38.4/Cargo.toml.orig000064400000000000000000000211561046102023000145260ustar 00000000000000[package] name = "quick-xml" version = "0.38.4" description = "High performance xml reader and writer" edition = "2021" documentation = "https://docs.rs/quick-xml" repository = "https://github.com/tafia/quick-xml" keywords = ["xml", "serde", "parser", "writer", "html"] categories = ["asynchronous", "encoding", "parsing", "parser-implementations"] license = "MIT" rust-version = "1.56" # We exclude tests & examples & benches to reduce the size of a package. # Unfortunately, this is source of warnings in latest cargo when packaging: # > warning: ignoring {context} `{name}` as `{path}` is not included in the published package # That may become unnecessary once https://github.com/rust-lang/cargo/issues/13491 # will be resolved include = ["src/*", "LICENSE-MIT.md", "README.md"] [dependencies] arbitrary = { version = "1", features = ["derive"], optional = true } document-features = { version = "0.2", optional = true } encoding_rs = { version = "0.8", optional = true } serde = { version = ">=1.0.139", optional = true } tokio = { version = "1.10", optional = true, default-features = false, features = ["io-util"] } memchr = "2.1" [dev-dependencies] # msrv workflow uses `cargo check` which tries to resolve all dependencies, even # not used (for example, when calling `cargo check --lib` only `dependencies` is # required, `dev-dependencies` are not used). `criterion` 0.6 has msrv = 1.80, so # we cannot check minimal versions with it. We allow to use `criterion` 0.4 for that # See https://github.com/rust-lang/cargo/issues/10958 criterion = ">=0.4,<0.8" pretty_assertions = "1.4" regex = "1" # https://github.com/serde-rs/serde/issues/1904 is fixed since 1.0.206 # serde does not follow semver in numbering and their dependencies, so we specifying patch here serde_derive = { version = "1.0.206" } serde-value = "0.7" tokio = { version = "1.21", default-features = false, features = ["macros", "rt"] } tokio-test = "0.4" [lib] bench = false [[bench]] name = "microbenches" harness = false path = "benches/microbenches.rs" [[bench]] name = "macrobenches" harness = false path = "benches/macrobenches.rs" [features] default = [] ## Enables support for asynchronous reading and writing from `tokio`'s IO-Traits by enabling ## [reading events] from types implementing [`tokio::io::AsyncBufRead`]. ## ## [reading events]: crate::reader::Reader::read_event_into_async async-tokio = ["tokio"] ## Enables support of non-UTF-8 encoded documents. Encoding will be inferred from ## the XML declaration if it is found, otherwise UTF-8 is assumed. ## ## Currently, only ASCII-compatible encodings are supported. For example, ## UTF-16 will not work (therefore, `quick-xml` is not [standard compliant]). ## ## Thus, quick-xml supports all encodings of [`encoding_rs`] except these: ## - [UTF-16BE] ## - [UTF-16LE] ## - [ISO-2022-JP] ## ## You should stop processing a document when one of these encodings is detected, ## because generated events can be wrong and do not reflect a real document structure! ## ## Because these are the only supported encodings that are not ASCII compatible, you can ## check for them: ## ## ``` ## use quick_xml::events::Event; ## use quick_xml::reader::Reader; ## ## # fn to_utf16le_with_bom(string: &str) -> Vec { ## # let mut bytes = Vec::new(); ## # bytes.extend_from_slice(&[0xFF, 0xFE]); // UTF-16 LE BOM ## # for ch in string.encode_utf16() { ## # bytes.extend_from_slice(&ch.to_le_bytes()); ## # } ## # bytes ## # } ## let xml = to_utf16le_with_bom(r#""#); ## let mut reader = Reader::from_reader(xml.as_ref()); ## reader.config_mut().trim_text(true); ## ## let mut buf = Vec::new(); ## let mut unsupported = false; ## loop { ## if !reader.decoder().encoding().is_ascii_compatible() { ## unsupported = true; ## break; ## } ## buf.clear(); ## match reader.read_event_into(&mut buf).unwrap() { ## Event::Eof => break, ## _ => {} ## } ## } ## assert_eq!(unsupported, true); ## ``` ## This restriction will be eliminated once issue [#158] is resolved. ## ## [standard compliant]: https://www.w3.org/TR/xml11/#charencoding ## [UTF-16BE]: encoding_rs::UTF_16BE ## [UTF-16LE]: encoding_rs::UTF_16LE ## [ISO-2022-JP]: encoding_rs::ISO_2022_JP ## [#158]: https://github.com/tafia/quick-xml/issues/158 encoding = ["encoding_rs"] ## Enables support for recognizing all [HTML 5 entities] in [`unescape`] ## function. The full list of entities also can be found in ## . ## ## [HTML 5 entities]: https://dev.w3.org/html5/html-author/charref ## [`unescape`]: crate::escape::unescape escape-html = [] ## This feature is for the Serde deserializer that enables support for deserializing ## lists where tags are overlapped with tags that do not correspond to the list. ## ## When this feature is enabled, the XML: ## ```xml ## ## ## ## ## ## ## ``` ## could be deserialized to a struct: ## ```no_run ## # use serde::Deserialize; ## #[derive(Deserialize)] ## #[serde(rename_all = "kebab-case")] ## struct AnyName { ## item: Vec<()>, ## another_item: (), ## } ## ``` ## ## When this feature is not enabled (default), only the first element will be ## associated with the field, and the deserialized type will report an error ## (duplicated field) when the deserializer encounters a second ``. ## ## Note, that enabling this feature can lead to high and even unlimited memory ## consumption, because deserializer needs to check all events up to the end of a ## container tag (`` in this example) to figure out that there are no ## more items for a field. If `` or even EOF is not encountered, the ## parsing will never end which can lead to a denial-of-service (DoS) scenario. ## ## Having several lists and overlapped elements for them in XML could also lead ## to quadratic parsing time, because the deserializer must check the list of ## events as many times as the number of sequence fields present in the schema. ## ## To reduce negative consequences, always [limit] the maximum number of events ## that [`Deserializer`] will buffer. ## ## This feature works only with `serialize` feature and has no effect if `serialize` ## is not enabled. ## ## [limit]: crate::de::Deserializer::event_buffer_size ## [`Deserializer`]: crate::de::Deserializer overlapped-lists = [] ## Enables serialization of some quick-xml types using [`serde`]. This feature ## is rarely needed. ## ## This feature does NOT provide XML serializer or deserializer. You should use ## the `serialize` feature for that instead. # Cannot name "serde" to avoid clash with dependency. # "dep:" prefix only available from Rust 1.60 serde-types = ["serde/derive"] ## Enables support for [`serde`] serialization and deserialization. When this ## feature is enabled, quick-xml provides serializer and deserializer for XML. ## ## This feature does NOT enables serialization of the types inside quick-xml. ## If you need that, use the `serde-types` feature. serialize = ["serde"] # "dep:" prefix only available from Rust 1.60 [package.metadata.docs.rs] # document all features all-features = true # Tests, benchmarks and examples doesn't included in package on crates.io, # so we need to specify a path, otherwise `cargo package` complains # That may become unnecessary once https://github.com/rust-lang/cargo/issues/13491 # will be resolved [[test]] name = "async-tokio" required-features = ["async-tokio"] path = "tests/async-tokio.rs" [[test]] name = "encodings" required-features = ["encoding"] path = "tests/encodings.rs" [[test]] name = "html" required-features = ["escape-html"] path = "tests/html.rs" [[test]] name = "serde-de" required-features = ["serialize"] path = "tests/serde-de.rs" [[test]] name = "serde-de-enum" required-features = ["serialize"] path = "tests/serde-de-enum.rs" [[test]] name = "serde-de-seq" required-features = ["serialize"] path = "tests/serde-de-seq.rs" [[test]] name = "serde-de-xsi" required-features = ["serialize"] path = "tests/serde-de-xsi.rs" [[test]] name = "serde-se" required-features = ["serialize"] path = "tests/serde-se.rs" [[test]] name = "serde-issues" required-features = ["serialize"] path = "tests/serde-issues.rs" [[example]] name = "read_nodes_serde" required-features = ["serialize"] path = "examples/read_nodes_serde.rs" [[example]] name = "flattened_enum" required-features = ["serialize"] path = "examples/flattened_enum.rs" quick-xml-0.38.4/LICENSE-MIT.md000064400000000000000000000021211046102023000136610ustar 00000000000000The MIT License (MIT) Copyright (c) 2016 Johann Tuffe Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. quick-xml-0.38.4/README.md000064400000000000000000000147541046102023000131240ustar 00000000000000# quick-xml ![status](https://github.com/tafia/quick-xml/actions/workflows/rust.yml/badge.svg) [![Crate](https://img.shields.io/crates/v/quick-xml.svg)](https://crates.io/crates/quick-xml) [![docs.rs](https://docs.rs/quick-xml/badge.svg)](https://docs.rs/quick-xml) [![codecov](https://img.shields.io/codecov/c/github/tafia/quick-xml)](https://codecov.io/gh/tafia/quick-xml) [![MSRV](https://img.shields.io/badge/rustc-1.56.0+-ab6000.svg)](https://blog.rust-lang.org/2021/10/21/Rust-1.56.0.html) High performance xml pull reader/writer. The reader: - is almost zero-copy (use of `Cow` whenever possible) - is easy on memory allocation (the API provides a way to reuse buffers) - support various encoding (with `encoding` feature), namespaces resolution, special characters. Syntax is inspired by [xml-rs](https://github.com/netvl/xml-rs). ## Example ### Reader ```rust use quick_xml::events::Event; use quick_xml::reader::Reader; let xml = r#" Test Test 2 "#; let mut reader = Reader::from_str(xml); reader.config_mut().trim_text(true); let mut count = 0; let mut txt = Vec::new(); let mut buf = Vec::new(); // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s) loop { // NOTE: this is the generic case when we don't know about the input BufRead. // when the input is a &str or a &[u8], we don't actually need to use another // buffer, we could directly call `reader.read_event()` match reader.read_event_into(&mut buf) { Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), // exits the loop when reaching end of file Ok(Event::Eof) => break, Ok(Event::Start(e)) => { match e.name().as_ref() { b"tag1" => println!("attributes values: {:?}", e.attributes().map(|a| a.unwrap().value) .collect::>()), b"tag2" => count += 1, _ => (), } } Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()), // There are several other `Event`s we do not consider here _ => (), } // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low buf.clear(); } ``` ### Writer ```rust use quick_xml::events::{Event, BytesEnd, BytesStart}; use quick_xml::reader::Reader; use quick_xml::writer::Writer; use std::io::Cursor; let xml = r#"text"#; let mut reader = Reader::from_str(xml); reader.config_mut().trim_text(true); let mut writer = Writer::new(Cursor::new(Vec::new())); loop { match reader.read_event() { Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => { // crates a new element ... alternatively we could reuse `e` by calling // `e.into_owned()` let mut elem = BytesStart::new("my_elem"); // collect existing attributes elem.extend_attributes(e.attributes().map(|attr| attr.unwrap())); // copy existing attributes, adds a new my-key="some value" attribute elem.push_attribute(("my-key", "some value")); // writes the event to the writer assert!(writer.write_event(Event::Start(elem)).is_ok()); }, Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => { assert!(writer.write_event(Event::End(BytesEnd::new("my_elem"))).is_ok()); }, Ok(Event::Eof) => break, // we can either move or borrow the event to write, depending on your use-case Ok(e) => assert!(writer.write_event(e).is_ok()), Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), } } let result = writer.into_inner().into_inner(); let expected = r#"text"#; assert_eq!(result, expected.as_bytes()); ``` ## Serde When using the `serialize` feature, quick-xml can be used with serde's `Serialize`/`Deserialize` traits. The mapping between XML and Rust types, and in particular the syntax that allows you to specify the distinction between *elements* and *attributes*, is described in detail in the documentation for [deserialization](https://docs.rs/quick-xml/latest/quick_xml/de/). ### Credits This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs). quick-xml follows its convention for deserialization, including the [`$value`](https://github.com/RReverser/serde-xml-rs#parsing-the-value-of-a-tag) special name. ### Parsing the "value" of a tag If you have an input of the form `bar`, and you want to get at the `bar`, you can use either the special name `$text`, or the special name `$value`: ```rust,ignore struct Foo { #[serde(rename = "@abc")] pub abc: String, #[serde(rename = "$text")] pub body: String, } ``` Read about the difference in the [documentation](https://docs.rs/quick-xml/latest/quick_xml/de/index.html#difference-between-text-and-value-special-names). ### Performance Note that despite not focusing on performance (there are several unnecessary copies), it remains about 10x faster than serde-xml-rs. # Features - `encoding`: support non utf8 xmls - `serialize`: support serde `Serialize`/`Deserialize` ## Performance Benchmarking is hard and the results depend on your input file and your machine. Here on my particular file, quick-xml is around **50 times faster** than [xml-rs](https://crates.io/crates/xml-rs) crate. ``` // quick-xml benches test bench_quick_xml ... bench: 198,866 ns/iter (+/- 9,663) test bench_quick_xml_escaped ... bench: 282,740 ns/iter (+/- 61,625) test bench_quick_xml_namespaced ... bench: 389,977 ns/iter (+/- 32,045) // same bench with xml-rs test bench_xml_rs ... bench: 14,468,930 ns/iter (+/- 321,171) // serde-xml-rs vs serialize feature test bench_serde_quick_xml ... bench: 1,181,198 ns/iter (+/- 138,290) test bench_serde_xml_rs ... bench: 15,039,564 ns/iter (+/- 783,485) ``` For a feature and performance comparison, you can also have a look at RazrFalcon's [parser comparison table](https://github.com/RazrFalcon/roxmltree#parsing). ## Contribute Any PR is welcomed! ## License MIT quick-xml-0.38.4/src/de/attributes.rs000064400000000000000000000132301046102023000155440ustar 00000000000000//! Implementation of the deserializer from attributes use std::borrow::Cow; use serde::de::{DeserializeSeed, Deserializer, Error, IntoDeserializer, MapAccess, Visitor}; use serde::forward_to_deserialize_any; use crate::de::key::QNameDeserializer; use crate::de::SimpleTypeDeserializer; use crate::errors::serialize::DeError; use crate::events::attributes::Attributes; impl<'i> Attributes<'i> { /// Converts this iterator into a serde's [`MapAccess`] trait to use with serde. /// The returned object also implements the [`Deserializer`] trait. /// /// # Parameters /// - `prefix`: a prefix of the field names in structs that should be stripped /// to get the local attribute name. The [`crate::de::Deserializer`] uses `"@"` /// as a prefix, but [`Self::into_deserializer()`] uses empy string, which mean /// that we do not strip anything. /// /// # Example /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::BytesStart; /// use serde::Deserialize; /// use serde::de::IntoDeserializer; /// /// #[derive(Debug, PartialEq, Deserialize)] /// struct MyData<'i> { /// question: &'i str, /// answer: u32, /// } /// /// #[derive(Debug, PartialEq, Deserialize)] /// struct MyDataPrefixed<'i> { /// #[serde(rename = "@question")] question: &'i str, /// #[serde(rename = "@answer")] answer: u32, /// } /// /// let tag = BytesStart::from_content( /// "tag /// question = 'The Ultimate Question of Life, the Universe, and Everything' /// answer = '42'", /// 3 /// ); /// // Strip nothing from the field names /// let de = tag.attributes().clone().into_deserializer(); /// assert_eq!( /// MyData::deserialize(de).unwrap(), /// MyData { /// question: "The Ultimate Question of Life, the Universe, and Everything", /// answer: 42, /// } /// ); /// /// // Strip "@" from the field name /// let de = tag.attributes().into_map_access("@"); /// assert_eq!( /// MyDataPrefixed::deserialize(de).unwrap(), /// MyDataPrefixed { /// question: "The Ultimate Question of Life, the Universe, and Everything", /// answer: 42, /// } /// ); /// ``` #[inline] pub const fn into_map_access(self, prefix: &'static str) -> AttributesDeserializer<'i> { AttributesDeserializer { iter: self, value: None, prefix, key_buf: String::new(), } } } impl<'de> IntoDeserializer<'de, DeError> for Attributes<'de> { type Deserializer = AttributesDeserializer<'de>; #[inline] fn into_deserializer(self) -> Self::Deserializer { self.into_map_access("") } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A deserializer used to make possible to pack all attributes into a struct. /// It is created by [`Attributes::into_map_access`] or [`Attributes::into_deserializer`] /// methods. /// /// This deserializer always call [`Visitor::visit_map`] with self as [`MapAccess`]. /// /// # Lifetime /// /// `'i` is a lifetime of the original buffer from which attributes were parsed. /// In particular, when reader was created from a string, this is lifetime of the /// string. #[derive(Debug, Clone)] pub struct AttributesDeserializer<'i> { iter: Attributes<'i>, /// The value of the attribute, read in last call to `next_key_seed`. value: Option>, /// This prefix will be stripped from struct fields before match against attribute name. prefix: &'static str, /// Buffer to store attribute name as a field name exposed to serde consumers. /// Keeped in the serializer to avoid many small allocations key_buf: String, } impl<'de> Deserializer<'de> for AttributesDeserializer<'de> { type Error = DeError; #[inline] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_map(self) } forward_to_deserialize_any! { bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string bytes byte_buf option unit unit_struct newtype_struct seq tuple tuple_struct map struct enum identifier ignored_any } } impl<'de> MapAccess<'de> for AttributesDeserializer<'de> { type Error = DeError; fn next_key_seed(&mut self, seed: K) -> Result, Self::Error> where K: DeserializeSeed<'de>, { debug_assert_eq!(self.value, None); match self.iter.next() { None => Ok(None), Some(Ok(attr)) => { self.value = Some(attr.value); self.key_buf.clear(); self.key_buf.push_str(self.prefix); let de = QNameDeserializer::from_attr(attr.key, self.iter.decoder(), &mut self.key_buf)?; seed.deserialize(de).map(Some) } Some(Err(err)) => Err(Error::custom(err)), } } fn next_value_seed(&mut self, seed: V) -> Result where V: DeserializeSeed<'de>, { match self.value.take() { Some(value) => { let de = SimpleTypeDeserializer::from_part(&value, 0..value.len(), self.iter.decoder()); seed.deserialize(de) } None => Err(DeError::KeyNotRead), } } } quick-xml-0.38.4/src/de/key.rs000064400000000000000000000366061046102023000141620ustar 00000000000000use crate::de::simple_type::UnitOnly; use crate::encoding::Decoder; use crate::errors::serialize::DeError; use crate::events::BytesStart; use crate::name::QName; use crate::utils::CowRef; use serde::de::{DeserializeSeed, Deserializer, EnumAccess, Visitor}; use serde::{forward_to_deserialize_any, serde_if_integer128}; use std::borrow::Cow; macro_rules! deserialize_num { ($method:ident, $visit:ident) => { fn $method(self, visitor: V) -> Result where V: Visitor<'de>, { match self.name.parse() { Ok(number) => visitor.$visit(number), Err(_) => self.name.deserialize_str(visitor), } } }; } /// Decodes raw bytes using the deserializer encoding. /// The method will borrow if encoding is UTF-8 compatible and `name` contains /// only UTF-8 compatible characters (usually only ASCII characters). #[inline] fn decode_name<'n>(name: QName<'n>, decoder: Decoder) -> Result, DeError> { let local = name.local_name(); Ok(decoder.decode(local.into_inner())?) } /// A deserializer for xml names of elements and attributes. /// /// Used for deserializing values from: /// - attribute names (`<... name="..." ...>`) /// - element names (`...`) /// /// Converts a name to an identifier string using the following rules: /// /// - if it is an [`attribute`] name, put `@` in front of the identifier /// - if it is a namespace binding (`xmlns` or `xmlns:xxx`) put the decoded name /// to the identifier /// - if it is an attribute in the `xml` namespace, put the decoded name /// to the identifier /// - put the decoded [`local_name()`] of a name to the identifier /// /// The final identifier looks like `[@]local_name`, or `@xmlns`, or `@xmlns:binding` or /// `xml:attribute` (where `[]` means optional element). /// /// The deserializer also supports deserializing names as other primitive types: /// - numbers /// - booleans /// - unit (`()`) and unit structs /// - unit variants of the enumerations /// /// Because `serde` does not define on which side type conversion should be /// performed, and because [`Deserialize`] implementation for that primitives /// in serde does not accept strings, the deserializer will perform conversion /// by itself. /// /// The deserializer is able to deserialize unit and unit structs, but any name /// will be converted to the same unit instance. This is asymmetry with a serializer, /// which not able to serialize those types, because empty names are impossible /// in XML. /// /// `deserialize_any()` returns the same result as `deserialize_identifier()`. /// /// # Lifetimes /// /// - `'i`: lifetime of the data that the deserializer borrows from the parsed input /// - `'d`: lifetime of a deserializer that holds a buffer with content of events /// /// [`attribute`]: Self::from_attr /// [`local_name()`]: QName::local_name /// [`Deserialize`]: serde::Deserialize pub struct QNameDeserializer<'i, 'd> { name: CowRef<'i, 'd, str>, } impl<'i, 'd> QNameDeserializer<'i, 'd> { /// Creates deserializer from name of an attribute pub fn from_attr( name: QName<'d>, decoder: Decoder, key_buf: &'d mut String, ) -> Result { // https://github.com/tafia/quick-xml/issues/537 // Namespace bindings (xmlns:xxx) map to `@xmlns:xxx` instead of `@xxx` if name.as_namespace_binding().is_some() { decoder.decode_into(name.into_inner(), key_buf)?; } else { // https://github.com/tafia/quick-xml/issues/841 // we also want to map to the full name for `xml:xxx`, because `xml:xxx` attributes // can apper only in this literal form, as `xml` prefix cannot be redeclared or unbound let (local, prefix_opt) = name.decompose(); if prefix_opt.map_or(false, |prefix| prefix.is_xml()) { decoder.decode_into(name.into_inner(), key_buf)?; } else { decoder.decode_into(local.into_inner(), key_buf)?; } }; Ok(Self { name: CowRef::Slice(key_buf), }) } /// Creates deserializer from name of an element pub fn from_elem(start: &'d BytesStart<'i>) -> Result { let local = match start.buf { Cow::Borrowed(b) => match decode_name(QName(&b[..start.name_len]), start.decoder())? { Cow::Borrowed(borrowed) => CowRef::Input(borrowed), Cow::Owned(owned) => CowRef::Owned(owned), }, Cow::Owned(ref o) => match decode_name(QName(&o[..start.name_len]), start.decoder())? { Cow::Borrowed(borrowed) => CowRef::Slice(borrowed), Cow::Owned(owned) => CowRef::Owned(owned), }, }; Ok(Self { name: local }) } } impl<'de, 'd> Deserializer<'de> for QNameDeserializer<'de, 'd> { type Error = DeError; forward_to_deserialize_any! { char str string bytes byte_buf seq tuple tuple_struct map struct ignored_any } /// According to the , /// valid boolean representations are only `"true"`, `"false"`, `"1"`, /// and `"0"`. fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'de>, { self.name.deserialize_bool(visitor) } deserialize_num!(deserialize_i8, visit_i8); deserialize_num!(deserialize_i16, visit_i16); deserialize_num!(deserialize_i32, visit_i32); deserialize_num!(deserialize_i64, visit_i64); deserialize_num!(deserialize_u8, visit_u8); deserialize_num!(deserialize_u16, visit_u16); deserialize_num!(deserialize_u32, visit_u32); deserialize_num!(deserialize_u64, visit_u64); serde_if_integer128! { deserialize_num!(deserialize_i128, visit_i128); deserialize_num!(deserialize_u128, visit_u128); } deserialize_num!(deserialize_f32, visit_f32); deserialize_num!(deserialize_f64, visit_f64); /// Calls [`Visitor::visit_unit`] fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } /// Forwards deserialization to the [`Self::deserialize_unit`] fn deserialize_unit_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } /// Forwards deserialization to the [`Self::deserialize_identifier`] #[inline] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_identifier(visitor) } /// If `name` is an empty string then calls [`Visitor::visit_none`], /// otherwise calls [`Visitor::visit_some`] with itself fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { if self.name.is_empty() { visitor.visit_none() } else { visitor.visit_some(self) } } fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } /// Calls a [`Visitor::visit_str`] if [`name`] contains only UTF-8 /// compatible encoded characters and represents an element name and /// a [`Visitor::visit_string`] in all other cases. /// /// [`name`]: Self::name fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, { match self.name { CowRef::Input(name) => visitor.visit_borrowed_str(name), CowRef::Slice(name) => visitor.visit_str(name), CowRef::Owned(name) => visitor.visit_string(name), } } fn deserialize_enum( self, _name: &str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } } impl<'de, 'd> EnumAccess<'de> for QNameDeserializer<'de, 'd> { type Error = DeError; type Variant = UnitOnly; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> where V: DeserializeSeed<'de>, { let name = seed.deserialize(self)?; Ok((name, UnitOnly)) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use crate::se::key::QNameSerializer; use crate::utils::{ByteBuf, Bytes}; use pretty_assertions::assert_eq; use serde::de::IgnoredAny; use serde::{Deserialize, Serialize}; use std::collections::HashMap; #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Unit; #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Newtype(String); #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Tuple((), ()); #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Struct { key: String, val: usize, } #[derive(Debug, Deserialize, Serialize, PartialEq)] enum Enum { Unit, #[serde(rename = "@Attr")] Attr, Newtype(String), Tuple(String, usize), Struct { key: String, val: usize, }, } #[derive(Debug, Deserialize, PartialEq)] #[serde(field_identifier)] enum Id { Field, } #[derive(Debug, Deserialize)] #[serde(transparent)] struct Any(IgnoredAny); impl PartialEq for Any { fn eq(&self, _other: &Any) -> bool { true } } /// Checks that given `$input` successfully deserializing into given `$result` macro_rules! deserialized_to_only { ($name:ident: $type:ty = $input:literal => $result:expr) => { #[test] fn $name() { let de = QNameDeserializer { name: CowRef::Input($input), }; let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); } }; } /// Checks that given `$input` successfully deserializing into given `$result` macro_rules! deserialized_to { ($name:ident: $type:ty = $input:literal => $result:expr) => { #[test] fn $name() { let de = QNameDeserializer { name: CowRef::Input($input), }; let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); // Roundtrip to ensure that serializer corresponds to deserializer assert_eq!( data.serialize(QNameSerializer { writer: String::new() }) .unwrap(), $input ); } }; } /// Checks that attempt to deserialize given `$input` as a `$type` results to a /// deserialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $type:ty = $input:literal => $kind:ident($reason:literal)) => { #[test] fn $name() { let de = QNameDeserializer { name: CowRef::Input($input), }; let err = <$type as Deserialize>::deserialize(de).unwrap_err(); match err { DeError::$kind(e) => assert_eq!(e, $reason), _ => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, err ), } } }; } deserialized_to!(false_: bool = "false" => false); deserialized_to!(true_: bool = "true" => true); deserialized_to!(i8_: i8 = "-2" => -2); deserialized_to!(i16_: i16 = "-2" => -2); deserialized_to!(i32_: i32 = "-2" => -2); deserialized_to!(i64_: i64 = "-2" => -2); deserialized_to!(u8_: u8 = "3" => 3); deserialized_to!(u16_: u16 = "3" => 3); deserialized_to!(u32_: u32 = "3" => 3); deserialized_to!(u64_: u64 = "3" => 3); serde_if_integer128! { deserialized_to!(i128_: i128 = "-2" => -2); deserialized_to!(u128_: u128 = "2" => 2); } deserialized_to!(f32_: f32 = "1.23" => 1.23); deserialized_to!(f64_: f64 = "1.23" => 1.23); deserialized_to!(char_unescaped: char = "h" => 'h'); err!(char_escaped: char = "<" => Custom("invalid value: string \"<\", expected a character")); deserialized_to!(string: String = "<escaped string" => "<escaped string"); deserialized_to!(borrowed_str: &str = "name" => "name"); err!(byte_buf: ByteBuf = "<escaped string" => Custom("invalid type: string \"<escaped string\", expected byte data")); err!(borrowed_bytes: Bytes = "name" => Custom("invalid type: string \"name\", expected borrowed bytes")); deserialized_to!(option_none: Option = "" => None); deserialized_to!(option_some: Option = "name" => Some("name".into())); // Unit structs cannot be represented in some meaningful way, but it meaningful // to use them as a placeholder when we want to deserialize _something_ deserialized_to_only!(unit: () = "anything" => ()); deserialized_to_only!(unit_struct: Unit = "anything" => Unit); deserialized_to!(newtype: Newtype = "<escaped string" => Newtype("<escaped string".into())); err!(seq: Vec<()> = "name" => Custom("invalid type: string \"name\", expected a sequence")); err!(tuple: ((), ()) = "name" => Custom("invalid type: string \"name\", expected a tuple of size 2")); err!(tuple_struct: Tuple = "name" => Custom("invalid type: string \"name\", expected tuple struct Tuple")); err!(map: HashMap<(), ()> = "name" => Custom("invalid type: string \"name\", expected a map")); err!(struct_: Struct = "name" => Custom("invalid type: string \"name\", expected struct Struct")); deserialized_to!(enum_unit: Enum = "Unit" => Enum::Unit); deserialized_to!(enum_unit_for_attr: Enum = "@Attr" => Enum::Attr); err!(enum_newtype: Enum = "Newtype" => Custom("invalid type: unit value, expected a string")); err!(enum_tuple: Enum = "Tuple" => Custom("invalid type: unit value, expected tuple variant Enum::Tuple")); err!(enum_struct: Enum = "Struct" => Custom("invalid type: unit value, expected struct variant Enum::Struct")); // Field identifiers cannot be serialized, and IgnoredAny represented _something_ // which is not concrete deserialized_to_only!(identifier: Id = "Field" => Id::Field); deserialized_to_only!(ignored_any: Any = "any-name" => Any(IgnoredAny)); } quick-xml-0.38.4/src/de/map.rs000064400000000000000000001354041046102023000141430ustar 00000000000000//! Serde `Deserializer` module use crate::{ de::key::QNameDeserializer, de::resolver::EntityResolver, de::simple_type::SimpleTypeDeserializer, de::text::TextDeserializer, de::{DeEvent, Deserializer, XmlRead, TEXT_KEY, VALUE_KEY}, errors::serialize::DeError, errors::Error, events::attributes::IterState, events::BytesStart, name::QName, }; use serde::de::value::BorrowedStrDeserializer; use serde::de::{self, DeserializeSeed, Deserializer as _, MapAccess, SeqAccess, Visitor}; use serde::serde_if_integer128; use std::borrow::Cow; use std::ops::Range; /// Defines a source that should be used to deserialize a value in the next call /// to [`next_value_seed()`](MapAccess::next_value_seed) #[derive(Debug, PartialEq)] enum ValueSource { /// Source are not specified, because [`next_key_seed()`] not yet called. /// This is an initial state and state after deserializing value /// (after call of [`next_value_seed()`]). /// /// Attempt to call [`next_value_seed()`] while accessor in this state would /// return a [`DeError::KeyNotRead`] error. /// /// [`next_key_seed()`]: MapAccess::next_key_seed /// [`next_value_seed()`]: MapAccess::next_value_seed Unknown, /// Next value should be deserialized from an attribute value; value is located /// at specified span. Attribute(Range), /// Value should be deserialized from the text content of the XML node, which /// represented or by an ordinary text node, or by a CDATA node: /// /// ```xml /// /// text content /// /// /// ``` /// ```xml /// /// /// /// /// ``` Text, /// Next value should be deserialized from an element with an any name, except /// elements with a name matching one of the struct fields. Corresponding tag /// name will always be associated with a field with name [`VALUE_KEY`]. /// /// That state is set when call to [`peek()`] returns a [`Start`] event, which /// [`name()`] is not listed in the [list of known fields] (which for a struct /// is a list of field names, and for a map that is an empty list), _and_ /// struct has a field with a special name [`VALUE_KEY`]. /// /// When in this state, next event, returned by [`next()`], will be a [`Start`], /// which represents both a key, and a value. Value would be deserialized from /// the whole element and how is will be done determined by the value deserializer. /// The [`ElementMapAccess`] do not consume any events in that state. /// /// Because in that state any encountered `` is mapped to the [`VALUE_KEY`] /// field, it is possible to use tag name as an enum discriminator, so `enum`s /// can be deserialized from that XMLs: /// /// ```xml /// /// ... /// /// /// /// ``` /// ```xml /// /// ... /// /// /// /// ``` /// /// both can be deserialized into /// /// ```ignore /// enum Enum { /// variant1, /// variant2, /// } /// struct AnyName { /// #[serde(rename = "$value")] /// field: Enum, /// } /// ``` /// /// That is possible, because value deserializer have access to the full content /// of a `...` or `...` node, including /// the tag name. /// /// [`Start`]: DeEvent::Start /// [`peek()`]: Deserializer::peek() /// [`next()`]: Deserializer::next() /// [`name()`]: BytesStart::name() /// [`Text`]: Self::Text /// [list of known fields]: ElementMapAccess::fields Content, /// Next value should be deserialized from an element with a dedicated name. /// If deserialized type is a sequence, then that sequence will collect all /// elements with the same name until it will be filled. If not all elements /// would be consumed, the rest will be ignored. /// /// That state is set when call to [`peek()`] returns a [`Start`] event, which /// [`name()`] represents a field name. That name will be deserialized as a key. /// /// When in this state, next event, returned by [`next()`], will be a [`Start`], /// which represents both a key, and a value. Value would be deserialized from /// the whole element and how is will be done determined by the value deserializer. /// The [`ElementMapAccess`] do not consume any events in that state. /// /// An illustration below shows, what data is used to deserialize key and value: /// ```xml /// /// ... /// /// /// /// ``` /// /// Although value deserializer will have access to the full content of a `` /// node (including the tag name), it will not get much benefits from that, /// because tag name will always be fixed for a given map field (equal to a /// field name). So, if the field type is an `enum`, it cannot select its /// variant based on the tag name. If that is needed, then [`Content`] variant /// of this enum should be used. Such usage is enabled by annotating a struct /// field as "content" field, which implemented as given the field a special /// [`VALUE_KEY`] name. /// /// [`Start`]: DeEvent::Start /// [`peek()`]: Deserializer::peek() /// [`next()`]: Deserializer::next() /// [`name()`]: BytesStart::name() /// [`Content`]: Self::Content Nested, } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A deserializer that extracts map-like structures from an XML. This deserializer /// represents a one XML tag: /// /// ```xml /// ... /// ``` /// /// Name of this tag is stored in a [`Self::start`] property. /// /// # Lifetimes /// /// - `'de` lifetime represents a buffer, from which deserialized values can /// borrow their data. Depending on the underlying reader, there can be an /// internal buffer of deserializer (i.e. deserializer itself) or an input /// (in that case it is possible to approach zero-copy deserialization). /// /// - `'d` lifetime represents a parent deserializer, which could own the data /// buffer. pub(crate) struct ElementMapAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Tag -- owner of attributes start: BytesStart<'de>, de: &'d mut Deserializer<'de, R, E>, /// State of the iterator over attributes. Contains the next position in the /// inner `start` slice, from which next attribute should be parsed. iter: IterState, /// Current state of the accessor that determines what next call to API /// methods should return. source: ValueSource, /// List of field names of the struct. It is empty for maps fields: &'static [&'static str], /// If `true`, then the deserialized struct has a field with a special name: /// [`VALUE_KEY`]. That field should be deserialized from the whole content /// of an XML node, including tag name: /// /// ```xml /// value for VALUE_KEY field /// ``` has_value_field: bool, /// If `true`, then the deserialized struct has a field with a special name: /// [`TEXT_KEY`]. has_text_field: bool, } impl<'de, 'd, R, E> ElementMapAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Create a new ElementMapAccess pub fn new( de: &'d mut Deserializer<'de, R, E>, start: BytesStart<'de>, fields: &'static [&'static str], ) -> Self { Self { de, iter: IterState::new(start.name().as_ref().len(), false), start, source: ValueSource::Unknown, fields, has_value_field: fields.contains(&VALUE_KEY), has_text_field: fields.contains(&TEXT_KEY), } } /// Determines if subtree started with the specified event shoould be skipped. /// /// Used to map elements with `xsi:nil` attribute set to true to `None` in optional contexts. /// /// We need to handle two attributes: /// - on parent element: `` /// - on this element: `` /// /// We check parent element too because `xsi:nil` affects only nested elements of the /// tag where it is defined. We can map structure with fields mapped to attributes to /// the `` element and set to `None` all its optional elements. fn should_skip_subtree(&self, start: &BytesStart) -> bool { self.de.reader.reader.has_nil_attr(&self.start) || self.de.reader.reader.has_nil_attr(start) } /// Skips whitespaces when they are not preserved #[inline] fn skip_whitespaces(&mut self) -> Result<(), DeError> { // TODO: respect the `xml:space` attribute and probably some deserialized type sign self.de.skip_whitespaces() } } impl<'de, 'd, R, E> MapAccess<'de> for ElementMapAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; fn next_key_seed>( &mut self, seed: K, ) -> Result, Self::Error> { debug_assert_eq!(self.source, ValueSource::Unknown); // FIXME: There error positions counted from the start of tag name - need global position let slice = &self.start.buf; let decoder = self.start.decoder(); if let Some(a) = self.iter.next(slice).transpose()? { // try getting map from attributes (key= "value") let (key, value) = a.into(); self.source = ValueSource::Attribute(value.unwrap_or_default()); // Attributes in mapping starts from @ prefix // TODO: Customization point - may customize prefix self.de.key_buf.clear(); self.de.key_buf.push('@'); let de = QNameDeserializer::from_attr(QName(&slice[key]), decoder, &mut self.de.key_buf)?; seed.deserialize(de).map(Some) } else { self.skip_whitespaces()?; // try getting from events (value) match self.de.peek()? { // If we have dedicated "$text" field, it will not be passed to "$value" field DeEvent::Text(_) if self.has_value_field && !self.has_text_field => { self.source = ValueSource::Content; // Deserialize `key` from special attribute name which means // that value should be taken from the text content of the // XML node let de = BorrowedStrDeserializer::::new(VALUE_KEY); seed.deserialize(de).map(Some) } DeEvent::Text(_) => { self.source = ValueSource::Text; // Deserialize `key` from special attribute name which means // that value should be taken from the text content of the // XML node let de = BorrowedStrDeserializer::::new(TEXT_KEY); seed.deserialize(de).map(Some) } // Used to deserialize collections of enums, like: // // // // // // // into // // enum Enum { A, B, С } // struct Root { // #[serde(rename = "$value")] // items: Vec, // } // TODO: This should be handled by #[serde(flatten)] // See https://github.com/serde-rs/serde/issues/1905 DeEvent::Start(e) if self.has_value_field && not_in(self.fields, e)? => { self.source = ValueSource::Content; let de = BorrowedStrDeserializer::::new(VALUE_KEY); seed.deserialize(de).map(Some) } DeEvent::Start(e) => { self.source = ValueSource::Nested; let de = QNameDeserializer::from_elem(e)?; seed.deserialize(de).map(Some) } // Stop iteration after reaching a closing tag // The matching tag name is guaranteed by the reader if our // deserializer implementation is correct DeEvent::End(e) => { debug_assert_eq!(self.start.name(), e.name()); // Consume End self.de.next()?; Ok(None) } // We cannot get `Eof` legally, because we always inside of the // opened tag `self.start` DeEvent::Eof => { Err(Error::missed_end(self.start.name(), self.start.decoder()).into()) } } } } fn next_value_seed>( &mut self, seed: K, ) -> Result { match std::mem::replace(&mut self.source, ValueSource::Unknown) { ValueSource::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part( &self.start.buf, value, self.start.decoder(), )), // This arm processes the following XML shape: // // text value // // The whole map represented by an `` element, the map key // is implicit and equals to the `TEXT_KEY` constant, and the value // is a `Text` event (the value deserializer will see that event) // This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs ValueSource::Text => match self.de.next()? { DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)), // SAFETY: We set `Text` only when we seen `Text` _ => unreachable!(), }, // This arm processes the following XML shape: // // ... // // The whole map represented by an `` element, the map key // is implicit and equals to the `VALUE_KEY` constant, and the value // is a `Start` event (the value deserializer will see that event) ValueSource::Content => seed.deserialize(MapValueDeserializer { map: self, fixed_name: false, }), // This arm processes the following XML shape: // // ... // // The whole map represented by an `` element, the map key // is a `tag`, and the value is a `Start` event (the value deserializer // will see that event) ValueSource::Nested => seed.deserialize(MapValueDeserializer { map: self, fixed_name: true, }), ValueSource::Unknown => Err(DeError::KeyNotRead), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A deserializer for a value of map or struct. That deserializer slightly /// differently processes events for a primitive types and sequences than /// a [`Deserializer`]. /// /// This deserializer used to deserialize two kinds of fields: /// - usual fields with a dedicated name, such as `field_one` or `field_two`, in /// that case field [`Self::fixed_name`] is `true`; /// - the special `$value` field which represents any tag or a textual content /// in the XML which would be found in the document, in that case field /// [`Self::fixed_name`] is `false`. /// /// This deserializer can see two kind of events at the start: /// - [`DeEvent::Text`] /// - [`DeEvent::Start`] /// /// which represents two possible variants of items: /// ```xml /// A tag item /// A text item /// /// ``` /// /// This deserializer are very similar to a [`ElementDeserializer`]. The only difference /// in the `deserialize_seq` method. This deserializer will act as an iterator /// over tags / text within it's parent tag, whereas the [`ElementDeserializer`] /// will represent sequences as an `xs:list`. /// /// This deserializer processes items as following: /// - primitives (numbers, booleans, strings, characters) are deserialized either /// from a text content, or unwrapped from a one level of a tag. So, `123` and /// `123` both can be deserialized into an `u32`; /// - `Option`: /// - empty text of [`DeEvent::Text`] is deserialized as `None`; /// - everything else are deserialized as `Some` using the same deserializer, /// including `` or ``; /// - units (`()`) and unit structs consumes the whole text or element subtree; /// - newtype structs are deserialized by forwarding deserialization of inner type /// with the same deserializer; /// - sequences, tuples and tuple structs are deserialized by iterating within the /// parent tag and deserializing each tag or text content using [`ElementDeserializer`]; /// - structs and maps are deserialized using new instance of [`ElementMapAccess`]; /// - enums: /// - in case of [`DeEvent::Text`] event the text content is deserialized as /// a `$text` variant. Enum content is deserialized from the text using /// [`SimpleTypeDeserializer`]; /// - in case of [`DeEvent::Start`] event the tag name is deserialized as /// an enum tag, and the content inside are deserialized as an enum content. /// Depending on a variant kind deserialization is performed as: /// - unit variants: consuming text content or a subtree; /// - newtype variants: forward deserialization to the inner type using /// this deserializer; /// - tuple variants: call [`deserialize_tuple`] of this deserializer; /// - struct variants: call [`deserialize_struct`] of this deserializer. /// /// [`deserialize_tuple`]: #method.deserialize_tuple /// [`deserialize_struct`]: #method.deserialize_struct struct MapValueDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Access to the map that created this deserializer. Gives access to the /// context, such as list of fields, that current map known about. map: &'m mut ElementMapAccess<'de, 'd, R, E>, /// Whether this deserializer was created for deserialization from an element /// with fixed name, or the elements with different names or even text are allowed. /// /// If this field is `true`, we process `` element in the following XML shape: /// /// ```xml /// /// ... /// /// ``` /// /// The whole map represented by an `` element, the map key is a `tag`, /// and the value starts with is a `Start("tag")` (the value deserializer will /// see that event first) and extended to the matching `End("tag")` event. /// In order to deserialize primitives (such as `usize`) we need to allow to /// look inside the one levels of tags, so the /// /// ```xml /// 42 /// ``` /// /// could be deserialized into `42usize` without problems, and at the same time /// /// ```xml /// /// /// /// /// /// ``` /// could be deserialized to a struct. /// /// If this field is `false`, we processes the one of following XML shapes: /// /// ```xml /// /// text value /// /// ``` /// ```xml /// /// /// /// ``` /// ```xml /// /// ... /// /// ``` /// /// The whole map represented by an `` element, the map key is /// implicit and equals to the [`VALUE_KEY`] constant, and the value is /// a [`Text`], or a [`Start`] event (the value deserializer will see one of /// those events). In the first two cases the value of this field do not matter /// (because we already see the textual event and there no reasons to look /// "inside" something), but in the last case the primitives should raise /// a deserialization error, because that means that you trying to deserialize /// the following struct: /// /// ```ignore /// struct AnyName { /// #[serde(rename = "$value")] /// any_name: String, /// } /// ``` /// which means that `any_name` should get a content of the `` element. /// /// Changing this can be valuable for , /// but those fields should be explicitly marked that they want to get any /// possible markup as a `String` and that mark is different from marking them /// as accepting "text content" which the currently `$text` means. /// /// [`Text`]: DeEvent::Text /// [`Start`]: DeEvent::Start fixed_name: bool, } impl<'de, 'd, 'm, R, E> MapValueDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Returns a next string as concatenated content of consequent [`Text`] and /// [`CData`] events, used inside [`deserialize_primitives!()`]. /// /// [`Text`]: crate::events::Event::Text /// [`CData`]: crate::events::Event::CData #[inline] fn read_string(&mut self) -> Result, DeError> { // TODO: Read the whole content to fix https://github.com/tafia/quick-xml/issues/483 self.map.de.read_string_impl(self.fixed_name) } } impl<'de, 'd, 'm, R, E> de::Deserializer<'de> for MapValueDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; deserialize_primitives!(mut); #[inline] fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { self.map.de.deserialize_unit(visitor) } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { // We cannot use result of `peek()` directly because of borrow checker let _ = self.map.de.peek()?; match self.map.de.last_peeked() { DeEvent::Text(t) if t.is_empty() => visitor.visit_none(), DeEvent::Start(start) if self.map.should_skip_subtree(start) => { self.map.de.skip_next_tree()?; visitor.visit_none() } _ => visitor.visit_some(self), } } /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] /// with the same deserializer. fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } /// Deserializes each `` in /// ```xml /// /// ... /// ... /// ... /// /// ``` /// as a sequence item, where `` represents a Map in a [`Self::map`], /// and a `` is a sequential field of that map. fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { let filter = if self.fixed_name { match self.map.de.peek()? { // Clone is cheap if event borrows from the input DeEvent::Start(e) => TagFilter::Include(e.clone()), // SAFETY: we use that deserializer with `fixed_name == true` // only from the `ElementMapAccess::next_value_seed` and only when we // peeked `Start` event _ => unreachable!(), } } else { TagFilter::Exclude(self.map.fields, self.map.has_text_field) }; visitor.visit_seq(MapValueSeqAccess { #[cfg(feature = "overlapped-lists")] checkpoint: self.map.de.skip_checkpoint(), map: self.map, filter, }) } #[inline] fn deserialize_struct( self, name: &'static str, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { self.map.de.deserialize_struct(name, fields, visitor) } fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { if self.fixed_name { match self.map.de.next()? { // Handles UnitEnumVariant DeEvent::Start(e) => { // skip , read text after it and ensure that it is ended by let text = self.map.de.read_text(e.name())?; if text.is_empty() { // Map empty text () to a special `$text` variant visitor.visit_enum(SimpleTypeDeserializer::from_text(TEXT_KEY.into())) } else { visitor.visit_enum(SimpleTypeDeserializer::from_text(text)) } } // SAFETY: we use that deserializer with `fixed_name == true` // only from the `MapAccess::next_value_seed` and only when we // peeked `Start` event _ => unreachable!(), } } else { visitor.visit_enum(self) } } fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { match self.map.de.peek()? { DeEvent::Text(_) => self.deserialize_str(visitor), _ => self.deserialize_map(visitor), } } } impl<'de, 'd, 'm, R, E> de::EnumAccess<'de> for MapValueDeserializer<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; type Variant = MapValueVariantAccess<'de, 'd, 'm, R, E>; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> where V: DeserializeSeed<'de>, { let (name, is_text) = match self.map.de.peek()? { DeEvent::Start(e) => (seed.deserialize(QNameDeserializer::from_elem(e)?)?, false), DeEvent::Text(_) => ( seed.deserialize(BorrowedStrDeserializer::::new(TEXT_KEY))?, true, ), // SAFETY: we use that deserializer only when we peeked `Start` or `Text` event _ => unreachable!(), }; Ok(( name, MapValueVariantAccess { map: self.map, is_text, }, )) } } struct MapValueVariantAccess<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Access to the map that created this enum accessor. Gives access to the /// context, such as list of fields, that current map known about. map: &'m mut ElementMapAccess<'de, 'd, R, E>, /// `true` if variant should be deserialized from a textual content /// and `false` if from tag is_text: bool, } impl<'de, 'd, 'm, R, E> de::VariantAccess<'de> for MapValueVariantAccess<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; fn unit_variant(self) -> Result<(), Self::Error> { match self.map.de.next()? { // Consume subtree DeEvent::Start(e) => self.map.de.read_to_end(e.name()), // Does not needed to deserialize using SimpleTypeDeserializer, because // it returns `()` when `deserialize_unit()` is requested DeEvent::Text(_) => Ok(()), // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Start` or `Text` events are possible here"), } } fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { if self.is_text { match self.map.de.next()? { DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)), // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Text` events are possible here"), } } else { seed.deserialize(MapValueDeserializer { map: self.map, // Because element name already was either mapped to a field name, // or to a variant name, we should not treat it as variable fixed_name: true, }) } } fn tuple_variant(self, len: usize, visitor: V) -> Result where V: Visitor<'de>, { if self.is_text { match self.map.de.next()? { DeEvent::Text(e) => { SimpleTypeDeserializer::from_text_content(e).deserialize_tuple(len, visitor) } // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Text` events are possible here"), } } else { MapValueDeserializer { map: self.map, // Because element name already was either mapped to a field name, // or to a variant name, we should not treat it as variable fixed_name: true, } .deserialize_tuple(len, visitor) } } fn struct_variant( self, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { match self.map.de.next()? { DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self.map.de, e, fields)), DeEvent::Text(e) => { SimpleTypeDeserializer::from_text_content(e).deserialize_struct("", fields, visitor) } // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Start` or `Text` events are possible here"), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Check if tag `start` is included in the `fields` list. `decoder` is used to /// get a string representation of a tag. /// /// Returns `true`, if `start` is not in the `fields` list and `false` otherwise. fn not_in(fields: &'static [&'static str], start: &BytesStart) -> Result { let tag = start.decoder().decode(start.local_name().into_inner())?; Ok(fields.iter().all(|&field| field != tag.as_ref())) } /// A filter that determines, what tags should form a sequence. /// /// There are two types of sequences: /// - sequence where each element represented by tags with the same name /// - sequence where each element can have a different tag /// /// The first variant could represent a collection of structs, the second -- /// a collection of enum variants. /// /// In the second case we don't know what tag name should be expected as a /// sequence element, so we accept any element. Since the sequence are flattened /// into maps, we skip elements which have dedicated fields in a struct by using an /// `Exclude` filter that filters out elements with names matching field names /// from the struct. /// /// # Lifetimes /// /// `'de` represents a lifetime of the XML input, when filter stores the /// dedicated tag name #[derive(Debug)] enum TagFilter<'de> { /// A `SeqAccess` interested only in tags with specified name to deserialize /// an XML like this: /// /// ```xml /// <...> /// /// /// /// ... /// /// ``` /// /// The tag name is stored inside (`b"tag"` for that example) Include(BytesStart<'de>), //TODO: Need to store only name instead of a whole tag /// A `SeqAccess` interested in tags with any name, except explicitly listed. /// Excluded tags are used as struct field names and therefore should not /// fall into a `$value` category. /// /// The `bool` represents the having of a `$text` special field in fields array. /// It is used to exclude text events when `$text` fields is defined together with /// `$value` fieldб and `$value` accepts sequence. Exclude(&'static [&'static str], bool), } impl<'de> TagFilter<'de> { fn is_suitable(&self, start: &BytesStart) -> Result { match self { Self::Include(n) => Ok(n.name() == start.name()), Self::Exclude(fields, _) => not_in(fields, start), } } const fn need_skip_text(&self) -> bool { match self { // If we look only for tags, we should skip any $text keys Self::Include(_) => true, // If we look fo any data, we should exclude $text keys if it in the list Self::Exclude(_, has_text_field) => *has_text_field, } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// An accessor to sequence elements forming a value for struct field. /// Technically, this sequence is flattened out into structure and sequence /// elements are overlapped with other fields of a structure. Each call to /// [`Self::next_element_seed`] consumes a next sub-tree or consequent list /// of [`Text`] and [`CData`] events. /// /// ```xml /// <> /// ... /// The is the one item /// This is item it splitted by comments /// ...and that is the third! /// ... /// /// ``` /// /// Depending on [`Self::filter`], only some of that possible constructs would be /// an element. /// /// [`Text`]: crate::events::Event::Text /// [`CData`]: crate::events::Event::CData struct MapValueSeqAccess<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Accessor to a map that creates this accessor and to a deserializer for /// a sequence items. map: &'m mut ElementMapAccess<'de, 'd, R, E>, /// Filter that determines whether a tag is a part of this sequence. /// /// When feature [`overlapped-lists`] is not activated, iteration will stop /// when found a tag that does not pass this filter. /// /// When feature [`overlapped-lists`] is activated, all tags, that not pass /// this check, will be skipped. /// /// [`overlapped-lists`]: ../../index.html#overlapped-lists filter: TagFilter<'de>, /// Checkpoint after which all skipped events should be returned. All events, /// that was skipped before creating this checkpoint, will still stay buffered /// and will not be returned #[cfg(feature = "overlapped-lists")] checkpoint: usize, } #[cfg(feature = "overlapped-lists")] impl<'de, 'd, 'm, R, E> Drop for MapValueSeqAccess<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { fn drop(&mut self) { self.map.de.start_replay(self.checkpoint); } } impl<'de, 'd, 'm, R, E> SeqAccess<'de> for MapValueSeqAccess<'de, 'd, 'm, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; fn next_element_seed(&mut self, seed: T) -> Result, DeError> where T: DeserializeSeed<'de>, { loop { self.map.skip_whitespaces()?; break match self.map.de.peek()? { // If we see a tag that we not interested, skip it #[cfg(feature = "overlapped-lists")] DeEvent::Start(e) if !self.filter.is_suitable(e)? => { self.map.de.skip()?; continue; } // Skip any text events if sequence expects only specific tag names #[cfg(feature = "overlapped-lists")] DeEvent::Text(_) if self.filter.need_skip_text() => { self.map.de.skip()?; continue; } // Stop iteration when list elements ends #[cfg(not(feature = "overlapped-lists"))] DeEvent::Start(e) if !self.filter.is_suitable(e)? => Ok(None), #[cfg(not(feature = "overlapped-lists"))] DeEvent::Text(_) if self.filter.need_skip_text() => Ok(None), // Stop iteration after reaching a closing tag // The matching tag name is guaranteed by the reader DeEvent::End(e) => { debug_assert_eq!(self.map.start.name(), e.name()); Ok(None) } // We cannot get `Eof` legally, because we always inside of the // opened tag `self.map.start` DeEvent::Eof => { Err(Error::missed_end(self.map.start.name(), self.map.start.decoder()).into()) } DeEvent::Text(_) => match self.map.de.next()? { DeEvent::Text(e) => seed.deserialize(TextDeserializer(e)).map(Some), // SAFETY: we just checked that the next event is Text _ => unreachable!(), }, DeEvent::Start(_) => match self.map.de.next()? { DeEvent::Start(start) => seed .deserialize(ElementDeserializer { start, de: self.map.de, }) .map(Some), // SAFETY: we just checked that the next event is Start _ => unreachable!(), }, }; } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A deserializer for a single tag item of a mixed sequence of tags and text. /// /// This deserializer are very similar to a [`MapValueDeserializer`] (when it /// processes the [`DeEvent::Start`] event). The only difference in the /// [`deserialize_seq`] method. This deserializer will perform deserialization /// from the textual content between start and end events, whereas the /// [`MapValueDeserializer`] will iterate over tags / text within it's parent tag. /// /// This deserializer processes items as following: /// - numbers are parsed from a text content between tags using [`FromStr`]. So, /// `123` can be deserialized into an `u32`; /// - booleans converted from a text content between tags according to the XML /// [specification]: /// - `"true"` and `"1"` converted to `true`; /// - `"false"` and `"0"` converted to `false`; /// - strings returned as a text content between tags; /// - characters also returned as strings. If string contain more than one character /// or empty, it is responsibility of a type to return an error; /// - `Option` are always deserialized as `Some` using the same deserializer, /// including `` or ``; /// - units (`()`) and unit structs consumes the whole element subtree; /// - newtype structs forwards deserialization to the inner type using /// [`SimpleTypeDeserializer`]; /// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`] /// (this is the difference): text content between tags is passed to /// [`SimpleTypeDeserializer`]; /// - structs and maps are deserialized using new instance of [`ElementMapAccess`]; /// - enums: /// - the variant name is deserialized using [`QNameDeserializer`] from the element name; /// - the content is deserialized using the same deserializer: /// - unit variants: consuming a subtree and return `()`; /// - newtype variants forwards deserialization to the inner type using /// this deserializer; /// - tuple variants: call [`deserialize_tuple`] of this deserializer; /// - struct variants: call [`deserialize_struct`] of this deserializer. /// /// [`deserialize_seq`]: #method.deserialize_seq /// [`FromStr`]: std::str::FromStr /// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean /// [`deserialize_tuple`]: #method.deserialize_tuple /// [`deserialize_struct`]: #method.deserialize_struct struct ElementDeserializer<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { start: BytesStart<'de>, de: &'d mut Deserializer<'de, R, E>, } impl<'de, 'd, R, E> ElementDeserializer<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Returns a next string as concatenated content of consequent [`Text`] and /// [`CData`] events, used inside [`deserialize_primitives!()`]. /// /// [`Text`]: crate::events::Event::Text /// [`CData`]: crate::events::Event::CData #[inline] fn read_string(&mut self) -> Result, DeError> { self.de.read_text(self.start.name()) } } impl<'de, 'd, R, E> de::Deserializer<'de> for ElementDeserializer<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; deserialize_primitives!(mut); fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { // Consume subtree self.de.read_to_end(self.start.name())?; visitor.visit_unit() } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_some(self) } /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] /// with this deserializer. fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } /// This method deserializes a sequence inside of element that itself is a /// sequence element: /// /// ```xml /// <> /// ... /// inner sequence /// inner sequence /// inner sequence /// ... /// /// ``` fn deserialize_seq(mut self, visitor: V) -> Result where V: Visitor<'de>, { let text = self.read_string()?; SimpleTypeDeserializer::from_text(text).deserialize_seq(visitor) } fn deserialize_struct( self, _name: &'static str, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_map(ElementMapAccess::new(self.de, self.start, fields)) } fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } #[inline] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_map(visitor) } } impl<'de, 'd, R, E> de::EnumAccess<'de> for ElementDeserializer<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; type Variant = Self; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> where V: DeserializeSeed<'de>, { let name = seed.deserialize(QNameDeserializer::from_elem(&self.start)?)?; Ok((name, self)) } } impl<'de, 'd, R, E> de::VariantAccess<'de> for ElementDeserializer<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; fn unit_variant(self) -> Result<(), Self::Error> { // Consume subtree self.de.read_to_end(self.start.name()) } fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { seed.deserialize(self) } #[inline] fn tuple_variant(self, len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_tuple(len, visitor) } #[inline] fn struct_variant( self, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_struct("", fields, visitor) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[test] fn test_not_in() { use pretty_assertions::assert_eq; let tag = BytesStart::new("tag"); assert_eq!(not_in(&[], &tag).unwrap(), true); assert_eq!(not_in(&["no", "such", "tags"], &tag).unwrap(), true); assert_eq!(not_in(&["some", "tag", "included"], &tag).unwrap(), false); let tag_ns = BytesStart::new("ns1:tag"); assert_eq!(not_in(&["no", "such", "tags"], &tag_ns).unwrap(), true); assert_eq!( not_in(&["some", "tag", "included"], &tag_ns).unwrap(), false ); assert_eq!( not_in(&["some", "namespace", "ns1:tag"], &tag_ns).unwrap(), true ); } quick-xml-0.38.4/src/de/mod.rs000064400000000000000000005500471046102023000141510ustar 00000000000000//! Serde `Deserializer` module. //! //! Due to the complexity of the XML standard and the fact that Serde was developed //! with JSON in mind, not all Serde concepts apply smoothly to XML. This leads to //! that fact that some XML concepts are inexpressible in terms of Serde derives //! and may require manual deserialization. //! //! The most notable restriction is the ability to distinguish between _elements_ //! and _attributes_, as no other format used by serde has such a conception. //! //! Due to that the mapping is performed in a best effort manner. //! //! //! //! Table of Contents //! ================= //! - [Mapping XML to Rust types](#mapping-xml-to-rust-types) //! - [Basics](#basics) //! - [Optional attributes and elements](#optional-attributes-and-elements) //! - [Choices (`xs:choice` XML Schema type)](#choices-xschoice-xml-schema-type) //! - [Sequences (`xs:all` and `xs:sequence` XML Schema types)](#sequences-xsall-and-xssequence-xml-schema-types) //! - [Mapping of `xsi:nil`](#mapping-of-xsinil) //! - [Generate Rust types from XML](#generate-rust-types-from-xml) //! - [Composition Rules](#composition-rules) //! - [Enum Representations](#enum-representations) //! - [Normal enum variant](#normal-enum-variant) //! - [`$text` enum variant](#text-enum-variant) //! - [`$text` and `$value` special names](#text-and-value-special-names) //! - [`$text`](#text) //! - [`$value`](#value) //! - [Primitives and sequences of primitives](#primitives-and-sequences-of-primitives) //! - [Structs and sequences of structs](#structs-and-sequences-of-structs) //! - [Enums and sequences of enums](#enums-and-sequences-of-enums) //! - [Frequently Used Patterns](#frequently-used-patterns) //! - [`` lists](#element-lists) //! - [Overlapped (Out-of-Order) Elements](#overlapped-out-of-order-elements) //! - [Internally Tagged Enums](#internally-tagged-enums) //! //! //! //! Mapping XML to Rust types //! ========================= //! //! Type names are never considered when deserializing, so you can name your //! types as you wish. Other general rules: //! - `struct` field name could be represented in XML only as an attribute name //! or an element name; //! - `enum` variant name could be represented in XML only as an attribute name //! or an element name; //! - the unit struct, unit type `()` and unit enum variant can be deserialized //! from any valid XML content: //! - attribute and element names; //! - attribute and element values; //! - text or CDATA content (including mixed text and CDATA content). //! //!
//! //! NOTE: All tests are marked with an `ignore` option, even though they do //! compile. This is because rustdoc marks such blocks with an information //! icon unlike `no_run` blocks. //! //!
//! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //! //!
//! //! ## Basics //! //!
To parse all these XML's......use these Rust type(s)
//! Content of attributes and text / CDATA content of elements (including mixed //! text and CDATA content): //! //! ```xml //! <... ...="content" /> //! ``` //! ```xml //! <...>content //! ``` //! ```xml //! <...> //! ``` //! ```xml //! <...>texttext //! ``` //! Mixed text / CDATA content represents one logical string, `"textcdatatext"` in that case. //! //! //! You can use any type that can be deserialized from an `&str`, for example: //! - [`String`] and [`&str`] //! - [`Cow`] //! - [`u32`], [`f32`] and other numeric types //! - `enum`s, like //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! enum Language { //! Rust, //! Cpp, //! #[serde(other)] //! Other, //! } //! # #[derive(Debug, PartialEq, Deserialize)] //! # struct X { #[serde(rename = "$text")] x: Language } //! # assert_eq!(X { x: Language::Rust }, quick_xml::de::from_str("Rust").unwrap()); //! # assert_eq!(X { x: Language::Cpp }, quick_xml::de::from_str("Cp").unwrap()); //! # assert_eq!(X { x: Language::Other }, quick_xml::de::from_str("").unwrap()); //! ``` //! //!
//! //! NOTE: deserialization to non-owned types (i.e. borrow from the input), //! such as `&str`, is possible only if you parse document in the UTF-8 //! encoding and content does not contain entity references such as `&`, //! or character references such as ` `, as well as text content represented //! by one piece of [text] or [CDATA] element. //!
//! //! //! [text]: Event::Text //! [CDATA]: Event::CData //!
//! //! Content of attributes and text / CDATA content of elements (including mixed //! text and CDATA content), which represents a space-delimited lists, as //! specified in the XML Schema specification for [`xs:list`] `simpleType`: //! //! ```xml //! <... ...="element1 element2 ..." /> //! ``` //! ```xml //! <...> //! element1 //! element2 //! ... //! //! ``` //! ```xml //! <...> //! ``` //! //! [`xs:list`]: https://www.w3.org/TR/xmlschema11-2/#list-datatypes //! //! //! Use any type that deserialized using [`deserialize_seq()`] call, for example: //! //! ``` //! type List = Vec; //! ``` //! //! See the next row to learn where in your struct definition you should //! use that type. //! //! According to the XML Schema specification, delimiters for elements is one //! or more space (`' '`, `'\r'`, `'\n'`, and `'\t'`) character(s). //! //!
//! //! NOTE: according to the XML Schema restrictions, you cannot escape those //! white-space characters, so list elements will _never_ contain them. //! In practice you will usually use `xs:list`s for lists of numbers or enumerated //! values which looks like identifiers in many languages, for example, `item`, //! `some_item` or `some-item`, so that shouldn't be a problem. //! //! NOTE: according to the XML Schema specification, list elements can be //! delimited only by spaces. Other delimiters (for example, commas) are not //! allowed. //! //!
//! //! [`deserialize_seq()`]: de::Deserializer::deserialize_seq //!
//! A typical XML with attributes. The root tag name does not matter: //! //! ```xml //! //! ``` //! //! //! A structure where each XML attribute is mapped to a field with a name //! starting with `@`. Because Rust identifiers do not permit the `@` character, //! you should use the `#[serde(rename = "@...")]` attribute to rename it. //! The name of the struct itself does not matter: //! //! ``` //! # use serde::Deserialize; //! # type T = (); //! # type U = (); //! // Get both attributes //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@one")] //! one: T, //! //! #[serde(rename = "@two")] //! two: U, //! } //! # quick_xml::de::from_str::(r#""#).unwrap(); //! ``` //! ``` //! # use serde::Deserialize; //! # type T = (); //! // Get only the one attribute, ignore the other //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@one")] //! one: T, //! } //! # quick_xml::de::from_str::(r#""#).unwrap(); //! # quick_xml::de::from_str::(r#""#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! ``` //! ``` //! # use serde::Deserialize; //! // Ignore all attributes //! // You can also use the `()` type (unit type) //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName; //! # quick_xml::de::from_str::(r#""#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! # quick_xml::de::from_str::(r#"......"#).unwrap(); //! ``` //! //! All these structs can be used to deserialize from an XML on the //! left side depending on amount of information that you want to get. //! Of course, you can combine them with elements extractor structs (see below). //! //!
//! //! NOTE: XML allows you to have an attribute and an element with the same name //! inside the one element. quick-xml deals with that by prepending a `@` prefix //! to the name of attributes. //!
//!
//! A typical XML with child elements. The root tag name does not matter: //! //! ```xml //! //! ... //! ... //! //! ``` //! //! A structure where each XML child element is mapped to the field. //! Each element name becomes a name of field. The name of the struct itself //! does not matter: //! //! ``` //! # use serde::Deserialize; //! # type T = (); //! # type U = (); //! // Get both elements //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! one: T, //! two: U, //! } //! # quick_xml::de::from_str::(r#"......"#).unwrap(); //! # //! # quick_xml::de::from_str::(r#""#).unwrap_err(); //! # quick_xml::de::from_str::(r#"..."#).unwrap_err(); //! ``` //! ``` //! # use serde::Deserialize; //! # type T = (); //! // Get only the one element, ignore the other //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! one: T, //! } //! # quick_xml::de::from_str::(r#"......"#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! ``` //! ``` //! # use serde::Deserialize; //! // Ignore all elements //! // You can also use the `()` type (unit type) //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName; //! # quick_xml::de::from_str::(r#""#).unwrap(); //! # quick_xml::de::from_str::(r#"......"#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! # quick_xml::de::from_str::(r#"..."#).unwrap(); //! ``` //! //! All these structs can be used to deserialize from an XML on the //! left side depending on amount of information that you want to get. //! Of course, you can combine them with attributes extractor structs (see above). //! //!
//! //! NOTE: XML allows you to have an attribute and an element with the same name //! inside the one element. quick-xml deals with that by prepending a `@` prefix //! to the name of attributes. //!
//!
//! An XML with an attribute and a child element named equally: //! //! ```xml //! //! ... //! //! ``` //! //! //! You MUST specify `#[serde(rename = "@field")]` on a field that will be used //! for an attribute: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # type U = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@field")] //! attribute: T, //! field: U, //! } //! # assert_eq!( //! # AnyName { attribute: (), field: () }, //! # quick_xml::de::from_str(r#" //! # //! # ... //! # //! # "#).unwrap(), //! # ); //! ``` //!
//! //! ## Optional attributes and elements //! //!
To parse all these XML's......use these Rust type(s)
//! An optional XML attribute that you want to capture. //! The root tag name does not matter: //! //! ```xml //! //! ``` //! ```xml //! //! ``` //! //! //! A structure with an optional field, renamed according to the requirements //! for attributes: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@optional")] //! optional: Option, //! } //! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#""#).unwrap()); //! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#""#).unwrap()); //! ``` //! When the XML attribute is present, type `T` will be deserialized from //! an attribute value (which is a string). Note, that if `T = String` or other //! string type, the empty attribute is mapped to a `Some("")`, whereas `None` //! represents the missed attribute: //! ```xml //! //! //! //! ``` //!
//! //! NOTE: The behaviour is not symmetric by default. `None` will be serialized as //! `optional=""`. This behaviour is consistent across serde crates. You should add //! `#[serde(skip_serializing_if = "Option::is_none")]` attribute to the field to //! skip `None`s. //!
//!
//! An optional XML elements that you want to capture. //! The root tag name does not matter: //! //! ```xml //! //! ... //! //! ``` //! ```xml //! //! //! //! ``` //! ```xml //! //! ``` //! //! //! A structure with an optional field: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! optional: Option, //! } //! # assert_eq!(AnyName { optional: Some(()) }, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName { optional: None }, quick_xml::de::from_str(r#""#).unwrap()); //! ``` //! When the XML element is present, type `T` will be deserialized from an //! element (which is a string or a multi-mapping -- i.e. mapping which can have //! duplicated keys). //!
//! //! NOTE: The behaviour is not symmetric by default. `None` will be serialized as //! ``. This behaviour is consistent across serde crates. You should add //! `#[serde(skip_serializing_if = "Option::is_none")]` attribute to the field to //! skip `None`s. //! //! NOTE: Deserializer will automatically handle a [`xsi:nil`] attribute and set field to `None`. //! For more info see [Mapping of `xsi:nil`](#mapping-of-xsinil). //!
//!
//! //! ## Choices (`xs:choice` XML Schema type) //! //!
To parse all these XML's......use these Rust type(s)
//! An XML with different root tag names, as well as text / CDATA content: //! //! ```xml //! ... //! ``` //! ```xml //! //! ... //! //! ``` //! ```xml //! Text content //! ``` //! //! //! An enum where each variant has the name of a possible root tag. The name of //! the enum itself does not matter. //! //! If you need to get the textual content, mark a variant with `#[serde(rename = "$text")]`. //! //! All these structs can be used to deserialize from any XML on the //! left side depending on amount of information that you want to get: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # type U = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum AnyName { //! One { #[serde(rename = "@field1")] field1: T }, //! Two { field2: U }, //! //! /// Use unit variant, if you do not care of a content. //! /// You can use tuple variant if you want to parse //! /// textual content as an xs:list. //! /// Struct variants are will pass a string to the //! /// struct enum variant visitor, which typically //! /// returns Err(Custom) //! #[serde(rename = "$text")] //! Text(String), //! } //! # assert_eq!(AnyName::One { field1: () }, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Two { field2: () }, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Text("text cdata ".into()), quick_xml::de::from_str(r#"text "#).unwrap()); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct Two { //! field2: T, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum AnyName { //! // `field1` content discarded //! One, //! Two(Two), //! #[serde(rename = "$text")] //! Text, //! } //! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Two(Two { field2: () }), quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Text, quick_xml::de::from_str(r#"text "#).unwrap()); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum AnyName { //! One, //! // the and textual content will be mapped to this //! #[serde(other)] //! Other, //! } //! # assert_eq!(AnyName::One, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"..."#).unwrap()); //! # assert_eq!(AnyName::Other, quick_xml::de::from_str(r#"text "#).unwrap()); //! ``` //!
//! //! NOTE: You should have variants for all possible tag names in your enum //! or have an `#[serde(other)]` variant. //! //!
//!
//! //! `` embedded in the other element, and at the same time you want //! to get access to other attributes that can appear in the same container //! (``). Also this case can be described, as if you want to choose //! Rust enum variant based on a tag name: //! //! ```xml //! //! ... //! //! ``` //! ```xml //! //! ... //! //! ``` //! ```xml //! //! Text content //! //! ``` //! //! //! A structure with a field which type is an `enum`. //! //! If you need to get a textual content, mark a variant with `#[serde(rename = "$text")]`. //! //! Names of the enum, struct, and struct field with `Choice` type does not matter: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! //! /// Use unit variant, if you do not care of a content. //! /// You can use tuple variant if you want to parse //! /// textual content as an xs:list. //! /// Struct variants are will pass a string to the //! /// struct enum variant visitor, which typically //! /// returns Err(Custom) //! #[serde(rename = "$text")] //! Text(String), //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@field")] //! field: T, //! //! #[serde(rename = "$value")] //! any_name: Choice, //! } //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::One }, //! # quick_xml::de::from_str(r#"..."#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::Two }, //! # quick_xml::de::from_str(r#"..."#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::Text("text cdata ".into()) }, //! # quick_xml::de::from_str(r#"text "#).unwrap(), //! # ); //! ``` //!
//! //! `` embedded in the other element, and at the same time you want //! to get access to other elements that can appear in the same container //! (``). Also this case can be described, as if you want to choose //! Rust enum variant based on a tag name: //! //! ```xml //! //! ... //! ... //! //! ``` //! ```xml //! //! ... //! ... //! //! ``` //! //! //! A structure with a field which type is an `enum`. //! //! Names of the enum, struct, and struct field with `Choice` type does not matter: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! field: T, //! //! #[serde(rename = "$value")] //! any_name: Choice, //! } //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::One }, //! # quick_xml::de::from_str(r#"......"#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), any_name: Choice::Two }, //! # quick_xml::de::from_str(r#"......"#).unwrap(), //! # ); //! ``` //! //!
//! //! NOTE: if your `Choice` enum would contain an `#[serde(other)]` //! variant, element `` will be mapped to the `field` and not to the enum //! variant. //!
//! //!
//! //! `` encapsulated in other element with a fixed name: //! //! ```xml //! //! //! ... //! //! //! ``` //! ```xml //! //! //! ... //! //! //! ``` //! //! //! A structure with a field of an intermediate type with one field of `enum` type. //! Actually, this example is not necessary, because you can construct it by yourself //! using the composition rules that were described above. However the XML construction //! described here is very common, so it is shown explicitly. //! //! Names of the enum and struct does not matter: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct Holder { //! #[serde(rename = "$value")] //! any_name: Choice, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@field")] //! field: T, //! //! choice: Holder, //! } //! # assert_eq!( //! # AnyName { field: (), choice: Holder { any_name: Choice::One } }, //! # quick_xml::de::from_str(r#"..."#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), choice: Holder { any_name: Choice::Two } }, //! # quick_xml::de::from_str(r#"..."#).unwrap(), //! # ); //! ``` //!
//! //! `` encapsulated in other element with a fixed name: //! //! ```xml //! //! ... //! //! ... //! //! //! ``` //! ```xml //! //! //! ... //! //! ... //! //! ``` //! //! //! A structure with a field of an intermediate type with one field of `enum` type. //! Actually, this example is not necessary, because you can construct it by yourself //! using the composition rules that were described above. However the XML construction //! described here is very common, so it is shown explicitly. //! //! Names of the enum and struct does not matter: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type T = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct Holder { //! #[serde(rename = "$value")] //! any_name: Choice, //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! field: T, //! //! choice: Holder, //! } //! # assert_eq!( //! # AnyName { field: (), choice: Holder { any_name: Choice::One } }, //! # quick_xml::de::from_str(r#"......"#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { field: (), choice: Holder { any_name: Choice::Two } }, //! # quick_xml::de::from_str(r#"......"#).unwrap(), //! # ); //! ``` //!
//! //! ## Sequences (`xs:all` and `xs:sequence` XML Schema types) //! //!
To parse all these XML's......use these Rust type(s)
//! A sequence inside of a tag without a dedicated name: //! //! ```xml //! //! ``` //! ```xml //! //! //! //! ``` //! ```xml //! //! //! //! //! //! ``` //! //! //! A structure with a field which is a sequence type, for example, [`Vec`]. //! Because XML syntax does not distinguish between empty sequences and missed //! elements, we should indicate that on the Rust side, because serde will require //! that field `item` exists. You can do that in two possible ways: //! //! Use the `#[serde(default)]` attribute for a [field] or the entire [struct]: //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type Item = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(default)] //! item: Vec, //! } //! # assert_eq!( //! # AnyName { item: vec![] }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { item: vec![()] }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { item: vec![(), (), ()] }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! ``` //! //! Use the [`Option`]. In that case inner array will always contains at least one //! element after deserialization: //! ```ignore //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type Item = (); //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! item: Option>, //! } //! # assert_eq!( //! # AnyName { item: None }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { item: Some(vec![()]) }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! # assert_eq!( //! # AnyName { item: Some(vec![(), (), ()]) }, //! # quick_xml::de::from_str(r#""#).unwrap(), //! # ); //! ``` //! //! See also [Frequently Used Patterns](#element-lists). //! //! [field]: https://serde.rs/field-attrs.html#default //! [struct]: https://serde.rs/container-attrs.html#default //!
//! A sequence with a strict order, probably with mixed content //! (text / CDATA and tags): //! //! ```xml //! ... //! text //! //! ... //! ... //! ``` //!
//! //! NOTE: this is just an example for showing mapping. XML does not allow //! multiple root tags -- you should wrap the sequence into a tag. //!
//!
//! //! All elements mapped to the heterogeneous sequential type: tuple or named tuple. //! Each element of the tuple should be able to be deserialized from the nested //! element content (`...`), except the enum types which would be deserialized //! from the full element (`...`), so they could use the element name //! to choose the right variant: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type One = (); //! # type Two = (); //! # /* //! type One = ...; //! type Two = ...; //! # */ //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName(One, String, Two, One); //! # assert_eq!( //! # AnyName((), "text cdata".into(), (), ()), //! # quick_xml::de::from_str(r#"...text ......"#).unwrap(), //! # ); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! } //! # type Two = (); //! # /* //! type Two = ...; //! # */ //! type AnyName = (Choice, String, Two, Choice); //! # assert_eq!( //! # (Choice::One, "text cdata".to_string(), (), Choice::One), //! # quick_xml::de::from_str(r#"...text ......"#).unwrap(), //! # ); //! ``` //!
//! //! NOTE: consequent text and CDATA nodes are merged into the one text node, //! so you cannot have two adjacent string types in your sequence. //! //! NOTE: In the case that the list might contain tags that are overlapped with //! tags that do not correspond to the list you should add the feature [`overlapped-lists`]. //!
//!
//! A sequence with a non-strict order, probably with a mixed content //! (text / CDATA and tags). //! //! ```xml //! ... //! text //! //! ... //! ... //! ``` //!
//! //! NOTE: this is just an example for showing mapping. XML does not allow //! multiple root tags -- you should wrap the sequence into a tag. //!
//!
//! A homogeneous sequence of elements with a fixed or dynamic size: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! #[serde(other)] //! Other, //! } //! type AnyName = [Choice; 4]; //! # assert_eq!( //! # [Choice::One, Choice::Other, Choice::Two, Choice::One], //! # quick_xml::de::from_str::(r#"...text ......"#).unwrap(), //! # ); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! #[serde(rename = "$text")] //! Other(String), //! } //! type AnyName = Vec; //! # assert_eq!( //! # vec![ //! # Choice::One, //! # Choice::Other("text cdata".into()), //! # Choice::Two, //! # Choice::One, //! # ], //! # quick_xml::de::from_str::(r#"...text ......"#).unwrap(), //! # ); //! ``` //!
//! //! NOTE: consequent text and CDATA nodes are merged into the one text node, //! so you cannot have two adjacent string types in your sequence. //!
//!
//! A sequence with a strict order, probably with a mixed content, //! (text and tags) inside of the other element: //! //! ```xml //! //! ... //! text //! //! ... //! ... //! //! ``` //! //! //! A structure where all child elements mapped to the one field which have //! a heterogeneous sequential type: tuple or named tuple. Each element of the //! tuple should be able to be deserialized from the full element (`...`). //! //! You MUST specify `#[serde(rename = "$value")]` on that field: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type One = (); //! # type Two = (); //! # /* //! type One = ...; //! type Two = ...; //! # */ //! //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@attribute")] //! # attribute: (), //! # /* //! attribute: ..., //! # */ //! // Does not (yet?) supported by the serde //! // https://github.com/serde-rs/serde/issues/1905 //! // #[serde(flatten)] //! #[serde(rename = "$value")] //! any_name: (One, String, Two, One), //! } //! # assert_eq!( //! # AnyName { attribute: (), any_name: ((), "text cdata".into(), (), ()) }, //! # quick_xml::de::from_str("\ //! # \ //! # ...\ //! # text \ //! # \ //! # ...\ //! # ...\ //! # " //! # ).unwrap(), //! # ); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # type One = (); //! # type Two = (); //! # /* //! type One = ...; //! type Two = ...; //! # */ //! //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct NamedTuple(One, String, Two, One); //! //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@attribute")] //! # attribute: (), //! # /* //! attribute: ..., //! # */ //! // Does not (yet?) supported by the serde //! // https://github.com/serde-rs/serde/issues/1905 //! // #[serde(flatten)] //! #[serde(rename = "$value")] //! any_name: NamedTuple, //! } //! # assert_eq!( //! # AnyName { attribute: (), any_name: NamedTuple((), "text cdata".into(), (), ()) }, //! # quick_xml::de::from_str("\ //! # \ //! # ...\ //! # text \ //! # \ //! # ...\ //! # ...\ //! # " //! # ).unwrap(), //! # ); //! ``` //!
//! //! NOTE: consequent text and CDATA nodes are merged into the one text node, //! so you cannot have two adjacent string types in your sequence. //!
//!
//! A sequence with a non-strict order, probably with a mixed content //! (text / CDATA and tags) inside of the other element: //! //! ```xml //! //! ... //! text //! //! ... //! ... //! //! ``` //! //! //! A structure where all child elements mapped to the one field which have //! a homogeneous sequential type: array-like container. A container type `T` //! should be able to be deserialized from the nested element content (`...`), //! except if it is an enum type which would be deserialized from the full //! element (`...`). //! //! You MUST specify `#[serde(rename = "$value")]` on that field: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! #[serde(rename = "$text")] //! Other(String), //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@attribute")] //! # attribute: (), //! # /* //! attribute: ..., //! # */ //! // Does not (yet?) supported by the serde //! // https://github.com/serde-rs/serde/issues/1905 //! // #[serde(flatten)] //! #[serde(rename = "$value")] //! any_name: [Choice; 4], //! } //! # assert_eq!( //! # AnyName { attribute: (), any_name: [ //! # Choice::One, //! # Choice::Other("text cdata".into()), //! # Choice::Two, //! # Choice::One, //! # ] }, //! # quick_xml::de::from_str("\ //! # \ //! # ...\ //! # text \ //! # \ //! # ...\ //! # ...\ //! # " //! # ).unwrap(), //! # ); //! ``` //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! #[serde(rename_all = "snake_case")] //! enum Choice { //! One, //! Two, //! #[serde(rename = "$text")] //! Other(String), //! } //! # #[derive(Debug, PartialEq)] //! #[derive(Deserialize)] //! struct AnyName { //! #[serde(rename = "@attribute")] //! # attribute: (), //! # /* //! attribute: ..., //! # */ //! // Does not (yet?) supported by the serde //! // https://github.com/serde-rs/serde/issues/1905 //! // #[serde(flatten)] //! #[serde(rename = "$value")] //! any_name: Vec, //! } //! # assert_eq!( //! # AnyName { attribute: (), any_name: vec![ //! # Choice::One, //! # Choice::Other("text cdata".into()), //! # Choice::Two, //! # Choice::One, //! # ] }, //! # quick_xml::de::from_str("\ //! # \ //! # ...\ //! # text \ //! # \ //! # ...\ //! # ...\ //! # " //! # ).unwrap(), //! # ); //! ``` //!
//! //! NOTE: consequent text and CDATA nodes are merged into the one text node, //! so you cannot have two adjacent string types in your sequence. //!
//!
//! //! //! Mapping of `xsi:nil` //! ==================== //! //! quick-xml supports handling of [`xsi:nil`] special attribute. When field of optional //! type is mapped to the XML element which have `xsi:nil="true"` set, or if that attribute //! is placed on parent XML element, the deserializer will call [`Visitor::visit_none`] //! and skip XML element corresponding to a field. //! //! Examples: //! //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! #[derive(Deserialize, Debug, PartialEq)] //! struct TypeWithOptionalField { //! element: Option, //! } //! //! assert_eq!( //! TypeWithOptionalField { //! element: None, //! }, //! quick_xml::de::from_str(" //! //! Content is skiped because of xsi:nil='true' //! //! ").unwrap(), //! ); //! ``` //! //! You can capture attributes from the optional type, because ` xsi:nil="true"` elements can have //! attributes: //! ``` //! # use pretty_assertions::assert_eq; //! # use serde::Deserialize; //! #[derive(Deserialize, Debug, PartialEq)] //! struct TypeWithOptionalField { //! #[serde(rename = "@attribute")] //! attribute: usize, //! //! element: Option, //! non_optional: String, //! } //! //! assert_eq!( //! TypeWithOptionalField { //! attribute: 42, //! element: None, //! non_optional: "Note, that non-optional fields will be deserialized as usual".to_string(), //! }, //! quick_xml::de::from_str(" //! //! Content is skiped because of xsi:nil='true' //! Note, that non-optional fields will be deserialized as usual //! //! ").unwrap(), //! ); //! ``` //! //! Generate Rust types from XML //! ============================ //! //! To speed up the creation of Rust types that represent a given XML file you can //! use the [xml_schema_generator](https://github.com/Thomblin/xml_schema_generator). //! It provides a standalone binary and a Rust library that parses one or more XML files //! and generates a collection of structs that are compatible with quick_xml::de. //! //! //! //! Composition Rules //! ================= //! //! The XML format is very different from other formats supported by `serde`. //! One such difference it is how data in the serialized form is related to //! the Rust type. Usually each byte in the data can be associated only with //! one field in the data structure. However, XML is an exception. //! //! For example, took this XML: //! //! ```xml //! //! //! //! ``` //! //! and try to deserialize it to the struct `AnyName`: //! //! ```no_run //! # use serde::Deserialize; //! #[derive(Deserialize)] //! struct AnyName { // AnyName calls `deserialize_struct` on `` //! // Used data: ^^^^^^^^^^^^^^^^^^^ //! key: Inner, // Inner calls `deserialize_struct` on `` //! // Used data: ^^^^^^^^^^^^ //! } //! #[derive(Deserialize)] //! struct Inner { //! #[serde(rename = "@attr")] //! attr: String, // String calls `deserialize_string` on `value` //! // Used data: ^^^^^ //! } //! ``` //! //! Comments shows what methods of a [`Deserializer`] called by each struct //! `deserialize` method and which input their seen. **Used data** shows, what //! content is actually used for deserializing. As you see, name of the inner //! `` tag used both as a map key / outer struct field name and as part //! of the inner struct (although _value_ of the tag, i.e. `key` is not used //! by it). //! //! //! //! Enum Representations //! ==================== //! //! `quick-xml` represents enums differently in normal fields, `$text` fields and //! `$value` fields. A normal representation is compatible with serde's adjacent //! and internal tags feature -- tag for adjacently and internally tagged enums //! are serialized using [`Serializer::serialize_unit_variant`] and deserialized //! using [`Deserializer::deserialize_enum`]. //! //! Use those simple rules to remember, how enum would be represented in XML: //! - In `$value` field the representation is always the same as top-level representation; //! - In `$text` field the representation is always the same as in normal field, //! but surrounding tags with field name are removed; //! - In normal field the representation is always contains a tag with a field name. //! //! Normal enum variant //! ------------------- //! //! To model an `xs:choice` XML construct use `$value` field. //! To model a top-level `xs:choice` just use the enum type. //! //! |Kind |Top-level and in `$value` field |In normal field |In `$text` field | //! |-------|-----------------------------------------|---------------------|---------------------| //! |Unit |`` |`Unit`|`Unit` | //! |Newtype|`42` |Err(Custom) [^0] |Err(Custom) [^0] | //! |Tuple |`42answer` |Err(Custom) [^0] |Err(Custom) [^0] | //! |Struct |`42
answer`|Err(Custom) [^0] |Err(Custom) [^0] | //! //! `$text` enum variant //! -------------------- //! //! |Kind |Top-level and in `$value` field |In normal field |In `$text` field | //! |-------|-----------------------------------------|---------------------|---------------------| //! |Unit |_(empty)_ |`` |_(empty)_ | //! |Newtype|`42` |Err(Custom) [^0] [^1]|Err(Custom) [^0] [^2]| //! |Tuple |`42 answer` |Err(Custom) [^0] [^3]|Err(Custom) [^0] [^4]| //! |Struct |Err(Custom) [^0] |Err(Custom) [^0] |Err(Custom) [^0] | //! //! [^0]: Error is returned by the deserialized type. In case of derived implementation a `Custom` //! error will be returned, but custom deserialize implementation can successfully deserialize //! value from a string which will be passed to it. //! //! [^1]: If this serialize as `42` then it will be ambiguity during deserialization, //! because it clash with `Unit` representation in normal field. //! //! [^2]: If this serialize as `42` then it will be ambiguity during deserialization, //! because it clash with `Unit` representation in `$text` field. //! //! [^3]: If this serialize as `42 answer` then it will be ambiguity during deserialization, //! because it clash with `Unit` representation in normal field. //! //! [^4]: If this serialize as `42 answer` then it will be ambiguity during deserialization, //! because it clash with `Unit` representation in `$text` field. //! //! //! //! `$text` and `$value` special names //! ================================== //! //! quick-xml supports two special names for fields -- `$text` and `$value`. //! Although they may seem the same, there is a distinction. Two different //! names is required mostly for serialization, because quick-xml should know //! how you want to serialize certain constructs, which could be represented //! through XML in multiple different ways. //! //! The only difference is in how complex types and sequences are serialized. //! If you doubt which one you should select, begin with [`$value`](#value). //! //! If you have both `$text` and `$value` in you struct, then text events will be //! mapped to the `$text` field: //! //! ``` //! # use serde::Deserialize; //! # use quick_xml::de::from_str; //! #[derive(Deserialize, PartialEq, Debug)] //! struct TextAndValue { //! #[serde(rename = "$text")] //! text: Option, //! //! #[serde(rename = "$value")] //! value: Option, //! } //! //! let object: TextAndValue = from_str("text ").unwrap(); //! assert_eq!(object, TextAndValue { //! text: Some("text and CDATA".to_string()), //! value: None, //! }); //! ``` //! //! ## `$text` //! `$text` is used when you want to write your XML as a text or a CDATA content. //! More formally, field with that name represents simple type definition with //! `{variety} = atomic` or `{variety} = union` whose basic members are all atomic, //! as described in the [specification]. //! //! As a result, not all types of such fields can be serialized. Only serialization //! of following types are supported: //! - all primitive types (strings, numbers, booleans) //! - unit variants of enumerations (serializes to a name of a variant) //! - newtypes (delegates serialization to inner type) //! - [`Option`] of above (`None` serializes to nothing) //! - sequences (including tuples and tuple variants of enumerations) of above, //! excluding `None` and empty string elements (because it will not be possible //! to deserialize them back). The elements are separated by space(s) //! - unit type `()` and unit structs (serializes to nothing) //! //! Complex types, such as structs and maps, are not supported in this field. //! If you want them, you should use `$value`. //! //! Sequences serialized to a space-delimited string, that is why only certain //! types are allowed in this mode: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! #[serde(rename = "$text")] //! field: Vec, //! } //! //! let obj = AnyName { field: vec![1, 2, 3] }; //! let xml = to_string(&obj).unwrap(); //! assert_eq!(xml, "1 2 3"); //! //! let object: AnyName = from_str(&xml).unwrap(); //! assert_eq!(object, obj); //! ``` //! //! ## `$value` //!
//! //! NOTE: a name `#content` would better explain the purpose of that field, //! but `$value` is used for compatibility with other XML serde crates, which //! uses that name. This will allow you to switch XML crates more smoothly if required. //!
//! //! The representation of primitive types in `$value` does not differ from their //! representation in `$text` fields. The difference is how sequences are serialized //! and deserialized. `$value` serializes each sequence item as a separate XML element. //! How the name of the XML element is chosen depends on the field's type. For //! `enum`s, the variant name is used. For `struct`s, the name of the `struct` //! is used. //! //! During deserialization, if the `$value` field is an enum, then the variant's //! name is matched against. That's **not** the case with structs, however, since //! `serde` does not expose type names of nested fields. This does mean that **any** //! type could be deserialized into a `$value` struct-type field, so long as the //! struct's fields have compatible types (or are captured as text by `String` //! or similar-behaving types). This can be handy when using generic types in fields //! where one knows in advance what to expect. If you do not know what to expect, //! however, prefer an enum with all possible variants. //! //! Unit structs and unit type `()` serialize to nothing and can be deserialized //! from any content. //! //! Serialization and deserialization of `$value` field performed as usual, except //! that name for an XML element will be given by the serialized type, instead of //! field. The latter allow to serialize enumerated types, where variant is encoded //! as a tag name, and, so, represent an XSD `xs:choice` schema by the Rust `enum`. //! //! In the example below, field will be serialized as ``, because elements //! get their names from the field name. It cannot be deserialized, because `Enum` //! expects elements ``, `` or ``, but `AnyName` looked only for ``: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use pretty_assertions::assert_eq; //! # #[derive(PartialEq, Debug)] //! #[derive(Deserialize, Serialize)] //! enum Enum { A, B, C } //! //! # #[derive(PartialEq, Debug)] //! #[derive(Deserialize, Serialize)] //! struct AnyName { //! // A, B, or C //! field: Enum, //! } //! # assert_eq!( //! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(), //! # "A", //! # ); //! # assert_eq!( //! # AnyName { field: Enum::B }, //! # quick_xml::de::from_str("B").unwrap(), //! # ); //! ``` //! //! If you rename field to `$value`, then `field` would be serialized as ``, //! `` or ``, depending on the its content. It is also possible to //! deserialize it from the same elements: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use pretty_assertions::assert_eq; //! # #[derive(Deserialize, Serialize, PartialEq, Debug)] //! # enum Enum { A, B, C } //! # //! # #[derive(PartialEq, Debug)] //! #[derive(Deserialize, Serialize)] //! struct AnyName { //! // , or //! #[serde(rename = "$value")] //! field: Enum, //! } //! # assert_eq!( //! # quick_xml::se::to_string(&AnyName { field: Enum::A }).unwrap(), //! # "", //! # ); //! # assert_eq!( //! # AnyName { field: Enum::B }, //! # quick_xml::de::from_str("").unwrap(), //! # ); //! ``` //! //! The next example demonstrates how generic types can be used in conjunction //! with `$value`-named fields to allow the reuse of wrapping structs. A common //! example use case for this feature is SOAP messages, which can be commmonly //! found wrapped around ` ... `. //! //! ```rust //! # use pretty_assertions::assert_eq; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! # use serde::{Deserialize, Serialize}; //! # //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct Envelope { //! body: Body, //! } //! //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct Body { //! #[serde(rename = "$value")] //! inner: T, //! } //! //! #[derive(Serialize, PartialEq, Debug)] //! struct Example { //! a: i32, //! } //! //! assert_eq!( //! to_string(&Envelope { body: Body { inner: Example { a: 42 } } }).unwrap(), //! // Notice how `inner` is not present in the XML //! "42", //! ); //! //! #[derive(Deserialize, PartialEq, Debug)] //! struct AnotherExample { //! a: i32, //! } //! //! assert_eq!( //! // Notice that tag the name does nothing for struct in `$value` field //! Envelope { body: Body { inner: AnotherExample { a: 42 } } }, //! from_str("42").unwrap(), //! ); //! ``` //! //! ### Primitives and sequences of primitives //! //! Sequences serialized to a list of elements. Note, that types that does not //! produce their own tag (i. e. primitives) will produce [`SeError::Unsupported`] //! if they contains more that one element, because such sequence cannot be //! deserialized to the same value: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use pretty_assertions::assert_eq; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! #[serde(rename = "$value")] //! field: Vec, //! } //! //! let obj = AnyName { field: vec![1, 2, 3] }; //! // If this object were serialized, it would be represented as "123" //! to_string(&obj).unwrap_err(); //! //! let object: AnyName = from_str("123").unwrap(); //! assert_eq!(object, AnyName { field: vec![123] }); //! //! // `1 2 3` is mapped to a single `usize` element //! // It is impossible to deserialize list of primitives to such field //! from_str::("1 2 3").unwrap_err(); //! ``` //! //! A particular case of that example is a string `$value` field, which probably //! would be a most used example of that attribute: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use pretty_assertions::assert_eq; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! #[serde(rename = "$value")] //! field: String, //! } //! //! let obj = AnyName { field: "content".to_string() }; //! let xml = to_string(&obj).unwrap(); //! assert_eq!(xml, "content"); //! ``` //! //! ### Structs and sequences of structs //! //! Note, that structures do not have a serializable name as well (name of the //! type is never used), so it is impossible to serialize non-unit struct or //! sequence of non-unit structs in `$value` field. (sequences of) unit structs //! are serialized as empty string, because units itself serializing //! to nothing: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use pretty_assertions::assert_eq; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct Unit; //! //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! // #[serde(default)] is required to deserialization of empty lists //! // This is a general note, not related to $value //! #[serde(rename = "$value", default)] //! field: Vec, //! } //! //! let obj = AnyName { field: vec![Unit, Unit, Unit] }; //! let xml = to_string(&obj).unwrap(); //! assert_eq!(xml, ""); //! //! let object: AnyName = from_str("").unwrap(); //! assert_eq!(object, AnyName { field: vec![] }); //! //! let object: AnyName = from_str("").unwrap(); //! assert_eq!(object, AnyName { field: vec![] }); //! //! let object: AnyName = from_str("").unwrap(); //! assert_eq!(object, AnyName { field: vec![Unit, Unit, Unit] }); //! ``` //! //! ### Enums and sequences of enums //! //! Enumerations uses the variant name as an element name: //! //! ``` //! # use serde::{Deserialize, Serialize}; //! # use pretty_assertions::assert_eq; //! # use quick_xml::de::from_str; //! # use quick_xml::se::to_string; //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! struct AnyName { //! #[serde(rename = "$value")] //! field: Vec, //! } //! //! #[derive(Deserialize, Serialize, PartialEq, Debug)] //! enum Enum { A, B, C } //! //! let obj = AnyName { field: vec![Enum::A, Enum::B, Enum::C] }; //! let xml = to_string(&obj).unwrap(); //! assert_eq!( //! xml, //! "\ //! \ //! \ //! \ //! " //! ); //! //! let object: AnyName = from_str(&xml).unwrap(); //! assert_eq!(object, obj); //! ``` //! //! //! //! Frequently Used Patterns //! ======================== //! //! Some XML constructs used so frequent, that it is worth to document the recommended //! way to represent them in the Rust. The sections below describes them. //! //! `` lists //! ----------------- //! Many XML formats wrap lists of elements in the additional container, //! although this is not required by the XML rules: //! //! ```xml //! //! //! //! //! //! //! //! //! //! //! ``` //! In this case, there is a great desire to describe this XML in this way: //! ``` //! /// Represents //! type Element = (); //! //! /// Represents ... //! struct AnyName { //! // Incorrect //! list: Vec, //! } //! ``` //! This will not work, because potentially `` element can have attributes //! and other elements inside. You should define the struct for the `` //! explicitly, as you do that in the XSD for that XML: //! ``` //! /// Represents //! type Element = (); //! //! /// Represents ... //! struct AnyName { //! // Correct //! list: List, //! } //! /// Represents ... //! struct List { //! element: Vec, //! } //! ``` //! //! If you want to simplify your API, you could write a simple function for unwrapping //! inner list and apply it via [`deserialize_with`]: //! //! ``` //! # use pretty_assertions::assert_eq; //! use quick_xml::de::from_str; //! use serde::{Deserialize, Deserializer}; //! //! /// Represents //! type Element = (); //! //! /// Represents ... //! #[derive(Deserialize, Debug, PartialEq)] //! struct AnyName { //! #[serde(deserialize_with = "unwrap_list")] //! list: Vec, //! } //! //! fn unwrap_list<'de, D>(deserializer: D) -> Result, D::Error> //! where //! D: Deserializer<'de>, //! { //! /// Represents ... //! #[derive(Deserialize)] //! struct List { //! // default allows empty list //! #[serde(default)] //! element: Vec, //! } //! Ok(List::deserialize(deserializer)?.element) //! } //! //! assert_eq!( //! AnyName { list: vec![(), (), ()] }, //! from_str(" //! //! //! //! //! //! //! //! ").unwrap(), //! ); //! ``` //! //! Instead of writing such functions manually, you also could try . //! //! Overlapped (Out-of-Order) Elements //! ---------------------------------- //! In the case that the list might contain tags that are overlapped with //! tags that do not correspond to the list (this is a usual case in XML //! documents) like this: //! ```xml //! //! //! //! //! //! //! ``` //! you should enable the [`overlapped-lists`] feature to make it possible //! to deserialize this to: //! ```no_run //! # use serde::Deserialize; //! #[derive(Deserialize)] //! #[serde(rename_all = "kebab-case")] //! struct AnyName { //! item: Vec<()>, //! another_item: (), //! } //! ``` //! //! //! Internally Tagged Enums //! ----------------------- //! [Tagged enums] are currently not supported because of an issue in the Serde //! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in //! Serde which could be useful for XML parsing ([serde#1495]). This can be worked //! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]` //! or implementing [`Deserialize`], but this can get very tedious very fast for //! files with large amounts of tagged enums. To help with this issue quick-xml //! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the //! macro documentation for details. //! //! //! [`overlapped-lists`]: ../index.html#overlapped-lists //! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition //! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with //! [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil //! [`Serializer::serialize_unit_variant`]: serde::Serializer::serialize_unit_variant //! [`Deserializer::deserialize_enum`]: serde::Deserializer::deserialize_enum //! [`SeError::Unsupported`]: crate::errors::serialize::SeError::Unsupported //! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged //! [serde#1183]: https://github.com/serde-rs/serde/issues/1183 //! [serde#1495]: https://github.com/serde-rs/serde/issues/1495 //! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586 //! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum // Macros should be defined before the modules that using them // Also, macros should be imported before using them use serde::serde_if_integer128; macro_rules! forward_to_simple_type { ($deserialize:ident, $($mut:tt)?) => { #[inline] fn $deserialize($($mut)? self, visitor: V) -> Result where V: Visitor<'de>, { SimpleTypeDeserializer::from_text(self.read_string()?).$deserialize(visitor) } }; } /// Implement deserialization methods for scalar types, such as numbers, strings, /// byte arrays, booleans and identifiers. macro_rules! deserialize_primitives { ($($mut:tt)?) => { forward_to_simple_type!(deserialize_i8, $($mut)?); forward_to_simple_type!(deserialize_i16, $($mut)?); forward_to_simple_type!(deserialize_i32, $($mut)?); forward_to_simple_type!(deserialize_i64, $($mut)?); forward_to_simple_type!(deserialize_u8, $($mut)?); forward_to_simple_type!(deserialize_u16, $($mut)?); forward_to_simple_type!(deserialize_u32, $($mut)?); forward_to_simple_type!(deserialize_u64, $($mut)?); serde_if_integer128! { forward_to_simple_type!(deserialize_i128, $($mut)?); forward_to_simple_type!(deserialize_u128, $($mut)?); } forward_to_simple_type!(deserialize_f32, $($mut)?); forward_to_simple_type!(deserialize_f64, $($mut)?); forward_to_simple_type!(deserialize_bool, $($mut)?); forward_to_simple_type!(deserialize_char, $($mut)?); forward_to_simple_type!(deserialize_str, $($mut)?); forward_to_simple_type!(deserialize_string, $($mut)?); /// Forwards deserialization to the [`deserialize_any`](#method.deserialize_any). #[inline] fn deserialize_bytes(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_any(visitor) } /// Forwards deserialization to the [`deserialize_bytes`](#method.deserialize_bytes). #[inline] fn deserialize_byte_buf(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_bytes(visitor) } /// Representation of the named units the same as [unnamed units](#method.deserialize_unit). #[inline] fn deserialize_unit_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } /// Representation of tuples the same as [sequences](#method.deserialize_seq). #[inline] fn deserialize_tuple(self, _len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_seq(visitor) } /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple). #[inline] fn deserialize_tuple_struct( self, _name: &'static str, len: usize, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_tuple(len, visitor) } /// Forwards deserialization to the [`deserialize_struct`](#method.deserialize_struct) /// with empty name and fields. #[inline] fn deserialize_map(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_struct("", &[], visitor) } /// Identifiers represented as [strings](#method.deserialize_str). #[inline] fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// Forwards deserialization to the [`deserialize_unit`](#method.deserialize_unit). #[inline] fn deserialize_ignored_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } }; } mod attributes; mod key; mod map; mod resolver; mod simple_type; mod text; mod var; pub use self::attributes::AttributesDeserializer; pub use self::resolver::{EntityResolver, PredefinedEntityResolver}; pub use self::simple_type::SimpleTypeDeserializer; pub use crate::errors::serialize::DeError; use crate::{ de::map::ElementMapAccess, encoding::Decoder, errors::Error, escape::{parse_number, EscapeError}, events::{BytesCData, BytesEnd, BytesRef, BytesStart, BytesText, Event}, name::QName, reader::NsReader, }; use serde::de::{ self, Deserialize, DeserializeOwned, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor, }; use std::borrow::Cow; #[cfg(feature = "overlapped-lists")] use std::collections::VecDeque; use std::io::BufRead; use std::mem::replace; #[cfg(feature = "overlapped-lists")] use std::num::NonZeroUsize; use std::ops::{Deref, Range}; /// Data represented by a text node or a CDATA node. XML markup is not expected pub(crate) const TEXT_KEY: &str = "$text"; /// Data represented by any XML markup inside pub(crate) const VALUE_KEY: &str = "$value"; /// A function to check whether the character is a whitespace (blank, new line, carriage return or tab). #[inline] const fn is_non_whitespace(ch: char) -> bool { !matches!(ch, ' ' | '\r' | '\n' | '\t') } /// Decoded and concatenated content of consequent [`Text`] and [`CData`] /// events. _Consequent_ means that events should follow each other or be /// delimited only by (any count of) [`Comment`] or [`PI`] events. /// /// Internally text is stored in `Cow`. Cloning of text is cheap while it /// is borrowed and makes copies of data when it is owned. /// /// [`Text`]: Event::Text /// [`CData`]: Event::CData /// [`Comment`]: Event::Comment /// [`PI`]: Event::PI #[derive(Clone, Debug, PartialEq, Eq)] pub struct Text<'a> { /// Untrimmed text after concatenating content of all /// [`Text`] and [`CData`] events /// /// [`Text`]: Event::Text /// [`CData`]: Event::CData text: Cow<'a, str>, /// A range into `text` which contains data after trimming content: Range, } impl<'a> Text<'a> { fn new(text: Cow<'a, str>) -> Self { let start = text.find(is_non_whitespace).unwrap_or(0); let end = text.rfind(is_non_whitespace).map_or(0, |i| i + 1); let content = if start >= end { 0..0 } else { start..end }; Self { text, content } } /// Returns text without leading and trailing whitespaces as [defined] by XML specification. /// /// If you want to only check if text contains only whitespaces, use [`is_blank`](Self::is_blank), /// which will not allocate. /// /// # Example /// /// ``` /// # use quick_xml::de::Text; /// # use pretty_assertions::assert_eq; /// # /// let text = Text::from(""); /// assert_eq!(text.trimmed(), ""); /// /// let text = Text::from(" \r\n\t "); /// assert_eq!(text.trimmed(), ""); /// /// let text = Text::from(" some useful text "); /// assert_eq!(text.trimmed(), "some useful text"); /// ``` /// /// [defined]: https://www.w3.org/TR/xml11/#NT-S pub fn trimmed(&self) -> Cow<'a, str> { match self.text { Cow::Borrowed(text) => Cow::Borrowed(&text[self.content.clone()]), Cow::Owned(ref text) => Cow::Owned(text[self.content.clone()].to_string()), } } /// Returns `true` if text is empty or contains only whitespaces as [defined] by XML specification. /// /// # Example /// /// ``` /// # use quick_xml::de::Text; /// # use pretty_assertions::assert_eq; /// # /// let text = Text::from(""); /// assert_eq!(text.is_blank(), true); /// /// let text = Text::from(" \r\n\t "); /// assert_eq!(text.is_blank(), true); /// /// let text = Text::from(" some useful text "); /// assert_eq!(text.is_blank(), false); /// ``` /// /// [defined]: https://www.w3.org/TR/xml11/#NT-S pub fn is_blank(&self) -> bool { self.content.is_empty() } } impl<'a> Deref for Text<'a> { type Target = str; #[inline] fn deref(&self) -> &Self::Target { self.text.deref() } } impl<'a> From<&'a str> for Text<'a> { #[inline] fn from(text: &'a str) -> Self { Self::new(Cow::Borrowed(text)) } } impl<'a> From for Text<'a> { #[inline] fn from(text: String) -> Self { Self::new(Cow::Owned(text)) } } impl<'a> From> for Text<'a> { #[inline] fn from(text: Cow<'a, str>) -> Self { Self::new(text) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Simplified event which contains only these variants that used by deserializer #[derive(Clone, Debug, PartialEq, Eq)] pub enum DeEvent<'a> { /// Start tag (with attributes) ``. Start(BytesStart<'a>), /// End tag ``. End(BytesEnd<'a>), /// Decoded and concatenated content of consequent [`Text`] and [`CData`] /// events. _Consequent_ means that events should follow each other or be /// delimited only by (any count of) [`Comment`] or [`PI`] events. /// /// [`Text`]: Event::Text /// [`CData`]: Event::CData /// [`Comment`]: Event::Comment /// [`PI`]: Event::PI Text(Text<'a>), /// End of XML document. Eof, } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Simplified event which contains only these variants that used by deserializer, /// but [`Text`] events not yet fully processed. /// /// [`Text`] events should be trimmed if they does not surrounded by the other /// [`Text`] or [`CData`] events. This event contains intermediate state of [`Text`] /// event, where they are trimmed from the start, but not from the end. To trim /// end spaces we should lookahead by one deserializer event (i. e. skip all /// comments and processing instructions). /// /// [`Text`]: Event::Text /// [`CData`]: Event::CData #[derive(Clone, Debug, PartialEq, Eq)] pub enum PayloadEvent<'a> { /// Start tag (with attributes) ``. Start(BytesStart<'a>), /// End tag ``. End(BytesEnd<'a>), /// Escaped character data between tags. Text(BytesText<'a>), /// Unescaped character data stored in ``. CData(BytesCData<'a>), /// Document type definition data (DTD) stored in ``. DocType(BytesText<'a>), /// Reference `&ref;` in the textual data. GeneralRef(BytesRef<'a>), /// End of XML document. Eof, } impl<'a> PayloadEvent<'a> { /// Ensures that all data is owned to extend the object's lifetime if necessary. #[inline] fn into_owned(self) -> PayloadEvent<'static> { match self { PayloadEvent::Start(e) => PayloadEvent::Start(e.into_owned()), PayloadEvent::End(e) => PayloadEvent::End(e.into_owned()), PayloadEvent::Text(e) => PayloadEvent::Text(e.into_owned()), PayloadEvent::CData(e) => PayloadEvent::CData(e.into_owned()), PayloadEvent::DocType(e) => PayloadEvent::DocType(e.into_owned()), PayloadEvent::GeneralRef(e) => PayloadEvent::GeneralRef(e.into_owned()), PayloadEvent::Eof => PayloadEvent::Eof, } } } /// An intermediate reader that consumes [`PayloadEvent`]s and produces final [`DeEvent`]s. /// [`PayloadEvent::Text`] events, that followed by any event except /// [`PayloadEvent::Text`] or [`PayloadEvent::CData`], are trimmed from the end. struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolver> { /// A source of low-level XML events reader: R, /// Intermediate event, that could be returned by the next call to `next()`. /// If that is the `Text` event then leading spaces already trimmed, but /// trailing spaces is not. Before the event will be returned, trimming of /// the spaces could be necessary lookahead: Result, DeError>, /// Used to resolve unknown entities that would otherwise cause the parser /// to return an [`EscapeError::UnrecognizedEntity`] error. /// /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity entity_resolver: E, } impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> { fn new(mut reader: R, entity_resolver: E) -> Self { // Lookahead by one event immediately, so we do not need to check in the // loop if we need lookahead or not let lookahead = reader.next(); Self { reader, lookahead, entity_resolver, } } /// Returns `true` if all events was consumed const fn is_empty(&self) -> bool { matches!(self.lookahead, Ok(PayloadEvent::Eof)) } /// Read next event and put it in lookahead, return the current lookahead #[inline(always)] fn next_impl(&mut self) -> Result, DeError> { replace(&mut self.lookahead, self.reader.next()) } /// Returns `true` when next event is not a text event in any form. #[inline(always)] const fn current_event_is_last_text(&self) -> bool { // If next event is a text or CDATA, we should not trim trailing spaces !matches!( self.lookahead, Ok(PayloadEvent::Text(_)) | Ok(PayloadEvent::CData(_) | PayloadEvent::GeneralRef(_)) ) } /// Read all consequent [`Text`] and [`CData`] events until non-text event /// occurs. Content of all events would be appended to `result` and returned /// as [`DeEvent::Text`]. /// /// [`Text`]: PayloadEvent::Text /// [`CData`]: PayloadEvent::CData fn drain_text(&mut self, mut result: Cow<'i, str>) -> Result, DeError> { loop { if self.current_event_is_last_text() { break; } match self.next_impl()? { PayloadEvent::Text(e) => result.to_mut().push_str(&e.xml_content()?), PayloadEvent::CData(e) => result.to_mut().push_str(&e.xml_content()?), PayloadEvent::GeneralRef(e) => self.resolve_reference(result.to_mut(), e)?, // SAFETY: current_event_is_last_text checks that event is Text, CData or GeneralRef _ => unreachable!("Only `Text`, `CData` or `GeneralRef` events can come here"), } } Ok(DeEvent::Text(Text::new(result))) } /// Return an input-borrowing event. fn next(&mut self) -> Result, DeError> { loop { return match self.next_impl()? { PayloadEvent::Start(e) => Ok(DeEvent::Start(e)), PayloadEvent::End(e) => Ok(DeEvent::End(e)), PayloadEvent::Text(e) => self.drain_text(e.xml_content()?), PayloadEvent::CData(e) => self.drain_text(e.xml_content()?), PayloadEvent::DocType(e) => { self.entity_resolver .capture(e) .map_err(|err| DeError::Custom(format!("cannot parse DTD: {}", err)))?; continue; } PayloadEvent::GeneralRef(e) => { let mut text = String::new(); self.resolve_reference(&mut text, e)?; self.drain_text(text.into()) } PayloadEvent::Eof => Ok(DeEvent::Eof), }; } } fn resolve_reference(&mut self, result: &mut String, event: BytesRef) -> Result<(), DeError> { let len = event.len(); let reference = self.decoder().decode(&event)?; if let Some(num) = reference.strip_prefix('#') { let codepoint = parse_number(num).map_err(EscapeError::InvalidCharRef)?; result.push_str(codepoint.encode_utf8(&mut [0u8; 4])); return Ok(()); } if let Some(value) = self.entity_resolver.resolve(reference.as_ref()) { result.push_str(value); return Ok(()); } Err(EscapeError::UnrecognizedEntity(0..len, reference.to_string()).into()) } #[inline] fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { match self.lookahead { // We pre-read event with the same name that is required to be skipped. // First call of `read_to_end` will end out pre-read event, the second // will consume other events Ok(PayloadEvent::Start(ref e)) if e.name() == name => { let result1 = self.reader.read_to_end(name); let result2 = self.reader.read_to_end(name); // In case of error `next_impl` returns `Eof` let _ = self.next_impl(); result1?; result2?; } // We pre-read event with the same name that is required to be skipped. // Because this is end event, we already consume the whole tree, so // nothing to do, just update lookahead Ok(PayloadEvent::End(ref e)) if e.name() == name => { let _ = self.next_impl(); } Ok(_) => { let result = self.reader.read_to_end(name); // In case of error `next_impl` returns `Eof` let _ = self.next_impl(); result?; } // Read next lookahead event, unpack error from the current lookahead Err(_) => { self.next_impl()?; } } Ok(()) } #[inline] fn decoder(&self) -> Decoder { self.reader.decoder() } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Deserialize an instance of type `T` from a string of XML text. pub fn from_str<'de, T>(s: &'de str) -> Result where T: Deserialize<'de>, { let mut de = Deserializer::from_str(s); T::deserialize(&mut de) } /// Deserialize from a reader. This method will do internal copies of data /// read from `reader`. If you want have a `&str` input and want to borrow /// as much as possible, use [`from_str`]. pub fn from_reader(reader: R) -> Result where R: BufRead, T: DeserializeOwned, { let mut de = Deserializer::from_reader(reader); T::deserialize(&mut de) } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A structure that deserializes XML into Rust values. pub struct Deserializer<'de, R, E: EntityResolver = PredefinedEntityResolver> where R: XmlRead<'de>, { /// An XML reader that streams events into this deserializer reader: XmlReader<'de, R, E>, /// When deserializing sequences sometimes we have to skip unwanted events. /// That events should be stored and then replayed. This is a replay buffer, /// that streams events while not empty. When it exhausted, events will /// requested from [`Self::reader`]. #[cfg(feature = "overlapped-lists")] read: VecDeque>, /// When deserializing sequences sometimes we have to skip events, because XML /// is tolerant to elements order and even if in the XSD order is strictly /// specified (using `xs:sequence`) most of XML parsers allows order violations. /// That means, that elements, forming a sequence, could be overlapped with /// other elements, do not related to that sequence. /// /// In order to support this, deserializer will scan events and skip unwanted /// events, store them here. After call [`Self::start_replay()`] all events /// moved from this to [`Self::read`]. #[cfg(feature = "overlapped-lists")] write: VecDeque>, /// Maximum number of events that can be skipped when processing sequences /// that occur out-of-order. This field is used to prevent potential /// denial-of-service (DoS) attacks which could cause infinite memory /// consumption when parsing a very large amount of XML into a sequence field. #[cfg(feature = "overlapped-lists")] limit: Option, #[cfg(not(feature = "overlapped-lists"))] peek: Option>, /// Buffer to store attribute name as a field name exposed to serde consumers key_buf: String, } impl<'de, R, E> Deserializer<'de, R, E> where R: XmlRead<'de>, E: EntityResolver, { /// Create an XML deserializer from one of the possible quick_xml input sources. /// /// Typically it is more convenient to use one of these methods instead: /// /// - [`Deserializer::from_str`] /// - [`Deserializer::from_reader`] fn new(reader: R, entity_resolver: E) -> Self { Self { reader: XmlReader::new(reader, entity_resolver), #[cfg(feature = "overlapped-lists")] read: VecDeque::new(), #[cfg(feature = "overlapped-lists")] write: VecDeque::new(), #[cfg(feature = "overlapped-lists")] limit: None, #[cfg(not(feature = "overlapped-lists"))] peek: None, key_buf: String::new(), } } /// Returns `true` if all events was consumed. pub fn is_empty(&self) -> bool { #[cfg(feature = "overlapped-lists")] let event = self.read.front(); #[cfg(not(feature = "overlapped-lists"))] let event = self.peek.as_ref(); match event { None | Some(DeEvent::Eof) => self.reader.is_empty(), _ => false, } } /// Returns the underlying XML reader. /// /// ``` /// # use pretty_assertions::assert_eq; /// use serde::Deserialize; /// use quick_xml::de::Deserializer; /// use quick_xml::NsReader; /// /// #[derive(Deserialize)] /// struct SomeStruct { /// field1: String, /// field2: String, /// } /// /// // Try to deserialize from broken XML /// let mut de = Deserializer::from_str( /// "" /// // 0 ^= 28 ^= 41 /// ); /// /// let err = SomeStruct::deserialize(&mut de); /// assert!(err.is_err()); /// /// let reader: &NsReader<_> = de.get_ref().get_ref(); /// /// assert_eq!(reader.error_position(), 28); /// assert_eq!(reader.buffer_position(), 41); /// ``` pub const fn get_ref(&self) -> &R { &self.reader.reader } /// Set the maximum number of events that could be skipped during deserialization /// of sequences. /// /// If `` contains more than specified nested elements, `$text` or /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during /// deserialization of sequence field (any type that uses [`deserialize_seq`] /// for the deserialization, for example, `Vec`). /// /// This method can be used to prevent a [DoS] attack and infinite memory /// consumption when parsing a very large XML to a sequence field. /// /// It is strongly recommended to set limit to some value when you parse data /// from untrusted sources. You should choose a value that your typical XMLs /// can have _between_ different elements that corresponds to the same sequence. /// /// # Examples /// /// Let's imagine, that we deserialize such structure: /// ``` /// struct List { /// item: Vec<()>, /// } /// ``` /// /// The XML that we try to parse look like this: /// ```xml /// /// /// /// /// with text /// /// /// /// /// /// /// /// ``` /// /// There, when we deserialize the `item` field, we need to buffer 7 events, /// before we can deserialize the second ``: /// /// - `` /// - `` /// - `$text(with text)` /// - `` /// - `` (virtual start event) /// - `` (virtual end event) /// - `` /// /// Note, that `` internally represented as 2 events: /// one for the start tag and one for the end tag. In the future this can be /// eliminated, but for now we use [auto-expanding feature] of a reader, /// because this simplifies deserializer code. /// /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack /// [auto-expanding feature]: crate::reader::Config::expand_empty_elements #[cfg(feature = "overlapped-lists")] pub fn event_buffer_size(&mut self, limit: Option) -> &mut Self { self.limit = limit; self } #[cfg(feature = "overlapped-lists")] fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> { if self.read.is_empty() { self.read.push_front(self.reader.next()?); } if let Some(event) = self.read.front() { return Ok(event); } // SAFETY: `self.read` was filled in the code above. // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }` // if unsafe code will be allowed unreachable!() } #[cfg(not(feature = "overlapped-lists"))] fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> { match &mut self.peek { Some(event) => Ok(event), empty_peek @ None => Ok(empty_peek.insert(self.reader.next()?)), } } #[inline] fn last_peeked(&self) -> &DeEvent<'de> { #[cfg(feature = "overlapped-lists")] { self.read .front() .expect("`Deserializer::peek()` should be called") } #[cfg(not(feature = "overlapped-lists"))] { self.peek .as_ref() .expect("`Deserializer::peek()` should be called") } } fn next(&mut self) -> Result, DeError> { // Replay skipped or peeked events #[cfg(feature = "overlapped-lists")] if let Some(event) = self.read.pop_front() { return Ok(event); } #[cfg(not(feature = "overlapped-lists"))] if let Some(e) = self.peek.take() { return Ok(e); } self.reader.next() } fn skip_whitespaces(&mut self) -> Result<(), DeError> { loop { match self.peek()? { DeEvent::Text(e) if e.is_blank() => { self.next()?; } _ => break, } } Ok(()) } /// Returns the mark after which all events, skipped by [`Self::skip()`] call, /// should be replayed after calling [`Self::start_replay()`]. #[cfg(feature = "overlapped-lists")] #[inline] #[must_use = "returned checkpoint should be used in `start_replay`"] fn skip_checkpoint(&self) -> usize { self.write.len() } /// Extracts XML tree of events from and stores them in the skipped events /// buffer from which they can be retrieved later. You MUST call /// [`Self::start_replay()`] after calling this to give access to the skipped /// events and release internal buffers. #[cfg(feature = "overlapped-lists")] fn skip(&mut self) -> Result<(), DeError> { let event = self.next()?; self.skip_event(event)?; // Skip all subtree, if we skip a start event if let Some(DeEvent::Start(e)) = self.write.back() { let end = e.name().as_ref().to_owned(); let mut depth = 0; loop { let event = self.next()?; match event { DeEvent::Start(ref e) if e.name().as_ref() == end => { self.skip_event(event)?; depth += 1; } DeEvent::End(ref e) if e.name().as_ref() == end => { self.skip_event(event)?; if depth == 0 { break; } depth -= 1; } DeEvent::Eof => { self.skip_event(event)?; break; } _ => self.skip_event(event)?, } } } Ok(()) } #[cfg(feature = "overlapped-lists")] #[inline] fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> { if let Some(max) = self.limit { if self.write.len() >= max.get() { return Err(DeError::TooManyEvents(max)); } } self.write.push_back(event); Ok(()) } /// Moves buffered events, skipped after given `checkpoint` from [`Self::write`] /// skip buffer to [`Self::read`] buffer. /// /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts /// return events that was skipped previously by calling [`Self::skip()`], /// and only when all that events will be consumed, the deserializer starts /// to drain events from underlying reader. /// /// This method MUST be called if any number of [`Self::skip()`] was called /// after [`Self::new()`] or `start_replay()` or you'll lost events. #[cfg(feature = "overlapped-lists")] fn start_replay(&mut self, checkpoint: usize) { if checkpoint == 0 { self.write.append(&mut self.read); std::mem::swap(&mut self.read, &mut self.write); } else { let mut read = self.write.split_off(checkpoint); read.append(&mut self.read); self.read = read; } } #[inline] fn read_string(&mut self) -> Result, DeError> { self.read_string_impl(true) } /// Consumes consequent [`Text`] and [`CData`] (both a referred below as a _text_) /// events, merge them into one string. If there are no such events, returns /// an empty string. /// /// If `allow_start` is `false`, then only text events are consumed, for other /// events an error is returned (see table below). /// /// If `allow_start` is `true`, then two or three events are expected: /// - [`DeEvent::Start`]; /// - _(optional)_ [`DeEvent::Text`] which content is returned; /// - [`DeEvent::End`]. If text event was missed, an empty string is returned. /// /// Corresponding events are consumed. /// /// # Handling events /// /// The table below shows how events is handled by this method: /// /// |Event |XML |Handling /// |------------------|---------------------------|---------------------------------------- /// |[`DeEvent::Start`]|`...` |if `allow_start == true`, result determined by the second table, otherwise emits [`UnexpectedStart("tag")`](DeError::UnexpectedStart) /// |[`DeEvent::End`] |`` |This is impossible situation, the method will panic if it happens /// |[`DeEvent::Text`] |`text content` or `` (probably mixed)|Returns event content unchanged /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof) /// /// Second event, consumed if [`DeEvent::Start`] was received and `allow_start == true`: /// /// |Event |XML |Handling /// |------------------|---------------------------|---------------------------------------------------------------------------------- /// |[`DeEvent::Start`]|`...` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart) /// |[`DeEvent::End`] |`
` |Returns an empty slice. The reader guarantee that tag will match the open one /// |[`DeEvent::Text`] |`text content` or `` (probably mixed)|Returns event content unchanged, expects the `
` after that /// |[`DeEvent::Eof`] | |Emits [`InvalidXml(IllFormed(MissingEndTag))`](DeError::InvalidXml) /// /// [`Text`]: Event::Text /// [`CData`]: Event::CData fn read_string_impl(&mut self, allow_start: bool) -> Result, DeError> { match self.next()? { // Reached by doc tests only: this file, lines 979 and 996 DeEvent::Text(e) => Ok(e.text), // allow one nested level // Reached by trivial::{...}::{field, field_nested, field_tag_after, field_tag_before, nested, tag_after, tag_before, wrapped} DeEvent::Start(e) if allow_start => self.read_text(e.name()), // TODO: not reached by any tests DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())), // SAFETY: The reader is guaranteed that we don't have unmatched tags // If we here, then our deserializer has a bug DeEvent::End(e) => unreachable!("{:?}", e), // Reached by trivial::{empty_doc, only_comment} DeEvent::Eof => Err(DeError::UnexpectedEof), } } /// Consumes one [`DeEvent::Text`] event and ensures that it is followed by the /// [`DeEvent::End`] event. /// /// # Parameters /// - `name`: name of a tag opened before reading text. The corresponding end tag /// should present in input just after the text fn read_text(&mut self, name: QName) -> Result, DeError> { match self.next()? { DeEvent::Text(e) => match self.next()? { // The matching tag name is guaranteed by the reader // Reached by trivial::{...}::{field, wrapped} DeEvent::End(_) => Ok(e.text), // SAFETY: Cannot be two consequent Text events, they would be merged into one DeEvent::Text(_) => unreachable!(), // Reached by trivial::{...}::{field_tag_after, tag_after} DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())), // Reached by struct_::non_closed::elements_child DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()), }, // We can get End event in case of `` or `` input // Return empty text in that case // The matching tag name is guaranteed by the reader // Reached by {...}::xs_list::empty DeEvent::End(_) => Ok("".into()), // Reached by trivial::{...}::{field_nested, field_tag_before, nested, tag_before} DeEvent::Start(s) => Err(DeError::UnexpectedStart(s.name().as_ref().to_owned())), // Reached by struct_::non_closed::elements_child DeEvent::Eof => Err(Error::missed_end(name, self.reader.decoder()).into()), } } /// Drops all events until event with [name](BytesEnd::name()) `name` won't be /// dropped. This method should be called after [`Self::next()`] #[cfg(feature = "overlapped-lists")] fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { let mut depth = 0; loop { match self.read.pop_front() { Some(DeEvent::Start(e)) if e.name() == name => { depth += 1; } Some(DeEvent::End(e)) if e.name() == name => { if depth == 0 { break; } depth -= 1; } // Drop all other skipped events Some(_) => continue, // If we do not have skipped events, use effective reading that will // not allocate memory for events None => { // We should close all opened tags, because we could buffer // Start events, but not the corresponding End events. So we // keep reading events until we exit all nested tags. // `read_to_end()` will return an error if an Eof was encountered // preliminary (in case of malformed XML). // // // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2 // ^^^^^^ - read by the first call of `self.reader.read_to_end()` // ^^^^^^ - read by the second call of `self.reader.read_to_end()` loop { self.reader.read_to_end(name)?; if depth == 0 { break; } depth -= 1; } break; } } } Ok(()) } #[cfg(not(feature = "overlapped-lists"))] fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { // First one might be in self.peek match self.next()? { DeEvent::Start(e) => self.reader.read_to_end(e.name())?, DeEvent::End(e) if e.name() == name => return Ok(()), _ => (), } self.reader.read_to_end(name) } fn skip_next_tree(&mut self) -> Result<(), DeError> { let DeEvent::Start(start) = self.next()? else { unreachable!("Only call this if the next event is a start event") }; let name = start.name(); self.read_to_end(name) } /// Method for testing Deserializer implementation. Checks that all events was consumed during /// deserialization. Panics if the next event will not be [`DeEvent::Eof`]. #[doc(hidden)] #[track_caller] pub fn check_eof_reached(&mut self) { // Deserializer may not consume trailing spaces, that is normal self.skip_whitespaces().expect("cannot skip whitespaces"); let event = self.peek().expect("cannot peek event"); assert_eq!( *event, DeEvent::Eof, "the whole XML document should be consumed, expected `Eof`", ); } } impl<'de> Deserializer<'de, SliceReader<'de>> { /// Create a new deserializer that will borrow data from the specified string. /// /// Deserializer created with this method will not resolve custom entities. #[allow(clippy::should_implement_trait)] pub fn from_str(source: &'de str) -> Self { Self::from_str_with_resolver(source, PredefinedEntityResolver) } /// Create a new deserializer that will borrow data from the specified preconfigured /// reader. /// /// Deserializer created with this method will not resolve custom entities. /// /// Note, that config option [`Config::expand_empty_elements`] will be set to `true`. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// # use quick_xml::de::Deserializer; /// # use quick_xml::NsReader; /// # use serde::Deserialize; /// # /// #[derive(Deserialize, PartialEq, Debug)] /// struct Object<'a> { /// tag: &'a str, /// } /// /// let mut reader = NsReader::from_str(" test "); /// /// let mut de = Deserializer::borrowing(reader.clone()); /// let obj = Object::deserialize(&mut de).unwrap(); /// assert_eq!(obj, Object { tag: " test " }); /// /// reader.config_mut().trim_text(true); /// /// let mut de = Deserializer::borrowing(reader); /// let obj = Object::deserialize(&mut de).unwrap(); /// assert_eq!(obj, Object { tag: "test" }); /// ``` /// /// [`Config::expand_empty_elements`]: crate::reader::Config::expand_empty_elements #[inline] pub fn borrowing(reader: NsReader<&'de [u8]>) -> Self { Self::borrowing_with_resolver(reader, PredefinedEntityResolver) } } impl<'de, E> Deserializer<'de, SliceReader<'de>, E> where E: EntityResolver, { /// Create a new deserializer that will borrow data from the specified string /// and use the specified entity resolver. pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self { Self::borrowing_with_resolver(NsReader::from_str(source), entity_resolver) } /// Create a new deserializer that will borrow data from the specified preconfigured /// reader and use the specified entity resolver. /// /// Note, that config option [`Config::expand_empty_elements`] will be set to `true`. /// /// [`Config::expand_empty_elements`]: crate::reader::Config::expand_empty_elements pub fn borrowing_with_resolver(mut reader: NsReader<&'de [u8]>, entity_resolver: E) -> Self { let config = reader.config_mut(); config.expand_empty_elements = true; Self::new(SliceReader { reader }, entity_resolver) } } impl<'de, R> Deserializer<'de, IoReader> where R: BufRead, { /// Create a new deserializer that will copy data from the specified reader /// into internal buffer. /// /// If you already have a string use [`Self::from_str`] instead, because it /// will borrow instead of copy. If you have `&[u8]` which is known to represent /// UTF-8, you can decode it first before using [`from_str`]. /// /// Deserializer created with this method will not resolve custom entities. pub fn from_reader(reader: R) -> Self { Self::with_resolver(reader, PredefinedEntityResolver) } /// Create a new deserializer that will copy data from the specified preconfigured /// reader into internal buffer. /// /// Deserializer created with this method will not resolve custom entities. /// /// Note, that config option [`Config::expand_empty_elements`] will be set to `true`. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// # use quick_xml::de::Deserializer; /// # use quick_xml::NsReader; /// # use serde::Deserialize; /// # /// #[derive(Deserialize, PartialEq, Debug)] /// struct Object { /// tag: String, /// } /// /// let mut reader = NsReader::from_str(" test "); /// /// let mut de = Deserializer::buffering(reader.clone()); /// let obj = Object::deserialize(&mut de).unwrap(); /// assert_eq!(obj, Object { tag: " test ".to_string() }); /// /// reader.config_mut().trim_text(true); /// /// let mut de = Deserializer::buffering(reader); /// let obj = Object::deserialize(&mut de).unwrap(); /// assert_eq!(obj, Object { tag: "test".to_string() }); /// ``` /// /// [`Config::expand_empty_elements`]: crate::reader::Config::expand_empty_elements #[inline] pub fn buffering(reader: NsReader) -> Self { Self::buffering_with_resolver(reader, PredefinedEntityResolver) } } impl<'de, R, E> Deserializer<'de, IoReader, E> where R: BufRead, E: EntityResolver, { /// Create a new deserializer that will copy data from the specified reader /// into internal buffer and use the specified entity resolver. /// /// If you already have a string use [`Self::from_str`] instead, because it /// will borrow instead of copy. If you have `&[u8]` which is known to represent /// UTF-8, you can decode it first before using [`from_str`]. pub fn with_resolver(reader: R, entity_resolver: E) -> Self { let mut reader = NsReader::from_reader(reader); let config = reader.config_mut(); config.expand_empty_elements = true; Self::new( IoReader { reader, buf: Vec::new(), }, entity_resolver, ) } /// Create new deserializer that will copy data from the specified preconfigured reader /// into internal buffer and use the specified entity resolver. /// /// Note, that config option [`Config::expand_empty_elements`] will be set to `true`. /// /// [`Config::expand_empty_elements`]: crate::reader::Config::expand_empty_elements pub fn buffering_with_resolver(mut reader: NsReader, entity_resolver: E) -> Self { let config = reader.config_mut(); config.expand_empty_elements = true; Self::new( IoReader { reader, buf: Vec::new(), }, entity_resolver, ) } } impl<'de, R, E> de::Deserializer<'de> for &mut Deserializer<'de, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; deserialize_primitives!(); fn deserialize_struct( self, _name: &'static str, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { // When document is pretty-printed there could be whitespaces before the root element self.skip_whitespaces()?; match self.next()? { DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self, e, fields)), // SAFETY: The reader is guaranteed that we don't have unmatched tags // If we here, then our deserializer has a bug DeEvent::End(e) => unreachable!("{:?}", e), // Deserializer methods are only hints, if deserializer could not satisfy // request, it should return the data that it has. It is responsibility // of a Visitor to return an error if it does not understand the data DeEvent::Text(e) => match e.text { Cow::Borrowed(s) => visitor.visit_borrowed_str(s), Cow::Owned(s) => visitor.visit_string(s), }, DeEvent::Eof => Err(DeError::UnexpectedEof), } } /// Unit represented in XML as a `xs:element` or text/CDATA content. /// Any content inside `xs:element` is ignored and skipped. /// /// Produces unit struct from any of following inputs: /// - any `...` /// - any `` /// - any consequent text / CDATA content (can consist of several parts /// delimited by comments and processing instructions) /// /// # Events handling /// /// |Event |XML |Handling /// |------------------|---------------------------|------------------------------------------- /// |[`DeEvent::Start`]|`...` |Calls `visitor.visit_unit()`, consumes all events up to and including corresponding `End` event /// |[`DeEvent::End`] |`` |This is impossible situation, the method will panic if it happens /// |[`DeEvent::Text`] |`text content` or `` (probably mixed)|Calls `visitor.visit_unit()`. The content is ignored /// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof) fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { match self.next()? { DeEvent::Start(s) => { self.read_to_end(s.name())?; visitor.visit_unit() } DeEvent::Text(_) => visitor.visit_unit(), // SAFETY: The reader is guaranteed that we don't have unmatched tags // If we here, then our deserializer has a bug DeEvent::End(e) => unreachable!("{:?}", e), DeEvent::Eof => Err(DeError::UnexpectedEof), } } /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] /// with the same deserializer. fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { // When document is pretty-printed there could be whitespaces before the root element // which represents the enum variant // Checked by `top_level::list_of_enum` test in serde-de-seq self.skip_whitespaces()?; visitor.visit_enum(var::EnumAccess::new(self)) } fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_seq(self) } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { // We cannot use result of `peek()` directly because of borrow checker let _ = self.peek()?; match self.last_peeked() { DeEvent::Text(t) if t.is_empty() => visitor.visit_none(), DeEvent::Eof => visitor.visit_none(), // if the `xsi:nil` attribute is set to true we got a none value DeEvent::Start(start) if self.reader.reader.has_nil_attr(start) => { self.skip_next_tree()?; visitor.visit_none() } _ => visitor.visit_some(self), } } fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { match self.peek()? { DeEvent::Text(_) => self.deserialize_str(visitor), _ => self.deserialize_map(visitor), } } } /// An accessor to sequence elements forming a value for top-level sequence of XML /// elements. /// /// Technically, multiple top-level elements violates XML rule of only one top-level /// element, but we consider this as several concatenated XML documents. impl<'de, R, E> SeqAccess<'de> for &mut Deserializer<'de, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; fn next_element_seed(&mut self, seed: T) -> Result, Self::Error> where T: DeserializeSeed<'de>, { // When document is pretty-printed there could be whitespaces before, between // and after root elements. We cannot defer decision if we need to skip spaces // or not: if we have a sequence of type that does not accept blank text, it // will need to return something and it can return only error. For example, // it can be enum without `$text` variant // Checked by `top_level::list_of_enum` test in serde-de-seq self.skip_whitespaces()?; match self.peek()? { DeEvent::Eof => Ok(None), // Start(tag), End(tag), Text _ => seed.deserialize(&mut **self).map(Some), } } } impl<'de, R, E> IntoDeserializer<'de, DeError> for &mut Deserializer<'de, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Deserializer = Self; #[inline] fn into_deserializer(self) -> Self { self } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Converts raw reader's event into a payload event. /// Returns `None`, if event should be skipped. #[inline(always)] fn skip_uninterested<'a>(event: Event<'a>) -> Option> { let event = match event { Event::DocType(e) => PayloadEvent::DocType(e), Event::Start(e) => PayloadEvent::Start(e), Event::End(e) => PayloadEvent::End(e), Event::Eof => PayloadEvent::Eof, // Do not trim next text event after Text, CDATA or reference event Event::CData(e) => PayloadEvent::CData(e), Event::Text(e) => PayloadEvent::Text(e), Event::GeneralRef(e) => PayloadEvent::GeneralRef(e), _ => return None, }; Some(event) } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Trait used by the deserializer for iterating over input. This is manually /// "specialized" for iterating over `&[u8]`. /// /// You do not need to implement this trait, it is needed to abstract from /// [borrowing](SliceReader) and [copying](IoReader) data sources and reuse code in /// deserializer pub trait XmlRead<'i> { /// Return an input-borrowing event. fn next(&mut self) -> Result, DeError>; /// Skips until end element is found. Unlike `next()` it will not allocate /// when it cannot satisfy the lifetime. fn read_to_end(&mut self, name: QName) -> Result<(), DeError>; /// A copy of the reader's decoder used to decode strings. fn decoder(&self) -> Decoder; /// Checks if the `start` tag has a [`xsi:nil`] attribute. This method ignores /// any errors in attributes. /// /// [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil fn has_nil_attr(&self, start: &BytesStart) -> bool; } /// XML input source that reads from a std::io input stream. /// /// You cannot create it, it is created automatically when you call /// [`Deserializer::from_reader`] pub struct IoReader { reader: NsReader, buf: Vec, } impl IoReader { /// Returns the underlying XML reader. /// /// ``` /// # use pretty_assertions::assert_eq; /// use serde::Deserialize; /// use std::io::Cursor; /// use quick_xml::de::Deserializer; /// use quick_xml::NsReader; /// /// #[derive(Deserialize)] /// struct SomeStruct { /// field1: String, /// field2: String, /// } /// /// // Try to deserialize from broken XML /// let mut de = Deserializer::from_reader(Cursor::new( /// "" /// // 0 ^= 28 ^= 41 /// )); /// /// let err = SomeStruct::deserialize(&mut de); /// assert!(err.is_err()); /// /// let reader: &NsReader> = de.get_ref().get_ref(); /// /// assert_eq!(reader.error_position(), 28); /// assert_eq!(reader.buffer_position(), 41); /// ``` pub const fn get_ref(&self) -> &NsReader { &self.reader } } impl<'i, R: BufRead> XmlRead<'i> for IoReader { fn next(&mut self) -> Result, DeError> { loop { self.buf.clear(); let event = self.reader.read_event_into(&mut self.buf)?; if let Some(event) = skip_uninterested(event) { return Ok(event.into_owned()); } } } fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { match self.reader.read_to_end_into(name, &mut self.buf) { Err(e) => Err(e.into()), Ok(_) => Ok(()), } } fn decoder(&self) -> Decoder { self.reader.decoder() } fn has_nil_attr(&self, start: &BytesStart) -> bool { start.attributes().has_nil(&self.reader) } } /// XML input source that reads from a slice of bytes and can borrow from it. /// /// You cannot create it, it is created automatically when you call /// [`Deserializer::from_str`]. pub struct SliceReader<'de> { reader: NsReader<&'de [u8]>, } impl<'de> SliceReader<'de> { /// Returns the underlying XML reader. /// /// ``` /// # use pretty_assertions::assert_eq; /// use serde::Deserialize; /// use quick_xml::de::Deserializer; /// use quick_xml::NsReader; /// /// #[derive(Deserialize)] /// struct SomeStruct { /// field1: String, /// field2: String, /// } /// /// // Try to deserialize from broken XML /// let mut de = Deserializer::from_str( /// "" /// // 0 ^= 28 ^= 41 /// ); /// /// let err = SomeStruct::deserialize(&mut de); /// assert!(err.is_err()); /// /// let reader: &NsReader<&[u8]> = de.get_ref().get_ref(); /// /// assert_eq!(reader.error_position(), 28); /// assert_eq!(reader.buffer_position(), 41); /// ``` pub const fn get_ref(&self) -> &NsReader<&'de [u8]> { &self.reader } } impl<'de> XmlRead<'de> for SliceReader<'de> { fn next(&mut self) -> Result, DeError> { loop { let event = self.reader.read_event()?; if let Some(event) = skip_uninterested(event) { return Ok(event); } } } fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { match self.reader.read_to_end(name) { Err(e) => Err(e.into()), Ok(_) => Ok(()), } } fn decoder(&self) -> Decoder { self.reader.decoder() } fn has_nil_attr(&self, start: &BytesStart) -> bool { start.attributes().has_nil(&self.reader) } } #[cfg(test)] mod tests { use super::*; use crate::errors::IllFormedError; use pretty_assertions::assert_eq; fn make_de<'de>(source: &'de str) -> Deserializer<'de, SliceReader<'de>> { dbg!(source); Deserializer::from_str(source) } #[cfg(feature = "overlapped-lists")] mod skip { use super::*; use crate::de::DeEvent::*; use crate::events::BytesEnd; use pretty_assertions::assert_eq; /// Checks that `peek()` and `read()` behaves correctly after `skip()` #[test] fn read_and_peek() { let mut de = make_de( "\ \ \ text\ \ \ \ \ \ ", ); // Initial conditions - both are empty assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("inner"))); // Mark that start_replay() should begin replay from this point let checkpoint = de.skip_checkpoint(); assert_eq!(checkpoint, 0); // Should skip first tree de.skip().unwrap(); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("inner")), Text("text".into()), Start(BytesStart::new("inner")), End(BytesEnd::new("inner")), End(BytesEnd::new("inner")), ] ); // Consume . Now unconsumed XML looks like: // // // text // // // // assert_eq!(de.next().unwrap(), Start(BytesStart::new("next"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("next"))); // We finish writing. Next call to `next()` should start replay that messages: // // // text // // // // and after that stream that messages: // // // de.start_replay(checkpoint); assert_eq!( de.read, vec![ Start(BytesStart::new("inner")), Text("text".into()), Start(BytesStart::new("inner")), End(BytesEnd::new("inner")), End(BytesEnd::new("inner")), ] ); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); // Mark that start_replay() should begin replay from this point let checkpoint = de.skip_checkpoint(); assert_eq!(checkpoint, 0); // Skip `$text` node and consume after it de.skip().unwrap(); assert_eq!( de.read, vec![ Start(BytesStart::new("inner")), End(BytesEnd::new("inner")), End(BytesEnd::new("inner")), ] ); assert_eq!( de.write, vec![ // This comment here to keep the same formatting of both arrays // otherwise rustfmt suggest one-line it Text("text".into()), ] ); assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); // We finish writing. Next call to `next()` should start replay messages: // // text // // // and after that stream that messages: // // // de.start_replay(checkpoint); assert_eq!( de.read, vec![ // This comment here to keep the same formatting as others // otherwise rustfmt suggest one-line it Text("text".into()), End(BytesEnd::new("inner")), ] ); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Text("text".into())); assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); assert_eq!(de.next().unwrap(), Start(BytesStart::new("target"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("target"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); assert_eq!(de.next().unwrap(), Eof); } /// Checks that `read_to_end()` behaves correctly after `skip()` #[test] fn read_to_end() { let mut de = make_de( "\ \ \ text\ \ \ \ \ \ \ ", ); // Initial conditions - both are empty assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); // Mark that start_replay() should begin replay from this point let checkpoint = de.skip_checkpoint(); assert_eq!(checkpoint, 0); // Skip the tree de.skip().unwrap(); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skip")), Text("text".into()), Start(BytesStart::new("skip")), End(BytesEnd::new("skip")), End(BytesEnd::new("skip")), ] ); // Drop all events that represents tree. Now unconsumed XML looks like: // // // text // // // assert_eq!(de.next().unwrap(), Start(BytesStart::new("target"))); de.read_to_end(QName(b"target")).unwrap(); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skip")), Text("text".into()), Start(BytesStart::new("skip")), End(BytesEnd::new("skip")), End(BytesEnd::new("skip")), ] ); // We finish writing. Next call to `next()` should start replay that messages: // // // text // // // // and after that stream that messages: // // de.start_replay(checkpoint); assert_eq!( de.read, vec![ Start(BytesStart::new("skip")), Text("text".into()), Start(BytesStart::new("skip")), End(BytesEnd::new("skip")), End(BytesEnd::new("skip")), ] ); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("skip"))); de.read_to_end(QName(b"skip")).unwrap(); assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); assert_eq!(de.next().unwrap(), Eof); } /// Checks that replay replayes only part of events /// Test for https://github.com/tafia/quick-xml/issues/435 #[test] fn partial_replay() { let mut de = make_de( "\ \ \ \ \ \ \ \ \ \ \ ", ); // Initial conditions - both are empty assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); // start_replay() should start replay from this point let checkpoint1 = de.skip_checkpoint(); assert_eq!(checkpoint1, 0); // Should skip first and second elements de.skip().unwrap(); // skipped-1 de.skip().unwrap(); // skipped-2 assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); //////////////////////////////////////////////////////////////////////////////////////// assert_eq!(de.next().unwrap(), Start(BytesStart::new("inner"))); assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("skipped-3"))); assert_eq!( de.read, vec![ // This comment here to keep the same formatting of both arrays // otherwise rustfmt suggest one-line it Start(BytesStart::new("skipped-3")), ] ); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); // start_replay() should start replay from this point let checkpoint2 = de.skip_checkpoint(); assert_eq!(checkpoint2, 4); // Should skip third and forth elements de.skip().unwrap(); // skipped-3 de.skip().unwrap(); // skipped-4 assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ // checkpoint 1 Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), // checkpoint 2 Start(BytesStart::new("skipped-3")), End(BytesEnd::new("skipped-3")), Start(BytesStart::new("skipped-4")), End(BytesEnd::new("skipped-4")), ] ); assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-2"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-2"))); assert_eq!(de.peek().unwrap(), &End(BytesEnd::new("inner"))); assert_eq!( de.read, vec![ // This comment here to keep the same formatting of both arrays // otherwise rustfmt suggest one-line it End(BytesEnd::new("inner")), ] ); assert_eq!( de.write, vec![ // checkpoint 1 Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), // checkpoint 2 Start(BytesStart::new("skipped-3")), End(BytesEnd::new("skipped-3")), Start(BytesStart::new("skipped-4")), End(BytesEnd::new("skipped-4")), ] ); // Start replay events from checkpoint 2 de.start_replay(checkpoint2); assert_eq!( de.read, vec![ Start(BytesStart::new("skipped-3")), End(BytesEnd::new("skipped-3")), Start(BytesStart::new("skipped-4")), End(BytesEnd::new("skipped-4")), End(BytesEnd::new("inner")), ] ); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); // Replayed events assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-3"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-3"))); assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-4"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-4"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("inner"))); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); //////////////////////////////////////////////////////////////////////////////////////// // New events assert_eq!(de.next().unwrap(), Start(BytesStart::new("target-1"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("target-1"))); assert_eq!(de.read, vec![]); assert_eq!( de.write, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); // Start replay events from checkpoint 1 de.start_replay(checkpoint1); assert_eq!( de.read, vec![ Start(BytesStart::new("skipped-1")), End(BytesEnd::new("skipped-1")), Start(BytesStart::new("skipped-2")), End(BytesEnd::new("skipped-2")), ] ); assert_eq!(de.write, vec![]); // Replayed events assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-1"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-1"))); assert_eq!(de.next().unwrap(), Start(BytesStart::new("skipped-2"))); assert_eq!(de.next().unwrap(), End(BytesEnd::new("skipped-2"))); assert_eq!(de.read, vec![]); assert_eq!(de.write, vec![]); // New events assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); assert_eq!(de.next().unwrap(), Eof); } /// Checks that limiting buffer size works correctly #[test] fn limit() { use serde::Deserialize; #[derive(Debug, Deserialize)] #[allow(unused)] struct List { item: Vec<()>, } let mut de = make_de( "\ \ \ \ with text\ \ \ \ \ \ ", ); de.event_buffer_size(NonZeroUsize::new(3)); match List::deserialize(&mut de) { Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3), e => panic!("Expected `Err(TooManyEvents(3))`, but got `{:?}`", e), } } /// Without handling Eof in `skip` this test failed with memory allocation #[test] fn invalid_xml() { use crate::de::DeEvent::*; let mut de = make_de(""); // Cache all events let checkpoint = de.skip_checkpoint(); de.skip().unwrap(); de.start_replay(checkpoint); assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]); } } mod read_to_end { use super::*; use crate::de::DeEvent::*; use pretty_assertions::assert_eq; #[test] fn complex() { let mut de = make_de( r#" textcontent "#, ); assert_eq!(de.next().unwrap(), Text("\n ".into())); assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); assert_eq!(de.next().unwrap(), Text("\n ".into())); assert_eq!( de.next().unwrap(), Start(BytesStart::from_content(r#"tag a="1""#, 3)) ); assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ()); assert_eq!(de.next().unwrap(), Text("\n ".into())); assert_eq!( de.next().unwrap(), Start(BytesStart::from_content(r#"tag a="2""#, 3)) ); assert_eq!(de.next().unwrap(), Text("cdata content".into())); assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), Text("\n ".into())); assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed"))); assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ()); assert_eq!(de.next().unwrap(), Text("\n ".into())); assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); assert_eq!(de.next().unwrap(), Text("\n ".into())); assert_eq!(de.next().unwrap(), Eof); } #[test] fn invalid_xml1() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag"))); assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag"))); match de.read_to_end(QName(b"tag")) { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::MissingEndTag("tag".into())) } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), Eof); } #[test] fn invalid_xml2() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag"))); assert_eq!(de.peek().unwrap(), &Text("".into())); match de.read_to_end(QName(b"tag")) { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::MissingEndTag("tag".into())) } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), Eof); } } #[test] fn borrowing_reader_parity() { let s = r#" Some text "#; let mut reader1 = IoReader { reader: NsReader::from_reader(s.as_bytes()), buf: Vec::new(), }; let mut reader2 = SliceReader { reader: NsReader::from_str(s), }; loop { let event1 = reader1.next().unwrap(); let event2 = reader2.next().unwrap(); if let (PayloadEvent::Eof, PayloadEvent::Eof) = (&event1, &event2) { break; } assert_eq!(event1, event2); } } #[test] fn borrowing_reader_events() { let s = r#" Some text "#; let mut reader = SliceReader { reader: NsReader::from_str(s), }; let config = reader.reader.config_mut(); config.expand_empty_elements = true; let mut events = Vec::new(); loop { let event = reader.next().unwrap(); if let PayloadEvent::Eof = event { break; } events.push(event); } use crate::de::PayloadEvent::*; assert_eq!( events, vec![ Text(BytesText::from_escaped("\n ")), Start(BytesStart::from_content( r#"item name="hello" source="world.rs""#, 4 )), Text(BytesText::from_escaped("Some text")), End(BytesEnd::new("item")), Text(BytesText::from_escaped("\n ")), Start(BytesStart::from_content("item2", 5)), End(BytesEnd::new("item2")), Text(BytesText::from_escaped("\n ")), Start(BytesStart::from_content("item3", 5)), End(BytesEnd::new("item3")), Text(BytesText::from_escaped("\n ")), Start(BytesStart::from_content(r#"item4 value="world" "#, 5)), End(BytesEnd::new("item4")), Text(BytesText::from_escaped("\n ")), ] ) } /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event, /// because parser reports error early #[test] fn read_string() { match from_str::(r#""#) { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("root".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } let s: String = from_str(r#""#).unwrap(); assert_eq!(s, ""); match from_str::(r#""#) { Err(DeError::InvalidXml(Error::IllFormed(cause))) => assert_eq!( cause, IllFormedError::MismatchedEndTag { expected: "root".into(), found: "other".into(), } ), x => panic!("Expected `Err(InvalidXml(IllFormed(_))`, but got `{:?}`", x), } } /// Tests for https://github.com/tafia/quick-xml/issues/474. /// /// That tests ensures that comments and processed instructions is ignored /// and can split one logical string in pieces. mod merge_text { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { let mut de = make_de("text"); assert_eq!(de.next().unwrap(), DeEvent::Text("text".into())); } #[test] fn cdata() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata".into())); } #[test] fn text_and_cdata() { let mut de = make_de("text and "); assert_eq!(de.next().unwrap(), DeEvent::Text("text and cdata".into())); } #[test] fn text_and_empty_cdata() { let mut de = make_de("text and "); assert_eq!(de.next().unwrap(), DeEvent::Text("text and ".into())); } #[test] fn cdata_and_text() { let mut de = make_de(" and text"); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata and text".into())); } #[test] fn empty_cdata_and_text() { let mut de = make_de(" and text"); assert_eq!(de.next().unwrap(), DeEvent::Text(" and text".into())); } #[test] fn cdata_and_cdata() { let mut de = make_de( "\ \ cdata]]>\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); } mod comment_between { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { let mut de = make_de( "\ text \ \ text\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into())); } #[test] fn cdata() { let mut de = make_de( "\ \ \ cdata]]>\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); } #[test] fn text_and_cdata() { let mut de = make_de( "\ text \ \ \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into())); } #[test] fn text_and_empty_cdata() { let mut de = make_de( "\ text \ \ \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into())); } #[test] fn cdata_and_text() { let mut de = make_de( "\ \ \ text \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text ".into())); } #[test] fn empty_cdata_and_text() { let mut de = make_de( "\ \ \ text \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); } #[test] fn cdata_and_cdata() { let mut de = make_de( "\ \ \ cdata]]>\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); } } mod pi_between { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { let mut de = make_de( "\ text \ \ text\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("text text".into())); } #[test] fn cdata() { let mut de = make_de( "\ \ \ cdata]]>\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); } #[test] fn text_and_cdata() { let mut de = make_de( "\ text \ \ \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata".into())); } #[test] fn text_and_empty_cdata() { let mut de = make_de( "\ text \ \ \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("text ".into())); } #[test] fn cdata_and_text() { let mut de = make_de( "\ \ \ text \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata text ".into())); } #[test] fn empty_cdata_and_text() { let mut de = make_de( "\ \ \ text \ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); } #[test] fn cdata_and_cdata() { let mut de = make_de( "\ \ \ cdata]]>\ ", ); assert_eq!(de.next().unwrap(), DeEvent::Text("cdata]]>cdata".into())); } } } /// Tests for https://github.com/tafia/quick-xml/issues/474. /// /// This tests ensures that any combination of payload data is processed /// as expected. mod triples { use super::*; use pretty_assertions::assert_eq; mod start { use super::*; /// ... // The same name is intentional #[allow(clippy::module_inception)] mod start { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag3"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } /// Not matching end tag will result to error #[test] fn end() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } /// ... mod end { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn end() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag2".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } /// text ... mod text { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn end() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } // start::text::text has no difference from start::text #[test] fn cdata() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" text cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } /// ... mod cdata { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn end() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } } /// Start from End event will always generate an error #[test] fn end() { let mut de = make_de(""); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } mod text { use super::*; use pretty_assertions::assert_eq; mod start { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } /// Not matching end tag will result in error #[test] fn end() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text text2 "); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" text2 ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } /// End event without corresponding start event will always generate an error #[test] fn end() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } // text::text::something is equivalent to text::something mod cdata { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" text cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn end() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" text cdata ".into())); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text text2 "); assert_eq!( de.next().unwrap(), DeEvent::Text(" text cdata text2 ".into()) ); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(" text "); assert_eq!( de.next().unwrap(), DeEvent::Text(" text cdata cdata2 ".into()) ); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" text cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } } mod cdata { use super::*; use pretty_assertions::assert_eq; mod start { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1"))); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } /// Not matching end tag will result in error #[test] fn end() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata2 ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } /// End event without corresponding start event will always generate an error #[test] fn end() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into())); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } mod text { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn end() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text ".into())); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } // cdata::text::text is equivalent to cdata::text #[test] fn cdata() { let mut de = make_de(" text "); assert_eq!( de.next().unwrap(), DeEvent::Text(" cdata text cdata2 ".into()) ); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(" text "); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } // The same name is intentional #[allow(clippy::module_inception)] mod cdata { use super::*; use pretty_assertions::assert_eq; #[test] fn start() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn end() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); match de.next() { Err(DeError::InvalidXml(Error::IllFormed(cause))) => { assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into())); } x => panic!( "Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`", x ), } assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn text() { let mut de = make_de(" text "); assert_eq!( de.next().unwrap(), DeEvent::Text(" cdata cdata2 text ".into()) ); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn cdata() { let mut de = make_de(""); assert_eq!( de.next().unwrap(), DeEvent::Text(" cdata cdata2 cdata3 ".into()) ); assert_eq!(de.next().unwrap(), DeEvent::Eof); } #[test] fn eof() { let mut de = make_de(""); assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into())); assert_eq!(de.next().unwrap(), DeEvent::Eof); assert_eq!(de.next().unwrap(), DeEvent::Eof); } } } } } quick-xml-0.38.4/src/de/resolver.rs000064400000000000000000000067071046102023000152320ustar 00000000000000//! Entity resolver module use std::convert::Infallible; use std::error::Error; use crate::escape::resolve_predefined_entity; use crate::events::BytesText; /// Used to resolve unknown entities while parsing /// /// # Example /// /// ``` /// # use serde::Deserialize; /// # use pretty_assertions::assert_eq; /// use regex::bytes::Regex; /// use std::collections::BTreeMap; /// use std::string::FromUtf8Error; /// use quick_xml::de::{Deserializer, EntityResolver}; /// use quick_xml::events::BytesText; /// /// struct DocTypeEntityResolver { /// re: Regex, /// map: BTreeMap, /// } /// /// impl Default for DocTypeEntityResolver { /// fn default() -> Self { /// Self { /// // We do not focus on true parsing in this example /// // You should use special libraries to parse DTD /// re: Regex::new(r#""#).unwrap(), /// map: BTreeMap::new(), /// } /// } /// } /// /// impl EntityResolver for DocTypeEntityResolver { /// type Error = FromUtf8Error; /// /// fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error> { /// for cap in self.re.captures_iter(&doctype) { /// self.map.insert( /// String::from_utf8(cap[1].to_vec())?, /// String::from_utf8(cap[2].to_vec())?, /// ); /// } /// Ok(()) /// } /// /// fn resolve(&self, entity: &str) -> Option<&str> { /// self.map.get(entity).map(|s| s.as_str()) /// } /// } /// /// let xml_reader = br#" /// ]> /// /// &e1; /// /// "#.as_ref(); /// /// let mut de = Deserializer::with_resolver( /// xml_reader, /// DocTypeEntityResolver::default(), /// ); /// let data: BTreeMap = BTreeMap::deserialize(&mut de).unwrap(); /// /// assert_eq!(data.get("entity_one"), Some(&"entity 1".to_string())); /// ``` pub trait EntityResolver { /// The error type that represents DTD parse error type Error: Error; /// Called on contents of [`Event::DocType`] to capture declared entities. /// Can be called multiple times, for each parsed `` declaration. /// /// [`Event::DocType`]: crate::events::Event::DocType fn capture(&mut self, doctype: BytesText) -> Result<(), Self::Error>; /// Called when an entity needs to be resolved. /// /// `None` is returned if a suitable value can not be found. /// In that case an [`EscapeError::UnrecognizedEntity`] will be returned by /// a deserializer. /// /// [`EscapeError::UnrecognizedEntity`]: crate::escape::EscapeError::UnrecognizedEntity fn resolve(&self, entity: &str) -> Option<&str>; } /// An [`EntityResolver`] that resolves only predefined entities: /// /// | Entity | Resolution /// |--------|------------ /// |`<` | `<` /// |`>` | `>` /// |`&` | `&` /// |`'`| `'` /// |`"`| `"` #[derive(Default, Copy, Clone)] pub struct PredefinedEntityResolver; impl EntityResolver for PredefinedEntityResolver { type Error = Infallible; #[inline] fn capture(&mut self, _doctype: BytesText) -> Result<(), Self::Error> { Ok(()) } #[inline] fn resolve(&self, entity: &str) -> Option<&str> { resolve_predefined_entity(entity) } } quick-xml-0.38.4/src/de/simple_type.rs000064400000000000000000001426071046102023000157230ustar 00000000000000//! Contains Serde `Deserializer` for XML [simple types] [as defined] in the XML Schema. //! //! [simple types]: https://www.w3schools.com/xml/el_simpletype.asp //! [as defined]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition use crate::de::Text; use crate::encoding::Decoder; use crate::errors::serialize::DeError; use crate::escape::unescape; use crate::utils::{trim_xml_spaces, CowRef}; use memchr::memchr; use serde::de::value::UnitDeserializer; use serde::de::{ DeserializeSeed, Deserializer, EnumAccess, IntoDeserializer, SeqAccess, VariantAccess, Visitor, }; use serde::serde_if_integer128; use std::borrow::Cow; use std::ops::Range; macro_rules! deserialize_num { ($method:ident => $visit:ident) => { #[inline] fn $method(self, visitor: V) -> Result where V: Visitor<'de>, { let text: &str = self.content.as_ref(); match trim_xml_spaces(text).parse() { Ok(number) => visitor.$visit(number), Err(_) => self.deserialize_str(visitor), } } }; } macro_rules! deserialize_primitive { ($method:ident) => { fn $method(self, visitor: V) -> Result where V: Visitor<'de>, { let de = AtomicDeserializer { content: self.decode()?, escaped: self.escaped, }; de.$method(visitor) } }; } macro_rules! unsupported { ( $deserialize:ident $( ($($type:ty),*) )? ) => { #[inline] fn $deserialize>( self, $($(_: $type,)*)? visitor: V ) -> Result { // Deserializer methods are only hints, if deserializer could not satisfy // request, it should return the data that it has. It is responsibility // of a Visitor to return an error if it does not understand the data self.deserialize_str(visitor) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A version of [`Cow`] that can borrow from two different buffers, one of them /// is a deserializer input, and conceptually contains only part of owned data. /// /// # Lifetimes /// - `'de` -- lifetime of the data that deserializer borrow from the parsed input /// - `'a` -- lifetime of the data that owned by a deserializer enum Content<'de, 'a> { /// An input borrowed from the parsed data Input(&'de str), /// An input borrowed from the buffer owned by another deserializer Slice(&'a str), /// An input taken from an external deserializer, owned by that deserializer. /// Only part of this data, located after offset represented by `usize`, used /// to deserialize data, the other is a garbage that can't be dropped because /// we do not want to make reallocations if they will not required. Owned(String, usize), } impl<'de, 'a> Content<'de, 'a> { /// Returns string representation of the content fn as_str(&self) -> &str { match self { Content::Input(s) => s, Content::Slice(s) => s, Content::Owned(s, offset) => s.split_at(*offset).1, } } } /// A deserializer that handles ordinary [simple type definition][item] with /// `{variety} = atomic`, or an ordinary [simple type] definition with /// `{variety} = union` whose basic members are all atomic. /// /// This deserializer can deserialize only primitive types: /// - numbers /// - booleans /// - strings /// - units /// - options /// - unit variants of enums /// /// Identifiers represented as strings and deserialized accordingly. /// /// Deserialization of all other types will provide a string and in most cases /// the deserialization will fail because visitor does not expect that. /// /// The `Owned` variant of the content acts as a storage for data, allocated by /// an external deserializer that pass it via [`ListIter`]. /// /// [item]: https://www.w3.org/TR/xmlschema11-1/#std-item_type_definition /// [simple type]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition struct AtomicDeserializer<'de, 'a> { /// Content of the attribute value, text content or CDATA content content: CowRef<'de, 'a, str>, /// If `true`, `content` in an escaped form and should be unescaped before use escaped: bool, } impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> { type Error = DeError; /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// According to the , /// valid boolean representations are only `"true"`, `"false"`, `"1"`, /// and `"0"`. fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'de>, { let text = self.content.as_ref(); let text = if self.escaped { unescape(text)? } else { Cow::Borrowed(text) }; match trim_xml_spaces(&text) { "1" | "true" => visitor.visit_bool(true), "0" | "false" => visitor.visit_bool(false), _ => match text { Cow::Borrowed(_) => self.content.deserialize_str(visitor), Cow::Owned(s) => visitor.visit_string(s), }, } } deserialize_num!(deserialize_i8 => visit_i8); deserialize_num!(deserialize_i16 => visit_i16); deserialize_num!(deserialize_i32 => visit_i32); deserialize_num!(deserialize_i64 => visit_i64); deserialize_num!(deserialize_u8 => visit_u8); deserialize_num!(deserialize_u16 => visit_u16); deserialize_num!(deserialize_u32 => visit_u32); deserialize_num!(deserialize_u64 => visit_u64); serde_if_integer128! { deserialize_num!(deserialize_i128 => visit_i128); deserialize_num!(deserialize_u128 => visit_u128); } deserialize_num!(deserialize_f32 => visit_f32); deserialize_num!(deserialize_f64 => visit_f64); /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_char(self, visitor: V) -> Result where V: Visitor<'de>, { let text: &str = self.content.as_ref(); let text = if self.escaped { unescape(text)? } else { Cow::Borrowed(text) }; let trimmed = trim_xml_spaces(&text); // If string is empty or contains only XML space characters (probably only one), // deserialize as usual string and allow visitor to accept or reject it. // Otherwise trim spaces and allow visitor to accept or reject the rest. if trimmed.is_empty() { match text { Cow::Borrowed(_) => self.content.deserialize_str(visitor), Cow::Owned(s) => visitor.visit_string(s), } } else { visitor.visit_str(trimmed) } } /// Supply to the visitor borrowed string, string slice, or owned string /// depending on the kind of input and presence of the escaped data. /// /// If string requires unescaping, then calls [`Visitor::visit_string`] with /// new allocated buffer with unescaped data. /// /// Otherwise calls /// - [`Visitor::visit_borrowed_str`] if data borrowed from the input /// - [`Visitor::visit_str`] if data borrowed from other deserializer /// - [`Visitor::visit_string`] if data owned by this deserializer fn deserialize_str(self, visitor: V) -> Result where V: Visitor<'de>, { if self.escaped { match unescape(self.content.as_ref())? { Cow::Borrowed(_) => self.content.deserialize_str(visitor), Cow::Owned(s) => visitor.visit_string(s), } } else { self.content.deserialize_str(visitor) } } fn deserialize_string(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } /// If `content` is an empty string then calls [`Visitor::visit_none`], /// otherwise calls [`Visitor::visit_some`] with itself fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { let text: &str = self.content.as_ref(); if text.is_empty() { visitor.visit_none() } else { visitor.visit_some(self) } } fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } /// Forwards deserialization to the [`Self::deserialize_unit`] fn deserialize_unit_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } fn deserialize_ignored_any(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } unsupported!(deserialize_bytes); unsupported!(deserialize_byte_buf); unsupported!(deserialize_seq); unsupported!(deserialize_tuple(usize)); unsupported!(deserialize_tuple_struct(&'static str, usize)); unsupported!(deserialize_map); unsupported!(deserialize_struct(&'static str, &'static [&'static str])); } impl<'de, 'a> EnumAccess<'de> for AtomicDeserializer<'de, 'a> { type Error = DeError; type Variant = UnitOnly; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), DeError> where V: DeserializeSeed<'de>, { let name = seed.deserialize(self)?; Ok((name, UnitOnly)) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Deserializer of variant data, that supports only unit variants. /// Attempt to deserialize newtype will provide [`UnitDeserializer`]. /// Attempt to deserialize tuple or struct variant will result to call of /// [`Visitor::visit_unit`]. pub struct UnitOnly; impl<'de> VariantAccess<'de> for UnitOnly { type Error = DeError; #[inline] fn unit_variant(self) -> Result<(), Self::Error> { Ok(()) } fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { seed.deserialize(UnitDeserializer::::new()) } #[inline] fn tuple_variant(self, _len: usize, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } #[inline] fn struct_variant( self, _fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_unit() } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Iterator over string sub-slices delimited by one or several spaces. /// Contains decoded value of the `simpleType`. /// Iteration ends when list contains `None`. struct ListIter<'de, 'a> { /// If `Some`, contains unconsumed data of the list content: Option>, /// If `true`, `content` in escaped form and should be unescaped before use escaped: bool, } impl<'de, 'a> SeqAccess<'de> for ListIter<'de, 'a> { type Error = DeError; fn next_element_seed(&mut self, seed: T) -> Result, DeError> where T: DeserializeSeed<'de>, { if let Some(mut content) = self.content.take() { const DELIMITER: u8 = b' '; loop { let string = content.as_str(); if string.is_empty() { return Ok(None); } return match memchr(DELIMITER, string.as_bytes()) { // No delimiters in the `content`, deserialize it as a whole atomic None => match content { Content::Input(s) => seed.deserialize(AtomicDeserializer { content: CowRef::Input(s), escaped: self.escaped, }), Content::Slice(s) => seed.deserialize(AtomicDeserializer { content: CowRef::Slice(s), escaped: self.escaped, }), Content::Owned(s, 0) => seed.deserialize(AtomicDeserializer { content: CowRef::Owned(s), escaped: self.escaped, }), Content::Owned(s, offset) => seed.deserialize(AtomicDeserializer { content: CowRef::Slice(s.split_at(offset).1), escaped: self.escaped, }), }, // `content` started with a space, skip them all Some(0) => { // Skip all spaces let start = string.as_bytes().iter().position(|ch| *ch != DELIMITER); content = match (start, content) { // We cannot find any non-space character, so string contains only spaces (None, _) => return Ok(None), // Borrow result from input or deserializer depending on the initial borrowing (Some(start), Content::Input(s)) => Content::Input(s.split_at(start).1), (Some(start), Content::Slice(s)) => Content::Slice(s.split_at(start).1), // Skip additional bytes if we own data (Some(start), Content::Owned(s, skip)) => { Content::Owned(s, skip + start) } }; continue; } // `content` started from an atomic Some(end) => match content { // Borrow for the next iteration from input or deserializer depending on // the initial borrowing Content::Input(s) => { let (item, rest) = s.split_at(end); self.content = Some(Content::Input(rest)); seed.deserialize(AtomicDeserializer { content: CowRef::Input(item), escaped: self.escaped, }) } Content::Slice(s) => { let (item, rest) = s.split_at(end); self.content = Some(Content::Slice(rest)); seed.deserialize(AtomicDeserializer { content: CowRef::Slice(item), escaped: self.escaped, }) } // Skip additional bytes if we own data for next iteration, but deserialize from // the borrowed data from our buffer Content::Owned(s, skip) => { let rest = s.split_at(skip).1; let item = rest.split_at(end).0; let result = seed.deserialize(AtomicDeserializer { content: CowRef::Slice(item), escaped: self.escaped, }); self.content = Some(Content::Owned(s, skip + end)); result } }, } .map(Some); } } Ok(None) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A deserializer for an xml probably escaped and encoded value of XSD [simple types]. /// This deserializer will borrow from the input as much as possible. /// /// `deserialize_any()` returns the whole string that deserializer contains. /// /// Escaping the value is actually not always necessary, for instance when /// converting to a float, we don't expect any escapable character anyway. /// In that cases deserializer skips unescaping step. /// /// Used for deserialize values from: /// - attribute values (`<... ...="value" ...>`) /// - mixed text / CDATA content (`<...>text`) /// /// This deserializer processes items as following: /// - numbers are parsed from a text content using [`FromStr`]; in case of error /// [`Visitor::visit_borrowed_str`], [`Visitor::visit_str`], or [`Visitor::visit_string`] /// is called; it is responsibility of the type to return an error if it does /// not able to process passed data; /// - booleans converted from the text according to the XML [specification]: /// - `"true"` and `"1"` converted to `true`; /// - `"false"` and `"0"` converted to `false`; /// - everything else calls [`Visitor::visit_borrowed_str`], [`Visitor::visit_str`], /// or [`Visitor::visit_string`]; it is responsibility of the type to return /// an error if it does not able to process passed data; /// - strings returned as is; /// - characters also returned as strings. If string contain more than one character /// or empty, it is responsibility of a type to return an error; /// - `Option` always deserialized as `Some` using the same deserializer. /// If attribute or text content is missed, then the deserializer even wouldn't /// be used, so if it is used, then the value should be; /// - units (`()`) and unit structs always deserialized successfully, the content is ignored; /// - newtype structs forwards deserialization to the inner type using the same /// deserializer; /// - sequences, tuples and tuple structs are deserialized as `xs:list`s. Only /// sequences of primitive types is possible to deserialize this way and they /// should be delimited by a space (` `, `\t`, `\r`, or `\n`); /// - structs and maps delegates to [`Self::deserialize_str`] which calls /// [`Visitor::visit_borrowed_str`] or [`Visitor::visit_string`]; it is responsibility /// of the type to return an error if it does not able to process passed data; /// - enums: /// - the variant name is deserialized using the same deserializer; /// - the content is deserialized using the deserializer that always returns unit (`()`): /// - unit variants: just return `()`; /// - newtype variants: deserialize from [`UnitDeserializer`]; /// - tuple and struct variants: call [`Visitor::visit_unit`]; /// - identifiers are deserialized as strings. /// /// [simple types]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition /// [`FromStr`]: std::str::FromStr /// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean pub struct SimpleTypeDeserializer<'de, 'a> { /// - In case of attribute contains escaped attribute value /// - In case of text contains unescaped text value content: CowRef<'de, 'a, [u8]>, /// If `true`, `content` in escaped form and should be unescaped before use escaped: bool, /// Decoder used to deserialize string data, numeric and boolean data. /// Not used for deserializing raw byte buffers decoder: Decoder, } impl<'de, 'a> SimpleTypeDeserializer<'de, 'a> { /// Creates a deserializer from a value, that possible borrowed from input. /// /// It is assumed that `text` does not have entities. pub fn from_text(text: Cow<'de, str>) -> Self { let content = match text { Cow::Borrowed(slice) => CowRef::Input(slice.as_bytes()), Cow::Owned(content) => CowRef::Owned(content.into_bytes()), }; Self::new(content, false, Decoder::utf8()) } /// Creates a deserializer from an XML text node, that possible borrowed from input. /// /// It is assumed that `text` does not have entities. /// /// This constructor used internally to deserialize from text nodes. pub fn from_text_content(value: Text<'de>) -> Self { Self::from_text(value.text) } /// Creates a deserializer from a part of value at specified range. /// /// This constructor used internally to deserialize from attribute values. #[allow(clippy::ptr_arg)] pub(crate) fn from_part( value: &'a Cow<'de, [u8]>, range: Range, decoder: Decoder, ) -> Self { let content = match value { Cow::Borrowed(slice) => CowRef::Input(&slice[range]), Cow::Owned(slice) => CowRef::Slice(&slice[range]), }; Self::new(content, true, decoder) } /// Constructor for tests #[inline] const fn new(content: CowRef<'de, 'a, [u8]>, escaped: bool, decoder: Decoder) -> Self { Self { content, escaped, decoder, } } /// Decodes raw bytes using the encoding specified. /// The method will borrow if has the UTF-8 compatible representation. #[inline] fn decode<'b>(&'b self) -> Result, DeError> { Ok(match self.content { CowRef::Input(content) => match self.decoder.decode(content)? { Cow::Borrowed(content) => CowRef::Input(content), Cow::Owned(content) => CowRef::Owned(content), }, CowRef::Slice(content) => match self.decoder.decode(content)? { Cow::Borrowed(content) => CowRef::Slice(content), Cow::Owned(content) => CowRef::Owned(content), }, CowRef::Owned(ref content) => match self.decoder.decode(content)? { Cow::Borrowed(content) => CowRef::Slice(content), Cow::Owned(content) => CowRef::Owned(content), }, }) } } impl<'de, 'a> Deserializer<'de> for SimpleTypeDeserializer<'de, 'a> { type Error = DeError; /// Forwards deserialization to the [`Self::deserialize_str`] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } deserialize_primitive!(deserialize_bool); deserialize_primitive!(deserialize_i8); deserialize_primitive!(deserialize_i16); deserialize_primitive!(deserialize_i32); deserialize_primitive!(deserialize_i64); deserialize_primitive!(deserialize_u8); deserialize_primitive!(deserialize_u16); deserialize_primitive!(deserialize_u32); deserialize_primitive!(deserialize_u64); serde_if_integer128! { deserialize_primitive!(deserialize_i128); deserialize_primitive!(deserialize_u128); } deserialize_primitive!(deserialize_f32); deserialize_primitive!(deserialize_f64); deserialize_primitive!(deserialize_char); deserialize_primitive!(deserialize_str); deserialize_primitive!(deserialize_string); deserialize_primitive!(deserialize_bytes); deserialize_primitive!(deserialize_byte_buf); fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_some(self) } #[inline] fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } /// Forwards deserialization to the [`Self::deserialize_unit`] #[inline] fn deserialize_unit_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_unit(visitor) } fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { let content = match self.decode()? { CowRef::Input(s) => Content::Input(s), CowRef::Slice(s) => Content::Slice(s), CowRef::Owned(s) => Content::Owned(s, 0), }; visitor.visit_seq(ListIter { content: Some(content), escaped: self.escaped, }) } /// Representation of tuples the same as [sequences][Self::deserialize_seq]. #[inline] fn deserialize_tuple(self, _len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_seq(visitor) } /// Representation of named tuples the same as [unnamed tuples][Self::deserialize_tuple]. #[inline] fn deserialize_tuple_struct( self, _name: &'static str, len: usize, visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_tuple(len, visitor) } unsupported!(deserialize_map); unsupported!(deserialize_struct(&'static str, &'static [&'static str])); fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } /// Forwards deserialization to the [`Self::deserialize_str`] #[inline] fn deserialize_identifier(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } #[inline] fn deserialize_ignored_any(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } } impl<'de, 'a> EnumAccess<'de> for SimpleTypeDeserializer<'de, 'a> { type Error = DeError; type Variant = UnitOnly; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), DeError> where V: DeserializeSeed<'de>, { let name = seed.deserialize(self)?; Ok((name, UnitOnly)) } } impl<'de, 'a> IntoDeserializer<'de, DeError> for SimpleTypeDeserializer<'de, 'a> { type Deserializer = Self; #[inline] fn into_deserializer(self) -> Self { self } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use crate::se::simple_type::{QuoteTarget, SimpleTypeSerializer}; use crate::se::QuoteLevel; use crate::utils::{ByteBuf, Bytes}; use serde::de::IgnoredAny; use serde::{Deserialize, Serialize}; use std::collections::HashMap; macro_rules! simple_only { ($encoding:ident, $name:ident: $type:ty = $xml:expr => $result:expr) => { #[test] fn $name() { let decoder = Decoder::$encoding(); let xml = $xml; let de = SimpleTypeDeserializer::new(CowRef::Input(xml.as_ref()), true, decoder); let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); } }; } macro_rules! simple { ($encoding:ident, $name:ident: $type:ty = $xml:expr => $result:expr) => { #[test] fn $name() { let decoder = Decoder::$encoding(); let xml = $xml; let de = SimpleTypeDeserializer::new(CowRef::Input(xml.as_ref()), true, decoder); let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); // Roundtrip to ensure that serializer corresponds to deserializer assert_eq!( data.serialize(SimpleTypeSerializer { writer: String::new(), target: QuoteTarget::Text, level: QuoteLevel::Full, }) .unwrap(), xml ); } }; } macro_rules! err { ($encoding:ident, $name:ident: $type:ty = $xml:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let decoder = Decoder::$encoding(); let xml = $xml; let de = SimpleTypeDeserializer::new(CowRef::Input(xml.as_ref()), true, decoder); let err = <$type as Deserialize>::deserialize(de).unwrap_err(); match err { DeError::$kind(e) => assert_eq!(e, $reason), _ => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, err ), } } }; } #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Unit; #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Newtype(String); #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Tuple((), ()); #[derive(Debug, Deserialize, Serialize, PartialEq)] struct BorrowedNewtype<'a>(&'a str); #[derive(Debug, Deserialize, Serialize, PartialEq)] struct Struct { key: String, val: usize, } #[derive(Debug, Deserialize, Serialize, PartialEq)] enum Enum { Unit, Newtype(String), Tuple(String, usize), Struct { key: String, val: usize }, } #[derive(Debug, Deserialize, PartialEq)] #[serde(field_identifier)] enum Id { Field, } #[derive(Debug, Deserialize)] #[serde(transparent)] struct Any(IgnoredAny); impl PartialEq for Any { fn eq(&self, _other: &Any) -> bool { true } } /// Tests for deserialize atomic and union values, as defined in XSD specification mod atomic { use super::*; use crate::se::simple_type::AtomicSerializer; use pretty_assertions::assert_eq; use std::ops::Deref; /// Checks that given `$input` successfully deserializing into given `$result` macro_rules! deserialized_to_only { ($name:ident: $type:ty = $input:literal => $result:expr) => { #[test] fn $name() { let de = AtomicDeserializer { content: CowRef::Input($input), escaped: true, }; let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); } }; } /// Checks that given `$input` successfully deserializing into given `$result` /// and the result is serialized back to the `$input` macro_rules! deserialized_to { ($name:ident: $type:ty = $input:literal => $result:expr) => { #[test] fn $name() { let de = AtomicDeserializer { content: CowRef::Input($input), escaped: true, }; let data: $type = Deserialize::deserialize(de).unwrap(); assert_eq!(data, $result); // Roundtrip to ensure that serializer corresponds to deserializer let mut buffer = String::new(); let has_written = data .serialize(AtomicSerializer { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, write_delimiter: false, }) .unwrap(); assert_eq!(buffer, $input); assert_eq!(has_written, !buffer.is_empty()); } }; } /// Checks that attempt to deserialize given `$input` as a `$type` results to a /// deserialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $type:ty = $input:literal => $kind:ident($reason:literal)) => { #[test] fn $name() { let de = AtomicDeserializer { content: CowRef::Input($input), escaped: true, }; let err = <$type as Deserialize>::deserialize(de).unwrap_err(); match err { DeError::$kind(e) => assert_eq!(e, $reason), _ => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, err ), } } }; } deserialized_to!(false_: bool = "false" => false); deserialized_to!(true_: bool = "true" => true); deserialized_to!(i8_: i8 = "-2" => -2); deserialized_to!(i16_: i16 = "-2" => -2); deserialized_to!(i32_: i32 = "-2" => -2); deserialized_to!(i64_: i64 = "-2" => -2); deserialized_to!(u8_: u8 = "3" => 3); deserialized_to!(u16_: u16 = "3" => 3); deserialized_to!(u32_: u32 = "3" => 3); deserialized_to!(u64_: u64 = "3" => 3); serde_if_integer128! { deserialized_to!(i128_: i128 = "-2" => -2); deserialized_to!(u128_: u128 = "2" => 2); } deserialized_to!(f32_: f32 = "1.23" => 1.23); deserialized_to!(f64_: f64 = "1.23" => 1.23); deserialized_to!(char_unescaped: char = "h" => 'h'); deserialized_to!(char_escaped: char = "<" => '<'); deserialized_to!(string: String = "<escaped string" => " "non-escaped string"); err!(escaped_str: &str = "escaped string" => Custom("invalid type: string \"escaped string\", expected a borrowed string")); err!(byte_buf: ByteBuf = "<escaped string" => Custom("invalid type: string \" Custom("invalid type: string \"non-escaped string\", expected borrowed bytes")); deserialized_to!(option_none: Option<&str> = "" => None); deserialized_to!(option_some: Option<&str> = "non-escaped-string" => Some("non-escaped-string")); deserialized_to_only!(unit: () = "anything" => ()); deserialized_to_only!(unit_struct: Unit = "anything" => Unit); deserialized_to!(newtype_owned: Newtype = "<escaped string" => Newtype(" BorrowedNewtype("non-escaped string")); err!(seq: Vec<()> = "non-escaped string" => Custom("invalid type: string \"non-escaped string\", expected a sequence")); err!(tuple: ((), ()) = "non-escaped string" => Custom("invalid type: string \"non-escaped string\", expected a tuple of size 2")); err!(tuple_struct: Tuple = "non-escaped string" => Custom("invalid type: string \"non-escaped string\", expected tuple struct Tuple")); err!(map: HashMap<(), ()> = "non-escaped string" => Custom("invalid type: string \"non-escaped string\", expected a map")); err!(struct_: Struct = "non-escaped string" => Custom("invalid type: string \"non-escaped string\", expected struct Struct")); deserialized_to!(enum_unit: Enum = "Unit" => Enum::Unit); err!(enum_newtype: Enum = "Newtype" => Custom("invalid type: unit value, expected a string")); err!(enum_tuple: Enum = "Tuple" => Custom("invalid type: unit value, expected tuple variant Enum::Tuple")); err!(enum_struct: Enum = "Struct" => Custom("invalid type: unit value, expected struct variant Enum::Struct")); err!(enum_other: Enum = "any data" => Custom("unknown variant `any data`, expected one of `Unit`, `Newtype`, `Tuple`, `Struct`")); deserialized_to_only!(identifier: Id = "Field" => Id::Field); deserialized_to_only!(ignored_any: Any = "any data" => Any(IgnoredAny)); /// Checks that deserialization from an owned content is working #[test] #[cfg(feature = "encoding")] fn owned_data() { let de = AtomicDeserializer { content: CowRef::Owned("string slice".into()), escaped: true, }; assert_eq!(de.content.deref(), "string slice"); let data: String = Deserialize::deserialize(de).unwrap(); assert_eq!(data, "string slice"); } /// Checks that deserialization from a content borrowed from some /// buffer other that input is working #[test] fn borrowed_from_deserializer() { let de = AtomicDeserializer { content: CowRef::Slice("string slice"), escaped: true, }; assert_eq!(de.content.deref(), "string slice"); let data: String = Deserialize::deserialize(de).unwrap(); assert_eq!(data, "string slice"); } } /// Module for testing list accessor mod list { use super::*; use pretty_assertions::assert_eq; #[test] fn empty() { let mut seq = ListIter { content: Some(Content::Input("")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn only_spaces() { let mut seq = ListIter { content: Some(Content::Input(" ")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn one_item() { let mut seq = ListIter { content: Some(Content::Input("abc")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("abc")); assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn two_items() { let mut seq = ListIter { content: Some(Content::Input("abc def")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("abc")); assert_eq!(seq.next_element::<&str>().unwrap(), Some("def")); assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn leading_spaces() { let mut seq = ListIter { content: Some(Content::Input(" def")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("def")); assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn trailing_spaces() { let mut seq = ListIter { content: Some(Content::Input("abc ")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("abc")); assert_eq!(seq.next_element::<&str>().unwrap(), None); assert_eq!(seq.next_element::<&str>().unwrap(), None); } #[test] fn mixed_types() { let mut seq = ListIter { content: Some(Content::Input("string 1.23 42 true false h Unit")), escaped: true, }; assert_eq!(seq.next_element::<&str>().unwrap(), Some("string")); assert_eq!(seq.next_element::().unwrap(), Some(1.23)); assert_eq!(seq.next_element::().unwrap(), Some(42)); assert_eq!(seq.next_element::().unwrap(), Some(true)); assert_eq!(seq.next_element::().unwrap(), Some(false)); assert_eq!(seq.next_element::().unwrap(), Some('h')); assert_eq!(seq.next_element::().unwrap(), Some(Enum::Unit)); assert_eq!(seq.next_element::<()>().unwrap(), None); assert_eq!(seq.next_element::<()>().unwrap(), None); } } mod utf8 { use super::*; use pretty_assertions::assert_eq; simple!(utf8, i8_: i8 = "-2" => -2); simple!(utf8, i16_: i16 = "-2" => -2); simple!(utf8, i32_: i32 = "-2" => -2); simple!(utf8, i64_: i64 = "-2" => -2); simple!(utf8, u8_: u8 = "3" => 3); simple!(utf8, u16_: u16 = "3" => 3); simple!(utf8, u32_: u32 = "3" => 3); simple!(utf8, u64_: u64 = "3" => 3); serde_if_integer128! { simple!(utf8, i128_: i128 = "-2" => -2); simple!(utf8, u128_: u128 = "2" => 2); } simple!(utf8, f32_: f32 = "1.23" => 1.23); simple!(utf8, f64_: f64 = "1.23" => 1.23); simple!(utf8, false_: bool = "false" => false); simple!(utf8, true_: bool = "true" => true); simple!(utf8, char_unescaped: char = "h" => 'h'); simple!(utf8, char_escaped: char = "<" => '<'); simple!(utf8, string: String = "<escaped string" => " Custom("invalid type: string \" "non-escaped string"); err!(utf8, borrowed_bytes: Bytes = "<escaped string" => Custom("invalid type: string \" = "" => Some("")); simple!(utf8, option_some: Option<&str> = "non-escaped string" => Some("non-escaped string")); simple_only!(utf8, unit: () = "any data" => ()); simple_only!(utf8, unit_struct: Unit = "any data" => Unit); // Serializer will not escape space because this is unnecessary. // Because borrowing has meaning only for deserializer, no need to test // roundtrip here, it is already tested for strings where compatible list // of escaped characters is used simple_only!(utf8, newtype_owned: Newtype = "<escaped string" => Newtype(" BorrowedNewtype("non-escaped string")); err!(utf8, map: HashMap<(), ()> = "any data" => Custom("invalid type: string \"any data\", expected a map")); err!(utf8, struct_: Struct = "any data" => Custom("invalid type: string \"any data\", expected struct Struct")); simple!(utf8, enum_unit: Enum = "Unit" => Enum::Unit); err!(utf8, enum_newtype: Enum = "Newtype" => Custom("invalid type: unit value, expected a string")); err!(utf8, enum_tuple: Enum = "Tuple" => Custom("invalid type: unit value, expected tuple variant Enum::Tuple")); err!(utf8, enum_struct: Enum = "Struct" => Custom("invalid type: unit value, expected struct variant Enum::Struct")); err!(utf8, enum_other: Enum = "any data" => Custom("unknown variant `any data`, expected one of `Unit`, `Newtype`, `Tuple`, `Struct`")); simple_only!(utf8, identifier: Id = "Field" => Id::Field); simple_only!(utf8, ignored_any: Any = "any data" => Any(IgnoredAny)); } #[cfg(feature = "encoding")] mod utf16 { use super::*; use pretty_assertions::assert_eq; fn to_utf16(string: &str) -> Vec { let mut bytes = Vec::new(); for ch in string.encode_utf16() { bytes.extend_from_slice(&ch.to_le_bytes()); } bytes } macro_rules! utf16 { ($name:ident: $type:ty = $xml:literal => $result:expr) => { simple_only!(utf16, $name: $type = to_utf16($xml) => $result); }; } macro_rules! unsupported { ($name:ident: $type:ty = $xml:literal => $err:literal) => { err!(utf16, $name: $type = to_utf16($xml) => Custom($err)); }; } utf16!(i8_: i8 = "-2" => -2); utf16!(i16_: i16 = "-2" => -2); utf16!(i32_: i32 = "-2" => -2); utf16!(i64_: i64 = "-2" => -2); utf16!(u8_: u8 = "3" => 3); utf16!(u16_: u16 = "3" => 3); utf16!(u32_: u32 = "3" => 3); utf16!(u64_: u64 = "3" => 3); serde_if_integer128! { utf16!(i128_: i128 = "-2" => -2); utf16!(u128_: u128 = "2" => 2); } utf16!(f32_: f32 = "1.23" => 1.23); utf16!(f64_: f64 = "1.23" => 1.23); utf16!(false_: bool = "false" => false); utf16!(true_: bool = "true" => true); utf16!(char_unescaped: char = "h" => 'h'); utf16!(char_escaped: char = "<" => '<'); utf16!(string: String = "<escaped string" => " "invalid type: string \" = "" => Some(())); utf16!(option_some: Option<()> = "any data" => Some(())); utf16!(unit: () = "any data" => ()); utf16!(unit_struct: Unit = "any data" => Unit); utf16!(newtype_owned: Newtype = "<escaped string" => Newtype(" "invalid type: string \"non-escaped string\", expected a borrowed string"); unsupported!(map: HashMap<(), ()> = "any data" => "invalid type: string \"any data\", expected a map"); unsupported!(struct_: Struct = "any data" => "invalid type: string \"any data\", expected struct Struct"); utf16!(enum_unit: Enum = "Unit" => Enum::Unit); unsupported!(enum_newtype: Enum = "Newtype" => "invalid type: unit value, expected a string"); unsupported!(enum_tuple: Enum = "Tuple" => "invalid type: unit value, expected tuple variant Enum::Tuple"); unsupported!(enum_struct: Enum = "Struct" => "invalid type: unit value, expected struct variant Enum::Struct"); unsupported!(enum_other: Enum = "any data" => "unknown variant `any data`, expected one of `Unit`, `Newtype`, `Tuple`, `Struct`"); utf16!(identifier: Id = "Field" => Id::Field); utf16!(ignored_any: Any = "any data" => Any(IgnoredAny)); } } quick-xml-0.38.4/src/de/text.rs000064400000000000000000000156061046102023000143530ustar 00000000000000use crate::{ de::simple_type::SimpleTypeDeserializer, de::{Text, TEXT_KEY}, errors::serialize::DeError, }; use serde::de::value::BorrowedStrDeserializer; use serde::de::{DeserializeSeed, Deserializer, EnumAccess, VariantAccess, Visitor}; use serde::serde_if_integer128; use std::borrow::Cow; /// A deserializer for a single text node of a mixed sequence of tags and text. /// /// This deserializer are very similar to a [`MapValueDeserializer`] (when it /// processes the [`DeEvent::Text`] event). The only difference in the /// `deserialize_seq` method. This deserializer will perform deserialization /// from a textual content, whereas the [`MapValueDeserializer`] will iterate /// over tags / text within it's parent tag. /// /// This deserializer processes items as following: /// - numbers are parsed from a text content using [`FromStr`]; in case of error /// [`Visitor::visit_borrowed_str`], [`Visitor::visit_str`], or [`Visitor::visit_string`] /// is called; it is responsibility of the type to return an error if it does /// not able to process passed data; /// - booleans converted from the text according to the XML [specification]: /// - `"true"` and `"1"` converted to `true`; /// - `"false"` and `"0"` converted to `false`; /// - everything else calls [`Visitor::visit_borrowed_str`], [`Visitor::visit_str`], /// or [`Visitor::visit_string`]; it is responsibility of the type to return /// an error if it does not able to process passed data; /// - strings returned as is; /// - characters also returned as strings. If string contain more than one character /// or empty, it is responsibility of a type to return an error; /// - `Option`: /// - empty text is deserialized as `None`; /// - everything else is deserialized as `Some` using the same deserializer; /// - units (`()`) and unit structs always deserialized successfully, the content is ignored; /// - newtype structs forwards deserialization to the inner type using the same /// deserializer; /// - sequences, tuples and tuple structs are deserialized using [`SimpleTypeDeserializer`] /// (this is the difference): text content passed to the deserializer directly; /// - structs and maps calls [`Visitor::visit_borrowed_str`] or [`Visitor::visit_string`], /// it is responsibility of the type to return an error if it do not able to process /// this data; /// - enums: /// - the variant name is deserialized as `$text`; /// - the content is deserialized using the same deserializer: /// - unit variants: just return `()`; /// - newtype variants forwards deserialization to the inner type using the /// same deserializer; /// - tuple and struct variants are deserialized using [`SimpleTypeDeserializer`]. /// /// [`MapValueDeserializer`]: ../map/struct.MapValueDeserializer.html /// [`DeEvent::Text`]: crate::de::DeEvent::Text /// [`FromStr`]: std::str::FromStr /// [specification]: https://www.w3.org/TR/xmlschema11-2/#boolean pub struct TextDeserializer<'de>(pub Text<'de>); impl<'de> TextDeserializer<'de> { /// Returns a next string as concatenated content of consequent [`Text`] and /// [`CData`] events, used inside [`deserialize_primitives!()`]. /// /// [`Text`]: crate::events::Event::Text /// [`CData`]: crate::events::Event::CData #[inline] fn read_string(self) -> Result, DeError> { Ok(self.0.text) } } impl<'de> Deserializer<'de> for TextDeserializer<'de> { type Error = DeError; deserialize_primitives!(); fn deserialize_unit(self, visitor: V) -> Result where V: Visitor<'de>, { visitor.visit_unit() } fn deserialize_option(self, visitor: V) -> Result where V: Visitor<'de>, { if self.0.is_empty() { visitor.visit_none() } else { visitor.visit_some(self) } } /// Forwards deserialization of the inner type. Always calls [`Visitor::visit_newtype_struct`] /// with this deserializer. fn deserialize_newtype_struct( self, _name: &'static str, visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_newtype_struct(self) } /// This method deserializes a sequence inside of element that itself is a /// sequence element: /// /// ```xml /// <> /// ... /// inner sequence as xs:list /// ... /// /// ``` fn deserialize_seq(self, visitor: V) -> Result where V: Visitor<'de>, { SimpleTypeDeserializer::from_text_content(self.0).deserialize_seq(visitor) } #[inline] fn deserialize_struct( self, _name: &'static str, _fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { // Deserializer methods are only hints, if deserializer could not satisfy // request, it should return the data that it has. It is responsibility // of a Visitor to return an error if it does not understand the data self.deserialize_str(visitor) } fn deserialize_enum( self, _name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { visitor.visit_enum(self) } #[inline] fn deserialize_any(self, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_str(visitor) } } impl<'de> EnumAccess<'de> for TextDeserializer<'de> { type Error = DeError; type Variant = Self; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> where V: DeserializeSeed<'de>, { let name = seed.deserialize(BorrowedStrDeserializer::::new(TEXT_KEY))?; Ok((name, self)) } } impl<'de> VariantAccess<'de> for TextDeserializer<'de> { type Error = DeError; #[inline] fn unit_variant(self) -> Result<(), Self::Error> { Ok(()) } fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { seed.deserialize(self) } #[inline] fn tuple_variant(self, len: usize, visitor: V) -> Result where V: Visitor<'de>, { self.deserialize_tuple(len, visitor) } #[inline] fn struct_variant( self, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { self.deserialize_struct("", fields, visitor) } } quick-xml-0.38.4/src/de/var.rs000064400000000000000000000110201046102023000141410ustar 00000000000000use crate::{ de::key::QNameDeserializer, de::map::ElementMapAccess, de::resolver::EntityResolver, de::simple_type::SimpleTypeDeserializer, de::{DeEvent, Deserializer, XmlRead, TEXT_KEY}, errors::serialize::DeError, }; use serde::de::value::BorrowedStrDeserializer; use serde::de::{self, DeserializeSeed, Deserializer as _, Visitor}; /// An enum access pub struct EnumAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { de: &'d mut Deserializer<'de, R, E>, } impl<'de, 'd, R, E> EnumAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { pub fn new(de: &'d mut Deserializer<'de, R, E>) -> Self { EnumAccess { de } } } impl<'de, 'd, R, E> de::EnumAccess<'de> for EnumAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; type Variant = VariantAccess<'de, 'd, R, E>; fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> where V: DeserializeSeed<'de>, { let (name, is_text) = match self.de.peek()? { DeEvent::Start(e) => (seed.deserialize(QNameDeserializer::from_elem(e)?)?, false), DeEvent::Text(_) => ( seed.deserialize(BorrowedStrDeserializer::::new(TEXT_KEY))?, true, ), // SAFETY: The reader is guaranteed that we don't have unmatched tags // If we here, then our deserializer has a bug DeEvent::End(e) => unreachable!("{:?}", e), DeEvent::Eof => return Err(DeError::UnexpectedEof), }; Ok(( name, VariantAccess { de: self.de, is_text, }, )) } } pub struct VariantAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { de: &'d mut Deserializer<'de, R, E>, /// `true` if variant should be deserialized from a textual content /// and `false` if from tag is_text: bool, } impl<'de, 'd, R, E> de::VariantAccess<'de> for VariantAccess<'de, 'd, R, E> where R: XmlRead<'de>, E: EntityResolver, { type Error = DeError; fn unit_variant(self) -> Result<(), Self::Error> { match self.de.next()? { // Consume subtree DeEvent::Start(e) => self.de.read_to_end(e.name()), // Does not needed to deserialize using SimpleTypeDeserializer, because // it returns `()` when `deserialize_unit()` is requested DeEvent::Text(_) => Ok(()), // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Start` or `Text` events are possible here"), } } fn newtype_variant_seed(self, seed: T) -> Result where T: DeserializeSeed<'de>, { if self.is_text { match self.de.next()? { DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(e)), // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Text` events are possible here"), } } else { seed.deserialize(self.de) } } fn tuple_variant(self, len: usize, visitor: V) -> Result where V: Visitor<'de>, { if self.is_text { match self.de.next()? { DeEvent::Text(e) => { SimpleTypeDeserializer::from_text_content(e).deserialize_tuple(len, visitor) } // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Text` events are possible here"), } } else { self.de.deserialize_tuple(len, visitor) } } fn struct_variant( self, fields: &'static [&'static str], visitor: V, ) -> Result where V: Visitor<'de>, { match self.de.next()? { DeEvent::Start(e) => visitor.visit_map(ElementMapAccess::new(self.de, e, fields)), DeEvent::Text(e) => { SimpleTypeDeserializer::from_text_content(e).deserialize_struct("", fields, visitor) } // SAFETY: the other events are filtered in `variant_seed()` _ => unreachable!("Only `Start` or `Text` events are possible here"), } } } quick-xml-0.38.4/src/encoding.rs000064400000000000000000000240211046102023000145540ustar 00000000000000//! A module for wrappers that encode / decode data. use std::borrow::Cow; use std::str::Utf8Error; #[cfg(feature = "encoding")] use encoding_rs::{DecoderResult, Encoding, UTF_16BE, UTF_16LE, UTF_8}; /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-8. /// See pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF]; /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-16 with little-endian byte order. /// See #[cfg(feature = "encoding")] pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE]; /// Unicode "byte order mark" (\u{FEFF}) encoded as UTF-16 with big-endian byte order. /// See #[cfg(feature = "encoding")] pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF]; /// An error when decoding or encoding /// /// If feature [`encoding`] is disabled, the [`EncodingError`] is always [`EncodingError::Utf8`] /// /// [`encoding`]: ../index.html#encoding #[derive(Clone, Debug, PartialEq, Eq)] #[non_exhaustive] pub enum EncodingError { /// Input was not valid UTF-8 Utf8(Utf8Error), /// Input did not adhere to the given encoding #[cfg(feature = "encoding")] Other(&'static Encoding), } impl From for EncodingError { #[inline] fn from(e: Utf8Error) -> Self { Self::Utf8(e) } } impl std::error::Error for EncodingError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Self::Utf8(e) => Some(e), #[cfg(feature = "encoding")] Self::Other(_) => None, } } } impl std::fmt::Display for EncodingError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Utf8(e) => write!(f, "cannot decode input using UTF-8: {}", e), #[cfg(feature = "encoding")] Self::Other(encoding) => write!(f, "cannot decode input using {}", encoding.name()), } } } /// Decoder of byte slices into strings. /// /// If feature [`encoding`] is enabled, this encoding taken from the `"encoding"` /// XML declaration or assumes UTF-8, if XML has no declaration, encoding /// key is not defined or contains unknown encoding. /// /// The library supports any UTF-8 compatible encodings that crate `encoding_rs` /// is supported. [*UTF-16 and ISO-2022-JP are not supported at the present*][utf16]. /// /// If feature [`encoding`] is disabled, the decoder is always UTF-8 decoder: /// any XML declarations are ignored. /// /// [utf16]: https://github.com/tafia/quick-xml/issues/158 /// [`encoding`]: ../index.html#encoding #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct Decoder { #[cfg(feature = "encoding")] pub(crate) encoding: &'static Encoding, } impl Decoder { pub(crate) const fn utf8() -> Self { Decoder { #[cfg(feature = "encoding")] encoding: UTF_8, } } #[cfg(all(test, feature = "encoding", feature = "serialize"))] pub(crate) const fn utf16() -> Self { Decoder { encoding: UTF_16LE } } } impl Decoder { /// Returns the `Reader`s encoding. /// /// This encoding will be used by [`decode`]. /// /// [`decode`]: Self::decode #[cfg(feature = "encoding")] pub const fn encoding(&self) -> &'static Encoding { self.encoding } /// ## Without `encoding` feature /// /// Decodes an UTF-8 slice regardless of XML declaration and ignoring BOM /// if it is present in the `bytes`. /// /// ## With `encoding` feature /// /// Decodes specified bytes using encoding, declared in the XML, if it was /// declared there, or UTF-8 otherwise, and ignoring BOM if it is present /// in the `bytes`. /// /// ---- /// Returns an error in case of malformed sequences in the `bytes`. pub fn decode<'b>(&self, bytes: &'b [u8]) -> Result, EncodingError> { #[cfg(not(feature = "encoding"))] let decoded = Ok(Cow::Borrowed(std::str::from_utf8(bytes)?)); #[cfg(feature = "encoding")] let decoded = decode(bytes, self.encoding); decoded } /// Like [`decode`][Self::decode] but using a pre-allocated buffer. pub fn decode_into(&self, bytes: &[u8], buf: &mut String) -> Result<(), EncodingError> { #[cfg(not(feature = "encoding"))] buf.push_str(std::str::from_utf8(bytes)?); #[cfg(feature = "encoding")] decode_into(bytes, self.encoding, buf)?; Ok(()) } /// Decodes the `Cow` buffer, preserves the lifetime pub(crate) fn decode_cow<'b>( &self, bytes: &Cow<'b, [u8]>, ) -> Result, EncodingError> { match bytes { Cow::Borrowed(bytes) => self.decode(bytes), // Convert to owned, because otherwise Cow will be bound with wrong lifetime Cow::Owned(bytes) => Ok(self.decode(bytes)?.into_owned().into()), } } /// Decodes the `Cow` buffer, normalizes XML EOLs, preserves the lifetime pub(crate) fn content<'b>( &self, bytes: &Cow<'b, [u8]>, normalize_eol: impl Fn(&str) -> Cow, ) -> Result, EncodingError> { match bytes { Cow::Borrowed(bytes) => { let text = self.decode(bytes)?; match normalize_eol(&text) { // If text borrowed after normalization that means that it's not changed Cow::Borrowed(_) => Ok(text), Cow::Owned(s) => Ok(Cow::Owned(s)), } } Cow::Owned(bytes) => { let text = self.decode(bytes)?; let text = normalize_eol(&text); // Convert to owned, because otherwise Cow will be bound with wrong lifetime Ok(text.into_owned().into()) } } } } /// Decodes the provided bytes using the specified encoding. /// /// Returns an error in case of malformed or non-representable sequences in the `bytes`. #[cfg(feature = "encoding")] pub fn decode<'b>( bytes: &'b [u8], encoding: &'static Encoding, ) -> Result, EncodingError> { encoding .decode_without_bom_handling_and_without_replacement(bytes) .ok_or(EncodingError::Other(encoding)) } /// Like [`decode`] but using a pre-allocated buffer. #[cfg(feature = "encoding")] pub fn decode_into( bytes: &[u8], encoding: &'static Encoding, buf: &mut String, ) -> Result<(), EncodingError> { if encoding == UTF_8 { buf.push_str(std::str::from_utf8(bytes)?); return Ok(()); } let mut decoder = encoding.new_decoder_without_bom_handling(); buf.reserve( decoder .max_utf8_buffer_length_without_replacement(bytes.len()) // SAFETY: None can be returned only if required size will overflow usize, // but in that case String::reserve also panics .unwrap(), ); let (result, read) = decoder.decode_to_string_without_replacement(bytes, buf, true); match result { DecoderResult::InputEmpty => { debug_assert_eq!(read, bytes.len()); Ok(()) } DecoderResult::Malformed(_, _) => Err(EncodingError::Other(encoding)), // SAFETY: We allocate enough space above DecoderResult::OutputFull => unreachable!(), } } /// Automatic encoding detection of XML files based using the /// [recommended algorithm](https://www.w3.org/TR/xml11/#sec-guessing). /// /// If encoding is detected, `Some` is returned with an encoding and size of BOM /// in bytes, if detection was performed using BOM, or zero, if detection was /// performed without BOM. /// /// IF encoding was not recognized, `None` is returned. /// /// Because the [`encoding_rs`] crate supports only subset of those encodings, only /// the supported subset are detected, which is UTF-8, UTF-16 BE and UTF-16 LE. /// /// The algorithm suggests examine up to the first 4 bytes to determine encoding /// according to the following table: /// /// | Bytes |Detected encoding /// |-------------|------------------------------------------ /// | **BOM** /// |`FE_FF_##_##`|UTF-16, big-endian /// |`FF FE ## ##`|UTF-16, little-endian /// |`EF BB BF` |UTF-8 /// | **No BOM** /// |`00 3C 00 3F`|UTF-16 BE or ISO-10646-UCS-2 BE or similar 16-bit BE (use declared encoding to find the exact one) /// |`3C 00 3F 00`|UTF-16 LE or ISO-10646-UCS-2 LE or similar 16-bit LE (use declared encoding to find the exact one) /// |`3C 3F 78 6D`|UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns for the relevant ASCII characters, the encoding declaration itself may be read reliably #[cfg(feature = "encoding")] pub fn detect_encoding(bytes: &[u8]) -> Option<(&'static Encoding, usize)> { // Prevent suggesting " Some((UTF_16BE, 2)), _ if bytes.starts_with(UTF16_LE_BOM) => Some((UTF_16LE, 2)), _ if bytes.starts_with(UTF8_BOM) => Some((UTF_8, 3)), // without BOM _ if bytes.starts_with(&[0x00, b'<', 0x00, b'?']) => Some((UTF_16BE, 0)), // Some BE encoding, for example, UTF-16 or ISO-10646-UCS-2 _ if bytes.starts_with(&[b'<', 0x00, b'?', 0x00]) => Some((UTF_16LE, 0)), // Some LE encoding, for example, UTF-16 or ISO-10646-UCS-2 _ if bytes.starts_with(&[b'<', b'?', b'x', b'm']) => Some((UTF_8, 0)), // Some ASCII compatible _ => None, } } quick-xml-0.38.4/src/errors.rs000064400000000000000000000407271046102023000143150ustar 00000000000000//! Error management module use crate::encoding::{Decoder, EncodingError}; use crate::escape::EscapeError; use crate::events::attributes::AttrError; use crate::name::{NamespaceError, QName}; use std::fmt; use std::io::Error as IoError; use std::sync::Arc; /// An error returned if parsed document does not correspond to the XML grammar, /// for example, a tag opened by `<` not closed with `>`. This error does not /// represent invalid XML constructs, for example, tags `<>` and `` a well-formed /// from syntax point-of-view. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum SyntaxError { /// The parser started to parse `` sequence was found. UnclosedPIOrXmlDecl, /// The parser started to parse comment (`` sequence was found. UnclosedComment, /// The parser started to parse DTD (`` character was found. UnclosedDoctype, /// The parser started to parse `` sequence was found. UnclosedCData, /// The parser started to parse tag content, but the input ended /// before the closing `>` character was found. UnclosedTag, } impl fmt::Display for SyntaxError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::InvalidBangMarkup => f.write_str("unknown or missed symbol in markup"), Self::UnclosedPIOrXmlDecl => { f.write_str("processing instruction or xml declaration not closed: `?>` not found before end of input") } Self::UnclosedComment => { f.write_str("comment not closed: `-->` not found before end of input") } Self::UnclosedDoctype => { f.write_str("DOCTYPE not closed: `>` not found before end of input") } Self::UnclosedCData => { f.write_str("CDATA not closed: `]]>` not found before end of input") } Self::UnclosedTag => f.write_str("tag not closed: `>` not found before end of input"), } } } impl std::error::Error for SyntaxError {} //////////////////////////////////////////////////////////////////////////////////////////////////// /// An error returned if parsed document is not [well-formed], for example, /// an opened tag is not closed before end of input. /// /// Those errors are not fatal: after encountering an error you can continue /// parsing the document. /// /// [well-formed]: https://www.w3.org/TR/xml11/#dt-wellformed #[derive(Clone, Debug, PartialEq, Eq)] pub enum IllFormedError { /// A `version` attribute was not found in an XML declaration or is not the /// first attribute. /// /// According to the [specification], the XML declaration (``) MUST contain /// a `version` attribute and it MUST be the first attribute. This error indicates, /// that the declaration does not contain attributes at all (if contains `None`) /// or either `version` attribute is not present or not the first attribute in /// the declaration. In the last case it contains the name of the found attribute. /// /// [specification]: https://www.w3.org/TR/xml11/#sec-prolog-dtd MissingDeclVersion(Option), /// A document type definition (DTD) does not contain a name of a root element. /// /// According to the [specification], document type definition (``) /// MUST contain a name which defines a document type (`foo`). If that name /// is missed, this error is returned. /// /// [specification]: https://www.w3.org/TR/xml11/#NT-doctypedecl MissingDoctypeName, /// The end tag was not found during reading of a sub-tree of elements due to /// encountering an EOF from the underlying reader. This error is returned from /// [`Reader::read_to_end`]. /// /// [`Reader::read_to_end`]: crate::reader::Reader::read_to_end MissingEndTag(String), /// The specified end tag was encountered without corresponding open tag at the /// same level of hierarchy UnmatchedEndTag(String), /// The specified end tag does not match the start tag at that nesting level. MismatchedEndTag { /// Name of open tag, that is expected to be closed expected: String, /// Name of actually closed tag found: String, }, /// A comment contains forbidden double-hyphen (`--`) sequence inside. /// /// According to the [specification], for compatibility, comments MUST NOT contain /// double-hyphen (`--`) sequence, in particular, they cannot end by `--->`. /// /// The quick-xml by default does not check that, because this restriction is /// mostly artificial, but you can enable it in the [configuration]. /// /// [specification]: https://www.w3.org/TR/xml11/#sec-comments /// [configuration]: crate::reader::Config::check_comments DoubleHyphenInComment, /// The parser started to parse entity or character reference (`&...;`) in text, /// but the input ended before the closing `;` character was found. UnclosedReference, } impl fmt::Display for IllFormedError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::MissingDeclVersion(None) => { f.write_str("an XML declaration does not contain `version` attribute") } Self::MissingDeclVersion(Some(attr)) => { write!(f, "an XML declaration must start with `version` attribute, but in starts with `{}`", attr) } Self::MissingDoctypeName => { f.write_str("`` declaration does not contain a name of a document type") } Self::MissingEndTag(tag) => write!( f, "start tag not closed: `` not found before end of input", tag, ), Self::UnmatchedEndTag(tag) => { write!(f, "close tag `` does not match any open tag", tag) } Self::MismatchedEndTag { expected, found } => write!( f, "expected ``, but `` was found", expected, found, ), Self::DoubleHyphenInComment => { f.write_str("forbidden string `--` was found in a comment") } Self::UnclosedReference => f.write_str( "entity or character reference not closed: `;` not found before end of input", ), } } } impl std::error::Error for IllFormedError {} //////////////////////////////////////////////////////////////////////////////////////////////////// /// The error type used by this crate. #[derive(Clone, Debug)] pub enum Error { /// XML document cannot be read from underlying source. /// /// Contains the reference-counted I/O error to make the error type `Clone`able. Io(Arc), /// The document does not corresponds to the XML grammar. Syntax(SyntaxError), /// The document is not [well-formed](https://www.w3.org/TR/xml11/#dt-wellformed). IllFormed(IllFormedError), /// Attribute parsing error InvalidAttr(AttrError), /// Encoding error Encoding(EncodingError), /// Escape error Escape(EscapeError), /// Parsed XML has some namespace-related problems Namespace(NamespaceError), } impl Error { pub(crate) fn missed_end(name: QName, decoder: Decoder) -> Self { match decoder.decode(name.as_ref()) { Ok(name) => IllFormedError::MissingEndTag(name.into()).into(), Err(err) => err.into(), } } } impl From for Error { /// Creates a new `Error::Io` from the given error #[inline] fn from(error: IoError) -> Error { Self::Io(Arc::new(error)) } } impl From for Error { /// Creates a new `Error::Syntax` from the given error #[inline] fn from(error: SyntaxError) -> Self { Self::Syntax(error) } } impl From for Error { /// Creates a new `Error::IllFormed` from the given error #[inline] fn from(error: IllFormedError) -> Self { Self::IllFormed(error) } } impl From for Error { /// Creates a new `Error::EncodingError` from the given error #[inline] fn from(error: EncodingError) -> Error { Self::Encoding(error) } } impl From for Error { /// Creates a new `Error::EscapeError` from the given error #[inline] fn from(error: EscapeError) -> Error { Self::Escape(error) } } impl From for Error { #[inline] fn from(error: AttrError) -> Self { Self::InvalidAttr(error) } } impl From for Error { #[inline] fn from(error: NamespaceError) -> Self { Self::Namespace(error) } } /// A specialized `Result` type where the error is hard-wired to [`Error`]. pub type Result = std::result::Result; impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Io(e) => write!(f, "I/O error: {}", e), Self::Syntax(e) => write!(f, "syntax error: {}", e), Self::IllFormed(e) => write!(f, "ill-formed document: {}", e), Self::InvalidAttr(e) => write!(f, "error while parsing attribute: {}", e), Self::Encoding(e) => e.fmt(f), Self::Escape(e) => e.fmt(f), Self::Namespace(e) => e.fmt(f), } } } impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Self::Io(e) => Some(e), Self::Syntax(e) => Some(e), Self::IllFormed(e) => Some(e), Self::InvalidAttr(e) => Some(e), Self::Encoding(e) => Some(e), Self::Escape(e) => Some(e), Self::Namespace(e) => Some(e), } } } #[cfg(feature = "serialize")] pub mod serialize { //! A module to handle serde (de)serialization errors use super::*; use crate::utils::write_byte_string; use std::borrow::Cow; #[cfg(feature = "overlapped-lists")] use std::num::NonZeroUsize; use std::str::Utf8Error; /// (De)serialization error #[derive(Clone, Debug)] pub enum DeError { /// Serde custom error Custom(String), /// Xml parsing error InvalidXml(Error), /// This error indicates an error in the [`Deserialize`](serde::Deserialize) /// implementation when read a map or a struct: `MapAccess::next_value[_seed]` /// was called before `MapAccess::next_key[_seed]`. /// /// You should check your types, that implements corresponding trait. KeyNotRead, /// Deserializer encounter a start tag with a specified name when it is /// not expecting. This happens when you try to deserialize a primitive /// value (numbers, strings, booleans) from an XML element. UnexpectedStart(Vec), /// The [`Reader`] produced [`Event::Eof`] when it is not expecting, /// for example, after producing [`Event::Start`] but before corresponding /// [`Event::End`]. /// /// [`Reader`]: crate::reader::Reader /// [`Event::Eof`]: crate::events::Event::Eof /// [`Event::Start`]: crate::events::Event::Start /// [`Event::End`]: crate::events::Event::End UnexpectedEof, /// Too many events were skipped while deserializing a sequence, event limit /// exceeded. The limit was provided as an argument #[cfg(feature = "overlapped-lists")] TooManyEvents(NonZeroUsize), } impl fmt::Display for DeError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Custom(s) => f.write_str(s), Self::InvalidXml(e) => e.fmt(f), Self::KeyNotRead => f.write_str("invalid `Deserialize` implementation: `MapAccess::next_value[_seed]` was called before `MapAccess::next_key[_seed]`"), Self::UnexpectedStart(e) => { f.write_str("unexpected `Event::Start(")?; write_byte_string(f, e)?; f.write_str(")`") } Self::UnexpectedEof => f.write_str("unexpected `Event::Eof`"), #[cfg(feature = "overlapped-lists")] Self::TooManyEvents(s) => write!(f, "deserializer buffered {} events, limit exceeded", s), } } } impl std::error::Error for DeError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Self::InvalidXml(e) => Some(e), _ => None, } } } impl serde::de::Error for DeError { fn custom(msg: T) -> Self { Self::Custom(msg.to_string()) } } impl From for DeError { #[inline] fn from(e: Error) -> Self { Self::InvalidXml(e) } } impl From for DeError { #[inline] fn from(e: EscapeError) -> Self { Self::InvalidXml(e.into()) } } impl From for DeError { #[inline] fn from(e: EncodingError) -> Self { Self::InvalidXml(e.into()) } } impl From for DeError { #[inline] fn from(e: AttrError) -> Self { Self::InvalidXml(e.into()) } } /// Serialization error #[derive(Clone, Debug)] pub enum SeError { /// Serde custom error Custom(String), /// XML document cannot be written to underlying source. /// /// Contains the reference-counted I/O error to make the error type `Clone`able. Io(Arc), /// Some value could not be formatted Fmt(std::fmt::Error), /// Serialized type cannot be represented in an XML due to violation of the /// XML rules in the final XML document. For example, attempt to serialize /// a `HashMap<{integer}, ...>` would cause this error because [XML name] /// cannot start from a digit or a hyphen (minus sign). The same result /// would occur if map key is a complex type that cannot be serialized as /// a primitive type (i.e. string, char, bool, unit struct or unit variant). /// /// [XML name]: https://www.w3.org/TR/xml11/#sec-common-syn Unsupported(Cow<'static, str>), /// Some value could not be turned to UTF-8 NonEncodable(Utf8Error), } impl fmt::Display for SeError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Custom(s) => f.write_str(s), Self::Io(e) => write!(f, "I/O error: {}", e), Self::Fmt(e) => write!(f, "formatting error: {}", e), Self::Unsupported(s) => write!(f, "unsupported value: {}", s), Self::NonEncodable(e) => write!(f, "malformed UTF-8: {}", e), } } } impl ::std::error::Error for SeError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Self::Io(e) => Some(e), _ => None, } } } impl serde::ser::Error for SeError { fn custom(msg: T) -> Self { Self::Custom(msg.to_string()) } } impl From for SeError { #[inline] fn from(e: IoError) -> Self { Self::Io(Arc::new(e)) } } impl From for SeError { #[inline] fn from(e: Utf8Error) -> Self { Self::NonEncodable(e) } } impl From for SeError { #[inline] fn from(e: fmt::Error) -> Self { Self::Fmt(e) } } } quick-xml-0.38.4/src/escape.rs000064400000000000000000002542261046102023000142420ustar 00000000000000//! Manage xml character escapes use memchr::{memchr, memchr2_iter, memchr3}; use std::borrow::Cow; use std::fmt::{self, Write}; use std::num::ParseIntError; use std::ops::Range; /// Error of parsing character reference (`&#;` or `&#x;`). #[derive(Clone, Debug, PartialEq)] pub enum ParseCharRefError { /// Number contains sign character (`+` or `-`) which is not allowed. UnexpectedSign, /// Number cannot be parsed due to non-number characters or a numeric overflow. InvalidNumber(ParseIntError), /// Character reference represents not a valid unicode codepoint. InvalidCodepoint(u32), /// Character reference expanded to a not permitted character for an XML. /// /// Currently, only `0x0` character produces this error. IllegalCharacter(u32), } impl std::fmt::Display for ParseCharRefError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { Self::UnexpectedSign => f.write_str("unexpected number sign"), Self::InvalidNumber(e) => e.fmt(f), Self::InvalidCodepoint(n) => write!(f, "`{}` is not a valid codepoint", n), Self::IllegalCharacter(n) => write!(f, "0x{:x} character is not permitted in XML", n), } } } impl std::error::Error for ParseCharRefError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Self::InvalidNumber(e) => Some(e), _ => None, } } } /// Error for XML escape / unescape. #[derive(Clone, Debug, PartialEq)] pub enum EscapeError { /// Referenced entity in unknown to the parser. UnrecognizedEntity(Range, String), /// Cannot find `;` after `&` UnterminatedEntity(Range), /// Attempt to parse character reference (`&#;` or `&#x;`) /// was unsuccessful, not all characters are decimal or hexadecimal numbers. InvalidCharRef(ParseCharRefError), } impl std::fmt::Display for EscapeError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { Self::UnrecognizedEntity(rge, res) => { write!(f, "at {:?}: unrecognized entity `{}`", rge, res) } Self::UnterminatedEntity(e) => write!( f, "Error while escaping character at range {:?}: Cannot find ';' after '&'", e ), Self::InvalidCharRef(e) => { write!(f, "invalid character reference: {}", e) } } } } impl std::error::Error for EscapeError { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Self::InvalidCharRef(e) => Some(e), _ => None, } } } /// Escapes an `&str` and replaces all xml special characters (`<`, `>`, `&`, `'`, `"`) /// with their corresponding xml escaped value. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` /// | `'` | `'` /// | `"` | `"` /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` /// | `'` | `'` /// | `"` | `"` pub fn escape<'a>(raw: impl Into>) -> Cow<'a, str> { _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&' | b'\'' | b'\"')) } /// Escapes an `&str` and replaces xml special characters (`<`, `>`, `&`) /// with their corresponding xml escaped value. /// /// Should only be used for escaping text content. In XML text content, it is allowed /// (though not recommended) to leave the quote special characters `"` and `'` unescaped. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` pub fn partial_escape<'a>(raw: impl Into>) -> Cow<'a, str> { _escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&')) } /// XML standard [requires] that only `<` and `&` was escaped in text content or /// attribute value. All other characters not necessary to be escaped, although /// for compatibility with SGML they also should be escaped. Practically, escaping /// only those characters is enough. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `&` | `&` /// /// [requires]: https://www.w3.org/TR/xml11/#syntax pub fn minimal_escape<'a>(raw: impl Into>) -> Cow<'a, str> { _escape(raw, |ch| matches!(ch, b'<' | b'&')) } pub(crate) fn escape_char(writer: &mut W, value: &str, from: usize, to: usize) -> fmt::Result where W: fmt::Write, { writer.write_str(&value[from..to])?; match value.as_bytes()[to] { b'<' => writer.write_str("<")?, b'>' => writer.write_str(">")?, b'\'' => writer.write_str("'")?, b'&' => writer.write_str("&")?, b'"' => writer.write_str(""")?, // This set of escapes handles characters that should be escaped // in elements of xs:lists, because those characters works as // delimiters of list elements b'\t' => writer.write_str(" ")?, b'\n' => writer.write_str(" ")?, b'\r' => writer.write_str(" ")?, b' ' => writer.write_str(" ")?, _ => unreachable!("Only '<', '>','\', '&', '\"', '\\t', '\\r', '\\n', and ' ' are escaped"), } Ok(()) } /// Escapes an `&str` and replaces a subset of xml special characters (`<`, `>`, /// `&`, `'`, `"`) with their corresponding xml escaped value. fn _escape<'a, F: Fn(u8) -> bool>(raw: impl Into>, escape_chars: F) -> Cow<'a, str> { let raw = raw.into(); let bytes = raw.as_bytes(); let mut escaped = None; let mut iter = bytes.iter(); let mut pos = 0; while let Some(i) = iter.position(|&b| escape_chars(b)) { if escaped.is_none() { escaped = Some(String::with_capacity(raw.len())); } let escaped = escaped.as_mut().expect("initialized"); let new_pos = pos + i; // SAFETY: It should fail only on OOM escape_char(escaped, &raw, pos, new_pos).unwrap(); pos = new_pos + 1; } if let Some(mut escaped) = escaped { if let Some(raw) = raw.get(pos..) { // SAFETY: It should fail only on OOM escaped.write_str(raw).unwrap(); } Cow::Owned(escaped) } else { raw } } /// Unescape an `&str` and replaces all xml escaped characters (`&...;`) into /// their corresponding value. /// /// If feature [`escape-html`] is enabled, then recognizes all [HTML5 escapes]. /// /// [`escape-html`]: ../index.html#escape-html /// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref pub fn unescape(raw: &str) -> Result, EscapeError> { unescape_with(raw, resolve_predefined_entity) } /// Unescape an `&str` and replaces all xml escaped characters (`&...;`) into /// their corresponding value, using a resolver function for custom entities. /// /// If feature [`escape-html`] is enabled, then recognizes all [HTML5 escapes]. /// /// Predefined entities will be resolved _after_ trying to resolve with `resolve_entity`, /// which allows you to override default behavior which required in some XML dialects. /// /// Character references (`&#hh;`) cannot be overridden, they are resolved before /// calling `resolve_entity`. /// /// Note, that entities will not be resolved recursively. In order to satisfy the /// XML [requirements] you should unescape nested entities by yourself. /// /// # Example /// /// ``` /// use quick_xml::escape::resolve_xml_entity; /// # use quick_xml::escape::unescape_with; /// # use pretty_assertions::assert_eq; /// let override_named_entities = |entity: &str| match entity { /// // Override standard entities /// "lt" => Some("FOO"), /// "gt" => Some("BAR"), /// // Resolve custom entities /// "baz" => Some("<"), /// // Delegate other entities to the default implementation /// _ => resolve_xml_entity(entity), /// }; /// /// assert_eq!( /// unescape_with("&<test>&baz;", override_named_entities).unwrap(), /// "&FOOtestBAR<" /// ); /// ``` /// /// [`escape-html`]: ../index.html#escape-html /// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref /// [requirements]: https://www.w3.org/TR/xml11/#intern-replacement pub fn unescape_with<'input, 'entity, F>( raw: &'input str, mut resolve_entity: F, ) -> Result, EscapeError> where // the lifetime of the output comes from a capture or is `'static` F: FnMut(&str) -> Option<&'entity str>, { let bytes = raw.as_bytes(); let mut unescaped = None; let mut last_end = 0; let mut iter = memchr2_iter(b'&', b';', bytes); while let Some(start) = iter.by_ref().find(|p| bytes[*p] == b'&') { match iter.next() { Some(end) if bytes[end] == b';' => { // append valid data if unescaped.is_none() { unescaped = Some(String::with_capacity(raw.len())); } let unescaped = unescaped.as_mut().expect("initialized"); unescaped.push_str(&raw[last_end..start]); // search for character correctness let pat = &raw[start + 1..end]; if let Some(entity) = pat.strip_prefix('#') { let codepoint = parse_number(entity).map_err(EscapeError::InvalidCharRef)?; unescaped.push_str(codepoint.encode_utf8(&mut [0u8; 4])); } else if let Some(value) = resolve_entity(pat) { unescaped.push_str(value); } else { return Err(EscapeError::UnrecognizedEntity( start + 1..end, pat.to_string(), )); } last_end = end + 1; } _ => return Err(EscapeError::UnterminatedEntity(start..raw.len())), } } if let Some(mut unescaped) = unescaped { if let Some(raw) = raw.get(last_end..) { unescaped.push_str(raw); } Ok(Cow::Owned(unescaped)) } else { Ok(Cow::Borrowed(raw)) } } //////////////////////////////////////////////////////////////////////////////////////////////////// // TODO: It would be better to reuse buffer after decoding if possible pub(crate) fn normalize_xml11_eols<'input>(text: &'input str) -> Cow<'input, str> { let bytes = text.as_bytes(); // The following sequences of UTF-8 encoded input should be translated into // a single `\n` (U+000a) character to normalize EOLs: // // |UTF-8 |String| // |--------|------| // |0d 0a |\r\n | // |0d c2 85|\r\x85| // |0d |\r | // |c2 85 |\x85 | // |e2 80 a8|\u2028| if let Some(i) = memchr3(b'\r', 0xC2, 0xE2, bytes) { // We found a character that requires normalization, so create new normalized // string, put the prefix as is and then put normalized character let mut normalized = String::with_capacity(text.len()); // NOTE: unsafe { text.get_unchecked(0..i) } could be used because // we are sure that index within string normalized.push_str(&text[0..i]); let mut pos = normalize_xml11_eol_step(&mut normalized, text, i, '\n'); while let Some(i) = memchr3(b'\r', 0xC2, 0xE2, &bytes[pos..]) { let index = pos + i; // NOTE: unsafe { text.get_unchecked(pos..index) } could be used because // we are sure that index within string normalized.push_str(&text[pos..index]); pos = normalize_xml11_eol_step(&mut normalized, text, index, '\n'); } if let Some(rest) = text.get(pos..) { normalized.push_str(rest); } return normalized.into(); } Cow::Borrowed(text) } /// All line breaks MUST have been normalized on input to #xA as described /// in [2.11 End-of-Line Handling][eof], so the rest of this algorithm operates /// on text normalized in this way. /// /// To simplify the tasks of applications, the XML processor MUST behave /// as if it normalized all line breaks in external parsed entities /// (including the document entity) on input, before parsing, by translating /// all of the following to a single #xA character (_which attribute normalization /// routine will replace by #x20 character_): /// /// 1. the two-character sequence #xD #xA /// 2. the two-character sequence #xD #x85 /// 3. the single character #x85 /// 4. the single character #x2028 /// 5. any #xD character that is not immediately followed by #xA or #x85. /// /// The characters #x85 and #x2028 cannot be reliably recognized and translated /// until an entity's encoding declaration (if present) has been read. /// Therefore, it is a fatal error to use them within the XML declaration or text declaration. /// /// Note, that this function cannot be used to normalize HTML values. The text in HTML /// normally is not normalized in any way; normalization is performed only in limited /// contexts and [only for] `\r\n` and `\r`. /// /// # Parameters /// /// - `normalized`: the string with the result of normalization /// - `input`: UTF-8 bytes of the string to be normalized /// - `index`: a byte index into `input` of character which is processed right now. /// It always points to the first byte of character in UTF-8 encoding /// - `ch`: a character that should be put to the string instead of newline sequence /// /// Returns the index of next unprocessed byte in the `input`. /// /// [eof]: https://www.w3.org/TR/xml11/#sec-line-ends /// [only for]: https://html.spec.whatwg.org/#normalize-newlines fn normalize_xml11_eol_step(normalized: &mut String, text: &str, index: usize, ch: char) -> usize { let input = text.as_bytes(); match input[index] { b'\r' => { if index + 1 < input.len() { let next = input[index + 1]; if next == b'\n' { normalized.push(ch); return index + 2; // skip \r\n } if next == 0xC2 { // UTF-8 encoding of #x85 character is [c2 85] if index + 2 < input.len() && input[index + 2] == 0x85 { normalized.push(ch); } else { normalized.push(ch); // NOTE: unsafe { text.get_unchecked(index..index + 3) } could be used because // we are sure that index within string normalized.push_str(&text[index + 1..index + 3]); } return index + 3; // skip \r + UTF-8 encoding of character (c2 xx) } } normalized.push(ch); index + 1 // skip \r } b'\n' => { normalized.push(ch); index + 1 // skip \n } // Start of UTF-8 encoding of #x85 character (c2 85) 0xC2 => { if index + 1 < input.len() && input[index + 1] == 0x85 { normalized.push(ch); } else { // NOTE: unsafe { text.get_unchecked(index..index + 2) } could be used because // we are sure that index within string normalized.push_str(&text[index..index + 2]); } index + 2 // skip UTF-8 encoding of character (c2 xx) } // Start of UTF-8 encoding of #x2028 character (e2 80 a8) 0xE2 => { if index + 2 < input.len() && input[index + 1] == 0x80 && input[index + 2] == 0xA8 { normalized.push(ch); } else { // NOTE: unsafe { text.get_unchecked(index..index + 3) } could be used because // we are sure that index within string normalized.push_str(&text[index..index + 3]); } index + 3 // skip UTF-8 encoding of character (e2 xx xx) } x => unreachable!( "at {}: expected ''\\n', '\\r', '\\xC2', or '\\xE2', found '{}' / {} / `0x{:X}`", index, x as char, x, x ), } } //////////////////////////////////////////////////////////////////////////////////////////////////// // TODO: It would be better to reuse buffer after decoding if possible pub(crate) fn normalize_xml10_eols<'input>(text: &'input str) -> Cow<'input, str> { let bytes = text.as_bytes(); // The following sequences of UTF-8 encoded input should be translated into // a single `\n` (U+000a) character to normalize EOLs: // // |UTF-8 |String| // |--------|------| // |0d 0a |\r\n | // |0d |\r | if let Some(i) = memchr(b'\r', bytes) { // We found a character that requires normalization, so create new normalized // string, put the prefix as is and then put normalized character let mut normalized = String::with_capacity(text.len()); // NOTE: unsafe { text.get_unchecked(0..i) } could be used because // we are sure that index within string normalized.push_str(&text[0..i]); let mut pos = normalize_xml10_eol_step(&mut normalized, bytes, i, '\n'); while let Some(i) = memchr(b'\r', &bytes[pos..]) { let index = pos + i; // NOTE: unsafe { text.get_unchecked(pos..index) } could be used because // we are sure that index within string normalized.push_str(&text[pos..index]); pos = normalize_xml10_eol_step(&mut normalized, bytes, index, '\n'); } if let Some(rest) = text.get(pos..) { normalized.push_str(rest); } return normalized.into(); } Cow::Borrowed(text) } /// The text in HTML normally is not normalized in any way; normalization is /// performed only in limited contexts and [only for] `\r\n` and `\r`. /// /// # Parameters /// /// - `normalized`: the string with the result of normalization /// - `input`: UTF-8 bytes of the string to be normalized /// - `index`: a byte index into `input` of character which is processed right now. /// It always points to the first byte of character in UTF-8 encoding /// - `ch`: a character that should be put to the string instead of newline sequence /// /// [only for]: https://html.spec.whatwg.org/#normalize-newlines fn normalize_xml10_eol_step( normalized: &mut String, input: &[u8], index: usize, ch: char, ) -> usize { match input[index] { b'\r' => { normalized.push(ch); if index + 1 < input.len() && input[index + 1] == b'\n' { return index + 2; // skip \r\n } index + 1 // skip \r } b'\n' => { normalized.push(ch); index + 1 // skip \n } x => unreachable!( "at {}: expected ''\\n' or '\\r', found '{}' / {} / `0x{:X}`", index, x as char, x, x ), } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Resolves predefined XML entities or all HTML5 entities depending on the feature /// [`escape-html`](https://docs.rs/quick-xml/latest/quick_xml/#escape-html). /// /// Behaves like [`resolve_xml_entity`] if feature is not enabled and as /// [`resolve_html5_entity`] if enabled. #[inline] pub const fn resolve_predefined_entity(entity: &str) -> Option<&'static str> { #[cfg(not(feature = "escape-html"))] { resolve_xml_entity(entity) } #[cfg(feature = "escape-html")] { resolve_html5_entity(entity) } } /// Resolves predefined XML entities. If specified entity is not a predefined XML /// entity, `None` is returned. /// /// The complete list of predefined entities are defined in the [specification]. /// /// ``` /// # use quick_xml::escape::resolve_xml_entity; /// # use pretty_assertions::assert_eq; /// assert_eq!(resolve_xml_entity("lt"), Some("<")); /// assert_eq!(resolve_xml_entity("gt"), Some(">")); /// assert_eq!(resolve_xml_entity("amp"), Some("&")); /// assert_eq!(resolve_xml_entity("apos"), Some("'")); /// assert_eq!(resolve_xml_entity("quot"), Some("\"")); /// /// assert_eq!(resolve_xml_entity("foo"), None); /// ``` /// /// [specification]: https://www.w3.org/TR/xml11/#sec-predefined-ent pub const fn resolve_xml_entity(entity: &str) -> Option<&'static str> { // match over strings are not allowed in const functions let s = match entity.as_bytes() { b"lt" => "<", b"gt" => ">", b"amp" => "&", b"apos" => "'", b"quot" => "\"", _ => return None, }; Some(s) } /// Resolves all HTML5 entities. For complete list see . #[cfg(feature = "escape-html")] pub const fn resolve_html5_entity(entity: &str) -> Option<&'static str> { // imported from https://dev.w3.org/html5/html-author/charref // match over strings are not allowed in const functions //TODO: automate up-to-dating using https://html.spec.whatwg.org/entities.json //TODO: building this function increases compilation time by 10+ seconds (or 5x times) // Maybe this is because of very long match // See https://github.com/tafia/quick-xml/issues/763 let s = match entity.as_bytes() { b"Tab" => "\u{09}", b"NewLine" => "\u{0A}", b"excl" => "\u{21}", b"quot" | b"QUOT" => "\u{22}", b"num" => "\u{23}", b"dollar" => "\u{24}", b"percnt" => "\u{25}", b"amp" | b"AMP" => "\u{26}", b"apos" => "\u{27}", b"lpar" => "\u{28}", b"rpar" => "\u{29}", b"ast" | b"midast" => "\u{2A}", b"plus" => "\u{2B}", b"comma" => "\u{2C}", b"period" => "\u{2E}", b"sol" => "\u{2F}", b"colon" => "\u{3A}", b"semi" => "\u{3B}", b"lt" | b"LT" => "\u{3C}", b"equals" => "\u{3D}", b"gt" | b"GT" => "\u{3E}", b"quest" => "\u{3F}", b"commat" => "\u{40}", b"lsqb" | b"lbrack" => "\u{5B}", b"bsol" => "\u{5C}", b"rsqb" | b"rbrack" => "\u{5D}", b"Hat" => "\u{5E}", b"lowbar" => "\u{5F}", b"grave" | b"DiacriticalGrave" => "\u{60}", b"lcub" | b"lbrace" => "\u{7B}", b"verbar" | b"vert" | b"VerticalLine" => "\u{7C}", b"rcub" | b"rbrace" => "\u{7D}", b"nbsp" | b"NonBreakingSpace" => "\u{A0}", b"iexcl" => "\u{A1}", b"cent" => "\u{A2}", b"pound" => "\u{A3}", b"curren" => "\u{A4}", b"yen" => "\u{A5}", b"brvbar" => "\u{A6}", b"sect" => "\u{A7}", b"Dot" | b"die" | b"DoubleDot" | b"uml" => "\u{A8}", b"copy" | b"COPY" => "\u{A9}", b"ordf" => "\u{AA}", b"laquo" => "\u{AB}", b"not" => "\u{AC}", b"shy" => "\u{AD}", b"reg" | b"circledR" | b"REG" => "\u{AE}", b"macr" | b"OverBar" | b"strns" => "\u{AF}", b"deg" => "\u{B0}", b"plusmn" | b"pm" | b"PlusMinus" => "\u{B1}", b"sup2" => "\u{B2}", b"sup3" => "\u{B3}", b"acute" | b"DiacriticalAcute" => "\u{B4}", b"micro" => "\u{B5}", b"para" => "\u{B6}", b"middot" | b"centerdot" | b"CenterDot" => "\u{B7}", b"cedil" | b"Cedilla" => "\u{B8}", b"sup1" => "\u{B9}", b"ordm" => "\u{BA}", b"raquo" => "\u{BB}", b"frac14" => "\u{BC}", b"frac12" | b"half" => "\u{BD}", b"frac34" => "\u{BE}", b"iquest" => "\u{BF}", b"Agrave" => "\u{C0}", b"Aacute" => "\u{C1}", b"Acirc" => "\u{C2}", b"Atilde" => "\u{C3}", b"Auml" => "\u{C4}", b"Aring" => "\u{C5}", b"AElig" => "\u{C6}", b"Ccedil" => "\u{C7}", b"Egrave" => "\u{C8}", b"Eacute" => "\u{C9}", b"Ecirc" => "\u{CA}", b"Euml" => "\u{CB}", b"Igrave" => "\u{CC}", b"Iacute" => "\u{CD}", b"Icirc" => "\u{CE}", b"Iuml" => "\u{CF}", b"ETH" => "\u{D0}", b"Ntilde" => "\u{D1}", b"Ograve" => "\u{D2}", b"Oacute" => "\u{D3}", b"Ocirc" => "\u{D4}", b"Otilde" => "\u{D5}", b"Ouml" => "\u{D6}", b"times" => "\u{D7}", b"Oslash" => "\u{D8}", b"Ugrave" => "\u{D9}", b"Uacute" => "\u{DA}", b"Ucirc" => "\u{DB}", b"Uuml" => "\u{DC}", b"Yacute" => "\u{DD}", b"THORN" => "\u{DE}", b"szlig" => "\u{DF}", b"agrave" => "\u{E0}", b"aacute" => "\u{E1}", b"acirc" => "\u{E2}", b"atilde" => "\u{E3}", b"auml" => "\u{E4}", b"aring" => "\u{E5}", b"aelig" => "\u{E6}", b"ccedil" => "\u{E7}", b"egrave" => "\u{E8}", b"eacute" => "\u{E9}", b"ecirc" => "\u{EA}", b"euml" => "\u{EB}", b"igrave" => "\u{EC}", b"iacute" => "\u{ED}", b"icirc" => "\u{EE}", b"iuml" => "\u{EF}", b"eth" => "\u{F0}", b"ntilde" => "\u{F1}", b"ograve" => "\u{F2}", b"oacute" => "\u{F3}", b"ocirc" => "\u{F4}", b"otilde" => "\u{F5}", b"ouml" => "\u{F6}", b"divide" | b"div" => "\u{F7}", b"oslash" => "\u{F8}", b"ugrave" => "\u{F9}", b"uacute" => "\u{FA}", b"ucirc" => "\u{FB}", b"uuml" => "\u{FC}", b"yacute" => "\u{FD}", b"thorn" => "\u{FE}", b"yuml" => "\u{FF}", b"Amacr" => "\u{10}", b"amacr" => "\u{10}", b"Abreve" => "\u{10}", b"abreve" => "\u{10}", b"Aogon" => "\u{10}", b"aogon" => "\u{10}", b"Cacute" => "\u{10}", b"cacute" => "\u{10}", b"Ccirc" => "\u{10}", b"ccirc" => "\u{10}", b"Cdot" => "\u{10}", b"cdot" => "\u{10}", b"Ccaron" => "\u{10}", b"ccaron" => "\u{10}", b"Dcaron" => "\u{10}", b"dcaron" => "\u{10}", b"Dstrok" => "\u{11}", b"dstrok" => "\u{11}", b"Emacr" => "\u{11}", b"emacr" => "\u{11}", b"Edot" => "\u{11}", b"edot" => "\u{11}", b"Eogon" => "\u{11}", b"eogon" => "\u{11}", b"Ecaron" => "\u{11}", b"ecaron" => "\u{11}", b"Gcirc" => "\u{11}", b"gcirc" => "\u{11}", b"Gbreve" => "\u{11}", b"gbreve" => "\u{11}", b"Gdot" => "\u{12}", b"gdot" => "\u{12}", b"Gcedil" => "\u{12}", b"Hcirc" => "\u{12}", b"hcirc" => "\u{12}", b"Hstrok" => "\u{12}", b"hstrok" => "\u{12}", b"Itilde" => "\u{12}", b"itilde" => "\u{12}", b"Imacr" => "\u{12}", b"imacr" => "\u{12}", b"Iogon" => "\u{12}", b"iogon" => "\u{12}", b"Idot" => "\u{13}", b"imath" | b"inodot" => "\u{13}", b"IJlig" => "\u{13}", b"ijlig" => "\u{13}", b"Jcirc" => "\u{13}", b"jcirc" => "\u{13}", b"Kcedil" => "\u{13}", b"kcedil" => "\u{13}", b"kgreen" => "\u{13}", b"Lacute" => "\u{13}", b"lacute" => "\u{13}", b"Lcedil" => "\u{13}", b"lcedil" => "\u{13}", b"Lcaron" => "\u{13}", b"lcaron" => "\u{13}", b"Lmidot" => "\u{13}", b"lmidot" => "\u{14}", b"Lstrok" => "\u{14}", b"lstrok" => "\u{14}", b"Nacute" => "\u{14}", b"nacute" => "\u{14}", b"Ncedil" => "\u{14}", b"ncedil" => "\u{14}", b"Ncaron" => "\u{14}", b"ncaron" => "\u{14}", b"napos" => "\u{14}", b"ENG" => "\u{14}", b"eng" => "\u{14}", b"Omacr" => "\u{14}", b"omacr" => "\u{14}", b"Odblac" => "\u{15}", b"odblac" => "\u{15}", b"OElig" => "\u{15}", b"oelig" => "\u{15}", b"Racute" => "\u{15}", b"racute" => "\u{15}", b"Rcedil" => "\u{15}", b"rcedil" => "\u{15}", b"Rcaron" => "\u{15}", b"rcaron" => "\u{15}", b"Sacute" => "\u{15}", b"sacute" => "\u{15}", b"Scirc" => "\u{15}", b"scirc" => "\u{15}", b"Scedil" => "\u{15}", b"scedil" => "\u{15}", b"Scaron" => "\u{16}", b"scaron" => "\u{16}", b"Tcedil" => "\u{16}", b"tcedil" => "\u{16}", b"Tcaron" => "\u{16}", b"tcaron" => "\u{16}", b"Tstrok" => "\u{16}", b"tstrok" => "\u{16}", b"Utilde" => "\u{16}", b"utilde" => "\u{16}", b"Umacr" => "\u{16}", b"umacr" => "\u{16}", b"Ubreve" => "\u{16}", b"ubreve" => "\u{16}", b"Uring" => "\u{16}", b"uring" => "\u{16}", b"Udblac" => "\u{17}", b"udblac" => "\u{17}", b"Uogon" => "\u{17}", b"uogon" => "\u{17}", b"Wcirc" => "\u{17}", b"wcirc" => "\u{17}", b"Ycirc" => "\u{17}", b"ycirc" => "\u{17}", b"Yuml" => "\u{17}", b"Zacute" => "\u{17}", b"zacute" => "\u{17}", b"Zdot" => "\u{17}", b"zdot" => "\u{17}", b"Zcaron" => "\u{17}", b"zcaron" => "\u{17}", b"fnof" => "\u{19}", b"imped" => "\u{1B}", b"gacute" => "\u{1F}", b"jmath" => "\u{23}", b"circ" => "\u{2C}", b"caron" | b"Hacek" => "\u{2C}", b"breve" | b"Breve" => "\u{2D}", b"dot" | b"DiacriticalDot" => "\u{2D}", b"ring" => "\u{2D}", b"ogon" => "\u{2D}", b"tilde" | b"DiacriticalTilde" => "\u{2D}", b"dblac" | b"DiacriticalDoubleAcute" => "\u{2D}", b"DownBreve" => "\u{31}", b"UnderBar" => "\u{33}", b"Alpha" => "\u{39}", b"Beta" => "\u{39}", b"Gamma" => "\u{39}", b"Delta" => "\u{39}", b"Epsilon" => "\u{39}", b"Zeta" => "\u{39}", b"Eta" => "\u{39}", b"Theta" => "\u{39}", b"Iota" => "\u{39}", b"Kappa" => "\u{39}", b"Lambda" => "\u{39}", b"Mu" => "\u{39}", b"Nu" => "\u{39}", b"Xi" => "\u{39}", b"Omicron" => "\u{39}", b"Pi" => "\u{3A}", b"Rho" => "\u{3A}", b"Sigma" => "\u{3A}", b"Tau" => "\u{3A}", b"Upsilon" => "\u{3A}", b"Phi" => "\u{3A}", b"Chi" => "\u{3A}", b"Psi" => "\u{3A}", b"Omega" => "\u{3A}", b"alpha" => "\u{3B}", b"beta" => "\u{3B}", b"gamma" => "\u{3B}", b"delta" => "\u{3B}", b"epsiv" | b"varepsilon" | b"epsilon" => "\u{3B}", b"zeta" => "\u{3B}", b"eta" => "\u{3B}", b"theta" => "\u{3B}", b"iota" => "\u{3B}", b"kappa" => "\u{3B}", b"lambda" => "\u{3B}", b"mu" => "\u{3B}", b"nu" => "\u{3B}", b"xi" => "\u{3B}", b"omicron" => "\u{3B}", b"pi" => "\u{3C}", b"rho" => "\u{3C}", b"sigmav" | b"varsigma" | b"sigmaf" => "\u{3C}", b"sigma" => "\u{3C}", b"tau" => "\u{3C}", b"upsi" | b"upsilon" => "\u{3C}", b"phi" | b"phiv" | b"varphi" => "\u{3C}", b"chi" => "\u{3C}", b"psi" => "\u{3C}", b"omega" => "\u{3C}", b"thetav" | b"vartheta" | b"thetasym" => "\u{3D}", b"Upsi" | b"upsih" => "\u{3D}", b"straightphi" => "\u{3D}", b"piv" | b"varpi" => "\u{3D}", b"Gammad" => "\u{3D}", b"gammad" | b"digamma" => "\u{3D}", b"kappav" | b"varkappa" => "\u{3F}", b"rhov" | b"varrho" => "\u{3F}", b"epsi" | b"straightepsilon" => "\u{3F}", b"bepsi" | b"backepsilon" => "\u{3F}", b"IOcy" => "\u{40}", b"DJcy" => "\u{40}", b"GJcy" => "\u{40}", b"Jukcy" => "\u{40}", b"DScy" => "\u{40}", b"Iukcy" => "\u{40}", b"YIcy" => "\u{40}", b"Jsercy" => "\u{40}", b"LJcy" => "\u{40}", b"NJcy" => "\u{40}", b"TSHcy" => "\u{40}", b"KJcy" => "\u{40}", b"Ubrcy" => "\u{40}", b"DZcy" => "\u{40}", b"Acy" => "\u{41}", b"Bcy" => "\u{41}", b"Vcy" => "\u{41}", b"Gcy" => "\u{41}", b"Dcy" => "\u{41}", b"IEcy" => "\u{41}", b"ZHcy" => "\u{41}", b"Zcy" => "\u{41}", b"Icy" => "\u{41}", b"Jcy" => "\u{41}", b"Kcy" => "\u{41}", b"Lcy" => "\u{41}", b"Mcy" => "\u{41}", b"Ncy" => "\u{41}", b"Ocy" => "\u{41}", b"Pcy" => "\u{41}", b"Rcy" => "\u{42}", b"Scy" => "\u{42}", b"Tcy" => "\u{42}", b"Ucy" => "\u{42}", b"Fcy" => "\u{42}", b"KHcy" => "\u{42}", b"TScy" => "\u{42}", b"CHcy" => "\u{42}", b"SHcy" => "\u{42}", b"SHCHcy" => "\u{42}", b"HARDcy" => "\u{42}", b"Ycy" => "\u{42}", b"SOFTcy" => "\u{42}", b"Ecy" => "\u{42}", b"YUcy" => "\u{42}", b"YAcy" => "\u{42}", b"acy" => "\u{43}", b"bcy" => "\u{43}", b"vcy" => "\u{43}", b"gcy" => "\u{43}", b"dcy" => "\u{43}", b"iecy" => "\u{43}", b"zhcy" => "\u{43}", b"zcy" => "\u{43}", b"icy" => "\u{43}", b"jcy" => "\u{43}", b"kcy" => "\u{43}", b"lcy" => "\u{43}", b"mcy" => "\u{43}", b"ncy" => "\u{43}", b"ocy" => "\u{43}", b"pcy" => "\u{43}", b"rcy" => "\u{44}", b"scy" => "\u{44}", b"tcy" => "\u{44}", b"ucy" => "\u{44}", b"fcy" => "\u{44}", b"khcy" => "\u{44}", b"tscy" => "\u{44}", b"chcy" => "\u{44}", b"shcy" => "\u{44}", b"shchcy" => "\u{44}", b"hardcy" => "\u{44}", b"ycy" => "\u{44}", b"softcy" => "\u{44}", b"ecy" => "\u{44}", b"yucy" => "\u{44}", b"yacy" => "\u{44}", b"iocy" => "\u{45}", b"djcy" => "\u{45}", b"gjcy" => "\u{45}", b"jukcy" => "\u{45}", b"dscy" => "\u{45}", b"iukcy" => "\u{45}", b"yicy" => "\u{45}", b"jsercy" => "\u{45}", b"ljcy" => "\u{45}", b"njcy" => "\u{45}", b"tshcy" => "\u{45}", b"kjcy" => "\u{45}", b"ubrcy" => "\u{45}", b"dzcy" => "\u{45}", b"ensp" => "\u{2002}", b"emsp" => "\u{2003}", b"emsp13" => "\u{2004}", b"emsp14" => "\u{2005}", b"numsp" => "\u{2007}", b"puncsp" => "\u{2008}", b"thinsp" | b"ThinSpace" => "\u{2009}", b"hairsp" | b"VeryThinSpace" => "\u{200A}", b"ZeroWidthSpace" | b"NegativeVeryThinSpace" | b"NegativeThinSpace" | b"NegativeMediumSpace" | b"NegativeThickSpace" => "\u{200B}", b"zwnj" => "\u{200C}", b"zwj" => "\u{200D}", b"lrm" => "\u{200E}", b"rlm" => "\u{200F}", b"hyphen" | b"dash" => "\u{2010}", b"ndash" => "\u{2013}", b"mdash" => "\u{2014}", b"horbar" => "\u{2015}", b"Verbar" | b"Vert" => "\u{2016}", b"lsquo" | b"OpenCurlyQuote" => "\u{2018}", b"rsquo" | b"rsquor" | b"CloseCurlyQuote" => "\u{2019}", b"lsquor" | b"sbquo" => "\u{201A}", b"ldquo" | b"OpenCurlyDoubleQuote" => "\u{201C}", b"rdquo" | b"rdquor" | b"CloseCurlyDoubleQuote" => "\u{201D}", b"ldquor" | b"bdquo" => "\u{201E}", b"dagger" => "\u{2020}", b"Dagger" | b"ddagger" => "\u{2021}", b"bull" | b"bullet" => "\u{2022}", b"nldr" => "\u{2025}", b"hellip" | b"mldr" => "\u{2026}", b"permil" => "\u{2030}", b"pertenk" => "\u{2031}", b"prime" => "\u{2032}", b"Prime" => "\u{2033}", b"tprime" => "\u{2034}", b"bprime" | b"backprime" => "\u{2035}", b"lsaquo" => "\u{2039}", b"rsaquo" => "\u{203A}", b"oline" => "\u{203E}", b"caret" => "\u{2041}", b"hybull" => "\u{2043}", b"frasl" => "\u{2044}", b"bsemi" => "\u{204F}", b"qprime" => "\u{2057}", b"MediumSpace" => "\u{205F}", b"NoBreak" => "\u{2060}", b"ApplyFunction" | b"af" => "\u{2061}", b"InvisibleTimes" | b"it" => "\u{2062}", b"InvisibleComma" | b"ic" => "\u{2063}", b"euro" => "\u{20AC}", b"tdot" | b"TripleDot" => "\u{20DB}", b"DotDot" => "\u{20DC}", b"Copf" | b"complexes" => "\u{2102}", b"incare" => "\u{2105}", b"gscr" => "\u{210A}", b"hamilt" | b"HilbertSpace" | b"Hscr" => "\u{210B}", b"Hfr" | b"Poincareplane" => "\u{210C}", b"quaternions" | b"Hopf" => "\u{210D}", b"planckh" => "\u{210E}", b"planck" | b"hbar" | b"plankv" | b"hslash" => "\u{210F}", b"Iscr" | b"imagline" => "\u{2110}", b"image" | b"Im" | b"imagpart" | b"Ifr" => "\u{2111}", b"Lscr" | b"lagran" | b"Laplacetrf" => "\u{2112}", b"ell" => "\u{2113}", b"Nopf" | b"naturals" => "\u{2115}", b"numero" => "\u{2116}", b"copysr" => "\u{2117}", b"weierp" | b"wp" => "\u{2118}", b"Popf" | b"primes" => "\u{2119}", b"rationals" | b"Qopf" => "\u{211A}", b"Rscr" | b"realine" => "\u{211B}", b"real" | b"Re" | b"realpart" | b"Rfr" => "\u{211C}", b"reals" | b"Ropf" => "\u{211D}", b"rx" => "\u{211E}", b"trade" | b"TRADE" => "\u{2122}", b"integers" | b"Zopf" => "\u{2124}", b"ohm" => "\u{2126}", b"mho" => "\u{2127}", b"Zfr" | b"zeetrf" => "\u{2128}", b"iiota" => "\u{2129}", b"angst" => "\u{212B}", b"bernou" | b"Bernoullis" | b"Bscr" => "\u{212C}", b"Cfr" | b"Cayleys" => "\u{212D}", b"escr" => "\u{212F}", b"Escr" | b"expectation" => "\u{2130}", b"Fscr" | b"Fouriertrf" => "\u{2131}", b"phmmat" | b"Mellintrf" | b"Mscr" => "\u{2133}", b"order" | b"orderof" | b"oscr" => "\u{2134}", b"alefsym" | b"aleph" => "\u{2135}", b"beth" => "\u{2136}", b"gimel" => "\u{2137}", b"daleth" => "\u{2138}", b"CapitalDifferentialD" | b"DD" => "\u{2145}", b"DifferentialD" | b"dd" => "\u{2146}", b"ExponentialE" | b"exponentiale" | b"ee" => "\u{2147}", b"ImaginaryI" | b"ii" => "\u{2148}", b"frac13" => "\u{2153}", b"frac23" => "\u{2154}", b"frac15" => "\u{2155}", b"frac25" => "\u{2156}", b"frac35" => "\u{2157}", b"frac45" => "\u{2158}", b"frac16" => "\u{2159}", b"frac56" => "\u{215A}", b"frac18" => "\u{215B}", b"frac38" => "\u{215C}", b"frac58" => "\u{215D}", b"frac78" => "\u{215E}", b"larr" | b"leftarrow" | b"LeftArrow" | b"slarr" | b"ShortLeftArrow" => "\u{2190}", b"uarr" | b"uparrow" | b"UpArrow" | b"ShortUpArrow" => "\u{2191}", b"rarr" | b"rightarrow" | b"RightArrow" | b"srarr" | b"ShortRightArrow" => "\u{2192}", b"darr" | b"downarrow" | b"DownArrow" | b"ShortDownArrow" => "\u{2193}", b"harr" | b"leftrightarrow" | b"LeftRightArrow" => "\u{2194}", b"varr" | b"updownarrow" | b"UpDownArrow" => "\u{2195}", b"nwarr" | b"UpperLeftArrow" | b"nwarrow" => "\u{2196}", b"nearr" | b"UpperRightArrow" | b"nearrow" => "\u{2197}", b"searr" | b"searrow" | b"LowerRightArrow" => "\u{2198}", b"swarr" | b"swarrow" | b"LowerLeftArrow" => "\u{2199}", b"nlarr" | b"nleftarrow" => "\u{219A}", b"nrarr" | b"nrightarrow" => "\u{219B}", b"rarrw" | b"rightsquigarrow" => "\u{219D}", b"Larr" | b"twoheadleftarrow" => "\u{219E}", b"Uarr" => "\u{219F}", b"Rarr" | b"twoheadrightarrow" => "\u{21A0}", b"Darr" => "\u{21A1}", b"larrtl" | b"leftarrowtail" => "\u{21A2}", b"rarrtl" | b"rightarrowtail" => "\u{21A3}", b"LeftTeeArrow" | b"mapstoleft" => "\u{21A4}", b"UpTeeArrow" | b"mapstoup" => "\u{21A5}", b"map" | b"RightTeeArrow" | b"mapsto" => "\u{21A6}", b"DownTeeArrow" | b"mapstodown" => "\u{21A7}", b"larrhk" | b"hookleftarrow" => "\u{21A9}", b"rarrhk" | b"hookrightarrow" => "\u{21AA}", b"larrlp" | b"looparrowleft" => "\u{21AB}", b"rarrlp" | b"looparrowright" => "\u{21AC}", b"harrw" | b"leftrightsquigarrow" => "\u{21AD}", b"nharr" | b"nleftrightarrow" => "\u{21AE}", b"lsh" | b"Lsh" => "\u{21B0}", b"rsh" | b"Rsh" => "\u{21B1}", b"ldsh" => "\u{21B2}", b"rdsh" => "\u{21B3}", b"crarr" => "\u{21B5}", b"cularr" | b"curvearrowleft" => "\u{21B6}", b"curarr" | b"curvearrowright" => "\u{21B7}", b"olarr" | b"circlearrowleft" => "\u{21BA}", b"orarr" | b"circlearrowright" => "\u{21BB}", b"lharu" | b"LeftVector" | b"leftharpoonup" => "\u{21BC}", b"lhard" | b"leftharpoondown" | b"DownLeftVector" => "\u{21BD}", b"uharr" | b"upharpoonright" | b"RightUpVector" => "\u{21BE}", b"uharl" | b"upharpoonleft" | b"LeftUpVector" => "\u{21BF}", b"rharu" | b"RightVector" | b"rightharpoonup" => "\u{21C0}", b"rhard" | b"rightharpoondown" | b"DownRightVector" => "\u{21C1}", b"dharr" | b"RightDownVector" | b"downharpoonright" => "\u{21C2}", b"dharl" | b"LeftDownVector" | b"downharpoonleft" => "\u{21C3}", b"rlarr" | b"rightleftarrows" | b"RightArrowLeftArrow" => "\u{21C4}", b"udarr" | b"UpArrowDownArrow" => "\u{21C5}", b"lrarr" | b"leftrightarrows" | b"LeftArrowRightArrow" => "\u{21C6}", b"llarr" | b"leftleftarrows" => "\u{21C7}", b"uuarr" | b"upuparrows" => "\u{21C8}", b"rrarr" | b"rightrightarrows" => "\u{21C9}", b"ddarr" | b"downdownarrows" => "\u{21CA}", b"lrhar" | b"ReverseEquilibrium" | b"leftrightharpoons" => "\u{21CB}", b"rlhar" | b"rightleftharpoons" | b"Equilibrium" => "\u{21CC}", b"nlArr" | b"nLeftarrow" => "\u{21CD}", b"nhArr" | b"nLeftrightarrow" => "\u{21CE}", b"nrArr" | b"nRightarrow" => "\u{21CF}", b"lArr" | b"Leftarrow" | b"DoubleLeftArrow" => "\u{21D0}", b"uArr" | b"Uparrow" | b"DoubleUpArrow" => "\u{21D1}", b"rArr" | b"Rightarrow" | b"Implies" | b"DoubleRightArrow" => "\u{21D2}", b"dArr" | b"Downarrow" | b"DoubleDownArrow" => "\u{21D3}", b"hArr" | b"Leftrightarrow" | b"DoubleLeftRightArrow" | b"iff" => "\u{21D4}", b"vArr" | b"Updownarrow" | b"DoubleUpDownArrow" => "\u{21D5}", b"nwArr" => "\u{21D6}", b"neArr" => "\u{21D7}", b"seArr" => "\u{21D8}", b"swArr" => "\u{21D9}", b"lAarr" | b"Lleftarrow" => "\u{21DA}", b"rAarr" | b"Rrightarrow" => "\u{21DB}", b"zigrarr" => "\u{21DD}", b"larrb" | b"LeftArrowBar" => "\u{21E4}", b"rarrb" | b"RightArrowBar" => "\u{21E5}", b"duarr" | b"DownArrowUpArrow" => "\u{21F5}", b"loarr" => "\u{21FD}", b"roarr" => "\u{21FE}", b"hoarr" => "\u{21FF}", b"forall" | b"ForAll" => "\u{2200}", b"comp" | b"complement" => "\u{2201}", b"part" | b"PartialD" => "\u{2202}", b"exist" | b"Exists" => "\u{2203}", b"nexist" | b"NotExists" | b"nexists" => "\u{2204}", b"empty" | b"emptyset" | b"emptyv" | b"varnothing" => "\u{2205}", b"nabla" | b"Del" => "\u{2207}", b"isin" | b"isinv" | b"Element" | b"in" => "\u{2208}", b"notin" | b"NotElement" | b"notinva" => "\u{2209}", b"niv" | b"ReverseElement" | b"ni" | b"SuchThat" => "\u{220B}", b"notni" | b"notniva" | b"NotReverseElement" => "\u{220C}", b"prod" | b"Product" => "\u{220F}", b"coprod" | b"Coproduct" => "\u{2210}", b"sum" | b"Sum" => "\u{2211}", b"minus" => "\u{2212}", b"mnplus" | b"mp" | b"MinusPlus" => "\u{2213}", b"plusdo" | b"dotplus" => "\u{2214}", b"setmn" | b"setminus" | b"Backslash" | b"ssetmn" | b"smallsetminus" => "\u{2216}", b"lowast" => "\u{2217}", b"compfn" | b"SmallCircle" => "\u{2218}", b"radic" | b"Sqrt" => "\u{221A}", b"prop" | b"propto" | b"Proportional" | b"vprop" | b"varpropto" => "\u{221D}", b"infin" => "\u{221E}", b"angrt" => "\u{221F}", b"ang" | b"angle" => "\u{2220}", b"angmsd" | b"measuredangle" => "\u{2221}", b"angsph" => "\u{2222}", b"mid" | b"VerticalBar" | b"smid" | b"shortmid" => "\u{2223}", b"nmid" | b"NotVerticalBar" | b"nsmid" | b"nshortmid" => "\u{2224}", b"par" | b"parallel" | b"DoubleVerticalBar" | b"spar" | b"shortparallel" => "\u{2225}", b"npar" | b"nparallel" | b"NotDoubleVerticalBar" | b"nspar" | b"nshortparallel" => { "\u{2226}" } b"and" | b"wedge" => "\u{2227}", b"or" | b"vee" => "\u{2228}", b"cap" => "\u{2229}", b"cup" => "\u{222A}", b"int" | b"Integral" => "\u{222B}", b"Int" => "\u{222C}", b"tint" | b"iiint" => "\u{222D}", b"conint" | b"oint" | b"ContourIntegral" => "\u{222E}", b"Conint" | b"DoubleContourIntegral" => "\u{222F}", b"Cconint" => "\u{2230}", b"cwint" => "\u{2231}", b"cwconint" | b"ClockwiseContourIntegral" => "\u{2232}", b"awconint" | b"CounterClockwiseContourIntegral" => "\u{2233}", b"there4" | b"therefore" | b"Therefore" => "\u{2234}", b"becaus" | b"because" | b"Because" => "\u{2235}", b"ratio" => "\u{2236}", b"Colon" | b"Proportion" => "\u{2237}", b"minusd" | b"dotminus" => "\u{2238}", b"mDDot" => "\u{223A}", b"homtht" => "\u{223B}", b"sim" | b"Tilde" | b"thksim" | b"thicksim" => "\u{223C}", b"bsim" | b"backsim" => "\u{223D}", b"ac" | b"mstpos" => "\u{223E}", b"acd" => "\u{223F}", b"wreath" | b"VerticalTilde" | b"wr" => "\u{2240}", b"nsim" | b"NotTilde" => "\u{2241}", b"esim" | b"EqualTilde" | b"eqsim" => "\u{2242}", b"sime" | b"TildeEqual" | b"simeq" => "\u{2243}", b"nsime" | b"nsimeq" | b"NotTildeEqual" => "\u{2244}", b"cong" | b"TildeFullEqual" => "\u{2245}", b"simne" => "\u{2246}", b"ncong" | b"NotTildeFullEqual" => "\u{2247}", b"asymp" | b"ap" | b"TildeTilde" | b"approx" | b"thkap" | b"thickapprox" => "\u{2248}", b"nap" | b"NotTildeTilde" | b"napprox" => "\u{2249}", b"ape" | b"approxeq" => "\u{224A}", b"apid" => "\u{224B}", b"bcong" | b"backcong" => "\u{224C}", b"asympeq" | b"CupCap" => "\u{224D}", b"bump" | b"HumpDownHump" | b"Bumpeq" => "\u{224E}", b"bumpe" | b"HumpEqual" | b"bumpeq" => "\u{224F}", b"esdot" | b"DotEqual" | b"doteq" => "\u{2250}", b"eDot" | b"doteqdot" => "\u{2251}", b"efDot" | b"fallingdotseq" => "\u{2252}", b"erDot" | b"risingdotseq" => "\u{2253}", b"colone" | b"coloneq" | b"Assign" => "\u{2254}", b"ecolon" | b"eqcolon" => "\u{2255}", b"ecir" | b"eqcirc" => "\u{2256}", b"cire" | b"circeq" => "\u{2257}", b"wedgeq" => "\u{2259}", b"veeeq" => "\u{225A}", b"trie" | b"triangleq" => "\u{225C}", b"equest" | b"questeq" => "\u{225F}", b"ne" | b"NotEqual" => "\u{2260}", b"equiv" | b"Congruent" => "\u{2261}", b"nequiv" | b"NotCongruent" => "\u{2262}", b"le" | b"leq" => "\u{2264}", b"ge" | b"GreaterEqual" | b"geq" => "\u{2265}", b"lE" | b"LessFullEqual" | b"leqq" => "\u{2266}", b"gE" | b"GreaterFullEqual" | b"geqq" => "\u{2267}", b"lnE" | b"lneqq" => "\u{2268}", b"gnE" | b"gneqq" => "\u{2269}", b"Lt" | b"NestedLessLess" | b"ll" => "\u{226A}", b"Gt" | b"NestedGreaterGreater" | b"gg" => "\u{226B}", b"twixt" | b"between" => "\u{226C}", b"NotCupCap" => "\u{226D}", b"nlt" | b"NotLess" | b"nless" => "\u{226E}", b"ngt" | b"NotGreater" | b"ngtr" => "\u{226F}", b"nle" | b"NotLessEqual" | b"nleq" => "\u{2270}", b"nge" | b"NotGreaterEqual" | b"ngeq" => "\u{2271}", b"lsim" | b"LessTilde" | b"lesssim" => "\u{2272}", b"gsim" | b"gtrsim" | b"GreaterTilde" => "\u{2273}", b"nlsim" | b"NotLessTilde" => "\u{2274}", b"ngsim" | b"NotGreaterTilde" => "\u{2275}", b"lg" | b"lessgtr" | b"LessGreater" => "\u{2276}", b"gl" | b"gtrless" | b"GreaterLess" => "\u{2277}", b"ntlg" | b"NotLessGreater" => "\u{2278}", b"ntgl" | b"NotGreaterLess" => "\u{2279}", b"pr" | b"Precedes" | b"prec" => "\u{227A}", b"sc" | b"Succeeds" | b"succ" => "\u{227B}", b"prcue" | b"PrecedesSlantEqual" | b"preccurlyeq" => "\u{227C}", b"sccue" | b"SucceedsSlantEqual" | b"succcurlyeq" => "\u{227D}", b"prsim" | b"precsim" | b"PrecedesTilde" => "\u{227E}", b"scsim" | b"succsim" | b"SucceedsTilde" => "\u{227F}", b"npr" | b"nprec" | b"NotPrecedes" => "\u{2280}", b"nsc" | b"nsucc" | b"NotSucceeds" => "\u{2281}", b"sub" | b"subset" => "\u{2282}", b"sup" | b"supset" | b"Superset" => "\u{2283}", b"nsub" => "\u{2284}", b"nsup" => "\u{2285}", b"sube" | b"SubsetEqual" | b"subseteq" => "\u{2286}", b"supe" | b"supseteq" | b"SupersetEqual" => "\u{2287}", b"nsube" | b"nsubseteq" | b"NotSubsetEqual" => "\u{2288}", b"nsupe" | b"nsupseteq" | b"NotSupersetEqual" => "\u{2289}", b"subne" | b"subsetneq" => "\u{228A}", b"supne" | b"supsetneq" => "\u{228B}", b"cupdot" => "\u{228D}", b"uplus" | b"UnionPlus" => "\u{228E}", b"sqsub" | b"SquareSubset" | b"sqsubset" => "\u{228F}", b"sqsup" | b"SquareSuperset" | b"sqsupset" => "\u{2290}", b"sqsube" | b"SquareSubsetEqual" | b"sqsubseteq" => "\u{2291}", b"sqsupe" | b"SquareSupersetEqual" | b"sqsupseteq" => "\u{2292}", b"sqcap" | b"SquareIntersection" => "\u{2293}", b"sqcup" | b"SquareUnion" => "\u{2294}", b"oplus" | b"CirclePlus" => "\u{2295}", b"ominus" | b"CircleMinus" => "\u{2296}", b"otimes" | b"CircleTimes" => "\u{2297}", b"osol" => "\u{2298}", b"odot" | b"CircleDot" => "\u{2299}", b"ocir" | b"circledcirc" => "\u{229A}", b"oast" | b"circledast" => "\u{229B}", b"odash" | b"circleddash" => "\u{229D}", b"plusb" | b"boxplus" => "\u{229E}", b"minusb" | b"boxminus" => "\u{229F}", b"timesb" | b"boxtimes" => "\u{22A0}", b"sdotb" | b"dotsquare" => "\u{22A1}", b"vdash" | b"RightTee" => "\u{22A2}", b"dashv" | b"LeftTee" => "\u{22A3}", b"top" | b"DownTee" => "\u{22A4}", b"bottom" | b"bot" | b"perp" | b"UpTee" => "\u{22A5}", b"models" => "\u{22A7}", b"vDash" | b"DoubleRightTee" => "\u{22A8}", b"Vdash" => "\u{22A9}", b"Vvdash" => "\u{22AA}", b"VDash" => "\u{22AB}", b"nvdash" => "\u{22AC}", b"nvDash" => "\u{22AD}", b"nVdash" => "\u{22AE}", b"nVDash" => "\u{22AF}", b"prurel" => "\u{22B0}", b"vltri" | b"vartriangleleft" | b"LeftTriangle" => "\u{22B2}", b"vrtri" | b"vartriangleright" | b"RightTriangle" => "\u{22B3}", b"ltrie" | b"trianglelefteq" | b"LeftTriangleEqual" => "\u{22B4}", b"rtrie" | b"trianglerighteq" | b"RightTriangleEqual" => "\u{22B5}", b"origof" => "\u{22B6}", b"imof" => "\u{22B7}", b"mumap" | b"multimap" => "\u{22B8}", b"hercon" => "\u{22B9}", b"intcal" | b"intercal" => "\u{22BA}", b"veebar" => "\u{22BB}", b"barvee" => "\u{22BD}", b"angrtvb" => "\u{22BE}", b"lrtri" => "\u{22BF}", b"xwedge" | b"Wedge" | b"bigwedge" => "\u{22C0}", b"xvee" | b"Vee" | b"bigvee" => "\u{22C1}", b"xcap" | b"Intersection" | b"bigcap" => "\u{22C2}", b"xcup" | b"Union" | b"bigcup" => "\u{22C3}", b"diam" | b"diamond" | b"Diamond" => "\u{22C4}", b"sdot" => "\u{22C5}", b"sstarf" | b"Star" => "\u{22C6}", b"divonx" | b"divideontimes" => "\u{22C7}", b"bowtie" => "\u{22C8}", b"ltimes" => "\u{22C9}", b"rtimes" => "\u{22CA}", b"lthree" | b"leftthreetimes" => "\u{22CB}", b"rthree" | b"rightthreetimes" => "\u{22CC}", b"bsime" | b"backsimeq" => "\u{22CD}", b"cuvee" | b"curlyvee" => "\u{22CE}", b"cuwed" | b"curlywedge" => "\u{22CF}", b"Sub" | b"Subset" => "\u{22D0}", b"Sup" | b"Supset" => "\u{22D1}", b"Cap" => "\u{22D2}", b"Cup" => "\u{22D3}", b"fork" | b"pitchfork" => "\u{22D4}", b"epar" => "\u{22D5}", b"ltdot" | b"lessdot" => "\u{22D6}", b"gtdot" | b"gtrdot" => "\u{22D7}", b"Ll" => "\u{22D8}", b"Gg" | b"ggg" => "\u{22D9}", b"leg" | b"LessEqualGreater" | b"lesseqgtr" => "\u{22DA}", b"gel" | b"gtreqless" | b"GreaterEqualLess" => "\u{22DB}", b"cuepr" | b"curlyeqprec" => "\u{22DE}", b"cuesc" | b"curlyeqsucc" => "\u{22DF}", b"nprcue" | b"NotPrecedesSlantEqual" => "\u{22E0}", b"nsccue" | b"NotSucceedsSlantEqual" => "\u{22E1}", b"nsqsube" | b"NotSquareSubsetEqual" => "\u{22E2}", b"nsqsupe" | b"NotSquareSupersetEqual" => "\u{22E3}", b"lnsim" => "\u{22E6}", b"gnsim" => "\u{22E7}", b"prnsim" | b"precnsim" => "\u{22E8}", b"scnsim" | b"succnsim" => "\u{22E9}", b"nltri" | b"ntriangleleft" | b"NotLeftTriangle" => "\u{22EA}", b"nrtri" | b"ntriangleright" | b"NotRightTriangle" => "\u{22EB}", b"nltrie" | b"ntrianglelefteq" | b"NotLeftTriangleEqual" => "\u{22EC}", b"nrtrie" | b"ntrianglerighteq" | b"NotRightTriangleEqual" => "\u{22ED}", b"vellip" => "\u{22EE}", b"ctdot" => "\u{22EF}", b"utdot" => "\u{22F0}", b"dtdot" => "\u{22F1}", b"disin" => "\u{22F2}", b"isinsv" => "\u{22F3}", b"isins" => "\u{22F4}", b"isindot" => "\u{22F5}", b"notinvc" => "\u{22F6}", b"notinvb" => "\u{22F7}", b"isinE" => "\u{22F9}", b"nisd" => "\u{22FA}", b"xnis" => "\u{22FB}", b"nis" => "\u{22FC}", b"notnivc" => "\u{22FD}", b"notnivb" => "\u{22FE}", b"barwed" | b"barwedge" => "\u{2305}", b"Barwed" | b"doublebarwedge" => "\u{2306}", b"lceil" | b"LeftCeiling" => "\u{2308}", b"rceil" | b"RightCeiling" => "\u{2309}", b"lfloor" | b"LeftFloor" => "\u{230A}", b"rfloor" | b"RightFloor" => "\u{230B}", b"drcrop" => "\u{230C}", b"dlcrop" => "\u{230D}", b"urcrop" => "\u{230E}", b"ulcrop" => "\u{230F}", b"bnot" => "\u{2310}", b"profline" => "\u{2312}", b"profsurf" => "\u{2313}", b"telrec" => "\u{2315}", b"target" => "\u{2316}", b"ulcorn" | b"ulcorner" => "\u{231C}", b"urcorn" | b"urcorner" => "\u{231D}", b"dlcorn" | b"llcorner" => "\u{231E}", b"drcorn" | b"lrcorner" => "\u{231F}", b"frown" | b"sfrown" => "\u{2322}", b"smile" | b"ssmile" => "\u{2323}", b"cylcty" => "\u{232D}", b"profalar" => "\u{232E}", b"topbot" => "\u{2336}", b"ovbar" => "\u{233D}", b"solbar" => "\u{233F}", b"angzarr" => "\u{237C}", b"lmoust" | b"lmoustache" => "\u{23B0}", b"rmoust" | b"rmoustache" => "\u{23B1}", b"tbrk" | b"OverBracket" => "\u{23B4}", b"bbrk" | b"UnderBracket" => "\u{23B5}", b"bbrktbrk" => "\u{23B6}", b"OverParenthesis" => "\u{23DC}", b"UnderParenthesis" => "\u{23DD}", b"OverBrace" => "\u{23DE}", b"UnderBrace" => "\u{23DF}", b"trpezium" => "\u{23E2}", b"elinters" => "\u{23E7}", b"blank" => "\u{2423}", b"oS" | b"circledS" => "\u{24C8}", b"boxh" | b"HorizontalLine" => "\u{2500}", b"boxv" => "\u{2502}", b"boxdr" => "\u{250C}", b"boxdl" => "\u{2510}", b"boxur" => "\u{2514}", b"boxul" => "\u{2518}", b"boxvr" => "\u{251C}", b"boxvl" => "\u{2524}", b"boxhd" => "\u{252C}", b"boxhu" => "\u{2534}", b"boxvh" => "\u{253C}", b"boxH" => "\u{2550}", b"boxV" => "\u{2551}", b"boxdR" => "\u{2552}", b"boxDr" => "\u{2553}", b"boxDR" => "\u{2554}", b"boxdL" => "\u{2555}", b"boxDl" => "\u{2556}", b"boxDL" => "\u{2557}", b"boxuR" => "\u{2558}", b"boxUr" => "\u{2559}", b"boxUR" => "\u{255A}", b"boxuL" => "\u{255B}", b"boxUl" => "\u{255C}", b"boxUL" => "\u{255D}", b"boxvR" => "\u{255E}", b"boxVr" => "\u{255F}", b"boxVR" => "\u{2560}", b"boxvL" => "\u{2561}", b"boxVl" => "\u{2562}", b"boxVL" => "\u{2563}", b"boxHd" => "\u{2564}", b"boxhD" => "\u{2565}", b"boxHD" => "\u{2566}", b"boxHu" => "\u{2567}", b"boxhU" => "\u{2568}", b"boxHU" => "\u{2569}", b"boxvH" => "\u{256A}", b"boxVh" => "\u{256B}", b"boxVH" => "\u{256C}", b"uhblk" => "\u{2580}", b"lhblk" => "\u{2584}", b"block" => "\u{2588}", b"blk14" => "\u{2591}", b"blk12" => "\u{2592}", b"blk34" => "\u{2593}", b"squ" | b"square" | b"Square" => "\u{25A1}", b"squf" | b"squarf" | b"blacksquare" | b"FilledVerySmallSquare" => "\u{25AA}", b"EmptyVerySmallSquare" => "\u{25AB}", b"rect" => "\u{25AD}", b"marker" => "\u{25AE}", b"fltns" => "\u{25B1}", b"xutri" | b"bigtriangleup" => "\u{25B3}", b"utrif" | b"blacktriangle" => "\u{25B4}", b"utri" | b"triangle" => "\u{25B5}", b"rtrif" | b"blacktriangleright" => "\u{25B8}", b"rtri" | b"triangleright" => "\u{25B9}", b"xdtri" | b"bigtriangledown" => "\u{25BD}", b"dtrif" | b"blacktriangledown" => "\u{25BE}", b"dtri" | b"triangledown" => "\u{25BF}", b"ltrif" | b"blacktriangleleft" => "\u{25C2}", b"ltri" | b"triangleleft" => "\u{25C3}", b"loz" | b"lozenge" => "\u{25CA}", b"cir" => "\u{25CB}", b"tridot" => "\u{25EC}", b"xcirc" | b"bigcirc" => "\u{25EF}", b"ultri" => "\u{25F8}", b"urtri" => "\u{25F9}", b"lltri" => "\u{25FA}", b"EmptySmallSquare" => "\u{25FB}", b"FilledSmallSquare" => "\u{25FC}", b"starf" | b"bigstar" => "\u{2605}", b"star" => "\u{2606}", b"phone" => "\u{260E}", b"female" => "\u{2640}", b"male" => "\u{2642}", b"spades" | b"spadesuit" => "\u{2660}", b"clubs" | b"clubsuit" => "\u{2663}", b"hearts" | b"heartsuit" => "\u{2665}", b"diams" | b"diamondsuit" => "\u{2666}", b"sung" => "\u{266A}", b"flat" => "\u{266D}", b"natur" | b"natural" => "\u{266E}", b"sharp" => "\u{266F}", b"check" | b"checkmark" => "\u{2713}", b"cross" => "\u{2717}", b"malt" | b"maltese" => "\u{2720}", b"sext" => "\u{2736}", b"VerticalSeparator" => "\u{2758}", b"lbbrk" => "\u{2772}", b"rbbrk" => "\u{2773}", b"lobrk" | b"LeftDoubleBracket" => "\u{27E6}", b"robrk" | b"RightDoubleBracket" => "\u{27E7}", b"lang" | b"LeftAngleBracket" | b"langle" => "\u{27E8}", b"rang" | b"RightAngleBracket" | b"rangle" => "\u{27E9}", b"Lang" => "\u{27EA}", b"Rang" => "\u{27EB}", b"loang" => "\u{27EC}", b"roang" => "\u{27ED}", b"xlarr" | b"longleftarrow" | b"LongLeftArrow" => "\u{27F5}", b"xrarr" | b"longrightarrow" | b"LongRightArrow" => "\u{27F6}", b"xharr" | b"longleftrightarrow" | b"LongLeftRightArrow" => "\u{27F7}", b"xlArr" | b"Longleftarrow" | b"DoubleLongLeftArrow" => "\u{27F8}", b"xrArr" | b"Longrightarrow" | b"DoubleLongRightArrow" => "\u{27F9}", b"xhArr" | b"Longleftrightarrow" | b"DoubleLongLeftRightArrow" => "\u{27FA}", b"xmap" | b"longmapsto" => "\u{27FC}", b"dzigrarr" => "\u{27FF}", b"nvlArr" => "\u{2902}", b"nvrArr" => "\u{2903}", b"nvHarr" => "\u{2904}", b"Map" => "\u{2905}", b"lbarr" => "\u{290C}", b"rbarr" | b"bkarow" => "\u{290D}", b"lBarr" => "\u{290E}", b"rBarr" | b"dbkarow" => "\u{290F}", b"RBarr" | b"drbkarow" => "\u{2910}", b"DDotrahd" => "\u{2911}", b"UpArrowBar" => "\u{2912}", b"DownArrowBar" => "\u{2913}", b"Rarrtl" => "\u{2916}", b"latail" => "\u{2919}", b"ratail" => "\u{291A}", b"lAtail" => "\u{291B}", b"rAtail" => "\u{291C}", b"larrfs" => "\u{291D}", b"rarrfs" => "\u{291E}", b"larrbfs" => "\u{291F}", b"rarrbfs" => "\u{2920}", b"nwarhk" => "\u{2923}", b"nearhk" => "\u{2924}", b"searhk" | b"hksearow" => "\u{2925}", b"swarhk" | b"hkswarow" => "\u{2926}", b"nwnear" => "\u{2927}", b"nesear" | b"toea" => "\u{2928}", b"seswar" | b"tosa" => "\u{2929}", b"swnwar" => "\u{292A}", b"rarrc" => "\u{2933}", b"cudarrr" => "\u{2935}", b"ldca" => "\u{2936}", b"rdca" => "\u{2937}", b"cudarrl" => "\u{2938}", b"larrpl" => "\u{2939}", b"curarrm" => "\u{293C}", b"cularrp" => "\u{293D}", b"rarrpl" => "\u{2945}", b"harrcir" => "\u{2948}", b"Uarrocir" => "\u{2949}", b"lurdshar" => "\u{294A}", b"ldrushar" => "\u{294B}", b"LeftRightVector" => "\u{294E}", b"RightUpDownVector" => "\u{294F}", b"DownLeftRightVector" => "\u{2950}", b"LeftUpDownVector" => "\u{2951}", b"LeftVectorBar" => "\u{2952}", b"RightVectorBar" => "\u{2953}", b"RightUpVectorBar" => "\u{2954}", b"RightDownVectorBar" => "\u{2955}", b"DownLeftVectorBar" => "\u{2956}", b"DownRightVectorBar" => "\u{2957}", b"LeftUpVectorBar" => "\u{2958}", b"LeftDownVectorBar" => "\u{2959}", b"LeftTeeVector" => "\u{295A}", b"RightTeeVector" => "\u{295B}", b"RightUpTeeVector" => "\u{295C}", b"RightDownTeeVector" => "\u{295D}", b"DownLeftTeeVector" => "\u{295E}", b"DownRightTeeVector" => "\u{295F}", b"LeftUpTeeVector" => "\u{2960}", b"LeftDownTeeVector" => "\u{2961}", b"lHar" => "\u{2962}", b"uHar" => "\u{2963}", b"rHar" => "\u{2964}", b"dHar" => "\u{2965}", b"luruhar" => "\u{2966}", b"ldrdhar" => "\u{2967}", b"ruluhar" => "\u{2968}", b"rdldhar" => "\u{2969}", b"lharul" => "\u{296A}", b"llhard" => "\u{296B}", b"rharul" => "\u{296C}", b"lrhard" => "\u{296D}", b"udhar" | b"UpEquilibrium" => "\u{296E}", b"duhar" | b"ReverseUpEquilibrium" => "\u{296F}", b"RoundImplies" => "\u{2970}", b"erarr" => "\u{2971}", b"simrarr" => "\u{2972}", b"larrsim" => "\u{2973}", b"rarrsim" => "\u{2974}", b"rarrap" => "\u{2975}", b"ltlarr" => "\u{2976}", b"gtrarr" => "\u{2978}", b"subrarr" => "\u{2979}", b"suplarr" => "\u{297B}", b"lfisht" => "\u{297C}", b"rfisht" => "\u{297D}", b"ufisht" => "\u{297E}", b"dfisht" => "\u{297F}", b"lopar" => "\u{2985}", b"ropar" => "\u{2986}", b"lbrke" => "\u{298B}", b"rbrke" => "\u{298C}", b"lbrkslu" => "\u{298D}", b"rbrksld" => "\u{298E}", b"lbrksld" => "\u{298F}", b"rbrkslu" => "\u{2990}", b"langd" => "\u{2991}", b"rangd" => "\u{2992}", b"lparlt" => "\u{2993}", b"rpargt" => "\u{2994}", b"gtlPar" => "\u{2995}", b"ltrPar" => "\u{2996}", b"vzigzag" => "\u{299A}", b"vangrt" => "\u{299C}", b"angrtvbd" => "\u{299D}", b"ange" => "\u{29A4}", b"range" => "\u{29A5}", b"dwangle" => "\u{29A6}", b"uwangle" => "\u{29A7}", b"angmsdaa" => "\u{29A8}", b"angmsdab" => "\u{29A9}", b"angmsdac" => "\u{29AA}", b"angmsdad" => "\u{29AB}", b"angmsdae" => "\u{29AC}", b"angmsdaf" => "\u{29AD}", b"angmsdag" => "\u{29AE}", b"angmsdah" => "\u{29AF}", b"bemptyv" => "\u{29B0}", b"demptyv" => "\u{29B1}", b"cemptyv" => "\u{29B2}", b"raemptyv" => "\u{29B3}", b"laemptyv" => "\u{29B4}", b"ohbar" => "\u{29B5}", b"omid" => "\u{29B6}", b"opar" => "\u{29B7}", b"operp" => "\u{29B9}", b"olcross" => "\u{29BB}", b"odsold" => "\u{29BC}", b"olcir" => "\u{29BE}", b"ofcir" => "\u{29BF}", b"olt" => "\u{29C0}", b"ogt" => "\u{29C1}", b"cirscir" => "\u{29C2}", b"cirE" => "\u{29C3}", b"solb" => "\u{29C4}", b"bsolb" => "\u{29C5}", b"boxbox" => "\u{29C9}", b"trisb" => "\u{29CD}", b"rtriltri" => "\u{29CE}", b"LeftTriangleBar" => "\u{29CF}", b"RightTriangleBar" => "\u{29D0}", b"race" => "\u{29DA}", b"iinfin" => "\u{29DC}", b"infintie" => "\u{29DD}", b"nvinfin" => "\u{29DE}", b"eparsl" => "\u{29E3}", b"smeparsl" => "\u{29E4}", b"eqvparsl" => "\u{29E5}", b"lozf" | b"blacklozenge" => "\u{29EB}", b"RuleDelayed" => "\u{29F4}", b"dsol" => "\u{29F6}", b"xodot" | b"bigodot" => "\u{2A00}", b"xoplus" | b"bigoplus" => "\u{2A01}", b"xotime" | b"bigotimes" => "\u{2A02}", b"xuplus" | b"biguplus" => "\u{2A04}", b"xsqcup" | b"bigsqcup" => "\u{2A06}", b"qint" | b"iiiint" => "\u{2A0C}", b"fpartint" => "\u{2A0D}", b"cirfnint" => "\u{2A10}", b"awint" => "\u{2A11}", b"rppolint" => "\u{2A12}", b"scpolint" => "\u{2A13}", b"npolint" => "\u{2A14}", b"pointint" => "\u{2A15}", b"quatint" => "\u{2A16}", b"intlarhk" => "\u{2A17}", b"pluscir" => "\u{2A22}", b"plusacir" => "\u{2A23}", b"simplus" => "\u{2A24}", b"plusdu" => "\u{2A25}", b"plussim" => "\u{2A26}", b"plustwo" => "\u{2A27}", b"mcomma" => "\u{2A29}", b"minusdu" => "\u{2A2A}", b"loplus" => "\u{2A2D}", b"roplus" => "\u{2A2E}", b"Cross" => "\u{2A2F}", b"timesd" => "\u{2A30}", b"timesbar" => "\u{2A31}", b"smashp" => "\u{2A33}", b"lotimes" => "\u{2A34}", b"rotimes" => "\u{2A35}", b"otimesas" => "\u{2A36}", b"Otimes" => "\u{2A37}", b"odiv" => "\u{2A38}", b"triplus" => "\u{2A39}", b"triminus" => "\u{2A3A}", b"tritime" => "\u{2A3B}", b"iprod" | b"intprod" => "\u{2A3C}", b"amalg" => "\u{2A3F}", b"capdot" => "\u{2A40}", b"ncup" => "\u{2A42}", b"ncap" => "\u{2A43}", b"capand" => "\u{2A44}", b"cupor" => "\u{2A45}", b"cupcap" => "\u{2A46}", b"capcup" => "\u{2A47}", b"cupbrcap" => "\u{2A48}", b"capbrcup" => "\u{2A49}", b"cupcup" => "\u{2A4A}", b"capcap" => "\u{2A4B}", b"ccups" => "\u{2A4C}", b"ccaps" => "\u{2A4D}", b"ccupssm" => "\u{2A50}", b"And" => "\u{2A53}", b"Or" => "\u{2A54}", b"andand" => "\u{2A55}", b"oror" => "\u{2A56}", b"orslope" => "\u{2A57}", b"andslope" => "\u{2A58}", b"andv" => "\u{2A5A}", b"orv" => "\u{2A5B}", b"andd" => "\u{2A5C}", b"ord" => "\u{2A5D}", b"wedbar" => "\u{2A5F}", b"sdote" => "\u{2A66}", b"simdot" => "\u{2A6A}", b"congdot" => "\u{2A6D}", b"easter" => "\u{2A6E}", b"apacir" => "\u{2A6F}", b"apE" => "\u{2A70}", b"eplus" => "\u{2A71}", b"pluse" => "\u{2A72}", b"Esim" => "\u{2A73}", b"Colone" => "\u{2A74}", b"Equal" => "\u{2A75}", b"eDDot" | b"ddotseq" => "\u{2A77}", b"equivDD" => "\u{2A78}", b"ltcir" => "\u{2A79}", b"gtcir" => "\u{2A7A}", b"ltquest" => "\u{2A7B}", b"gtquest" => "\u{2A7C}", b"les" | b"LessSlantEqual" | b"leqslant" => "\u{2A7D}", b"ges" | b"GreaterSlantEqual" | b"geqslant" => "\u{2A7E}", b"lesdot" => "\u{2A7F}", b"gesdot" => "\u{2A80}", b"lesdoto" => "\u{2A81}", b"gesdoto" => "\u{2A82}", b"lesdotor" => "\u{2A83}", b"gesdotol" => "\u{2A84}", b"lap" | b"lessapprox" => "\u{2A85}", b"gap" | b"gtrapprox" => "\u{2A86}", b"lne" | b"lneq" => "\u{2A87}", b"gne" | b"gneq" => "\u{2A88}", b"lnap" | b"lnapprox" => "\u{2A89}", b"gnap" | b"gnapprox" => "\u{2A8A}", b"lEg" | b"lesseqqgtr" => "\u{2A8B}", b"gEl" | b"gtreqqless" => "\u{2A8C}", b"lsime" => "\u{2A8D}", b"gsime" => "\u{2A8E}", b"lsimg" => "\u{2A8F}", b"gsiml" => "\u{2A90}", b"lgE" => "\u{2A91}", b"glE" => "\u{2A92}", b"lesges" => "\u{2A93}", b"gesles" => "\u{2A94}", b"els" | b"eqslantless" => "\u{2A95}", b"egs" | b"eqslantgtr" => "\u{2A96}", b"elsdot" => "\u{2A97}", b"egsdot" => "\u{2A98}", b"el" => "\u{2A99}", b"eg" => "\u{2A9A}", b"siml" => "\u{2A9D}", b"simg" => "\u{2A9E}", b"simlE" => "\u{2A9F}", b"simgE" => "\u{2AA0}", b"LessLess" => "\u{2AA1}", b"GreaterGreater" => "\u{2AA2}", b"glj" => "\u{2AA4}", b"gla" => "\u{2AA5}", b"ltcc" => "\u{2AA6}", b"gtcc" => "\u{2AA7}", b"lescc" => "\u{2AA8}", b"gescc" => "\u{2AA9}", b"smt" => "\u{2AAA}", b"lat" => "\u{2AAB}", b"smte" => "\u{2AAC}", b"late" => "\u{2AAD}", b"bumpE" => "\u{2AAE}", b"pre" | b"preceq" | b"PrecedesEqual" => "\u{2AAF}", b"sce" | b"succeq" | b"SucceedsEqual" => "\u{2AB0}", b"prE" => "\u{2AB3}", b"scE" => "\u{2AB4}", b"prnE" | b"precneqq" => "\u{2AB5}", b"scnE" | b"succneqq" => "\u{2AB6}", b"prap" | b"precapprox" => "\u{2AB7}", b"scap" | b"succapprox" => "\u{2AB8}", b"prnap" | b"precnapprox" => "\u{2AB9}", b"scnap" | b"succnapprox" => "\u{2ABA}", b"Pr" => "\u{2ABB}", b"Sc" => "\u{2ABC}", b"subdot" => "\u{2ABD}", b"supdot" => "\u{2ABE}", b"subplus" => "\u{2ABF}", b"supplus" => "\u{2AC0}", b"submult" => "\u{2AC1}", b"supmult" => "\u{2AC2}", b"subedot" => "\u{2AC3}", b"supedot" => "\u{2AC4}", b"subE" | b"subseteqq" => "\u{2AC5}", b"supE" | b"supseteqq" => "\u{2AC6}", b"subsim" => "\u{2AC7}", b"supsim" => "\u{2AC8}", b"subnE" | b"subsetneqq" => "\u{2ACB}", b"supnE" | b"supsetneqq" => "\u{2ACC}", b"csub" => "\u{2ACF}", b"csup" => "\u{2AD0}", b"csube" => "\u{2AD1}", b"csupe" => "\u{2AD2}", b"subsup" => "\u{2AD3}", b"supsub" => "\u{2AD4}", b"subsub" => "\u{2AD5}", b"supsup" => "\u{2AD6}", b"suphsub" => "\u{2AD7}", b"supdsub" => "\u{2AD8}", b"forkv" => "\u{2AD9}", b"topfork" => "\u{2ADA}", b"mlcp" => "\u{2ADB}", b"Dashv" | b"DoubleLeftTee" => "\u{2AE4}", b"Vdashl" => "\u{2AE6}", b"Barv" => "\u{2AE7}", b"vBar" => "\u{2AE8}", b"vBarv" => "\u{2AE9}", b"Vbar" => "\u{2AEB}", b"Not" => "\u{2AEC}", b"bNot" => "\u{2AED}", b"rnmid" => "\u{2AEE}", b"cirmid" => "\u{2AEF}", b"midcir" => "\u{2AF0}", b"topcir" => "\u{2AF1}", b"nhpar" => "\u{2AF2}", b"parsim" => "\u{2AF3}", b"parsl" => "\u{2AFD}", b"fflig" => "\u{FB00}", b"filig" => "\u{FB01}", b"fllig" => "\u{FB02}", b"ffilig" => "\u{FB03}", b"ffllig" => "\u{FB04}", b"Ascr" => "\u{1D49}", b"Cscr" => "\u{1D49}", b"Dscr" => "\u{1D49}", b"Gscr" => "\u{1D4A}", b"Jscr" => "\u{1D4A}", b"Kscr" => "\u{1D4A}", b"Nscr" => "\u{1D4A}", b"Oscr" => "\u{1D4A}", b"Pscr" => "\u{1D4A}", b"Qscr" => "\u{1D4A}", b"Sscr" => "\u{1D4A}", b"Tscr" => "\u{1D4A}", b"Uscr" => "\u{1D4B}", b"Vscr" => "\u{1D4B}", b"Wscr" => "\u{1D4B}", b"Xscr" => "\u{1D4B}", b"Yscr" => "\u{1D4B}", b"Zscr" => "\u{1D4B}", b"ascr" => "\u{1D4B}", b"bscr" => "\u{1D4B}", b"cscr" => "\u{1D4B}", b"dscr" => "\u{1D4B}", b"fscr" => "\u{1D4B}", b"hscr" => "\u{1D4B}", b"iscr" => "\u{1D4B}", b"jscr" => "\u{1D4B}", b"kscr" => "\u{1D4C}", b"lscr" => "\u{1D4C}", b"mscr" => "\u{1D4C}", b"nscr" => "\u{1D4C}", b"pscr" => "\u{1D4C}", b"qscr" => "\u{1D4C}", b"rscr" => "\u{1D4C}", b"sscr" => "\u{1D4C}", b"tscr" => "\u{1D4C}", b"uscr" => "\u{1D4C}", b"vscr" => "\u{1D4C}", b"wscr" => "\u{1D4C}", b"xscr" => "\u{1D4C}", b"yscr" => "\u{1D4C}", b"zscr" => "\u{1D4C}", b"Afr" => "\u{1D50}", b"Bfr" => "\u{1D50}", b"Dfr" => "\u{1D50}", b"Efr" => "\u{1D50}", b"Ffr" => "\u{1D50}", b"Gfr" => "\u{1D50}", b"Jfr" => "\u{1D50}", b"Kfr" => "\u{1D50}", b"Lfr" => "\u{1D50}", b"Mfr" => "\u{1D51}", b"Nfr" => "\u{1D51}", b"Ofr" => "\u{1D51}", b"Pfr" => "\u{1D51}", b"Qfr" => "\u{1D51}", b"Sfr" => "\u{1D51}", b"Tfr" => "\u{1D51}", b"Ufr" => "\u{1D51}", b"Vfr" => "\u{1D51}", b"Wfr" => "\u{1D51}", b"Xfr" => "\u{1D51}", b"Yfr" => "\u{1D51}", b"afr" => "\u{1D51}", b"bfr" => "\u{1D51}", b"cfr" => "\u{1D52}", b"dfr" => "\u{1D52}", b"efr" => "\u{1D52}", b"ffr" => "\u{1D52}", b"gfr" => "\u{1D52}", b"hfr" => "\u{1D52}", b"ifr" => "\u{1D52}", b"jfr" => "\u{1D52}", b"kfr" => "\u{1D52}", b"lfr" => "\u{1D52}", b"mfr" => "\u{1D52}", b"nfr" => "\u{1D52}", b"ofr" => "\u{1D52}", b"pfr" => "\u{1D52}", b"qfr" => "\u{1D52}", b"rfr" => "\u{1D52}", b"sfr" => "\u{1D53}", b"tfr" => "\u{1D53}", b"ufr" => "\u{1D53}", b"vfr" => "\u{1D53}", b"wfr" => "\u{1D53}", b"xfr" => "\u{1D53}", b"yfr" => "\u{1D53}", b"zfr" => "\u{1D53}", b"Aopf" => "\u{1D53}", b"Bopf" => "\u{1D53}", b"Dopf" => "\u{1D53}", b"Eopf" => "\u{1D53}", b"Fopf" => "\u{1D53}", b"Gopf" => "\u{1D53}", b"Iopf" => "\u{1D54}", b"Jopf" => "\u{1D54}", b"Kopf" => "\u{1D54}", b"Lopf" => "\u{1D54}", b"Mopf" => "\u{1D54}", b"Oopf" => "\u{1D54}", b"Sopf" => "\u{1D54}", b"Topf" => "\u{1D54}", b"Uopf" => "\u{1D54}", b"Vopf" => "\u{1D54}", b"Wopf" => "\u{1D54}", b"Xopf" => "\u{1D54}", b"Yopf" => "\u{1D55}", b"aopf" => "\u{1D55}", b"bopf" => "\u{1D55}", b"copf" => "\u{1D55}", b"dopf" => "\u{1D55}", b"eopf" => "\u{1D55}", b"fopf" => "\u{1D55}", b"gopf" => "\u{1D55}", b"hopf" => "\u{1D55}", b"iopf" => "\u{1D55}", b"jopf" => "\u{1D55}", b"kopf" => "\u{1D55}", b"lopf" => "\u{1D55}", b"mopf" => "\u{1D55}", b"nopf" => "\u{1D55}", b"oopf" => "\u{1D56}", b"popf" => "\u{1D56}", b"qopf" => "\u{1D56}", b"ropf" => "\u{1D56}", b"sopf" => "\u{1D56}", b"topf" => "\u{1D56}", b"uopf" => "\u{1D56}", b"vopf" => "\u{1D56}", b"wopf" => "\u{1D56}", b"xopf" => "\u{1D56}", b"yopf" => "\u{1D56}", b"zopf" => "\u{1D56}", _ => return None, }; Some(s) } pub(crate) fn parse_number(num: &str) -> Result { let code = if let Some(hex) = num.strip_prefix('x') { from_str_radix(hex, 16)? } else { from_str_radix(num, 10)? }; if code == 0 { return Err(ParseCharRefError::IllegalCharacter(code)); } match std::char::from_u32(code) { Some(c) => Ok(c), None => Err(ParseCharRefError::InvalidCodepoint(code)), } } #[inline] fn from_str_radix(src: &str, radix: u32) -> Result { match src.as_bytes().first().copied() { // We should not allow sign numbers, but u32::from_str_radix will accept `+`. // We also handle `-` to be consistent in returned errors Some(b'+') | Some(b'-') => Err(ParseCharRefError::UnexpectedSign), _ => u32::from_str_radix(src, radix).map_err(ParseCharRefError::InvalidNumber), } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod normalization { use super::*; mod eol { use super::*; mod xml11 { use super::*; use pretty_assertions::assert_eq; #[test] fn empty() { assert_eq!(normalize_xml11_eols(""), ""); } #[test] fn already_normalized() { assert_eq!( normalize_xml11_eols("\nalready \n\n normalized\n"), "\nalready \n\n normalized\n", ); } #[test] fn cr_lf() { assert_eq!( normalize_xml11_eols("\r\nsome\r\n\r\ntext"), "\nsome\n\ntext" ); } #[test] fn cr_u0085() { assert_eq!( normalize_xml11_eols("\r\u{0085}some\r\u{0085}\r\u{0085}text"), "\nsome\n\ntext", ); } #[test] fn u0085() { assert_eq!( normalize_xml11_eols("\u{0085}some\u{0085}\u{0085}text"), "\nsome\n\ntext", ); } #[test] fn u2028() { assert_eq!( normalize_xml11_eols("\u{2028}some\u{2028}\u{2028}text"), "\nsome\n\ntext", ); } #[test] fn mixed() { assert_eq!( normalize_xml11_eols("\r\r\r\u{2028}\n\r\nsome\n\u{0085}\r\u{0085}text"), "\n\n\n\n\n\nsome\n\n\ntext", ); } #[test] fn utf8_0xc2() { // All possible characters encoded in 2 bytes in UTF-8 which first byte is 0xC2 (0b11000010) // Second byte follows the pattern 10xxxxxx let first = std::str::from_utf8(&[0b11000010, 0b10000000]) .unwrap() .chars() .next() .unwrap(); let last = std::str::from_utf8(&[0b11000010, 0b10111111]) .unwrap() .chars() .next() .unwrap(); let mut utf8 = [0; 2]; for ch in first..=last { ch.encode_utf8(&mut utf8); let description = format!("UTF-8 [{:02x} {:02x}] = `{}`", utf8[0], utf8[1], ch); let input = std::str::from_utf8(&utf8).expect(&description); dbg!((input, &description)); if ch == '\u{0085}' { assert_eq!(normalize_xml11_eols(input), "\n", "{}", description); } else { assert_eq!(normalize_xml11_eols(input), input, "{}", description); } } assert_eq!((first..=last).count(), 64); } #[test] fn utf8_0x0d_0xc2() { // All possible characters encoded in 2 bytes in UTF-8 which first byte is 0xC2 (0b11000010) // Second byte follows the pattern 10xxxxxx let first = std::str::from_utf8(&[0b11000010, 0b10000000]) .unwrap() .chars() .next() .unwrap(); let last = std::str::from_utf8(&[0b11000010, 0b10111111]) .unwrap() .chars() .next() .unwrap(); let mut utf8 = [b'\r', 0, 0]; for ch in first..=last { ch.encode_utf8(&mut utf8[1..]); let description = format!( "UTF-8 [{:02x} {:02x} {:02x}] = `{}`", utf8[0], utf8[1], utf8[2], ch ); let input = std::str::from_utf8(&utf8).expect(&description); dbg!((input, &description)); if ch == '\u{0085}' { assert_eq!(normalize_xml11_eols(input), "\n", "{}", description); } else { // utf8 is copied, because [u8; 3] implements Copy let mut expected = utf8; expected[0] = b'\n'; let expected = std::str::from_utf8(&expected).expect(&description); assert_eq!(normalize_xml11_eols(input), expected, "{}", description); } } assert_eq!((first..=last).count(), 64); } #[test] fn utf8_0xe2() { // All possible characters encoded in 3 bytes in UTF-8 which first byte is 0xE2 (0b11100010) // Second and third bytes follows the pattern 10xxxxxx let first = std::str::from_utf8(&[0b11100010, 0b10000000, 0b10000000]) .unwrap() .chars() .next() .unwrap(); let last = std::str::from_utf8(&[0b11100010, 0b10111111, 0b10111111]) .unwrap() .chars() .next() .unwrap(); let mut buf = [0; 3]; for ch in first..=last { let input = &*ch.encode_utf8(&mut buf); let buf = input.as_bytes(); let description = format!( "UTF-8 [{:02x} {:02x} {:02x}] = `{}`", buf[0], buf[1], buf[2], ch ); dbg!((input, &description)); if ch == '\u{2028}' { assert_eq!(normalize_xml11_eols(input), "\n", "{}", description); } else { assert_eq!(normalize_xml11_eols(input), input, "{}", description); } } assert_eq!((first..=last).count(), 4096); } } mod xml10 { use super::*; use pretty_assertions::assert_eq; #[test] fn empty() { assert_eq!(normalize_xml10_eols(""), ""); } #[test] fn already_normalized() { assert_eq!( normalize_xml10_eols("\nalready \n\n normalized\n"), "\nalready \n\n normalized\n", ); } #[test] fn cr_lf() { assert_eq!( normalize_xml10_eols("\r\nsome\r\n\r\ntext"), "\nsome\n\ntext" ); } #[test] fn cr_u0085() { assert_eq!( normalize_xml10_eols("\r\u{0085}some\r\u{0085}\r\u{0085}text"), "\n\u{0085}some\n\u{0085}\n\u{0085}text", ); } #[test] fn u0085() { assert_eq!( normalize_xml10_eols("\u{0085}some\u{0085}\u{0085}text"), "\u{0085}some\u{0085}\u{0085}text", ); } #[test] fn u2028() { assert_eq!( normalize_xml10_eols("\u{2028}some\u{2028}\u{2028}text"), "\u{2028}some\u{2028}\u{2028}text", ); } #[test] fn mixed() { assert_eq!( normalize_xml10_eols("\r\r\r\u{2028}\n\r\nsome\n\u{0085}\r\u{0085}text"), "\n\n\n\u{2028}\n\nsome\n\u{0085}\n\u{0085}text", ); } } } } quick-xml-0.38.4/src/events/attributes.rs000064400000000000000000002614441046102023000164740ustar 00000000000000//! Xml Attributes module //! //! Provides an iterator over attributes key/value pairs use crate::encoding::Decoder; use crate::errors::Result as XmlResult; use crate::escape::{escape, resolve_predefined_entity, unescape_with}; use crate::name::{LocalName, Namespace, QName}; use crate::reader::NsReader; use crate::utils::{is_whitespace, Bytes}; use std::fmt::{self, Debug, Display, Formatter}; use std::iter::FusedIterator; use std::{borrow::Cow, ops::Range}; /// A struct representing a key/value XML attribute. /// /// Field `value` stores raw bytes, possibly containing escape-sequences. Most users will likely /// want to access the value using one of the [`unescape_value`] and [`decode_and_unescape_value`] /// functions. /// /// [`unescape_value`]: Self::unescape_value /// [`decode_and_unescape_value`]: Self::decode_and_unescape_value #[derive(Clone, Eq, PartialEq)] pub struct Attribute<'a> { /// The key to uniquely define the attribute. /// /// If [`Attributes::with_checks`] is turned off, the key might not be unique. pub key: QName<'a>, /// The raw value of the attribute. pub value: Cow<'a, [u8]>, } impl<'a> Attribute<'a> { /// Decodes using UTF-8 then unescapes the value. /// /// This is normally the value you are interested in. Escape sequences such as `>` are /// replaced with their unescaped equivalents such as `>`. /// /// This will allocate if the value contains any escape sequences. /// /// See also [`unescape_value_with()`](Self::unescape_value_with) /// ///
/// /// NOTE: Because this method is available only if [`encoding`] feature is **not** enabled, /// should only be used by applications. /// Libs should use [`decode_and_unescape_value()`](Self::decode_and_unescape_value) /// instead, because if lib will be used in a project which depends on quick_xml with /// [`encoding`] feature enabled, the lib will fail to compile due to [feature unification]. /// ///
/// /// [`encoding`]: ../../index.html#encoding /// [feature unification]: https://doc.rust-lang.org/cargo/reference/features.html#feature-unification #[cfg(any(doc, not(feature = "encoding")))] pub fn unescape_value(&self) -> XmlResult> { self.unescape_value_with(resolve_predefined_entity) } /// Decodes using UTF-8 then unescapes the value, using custom entities. /// /// This is normally the value you are interested in. Escape sequences such as `>` are /// replaced with their unescaped equivalents such as `>`. /// A fallback resolver for additional custom entities can be provided via /// `resolve_entity`. /// /// This will allocate if the value contains any escape sequences. /// /// See also [`unescape_value()`](Self::unescape_value) /// ///
/// /// NOTE: Because this method is available only if [`encoding`] feature is **not** enabled, /// should only be used by applications. /// Libs should use [`decode_and_unescape_value_with()`](Self::decode_and_unescape_value_with) /// instead, because if lib will be used in a project which depends on quick_xml with /// [`encoding`] feature enabled, the lib will fail to compile due to [feature unification]. /// ///
/// /// [`encoding`]: ../../index.html#encoding /// [feature unification]: https://doc.rust-lang.org/cargo/reference/features.html#feature-unification #[cfg(any(doc, not(feature = "encoding")))] #[inline] pub fn unescape_value_with<'entity>( &self, resolve_entity: impl FnMut(&str) -> Option<&'entity str>, ) -> XmlResult> { self.decode_and_unescape_value_with(Decoder::utf8(), resolve_entity) } /// Decodes then unescapes the value. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. pub fn decode_and_unescape_value(&self, decoder: Decoder) -> XmlResult> { self.decode_and_unescape_value_with(decoder, resolve_predefined_entity) } /// Decodes then unescapes the value with custom entities. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. pub fn decode_and_unescape_value_with<'entity>( &self, decoder: Decoder, resolve_entity: impl FnMut(&str) -> Option<&'entity str>, ) -> XmlResult> { let decoded = decoder.decode_cow(&self.value)?; match unescape_with(&decoded, resolve_entity)? { // Because result is borrowed, no replacements was done and we can use original string Cow::Borrowed(_) => Ok(decoded), Cow::Owned(s) => Ok(s.into()), } } /// If attribute value [represents] valid boolean values, returns `Some`, otherwise returns `None`. /// /// The valid boolean representations are only `"true"`, `"false"`, `"1"`, and `"0"`. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::attributes::Attribute; /// /// let attr = Attribute::from(("attr", "false")); /// assert_eq!(attr.as_bool(), Some(false)); /// /// let attr = Attribute::from(("attr", "0")); /// assert_eq!(attr.as_bool(), Some(false)); /// /// let attr = Attribute::from(("attr", "true")); /// assert_eq!(attr.as_bool(), Some(true)); /// /// let attr = Attribute::from(("attr", "1")); /// assert_eq!(attr.as_bool(), Some(true)); /// /// let attr = Attribute::from(("attr", "bot bool")); /// assert_eq!(attr.as_bool(), None); /// ``` /// /// [represents]: https://www.w3.org/TR/xmlschema11-2/#boolean #[inline] pub fn as_bool(&self) -> Option { match self.value.as_ref() { b"1" | b"true" => Some(true), b"0" | b"false" => Some(false), _ => None, } } } impl<'a> Debug for Attribute<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { f.debug_struct("Attribute") .field("key", &Bytes(self.key.as_ref())) .field("value", &Bytes(&self.value)) .finish() } } impl<'a> From<(&'a [u8], &'a [u8])> for Attribute<'a> { /// Creates new attribute from raw bytes. /// Does not apply any transformation to both key and value. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::attributes::Attribute; /// /// let features = Attribute::from(("features".as_bytes(), "Bells & whistles".as_bytes())); /// assert_eq!(features.value, "Bells & whistles".as_bytes()); /// ``` fn from(val: (&'a [u8], &'a [u8])) -> Attribute<'a> { Attribute { key: QName(val.0), value: Cow::from(val.1), } } } impl<'a> From<(&'a str, &'a str)> for Attribute<'a> { /// Creates new attribute from text representation. /// Key is stored as-is, but the value will be escaped. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::attributes::Attribute; /// /// let features = Attribute::from(("features", "Bells & whistles")); /// assert_eq!(features.value, "Bells & whistles".as_bytes()); /// ``` fn from(val: (&'a str, &'a str)) -> Attribute<'a> { Attribute { key: QName(val.0.as_bytes()), value: match escape(val.1) { Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), Cow::Owned(s) => Cow::Owned(s.into_bytes()), }, } } } impl<'a> From<(&'a str, Cow<'a, str>)> for Attribute<'a> { /// Creates new attribute from text representation. /// Key is stored as-is, but the value will be escaped. /// /// # Examples /// /// ``` /// # use std::borrow::Cow; /// use pretty_assertions::assert_eq; /// use quick_xml::events::attributes::Attribute; /// /// let features = Attribute::from(("features", Cow::Borrowed("Bells & whistles"))); /// assert_eq!(features.value, "Bells & whistles".as_bytes()); /// ``` fn from(val: (&'a str, Cow<'a, str>)) -> Attribute<'a> { Attribute { key: QName(val.0.as_bytes()), value: match escape(val.1) { Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), Cow::Owned(s) => Cow::Owned(s.into_bytes()), }, } } } impl<'a> From> for Attribute<'a> { #[inline] fn from(attr: Attr<&'a [u8]>) -> Self { Self { key: attr.key(), value: Cow::Borrowed(attr.value()), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Iterator over XML attributes. /// /// Yields `Result`. An `Err` will be yielded if an attribute is malformed or duplicated. /// The duplicate check can be turned off by calling [`with_checks(false)`]. /// /// When [`serialize`] feature is enabled, can be converted to serde's deserializer. /// /// [`with_checks(false)`]: Self::with_checks /// [`serialize`]: ../../index.html#serialize #[derive(Clone)] pub struct Attributes<'a> { /// Slice of `BytesStart` corresponding to attributes bytes: &'a [u8], /// Iterator state, independent from the actual source of bytes state: IterState, /// Encoding used for `bytes` decoder: Decoder, } impl<'a> Attributes<'a> { /// Internal constructor, used by `BytesStart`. Supplies data in reader's encoding #[inline] pub(crate) const fn wrap(buf: &'a [u8], pos: usize, html: bool, decoder: Decoder) -> Self { Self { bytes: buf, state: IterState::new(pos, html), decoder, } } /// Creates a new attribute iterator from a buffer, which recognizes only XML-style /// attributes, i. e. those which in the form `name = "value"` or `name = 'value'`. /// HTML style attributes (i. e. without quotes or only name) will return a error. /// /// # Parameters /// - `buf`: a buffer with a tag name and attributes, usually this is the whole /// string between `<` and `>` (or `/>`) of a tag; /// - `pos`: a position in the `buf` where tag name is finished and attributes /// is started. It is not necessary to point exactly to the end of a tag name, /// although that is usually that. If it will be more than the `buf` length, /// then the iterator will return `None`` immediately. /// /// # Example /// ``` /// # use quick_xml::events::attributes::{Attribute, Attributes}; /// # use pretty_assertions::assert_eq; /// # /// let mut iter = Attributes::new("tag-name attr1 = 'value1' attr2='value2' ", 9); /// // ^0 ^9 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr1", "value1"))))); /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr2", "value2"))))); /// assert_eq!(iter.next(), None); /// ``` pub const fn new(buf: &'a str, pos: usize) -> Self { Self::wrap(buf.as_bytes(), pos, false, Decoder::utf8()) } /// Creates a new attribute iterator from a buffer, allowing HTML attribute syntax. /// /// # Parameters /// - `buf`: a buffer with a tag name and attributes, usually this is the whole /// string between `<` and `>` (or `/>`) of a tag; /// - `pos`: a position in the `buf` where tag name is finished and attributes /// is started. It is not necessary to point exactly to the end of a tag name, /// although that is usually that. If it will be more than the `buf` length, /// then the iterator will return `None`` immediately. /// /// # Example /// ``` /// # use quick_xml::events::attributes::{Attribute, Attributes}; /// # use pretty_assertions::assert_eq; /// # /// let mut iter = Attributes::html("tag-name attr1 = value1 attr2 ", 9); /// // ^0 ^9 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr1", "value1"))))); /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr2", ""))))); /// assert_eq!(iter.next(), None); /// ``` pub const fn html(buf: &'a str, pos: usize) -> Self { Self::wrap(buf.as_bytes(), pos, true, Decoder::utf8()) } /// Changes whether attributes should be checked for uniqueness. /// /// The XML specification requires attribute keys in the same element to be unique. This check /// can be disabled to improve performance slightly. /// /// (`true` by default) pub fn with_checks(&mut self, val: bool) -> &mut Attributes<'a> { self.state.check_duplicates = val; self } /// Checks if the current tag has a [`xsi:nil`] attribute. This method ignores any errors in /// attributes. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::QName; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(" /// /// /// /// /// /// /// /// /// "); /// reader.config_mut().trim_text(true); /// /// macro_rules! check { /// ($reader:expr, $name:literal, $value:literal) => { /// let event = match $reader.read_event().unwrap() { /// Event::Empty(e) => e, /// e => panic!("Unexpected event {:?}", e), /// }; /// assert_eq!( /// (event.name(), event.attributes().has_nil(&$reader)), /// (QName($name.as_bytes()), $value), /// ); /// }; /// } /// /// let root = match reader.read_event().unwrap() { /// Event::Start(e) => e, /// e => panic!("Unexpected event {:?}", e), /// }; /// assert_eq!(root.attributes().has_nil(&reader), false); /// /// // definitely true /// check!(reader, "true", true); /// // definitely false /// check!(reader, "false", false); /// // absence of the attribute means that attribute is not set /// check!(reader, "none", false); /// // attribute not bound to the correct namespace /// check!(reader, "non-xsi", false); /// // attributes without prefix not bound to any namespace /// check!(reader, "unbound-nil", false); /// // prefix can be any while it is bound to the correct namespace /// check!(reader, "another-xmlns", true); /// ``` /// /// [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil pub fn has_nil(&mut self, reader: &NsReader) -> bool { use crate::name::ResolveResult::*; self.any(|attr| { if let Ok(attr) = attr { match reader.resolver().resolve_attribute(attr.key) { ( Bound(Namespace(b"http://www.w3.org/2001/XMLSchema-instance")), LocalName(b"nil"), ) => attr.as_bool().unwrap_or_default(), _ => false, } } else { false } }) } /// Get the decoder, used to decode bytes, read by the reader which produces /// this iterator, to the strings. /// /// When iterator was created manually or get from a manually created [`BytesStart`], /// encoding is UTF-8. /// /// If [`encoding`] feature is enabled and no encoding is specified in declaration, /// defaults to UTF-8. /// /// [`BytesStart`]: crate::events::BytesStart /// [`encoding`]: ../index.html#encoding #[inline] pub const fn decoder(&self) -> Decoder { self.decoder } } impl<'a> Debug for Attributes<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { f.debug_struct("Attributes") .field("bytes", &Bytes(self.bytes)) .field("state", &self.state) .field("decoder", &self.decoder) .finish() } } impl<'a> Iterator for Attributes<'a> { type Item = Result, AttrError>; #[inline] fn next(&mut self) -> Option { match self.state.next(self.bytes) { None => None, Some(Ok(a)) => Some(Ok(a.map(|range| &self.bytes[range]).into())), Some(Err(e)) => Some(Err(e)), } } } impl<'a> FusedIterator for Attributes<'a> {} //////////////////////////////////////////////////////////////////////////////////////////////////// /// Errors that can be raised during parsing attributes. /// /// Recovery position in examples shows the position from which parsing of the /// next attribute will be attempted. #[derive(Clone, Debug, PartialEq, Eq)] pub enum AttrError { /// Attribute key was not followed by `=`, position relative to the start of /// the owning tag is provided. /// /// Example of input that raises this error: /// /// ```xml /// /// /// ``` /// /// This error can be raised only when the iterator is in XML mode. ExpectedEq(usize), /// Attribute value was not found after `=`, position relative to the start /// of the owning tag is provided. /// /// Example of input that raises this error: /// /// ```xml /// /// /// ``` /// /// This error can be returned only for the last attribute in the list, /// because otherwise any content after `=` will be threated as a value. /// The XML /// /// ```xml /// /// /// /// ``` /// /// will be treated as `Attribute { key = b"key", value = b"another-key" }` /// and or [`Attribute`] is returned, or [`AttrError::UnquotedValue`] is raised, /// depending on the parsing mode. ExpectedValue(usize), /// Attribute value is not quoted, position relative to the start of the /// owning tag is provided. /// /// Example of input that raises this error: /// /// ```xml /// /// /// /// ``` /// /// This error can be raised only when the iterator is in XML mode. UnquotedValue(usize), /// Attribute value was not finished with a matching quote, position relative /// to the start of owning tag and a quote is provided. That position is always /// a last character in the tag content. /// /// Example of input that raises this error: /// /// ```xml /// /// /// /// /// ``` /// /// This error is returned only when [`Attributes::with_checks()`] is set /// to `true` (that is default behavior). Duplicated(usize, usize), } impl Display for AttrError { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Self::ExpectedEq(pos) => write!( f, r#"position {}: attribute key must be directly followed by `=` or space"#, pos ), Self::ExpectedValue(pos) => write!( f, r#"position {}: `=` must be followed by an attribute value"#, pos ), Self::UnquotedValue(pos) => write!( f, r#"position {}: attribute value must be enclosed in `"` or `'`"#, pos ), Self::ExpectedQuote(pos, quote) => write!( f, r#"position {}: missing closing quote `{}` in attribute value"#, pos, *quote as char ), Self::Duplicated(pos1, pos2) => write!( f, r#"position {}: duplicated attribute, previous declaration at position {}"#, pos1, pos2 ), } } } impl std::error::Error for AttrError {} //////////////////////////////////////////////////////////////////////////////////////////////////// /// A struct representing a key/value XML or HTML [attribute]. /// /// [attribute]: https://www.w3.org/TR/xml11/#NT-Attribute #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Attr { /// Attribute with value enclosed in double quotes (`"`). Attribute key and /// value provided. This is a canonical XML-style attribute. DoubleQ(T, T), /// Attribute with value enclosed in single quotes (`'`). Attribute key and /// value provided. This is an XML-style attribute. SingleQ(T, T), /// Attribute with value not enclosed in quotes. Attribute key and value /// provided. This is HTML-style attribute, it can be returned in HTML-mode /// parsing only. In an XML mode [`AttrError::UnquotedValue`] will be raised /// instead. /// /// Attribute value can be invalid according to the [HTML specification], /// in particular, it can contain `"`, `'`, `=`, `<`, and ` /// characters. The absence of the `>` character is nevertheless guaranteed, /// since the parser extracts [events] based on them even before the start /// of parsing attributes. /// /// [HTML specification]: https://html.spec.whatwg.org/#unquoted /// [events]: crate::events::Event::Start Unquoted(T, T), /// Attribute without value. Attribute key provided. This is HTML-style attribute, /// it can be returned in HTML-mode parsing only. In XML mode /// [`AttrError::ExpectedEq`] will be raised instead. Empty(T), } impl Attr { /// Maps an `Attr` to `Attr` by applying a function to a contained key and value. #[inline] pub fn map(self, mut f: F) -> Attr where F: FnMut(T) -> U, { match self { Attr::DoubleQ(key, value) => Attr::DoubleQ(f(key), f(value)), Attr::SingleQ(key, value) => Attr::SingleQ(f(key), f(value)), Attr::Empty(key) => Attr::Empty(f(key)), Attr::Unquoted(key, value) => Attr::Unquoted(f(key), f(value)), } } } impl<'a> Attr<&'a [u8]> { /// Returns the key value #[inline] pub const fn key(&self) -> QName<'a> { QName(match self { Attr::DoubleQ(key, _) => key, Attr::SingleQ(key, _) => key, Attr::Empty(key) => key, Attr::Unquoted(key, _) => key, }) } /// Returns the attribute value. For [`Self::Empty`] variant an empty slice /// is returned according to the [HTML specification]. /// /// [HTML specification]: https://www.w3.org/TR/2012/WD-html-markup-20120329/syntax.html#syntax-attr-empty #[inline] pub const fn value(&self) -> &'a [u8] { match self { Attr::DoubleQ(_, value) => value, Attr::SingleQ(_, value) => value, Attr::Empty(_) => &[], Attr::Unquoted(_, value) => value, } } } impl> Debug for Attr { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Attr::DoubleQ(key, value) => f .debug_tuple("Attr::DoubleQ") .field(&Bytes(key.as_ref())) .field(&Bytes(value.as_ref())) .finish(), Attr::SingleQ(key, value) => f .debug_tuple("Attr::SingleQ") .field(&Bytes(key.as_ref())) .field(&Bytes(value.as_ref())) .finish(), Attr::Empty(key) => f .debug_tuple("Attr::Empty") // Comment to prevent formatting and keep style consistent .field(&Bytes(key.as_ref())) .finish(), Attr::Unquoted(key, value) => f .debug_tuple("Attr::Unquoted") .field(&Bytes(key.as_ref())) .field(&Bytes(value.as_ref())) .finish(), } } } /// Unpacks attribute key and value into tuple of this two elements. /// `None` value element is returned only for [`Attr::Empty`] variant. impl From> for (T, Option) { #[inline] fn from(attr: Attr) -> Self { match attr { Attr::DoubleQ(key, value) => (key, Some(value)), Attr::SingleQ(key, value) => (key, Some(value)), Attr::Empty(key) => (key, None), Attr::Unquoted(key, value) => (key, Some(value)), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// type AttrResult = Result>, AttrError>; #[derive(Clone, Copy, Debug)] enum State { /// Iteration finished, iterator will return `None` to all [`IterState::next`] /// requests. Done, /// The last attribute returned was deserialized successfully. Contains an /// offset from which next attribute should be searched. Next(usize), /// The last attribute returns [`AttrError::UnquotedValue`], offset pointed /// to the beginning of the value. Recover should skip a value SkipValue(usize), /// The last attribute returns [`AttrError::Duplicated`], offset pointed to /// the equal (`=`) sign. Recover should skip it and a value SkipEqValue(usize), } /// External iterator over spans of attribute key and value #[derive(Clone, Debug)] pub(crate) struct IterState { /// Iteration state that determines what actions should be done before the /// actual parsing of the next attribute state: State, /// If `true`, enables ability to parse unquoted values and key-only (empty) /// attributes html: bool, /// If `true`, checks for duplicate names check_duplicates: bool, /// If `check_duplicates` is set, contains the ranges of already parsed attribute /// names. We store a ranges instead of slices to able to report a previous /// attribute position keys: Vec>, } impl IterState { pub const fn new(offset: usize, html: bool) -> Self { Self { state: State::Next(offset), html, check_duplicates: true, keys: Vec::new(), } } /// Recover from an error that could have been made on a previous step. /// Returns an offset from which parsing should continue. /// If there no input left, returns `None`. fn recover(&self, slice: &[u8]) -> Option { match self.state { State::Done => None, State::Next(offset) => Some(offset), State::SkipValue(offset) => self.skip_value(slice, offset), State::SkipEqValue(offset) => self.skip_eq_value(slice, offset), } } /// Skip all characters up to first space symbol or end-of-input #[inline] #[allow(clippy::manual_map)] fn skip_value(&self, slice: &[u8], offset: usize) -> Option { let mut iter = (offset..).zip(slice[offset..].iter()); match iter.find(|(_, &b)| is_whitespace(b)) { // Input: ` key = value ` // | ^ // offset e Some((e, _)) => Some(e), // Input: ` key = value` // | ^ // offset e = len() None => None, } } /// Skip all characters up to first space symbol or end-of-input #[inline] fn skip_eq_value(&self, slice: &[u8], offset: usize) -> Option { let mut iter = (offset..).zip(slice[offset..].iter()); // Skip all up to the quote and get the quote type let quote = match iter.find(|(_, &b)| !is_whitespace(b)) { // Input: ` key = "` // | ^ // offset Some((_, b'"')) => b'"', // Input: ` key = '` // | ^ // offset Some((_, b'\'')) => b'\'', // Input: ` key = x` // | ^ // offset Some((offset, _)) => return self.skip_value(slice, offset), // Input: ` key = ` // | ^ // offset None => return None, }; match iter.find(|(_, &b)| b == quote) { // Input: ` key = " "` // ^ Some((e, b'"')) => Some(e), // Input: ` key = ' '` // ^ Some((e, _)) => Some(e), // Input: ` key = " ` // Input: ` key = ' ` // ^ // Closing quote not found None => None, } } #[inline] fn check_for_duplicates( &mut self, slice: &[u8], key: Range, ) -> Result, AttrError> { if self.check_duplicates { if let Some(prev) = self .keys .iter() .find(|r| slice[(*r).clone()] == slice[key.clone()]) { return Err(AttrError::Duplicated(key.start, prev.start)); } self.keys.push(key.clone()); } Ok(key) } /// # Parameters /// /// - `slice`: content of the tag, used for checking for duplicates /// - `key`: Range of key in slice, if iterator in HTML mode /// - `offset`: Position of error if iterator in XML mode #[inline] fn key_only(&mut self, slice: &[u8], key: Range, offset: usize) -> Option { Some(if self.html { self.check_for_duplicates(slice, key).map(Attr::Empty) } else { Err(AttrError::ExpectedEq(offset)) }) } #[inline] fn double_q(&mut self, key: Range, value: Range) -> Option { self.state = State::Next(value.end + 1); // +1 for `"` Some(Ok(Attr::DoubleQ(key, value))) } #[inline] fn single_q(&mut self, key: Range, value: Range) -> Option { self.state = State::Next(value.end + 1); // +1 for `'` Some(Ok(Attr::SingleQ(key, value))) } pub fn next(&mut self, slice: &[u8]) -> Option { let mut iter = match self.recover(slice) { Some(offset) => (offset..).zip(slice[offset..].iter()), None => return None, }; // Index where next key started let start_key = match iter.find(|(_, &b)| !is_whitespace(b)) { // Input: ` key` // ^ Some((s, _)) => s, // Input: ` ` // ^ None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; return None; } }; // Span of a key let (key, offset) = match iter.find(|(_, &b)| b == b'=' || is_whitespace(b)) { // Input: ` key=` // | ^ // s e Some((e, b'=')) => (start_key..e, e), // Input: ` key ` // ^ Some((e, _)) => match iter.find(|(_, &b)| !is_whitespace(b)) { // Input: ` key =` // | | ^ // start_key e Some((offset, b'=')) => (start_key..e, offset), // Input: ` key x` // | | ^ // start_key e // If HTML-like attributes is allowed, this is the result, otherwise error Some((offset, _)) => { // In any case, recovering is not required self.state = State::Next(offset); return self.key_only(slice, start_key..e, offset); } // Input: ` key ` // | | ^ // start_key e // If HTML-like attributes is allowed, this is the result, otherwise error None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; return self.key_only(slice, start_key..e, slice.len()); } }, // Input: ` key` // | ^ // s e = len() // If HTML-like attributes is allowed, this is the result, otherwise error None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; let e = slice.len(); return self.key_only(slice, start_key..e, e); } }; let key = match self.check_for_duplicates(slice, key) { Err(e) => { self.state = State::SkipEqValue(offset); return Some(Err(e)); } Ok(key) => key, }; //////////////////////////////////////////////////////////////////////// // Gets the position of quote and quote type let (start_value, quote) = match iter.find(|(_, &b)| !is_whitespace(b)) { // Input: ` key = "` // ^ Some((s, b'"')) => (s + 1, b'"'), // Input: ` key = '` // ^ Some((s, b'\'')) => (s + 1, b'\''), // Input: ` key = x` // ^ // If HTML-like attributes is allowed, this is the start of the value Some((s, _)) if self.html => { // We do not check validity of attribute value characters as required // according to https://html.spec.whatwg.org/#unquoted. It can be done // during validation phase let end = match iter.find(|(_, &b)| is_whitespace(b)) { // Input: ` key = value ` // | ^ // s e Some((e, _)) => e, // Input: ` key = value` // | ^ // s e = len() None => slice.len(), }; self.state = State::Next(end); return Some(Ok(Attr::Unquoted(key, s..end))); } // Input: ` key = x` // ^ Some((s, _)) => { self.state = State::SkipValue(s); return Some(Err(AttrError::UnquotedValue(s))); } // Input: ` key = ` // ^ None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; return Some(Err(AttrError::ExpectedValue(slice.len()))); } }; match iter.find(|(_, &b)| b == quote) { // Input: ` key = " "` // ^ Some((e, b'"')) => self.double_q(key, start_value..e), // Input: ` key = ' '` // ^ Some((e, _)) => self.single_q(key, start_value..e), // Input: ` key = " ` // Input: ` key = ' ` // ^ // Closing quote not found None => { // Because we reach end-of-input, stop iteration on next call self.state = State::Done; Some(Err(AttrError::ExpectedQuote(slice.len(), quote))) } } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Checks, how parsing of XML-style attributes works. Each attribute should /// have a value, enclosed in single or double quotes. #[cfg(test)] mod xml { use super::*; use pretty_assertions::assert_eq; /// Checked attribute is the single attribute mod single { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key="value""#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key=value"#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key"#, 3); // 0 ^ = 7 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(7)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::new(r#"tag 'key'='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::new(r#"tag key&jey='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::new(r#"tag key="#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checked attribute is the first attribute in the list of many attributes mod first { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key="value" regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key=value regular='attribute'"#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8)))); // check error recovery assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key regular='attribute'"#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8)))); // check error recovery assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::new(r#"tag 'key'='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::new(r#"tag key&jey='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=`. #[test] fn missed_value() { let mut iter = Attributes::new(r#"tag key= regular='attribute'"#, 3); // 0 ^ = 9 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // Because we do not check validity of keys and values during parsing, // "error='recovery'" is considered, as unquoted attribute value and // skipped during recovery and iteration finished assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::new(r#"tag key= regular= 'attribute'"#, 3); // 0 ^ = 9 ^ = 29 // In that case "regular=" considered as unquoted value assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // In that case "'attribute'" considered as a key, because we do not check // validity of key names assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::new(r#"tag key= regular ='attribute'"#, 3); // 0 ^ = 9 ^ = 29 // In that case "regular" considered as unquoted value assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // In that case "='attribute'" considered as a key, because we do not check // validity of key names assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::new(r#"tag key= regular = 'attribute'"#, 3); // 0 ^ = 9 ^ = 19 ^ = 30 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9)))); // In that case second "=" considered as a key, because we do not check // validity of key names assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(19)))); // In that case "'attribute'" considered as a key, because we do not check // validity of key names assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(30)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Copy of single, but with additional spaces in markup mod sparsed { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key = "value" "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key = value "#, 3); // 0 ^ = 10 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(10)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key "#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::new(r#"tag 'key' = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::new(r#"tag key&jey = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::new(r#"tag key = "#, 3); // 0 ^ = 10 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checks that duplicated attributes correctly reported and recovering is /// possible after that mod duplicated { use super::*; mod with_check { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3); // 0 ^ = 20 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Check for duplicated names is disabled mod without_check { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3); // 0 ^ = 20 iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(20)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3); // 0 ^ = 20 iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } } #[test] fn mixed_quote() { let mut iter = Attributes::new(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"a"), value: Cow::Borrowed(b"a"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"b"), value: Cow::Borrowed(b"b"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"c"), value: Cow::Borrowed(br#"cc"cc"#), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"d"), value: Cow::Borrowed(b"dd'dd"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checks, how parsing of HTML-style attributes works. Each attribute can be /// in three forms: /// - XML-like: have a value, enclosed in single or double quotes /// - have a value, do not enclosed in quotes /// - without value, key only #[cfg(test)] mod html { use super::*; use pretty_assertions::assert_eq; /// Checked attribute is the single attribute mod single { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key="value""#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key=value"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::html(r#"tag 'key'='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::html(r#"tag key&jey='value'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::html(r#"tag key="#, 3); // 0 ^ = 8 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checked attribute is the first attribute in the list of many attributes mod first { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key="value" regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key=value regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(&[]), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::html(r#"tag 'key'='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::html(r#"tag key&jey='value' regular='attribute'"#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"regular"), value: Cow::Borrowed(b"attribute"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::html(r#"tag key= regular='attribute'"#, 3); // Because we do not check validity of keys and values during parsing, // "regular='attribute'" is considered as unquoted attribute value assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"regular='attribute'"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::html(r#"tag key= regular= 'attribute'"#, 3); // Because we do not check validity of keys and values during parsing, // "regular=" is considered as unquoted attribute value assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"regular="), })) ); // Because we do not check validity of keys and values during parsing, // "'attribute'" is considered as key-only attribute assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'attribute'"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::html(r#"tag key= regular ='attribute'"#, 3); // Because we do not check validity of keys and values during parsing, // "regular" is considered as unquoted attribute value assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"regular"), })) ); // Because we do not check validity of keys and values during parsing, // "='attribute'" is considered as key-only attribute assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"='attribute'"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); //////////////////////////////////////////////////////////////////// let mut iter = Attributes::html(r#"tag key= regular = 'attribute'"#, 3); // 0 ^ = 9 ^ = 19 ^ = 30 // Because we do not check validity of keys and values during parsing, // "regular" is considered as unquoted attribute value assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"regular"), })) ); // Because we do not check validity of keys and values during parsing, // "=" is considered as key-only attribute assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"="), value: Cow::Borrowed(&[]), })) ); // Because we do not check validity of keys and values during parsing, // "'attribute'" is considered as key-only attribute assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'attribute'"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Copy of single, but with additional spaces in markup mod sparsed { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key = "value" "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key = value "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(&[]), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key is started with an invalid symbol (a single quote in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_start_invalid() { let mut iter = Attributes::html(r#"tag 'key' = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"'key'"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Key contains an invalid symbol (an ampersand in this test). /// Because we do not check validity of keys and values during parsing, /// that invalid attribute will be returned #[test] fn key_contains_invalid() { let mut iter = Attributes::html(r#"tag key&jey = 'value' "#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key&jey"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute value is missing after `=` #[test] fn missed_value() { let mut iter = Attributes::html(r#"tag key = "#, 3); // 0 ^ = 10 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10)))); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Checks that duplicated attributes correctly reported and recovering is /// possible after that mod duplicated { use super::*; mod with_check { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3); // 0 ^ = 4 ^ = 16 assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4)))); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } /// Check for duplicated names is disabled mod without_check { use super::*; use pretty_assertions::assert_eq; /// Attribute have a value enclosed in single quotes #[test] fn single_quoted() { let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value enclosed in double quotes #[test] fn double_quoted() { let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Attribute have a value, not enclosed in quotes #[test] fn unquoted() { let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"dup"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } /// Only attribute key is present #[test] fn key_only() { let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3); iter.with_checks(false); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(b"value"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"key"), value: Cow::Borrowed(&[]), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"another"), value: Cow::Borrowed(b""), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } } #[test] fn mixed_quote() { let mut iter = Attributes::html(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"a"), value: Cow::Borrowed(b"a"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"b"), value: Cow::Borrowed(b"b"), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"c"), value: Cow::Borrowed(br#"cc"cc"#), })) ); assert_eq!( iter.next(), Some(Ok(Attribute { key: QName(b"d"), value: Cow::Borrowed(b"dd'dd"), })) ); assert_eq!(iter.next(), None); assert_eq!(iter.next(), None); } } quick-xml-0.38.4/src/events/mod.rs000064400000000000000000001771631046102023000150710ustar 00000000000000//! Defines zero-copy XML events used throughout this library. //! //! A XML event often represents part of a XML element. //! They occur both during reading and writing and are //! usually used with the stream-oriented API. //! //! For example, the XML element //! ```xml //! Inner text //! ``` //! consists of the three events `Start`, `Text` and `End`. //! They can also represent other parts in an XML document like the //! XML declaration. Each Event usually contains further information, //! like the tag name, the attribute or the inner text. //! //! See [`Event`] for a list of all possible events. //! //! # Reading //! When reading a XML stream, the events are emitted by [`Reader::read_event`] //! and [`Reader::read_event_into`]. You must listen //! for the different types of events you are interested in. //! //! See [`Reader`] for further information. //! //! # Writing //! When writing the XML document, you must create the XML element //! by constructing the events it consists of and pass them to the writer //! sequentially. //! //! See [`Writer`] for further information. //! //! [`Reader::read_event`]: crate::reader::Reader::read_event //! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into //! [`Reader`]: crate::reader::Reader //! [`Writer`]: crate::writer::Writer //! [`Event`]: crate::events::Event pub mod attributes; #[cfg(feature = "encoding")] use encoding_rs::Encoding; use std::borrow::Cow; use std::fmt::{self, Debug, Formatter}; use std::iter::FusedIterator; use std::mem::replace; use std::ops::Deref; use std::str::from_utf8; use crate::encoding::{Decoder, EncodingError}; use crate::errors::{Error, IllFormedError}; use crate::escape::{ escape, minimal_escape, normalize_xml10_eols, normalize_xml11_eols, parse_number, partial_escape, EscapeError, }; use crate::name::{LocalName, QName}; use crate::utils::{self, name_len, trim_xml_end, trim_xml_start, write_cow_string}; use attributes::{AttrError, Attribute, Attributes}; /// Opening tag data (`Event::Start`), with optional attributes: ``. /// /// The name can be accessed using the [`name`] or [`local_name`] methods. /// An iterator over the attributes is returned by the [`attributes`] method. /// /// This event implements `Deref`. The `deref()` implementation /// returns the content of this event between `<` and `>` or `/>`: /// /// ``` /// # use quick_xml::events::{BytesStart, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// // Remember, that \ at the end of string literal strips /// // all space characters to the first non-space character /// let mut reader = Reader::from_str("\ /// \ /// " /// ); /// let content = "element a1 = 'val1' a2=\"val2\" "; /// let event = BytesStart::from_content(content, 7); /// /// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow())); /// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow())); /// // deref coercion of &BytesStart to &[u8] /// assert_eq!(&event as &[u8], content.as_bytes()); /// // AsRef<[u8]> for &T + deref coercion /// assert_eq!(event.as_ref(), content.as_bytes()); /// ``` /// /// [`name`]: Self::name /// [`local_name`]: Self::local_name /// [`attributes`]: Self::attributes #[derive(Clone, Eq, PartialEq)] pub struct BytesStart<'a> { /// content of the element, before any utf8 conversion pub(crate) buf: Cow<'a, [u8]>, /// end of the element name, the name starts at that the start of `buf` pub(crate) name_len: usize, /// Encoding used for `buf` decoder: Decoder, } impl<'a> BytesStart<'a> { /// Internal constructor, used by `Reader`. Supplies data in reader's encoding #[inline] pub(crate) const fn wrap(content: &'a [u8], name_len: usize, decoder: Decoder) -> Self { BytesStart { buf: Cow::Borrowed(content), name_len, decoder, } } /// Creates a new `BytesStart` from the given name. /// /// # Warning /// /// `name` must be a valid name. #[inline] pub fn new>>(name: C) -> Self { let buf = str_cow_to_bytes(name); BytesStart { name_len: buf.len(), buf, decoder: Decoder::utf8(), } } /// Creates a new `BytesStart` from the given content (name + attributes). /// /// # Warning /// /// `&content[..name_len]` must be a valid name, and the remainder of `content` /// must be correctly-formed attributes. Neither are checked, it is possible /// to generate invalid XML if `content` or `name_len` are incorrect. #[inline] pub fn from_content>>(content: C, name_len: usize) -> Self { BytesStart { buf: str_cow_to_bytes(content), name_len, decoder: Decoder::utf8(), } } /// Converts the event into an owned event. pub fn into_owned(self) -> BytesStart<'static> { BytesStart { buf: Cow::Owned(self.buf.into_owned()), name_len: self.name_len, decoder: self.decoder, } } /// Converts the event into an owned event without taking ownership of Event pub fn to_owned(&self) -> BytesStart<'static> { BytesStart { buf: Cow::Owned(self.buf.clone().into_owned()), name_len: self.name_len, decoder: self.decoder, } } /// Converts the event into a borrowed event. Most useful when paired with [`to_end`]. /// /// # Example /// /// ``` /// use quick_xml::events::{BytesStart, Event}; /// # use quick_xml::writer::Writer; /// # use quick_xml::Error; /// /// struct SomeStruct<'a> { /// attrs: BytesStart<'a>, /// // ... /// } /// # impl<'a> SomeStruct<'a> { /// # fn example(&self) -> Result<(), Error> { /// # let mut writer = Writer::new(Vec::new()); /// /// writer.write_event(Event::Start(self.attrs.borrow()))?; /// // ... /// writer.write_event(Event::End(self.attrs.to_end()))?; /// # Ok(()) /// # }} /// ``` /// /// [`to_end`]: Self::to_end pub fn borrow(&self) -> BytesStart<'_> { BytesStart { buf: Cow::Borrowed(&self.buf), name_len: self.name_len, decoder: self.decoder, } } /// Creates new paired close tag #[inline] pub fn to_end(&self) -> BytesEnd<'_> { BytesEnd::from(self.name()) } /// Get the decoder, used to decode bytes, read by the reader which produces /// this event, to the strings. /// /// When event was created manually, encoding is UTF-8. /// /// If [`encoding`] feature is enabled and no encoding is specified in declaration, /// defaults to UTF-8. /// /// [`encoding`]: ../index.html#encoding #[inline] pub const fn decoder(&self) -> Decoder { self.decoder } /// Gets the undecoded raw tag name, as present in the input stream. #[inline] pub fn name(&self) -> QName<'_> { QName(&self.buf[..self.name_len]) } /// Gets the undecoded raw local tag name (excluding namespace) as present /// in the input stream. /// /// All content up to and including the first `:` character is removed from the tag name. #[inline] pub fn local_name(&self) -> LocalName<'_> { self.name().into() } /// Edit the name of the BytesStart in-place /// /// # Warning /// /// `name` must be a valid name. pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> { let bytes = self.buf.to_mut(); bytes.splice(..self.name_len, name.iter().cloned()); self.name_len = name.len(); self } } /// Attribute-related methods impl<'a> BytesStart<'a> { /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator. /// /// The yielded items must be convertible to [`Attribute`] using `Into`. pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self where I: IntoIterator, I::Item: Into>, { self.extend_attributes(attributes); self } /// Add additional attributes to this tag using an iterator. /// /// The yielded items must be convertible to [`Attribute`] using `Into`. pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a> where I: IntoIterator, I::Item: Into>, { for attr in attributes { self.push_attribute(attr); } self } /// Adds an attribute to this element. pub fn push_attribute<'b, A>(&mut self, attr: A) where A: Into>, { self.buf.to_mut().push(b' '); self.push_attr(attr.into()); } /// Remove all attributes from the ByteStart pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> { self.buf.to_mut().truncate(self.name_len); self } /// Returns an iterator over the attributes of this tag. pub fn attributes(&self) -> Attributes<'_> { Attributes::wrap(&self.buf, self.name_len, false, self.decoder) } /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`). pub fn html_attributes(&self) -> Attributes<'_> { Attributes::wrap(&self.buf, self.name_len, true, self.decoder) } /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`, /// including the whitespace after the tag name if there is any. #[inline] pub fn attributes_raw(&self) -> &[u8] { &self.buf[self.name_len..] } /// Try to get an attribute pub fn try_get_attribute + Sized>( &'a self, attr_name: N, ) -> Result>, AttrError> { for a in self.attributes().with_checks(false) { let a = a?; if a.key.as_ref() == attr_name.as_ref() { return Ok(Some(a)); } } Ok(None) } /// Adds an attribute to this element. pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) { let bytes = self.buf.to_mut(); bytes.extend_from_slice(attr.key.as_ref()); bytes.extend_from_slice(b"=\""); // FIXME: need to escape attribute content bytes.extend_from_slice(attr.value.as_ref()); bytes.push(b'"'); } /// Adds new line in existing element pub(crate) fn push_newline(&mut self) { self.buf.to_mut().push(b'\n'); } /// Adds indentation bytes in existing element pub(crate) fn push_indent(&mut self, indent: &[u8]) { self.buf.to_mut().extend_from_slice(indent); } } impl<'a> Debug for BytesStart<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesStart {{ buf: ")?; write_cow_string(f, &self.buf)?; write!(f, ", name_len: {} }}", self.name_len) } } impl<'a> Deref for BytesStart<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.buf } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { let s = <&str>::arbitrary(u)?; if s.is_empty() || !s.chars().all(char::is_alphanumeric) { return Err(arbitrary::Error::IncorrectFormat); } let mut result = Self::new(s); result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?); Ok(result) } fn size_hint(depth: usize) -> (usize, Option) { <&str as arbitrary::Arbitrary>::size_hint(depth) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Closing tag data (`Event::End`): ``. /// /// The name can be accessed using the [`name`] or [`local_name`] methods. /// /// This event implements `Deref`. The `deref()` implementation /// returns the content of this event between ``. /// /// Note, that inner text will not contain `>` character inside: /// /// ``` /// # use quick_xml::events::{BytesEnd, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// let mut reader = Reader::from_str(r#""#); /// // Note, that this entire string considered as a .name() /// let content = "element a1 = 'val1' a2=\"val2\" "; /// let event = BytesEnd::new(content); /// /// reader.config_mut().trim_markup_names_in_closing_tags = false; /// reader.config_mut().check_end_names = false; /// reader.read_event().unwrap(); // Skip `` /// /// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow())); /// assert_eq!(event.name().as_ref(), content.as_bytes()); /// // deref coercion of &BytesEnd to &[u8] /// assert_eq!(&event as &[u8], content.as_bytes()); /// // AsRef<[u8]> for &T + deref coercion /// assert_eq!(event.as_ref(), content.as_bytes()); /// ``` /// /// [`name`]: Self::name /// [`local_name`]: Self::local_name #[derive(Clone, Eq, PartialEq)] pub struct BytesEnd<'a> { name: Cow<'a, [u8]>, } impl<'a> BytesEnd<'a> { /// Internal constructor, used by `Reader`. Supplies data in reader's encoding #[inline] pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self { BytesEnd { name } } /// Creates a new `BytesEnd` borrowing a slice. /// /// # Warning /// /// `name` must be a valid name. #[inline] pub fn new>>(name: C) -> Self { Self::wrap(str_cow_to_bytes(name)) } /// Converts the event into an owned event. pub fn into_owned(self) -> BytesEnd<'static> { BytesEnd { name: Cow::Owned(self.name.into_owned()), } } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesEnd<'_> { BytesEnd { name: Cow::Borrowed(&self.name), } } /// Gets the undecoded raw tag name, as present in the input stream. #[inline] pub fn name(&self) -> QName<'_> { QName(&self.name) } /// Gets the undecoded raw local tag name (excluding namespace) as present /// in the input stream. /// /// All content up to and including the first `:` character is removed from the tag name. #[inline] pub fn local_name(&self) -> LocalName<'_> { self.name().into() } } impl<'a> Debug for BytesEnd<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesEnd {{ name: ")?; write_cow_string(f, &self.name)?; write!(f, " }}") } } impl<'a> Deref for BytesEnd<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.name } } impl<'a> From> for BytesEnd<'a> { #[inline] fn from(name: QName<'a>) -> Self { Self::wrap(name.into_inner().into()) } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { Ok(Self::new(<&str>::arbitrary(u)?)) } fn size_hint(depth: usize) -> (usize, Option) { <&str as arbitrary::Arbitrary>::size_hint(depth) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Data from various events (most notably, `Event::Text`) that stored in XML /// in escaped form. Internally data is stored in escaped form. /// /// This event implements `Deref`. The `deref()` implementation /// returns the content of this event. In case of comment this is everything /// between `` and the text of comment will not contain `-->` inside. /// In case of DTD this is everything between `` /// (i.e. in case of DTD the first character is never space): /// /// ``` /// # use quick_xml::events::{BytesText, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// // Remember, that \ at the end of string literal strips /// // all space characters to the first non-space character /// let mut reader = Reader::from_str("\ /// \ /// comment or text \ /// " /// ); /// let content = "comment or text "; /// let event = BytesText::new(content); /// /// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow())); /// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow())); /// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow())); /// // deref coercion of &BytesText to &[u8] /// assert_eq!(&event as &[u8], content.as_bytes()); /// // AsRef<[u8]> for &T + deref coercion /// assert_eq!(event.as_ref(), content.as_bytes()); /// ``` #[derive(Clone, Eq, PartialEq)] pub struct BytesText<'a> { /// Escaped then encoded content of the event. Content is encoded in the XML /// document encoding when event comes from the reader and should be in the /// document encoding when event passed to the writer content: Cow<'a, [u8]>, /// Encoding in which the `content` is stored inside the event decoder: Decoder, } impl<'a> BytesText<'a> { /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding. #[inline] pub(crate) fn wrap>>(content: C, decoder: Decoder) -> Self { Self { content: content.into(), decoder, } } /// Creates a new `BytesText` from an escaped string. #[inline] pub fn from_escaped>>(content: C) -> Self { Self::wrap(str_cow_to_bytes(content), Decoder::utf8()) } /// Creates a new `BytesText` from a string. The string is expected not to /// be escaped. #[inline] pub fn new(content: &'a str) -> Self { Self::from_escaped(escape(content)) } /// Ensures that all data is owned to extend the object's lifetime if /// necessary. #[inline] pub fn into_owned(self) -> BytesText<'static> { BytesText { content: self.content.into_owned().into(), decoder: self.decoder, } } /// Extracts the inner `Cow` from the `BytesText` event container. #[inline] pub fn into_inner(self) -> Cow<'a, [u8]> { self.content } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesText<'_> { BytesText { content: Cow::Borrowed(&self.content), decoder: self.decoder, } } /// Decodes the content of the event. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. /// /// This method does not normalizes end-of-line characters as required by [specification]. /// Usually you need [`xml_content()`](Self::xml_content) instead of this method. /// /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends pub fn decode(&self) -> Result, EncodingError> { self.decoder.decode_cow(&self.content) } /// Decodes the content of the XML 1.0 or HTML event. /// /// When this event produced by the reader, it uses the encoding information /// associated with that reader to interpret the raw bytes contained within /// this text event. /// /// This will allocate if the value contains any escape sequences or in non-UTF-8 /// encoding, or EOL normalization is required. /// /// Note, that this method should be used only if event represents XML 1.0 or HTML content, /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs. /// /// This method also can be used to get HTML content, because rules the same. /// /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines pub fn xml10_content(&self) -> Result, EncodingError> { self.decoder.content(&self.content, normalize_xml10_eols) } /// Decodes the content of the XML 1.1 event. /// /// When this event produced by the reader, it uses the encoding information /// associated with that reader to interpret the raw bytes contained within /// this text event. /// /// This will allocate if the value contains any escape sequences or in non-UTF-8 /// encoding, or EOL normalization is required. /// /// Note, that this method should be used only if event represents XML 1.1 content, /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs. /// /// To get HTML content use [`xml10_content()`](Self::xml10_content). /// /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines pub fn xml11_content(&self) -> Result, EncodingError> { self.decoder.content(&self.content, normalize_xml11_eols) } /// Alias for [`xml11_content()`](Self::xml11_content). #[inline] pub fn xml_content(&self) -> Result, EncodingError> { self.xml11_content() } /// Alias for [`xml10_content()`](Self::xml10_content). #[inline] pub fn html_content(&self) -> Result, EncodingError> { self.xml10_content() } /// Removes leading XML whitespace bytes from text content. /// /// Returns `true` if content is empty after that pub fn inplace_trim_start(&mut self) -> bool { self.content = trim_cow( replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_start, ); self.content.is_empty() } /// Removes trailing XML whitespace bytes from text content. /// /// Returns `true` if content is empty after that pub fn inplace_trim_end(&mut self) -> bool { self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end); self.content.is_empty() } } impl<'a> Debug for BytesText<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesText {{ content: ")?; write_cow_string(f, &self.content)?; write!(f, " }}") } } impl<'a> Deref for BytesText<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.content } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { let s = <&str>::arbitrary(u)?; if !s.chars().all(char::is_alphanumeric) { return Err(arbitrary::Error::IncorrectFormat); } Ok(Self::new(s)) } fn size_hint(depth: usize) -> (usize, Option) { <&str as arbitrary::Arbitrary>::size_hint(depth) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// CDATA content contains unescaped data from the reader. If you want to write them as a text, /// [convert](Self::escape) it to [`BytesText`]. /// /// This event implements `Deref`. The `deref()` implementation /// returns the content of this event between ``. /// /// Note, that inner text will not contain `]]>` sequence inside: /// /// ``` /// # use quick_xml::events::{BytesCData, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// let mut reader = Reader::from_str(""); /// let content = " CDATA section "; /// let event = BytesCData::new(content); /// /// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow())); /// // deref coercion of &BytesCData to &[u8] /// assert_eq!(&event as &[u8], content.as_bytes()); /// // AsRef<[u8]> for &T + deref coercion /// assert_eq!(event.as_ref(), content.as_bytes()); /// ``` #[derive(Clone, Eq, PartialEq)] pub struct BytesCData<'a> { content: Cow<'a, [u8]>, /// Encoding in which the `content` is stored inside the event decoder: Decoder, } impl<'a> BytesCData<'a> { /// Creates a new `BytesCData` from a byte sequence in the specified encoding. #[inline] pub(crate) fn wrap>>(content: C, decoder: Decoder) -> Self { Self { content: content.into(), decoder, } } /// Creates a new `BytesCData` from a string. /// /// # Warning /// /// `content` must not contain the `]]>` sequence. You can use /// [`BytesCData::escaped`] to escape the content instead. #[inline] pub fn new>>(content: C) -> Self { Self::wrap(str_cow_to_bytes(content), Decoder::utf8()) } /// Creates an iterator of `BytesCData` from a string. /// /// If a string contains `]]>`, it needs to be split into multiple `CDATA` /// sections, splitting the `]]` and `>` characters, because the CDATA closing /// sequence cannot be escaped. This iterator yields a `BytesCData` instance /// for each of those sections. /// /// # Examples /// /// ``` /// # use quick_xml::events::BytesCData; /// # use pretty_assertions::assert_eq; /// let content = ""; /// let cdata = BytesCData::escaped(content).collect::>(); /// assert_eq!(cdata, &[BytesCData::new("")]); /// /// let content = "Certain tokens like ]]> can be difficult and "; /// let cdata = BytesCData::escaped(content).collect::>(); /// assert_eq!(cdata, &[ /// BytesCData::new("Certain tokens like ]]"), /// BytesCData::new("> can be difficult and "), /// ]); /// /// let content = "foo]]>bar]]>baz]]>quux"; /// let cdata = BytesCData::escaped(content).collect::>(); /// assert_eq!(cdata, &[ /// BytesCData::new("foo]]"), /// BytesCData::new(">bar]]"), /// BytesCData::new(">baz]]"), /// BytesCData::new(">quux"), /// ]); /// ``` #[inline] pub fn escaped(content: &'a str) -> CDataIterator<'a> { CDataIterator { inner: utils::CDataIterator::new(content), } } /// Ensures that all data is owned to extend the object's lifetime if /// necessary. #[inline] pub fn into_owned(self) -> BytesCData<'static> { BytesCData { content: self.content.into_owned().into(), decoder: self.decoder, } } /// Extracts the inner `Cow` from the `BytesCData` event container. #[inline] pub fn into_inner(self) -> Cow<'a, [u8]> { self.content } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesCData<'_> { BytesCData { content: Cow::Borrowed(&self.content), decoder: self.decoder, } } /// Converts this CDATA content to an escaped version, that can be written /// as an usual text in XML. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` /// | `'` | `'` /// | `"` | `"` pub fn escape(self) -> Result, EncodingError> { let decoded = self.decode()?; Ok(BytesText::wrap( match escape(decoded) { Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()), Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), }, Decoder::utf8(), )) } /// Converts this CDATA content to an escaped version, that can be written /// as an usual text in XML. /// /// In XML text content, it is allowed (though not recommended) to leave /// the quote special characters `"` and `'` unescaped. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `>` | `>` /// | `&` | `&` pub fn partial_escape(self) -> Result, EncodingError> { let decoded = self.decode()?; Ok(BytesText::wrap( match partial_escape(decoded) { Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()), Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), }, Decoder::utf8(), )) } /// Converts this CDATA content to an escaped version, that can be written /// as an usual text in XML. This method escapes only those characters that /// must be escaped according to the [specification]. /// /// This function performs following replacements: /// /// | Character | Replacement /// |-----------|------------ /// | `<` | `<` /// | `&` | `&` /// /// [specification]: https://www.w3.org/TR/xml11/#syntax pub fn minimal_escape(self) -> Result, EncodingError> { let decoded = self.decode()?; Ok(BytesText::wrap( match minimal_escape(decoded) { Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()), Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()), }, Decoder::utf8(), )) } /// Decodes the raw input byte content of the CDATA section into a string, /// without performing XML entity escaping. /// /// When this event produced by the XML reader, it uses the encoding information /// associated with that reader to interpret the raw bytes contained within this /// CDATA event. /// /// This method does not normalizes end-of-line characters as required by [specification]. /// Usually you need [`xml_content()`](Self::xml_content) instead of this method. /// /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends pub fn decode(&self) -> Result, EncodingError> { self.decoder.decode_cow(&self.content) } /// Decodes the raw input byte content of the CDATA section of the XML 1.0 or /// HTML event into a string. /// /// When this event produced by the reader, it uses the encoding information /// associated with that reader to interpret the raw bytes contained within /// this CDATA event. /// /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization /// is required. /// /// Note, that this method should be used only if event represents XML 1.0 or HTML content, /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs. /// /// This method also can be used to get HTML content, because rules the same. /// /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines pub fn xml10_content(&self) -> Result, EncodingError> { self.decoder.content(&self.content, normalize_xml10_eols) } /// Decodes the raw input byte content of the CDATA section of the XML 1.1 event /// into a string. /// /// When this event produced by the reader, it uses the encoding information /// associated with that reader to interpret the raw bytes contained within /// this CDATA event. /// /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization /// is required. /// /// Note, that this method should be used only if event represents XML 1.1 content, /// because rules for normalizing EOLs for [XML 1.0], [XML 1.1] and [HTML] differs. /// /// To get HTML content use [`xml10_content()`](Self::xml10_content). /// /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines pub fn xml11_content(&self) -> Result, EncodingError> { self.decoder.content(&self.content, normalize_xml11_eols) } /// Alias for [`xml11_content()`](Self::xml11_content). #[inline] pub fn xml_content(&self) -> Result, EncodingError> { self.xml11_content() } /// Alias for [`xml10_content()`](Self::xml10_content). #[inline] pub fn html_content(&self) -> Result, EncodingError> { self.xml10_content() } } impl<'a> Debug for BytesCData<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesCData {{ content: ")?; write_cow_string(f, &self.content)?; write!(f, " }}") } } impl<'a> Deref for BytesCData<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.content } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { Ok(Self::new(<&str>::arbitrary(u)?)) } fn size_hint(depth: usize) -> (usize, Option) { <&str as arbitrary::Arbitrary>::size_hint(depth) } } /// Iterator over `CDATA` sections in a string. /// /// This iterator is created by the [`BytesCData::escaped`] method. #[derive(Debug, Clone)] pub struct CDataIterator<'a> { inner: utils::CDataIterator<'a>, } impl<'a> Iterator for CDataIterator<'a> { type Item = BytesCData<'a>; fn next(&mut self) -> Option> { self.inner .next() .map(|slice| BytesCData::wrap(slice.as_bytes(), Decoder::utf8())) } } impl FusedIterator for CDataIterator<'_> {} //////////////////////////////////////////////////////////////////////////////////////////////////// /// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications. /// /// This event implements `Deref`. The `deref()` implementation /// returns the content of this event between ``. /// /// Note, that inner text will not contain `?>` sequence inside: /// /// ``` /// # use quick_xml::events::{BytesPI, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// let mut reader = Reader::from_str(":-<~ ?>"); /// let content = "processing instruction >:-<~ "; /// let event = BytesPI::new(content); /// /// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow())); /// // deref coercion of &BytesPI to &[u8] /// assert_eq!(&event as &[u8], content.as_bytes()); /// // AsRef<[u8]> for &T + deref coercion /// assert_eq!(event.as_ref(), content.as_bytes()); /// ``` /// /// [PI]: https://www.w3.org/TR/xml11/#sec-pi #[derive(Clone, Eq, PartialEq)] pub struct BytesPI<'a> { content: BytesStart<'a>, } impl<'a> BytesPI<'a> { /// Creates a new `BytesPI` from a byte sequence in the specified encoding. #[inline] pub(crate) const fn wrap(content: &'a [u8], target_len: usize, decoder: Decoder) -> Self { Self { content: BytesStart::wrap(content, target_len, decoder), } } /// Creates a new `BytesPI` from a string. /// /// # Warning /// /// `content` must not contain the `?>` sequence. #[inline] pub fn new>>(content: C) -> Self { let buf = str_cow_to_bytes(content); let name_len = name_len(&buf); Self { content: BytesStart { buf, name_len, decoder: Decoder::utf8(), }, } } /// Ensures that all data is owned to extend the object's lifetime if /// necessary. #[inline] pub fn into_owned(self) -> BytesPI<'static> { BytesPI { content: self.content.into_owned(), } } /// Extracts the inner `Cow` from the `BytesPI` event container. #[inline] pub fn into_inner(self) -> Cow<'a, [u8]> { self.content.buf } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesPI<'_> { BytesPI { content: self.content.borrow(), } } /// A target used to identify the application to which the instruction is directed. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::BytesPI; /// /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#); /// assert_eq!(instruction.target(), b"xml-stylesheet"); /// ``` #[inline] pub fn target(&self) -> &[u8] { self.content.name().0 } /// Content of the processing instruction. Contains everything between target /// name and the end of the instruction. A direct consequence is that the first /// character is always a space character. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::BytesPI; /// /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#); /// assert_eq!(instruction.content(), br#" href="style.css""#); /// ``` #[inline] pub fn content(&self) -> &[u8] { self.content.attributes_raw() } /// A view of the processing instructions' content as a list of key-value pairs. /// /// Key-value pairs are used in some processing instructions, for example in /// ``. /// /// Returned iterator does not validate attribute values as may required by /// target's rules. For example, it doesn't check that substring `?>` is not /// present in the attribute value. That shouldn't be the problem when event /// is produced by the reader, because reader detects end of processing instruction /// by the first `?>` sequence, as required by the specification, and therefore /// this sequence cannot appear inside it. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// use std::borrow::Cow; /// use quick_xml::events::attributes::Attribute; /// use quick_xml::events::BytesPI; /// use quick_xml::name::QName; /// /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#); /// for attr in instruction.attributes() { /// assert_eq!(attr, Ok(Attribute { /// key: QName(b"href"), /// value: Cow::Borrowed(b"style.css"), /// })); /// } /// ``` #[inline] pub fn attributes(&self) -> Attributes<'_> { self.content.attributes() } } impl<'a> Debug for BytesPI<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesPI {{ content: ")?; write_cow_string(f, &self.content.buf)?; write!(f, " }}") } } impl<'a> Deref for BytesPI<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.content } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { Ok(Self::new(<&str>::arbitrary(u)?)) } fn size_hint(depth: usize) -> (usize, Option) { <&str as arbitrary::Arbitrary>::size_hint(depth) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// An XML declaration (`Event::Decl`). /// /// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd) /// /// This event implements `Deref`. The `deref()` implementation /// returns the content of this event between ``. /// /// Note, that inner text will not contain `?>` sequence inside: /// /// ``` /// # use quick_xml::events::{BytesDecl, BytesStart, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// let mut reader = Reader::from_str(""); /// let content = "xml version = '1.0' "; /// let event = BytesDecl::from_start(BytesStart::from_content(content, 3)); /// /// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow())); /// // deref coercion of &BytesDecl to &[u8] /// assert_eq!(&event as &[u8], content.as_bytes()); /// // AsRef<[u8]> for &T + deref coercion /// assert_eq!(event.as_ref(), content.as_bytes()); /// ``` #[derive(Clone, Debug, Eq, PartialEq)] pub struct BytesDecl<'a> { content: BytesStart<'a>, } impl<'a> BytesDecl<'a> { /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`), /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`) /// attribute. /// /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values. /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since /// the double quote character is not allowed in any of the attribute values. pub fn new( version: &str, encoding: Option<&str>, standalone: Option<&str>, ) -> BytesDecl<'static> { // Compute length of the buffer based on supplied attributes // ' encoding=""' => 12 let encoding_attr_len = if let Some(xs) = encoding { 12 + xs.len() } else { 0 }; // ' standalone=""' => 14 let standalone_attr_len = if let Some(xs) = standalone { 14 + xs.len() } else { 0 }; // 'xml version=""' => 14 let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len); buf.push_str("xml version=\""); buf.push_str(version); if let Some(encoding_val) = encoding { buf.push_str("\" encoding=\""); buf.push_str(encoding_val); } if let Some(standalone_val) = standalone { buf.push_str("\" standalone=\""); buf.push_str(standalone_val); } buf.push('"'); BytesDecl { content: BytesStart::from_content(buf, 3), } } /// Creates a `BytesDecl` from a `BytesStart` pub const fn from_start(start: BytesStart<'a>) -> Self { Self { content: start } } /// Gets xml version, excluding quotes (`'` or `"`). /// /// According to the [grammar], the version *must* be the first thing in the declaration. /// This method tries to extract the first thing in the declaration and return it. /// In case of multiple attributes value of the first one is returned. /// /// If version is missed in the declaration, or the first thing is not a version, /// [`IllFormedError::MissingDeclVersion`] will be returned. /// /// # Examples /// /// ``` /// use quick_xml::errors::{Error, IllFormedError}; /// use quick_xml::events::{BytesDecl, BytesStart}; /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref()); /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0)); /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref()); /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0)); /// match decl.version() { /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"), /// _ => assert!(false), /// } /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0)); /// match decl.version() { /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"), /// _ => assert!(false), /// } /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0)); /// match decl.version() { /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {}, /// _ => assert!(false), /// } /// ``` /// /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl pub fn version(&self) -> Result, Error> { // The version *must* be the first thing in the declaration. match self.content.attributes().with_checks(false).next() { Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value), // first attribute was not "version" Some(Ok(a)) => { let found = from_utf8(a.key.as_ref()) .map_err(|_| IllFormedError::MissingDeclVersion(None))? .to_string(); Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some( found, )))) } // error parsing attributes Some(Err(e)) => Err(e.into()), // no attributes None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))), } } /// Gets xml encoding, excluding quotes (`'` or `"`). /// /// Although according to the [grammar] encoding must appear before `"standalone"` /// and after `"version"`, this method does not check that. The first occurrence /// of the attribute will be returned even if there are several. Also, method does /// not restrict symbols that can forming the encoding, so the returned encoding /// name may not correspond to the grammar. /// /// # Examples /// /// ``` /// use std::borrow::Cow; /// use quick_xml::Error; /// use quick_xml::events::{BytesDecl, BytesStart}; /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); /// assert!(decl.encoding().is_none()); /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0)); /// match decl.encoding() { /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"), /// _ => assert!(false), /// } /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0)); /// match decl.encoding() { /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"), /// _ => assert!(false), /// } /// ``` /// /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl pub fn encoding(&self) -> Option, AttrError>> { self.content .try_get_attribute("encoding") .map(|a| a.map(|a| a.value)) .transpose() } /// Gets xml standalone, excluding quotes (`'` or `"`). /// /// Although according to the [grammar] standalone flag must appear after `"version"` /// and `"encoding"`, this method does not check that. The first occurrence of the /// attribute will be returned even if there are several. Also, method does not /// restrict symbols that can forming the value, so the returned flag name may not /// correspond to the grammar. /// /// # Examples /// /// ``` /// use std::borrow::Cow; /// use quick_xml::Error; /// use quick_xml::events::{BytesDecl, BytesStart}; /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0)); /// assert!(decl.standalone().is_none()); /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0)); /// match decl.standalone() { /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"), /// _ => assert!(false), /// } /// /// // /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0)); /// match decl.standalone() { /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"), /// _ => assert!(false), /// } /// ``` /// /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl pub fn standalone(&self) -> Option, AttrError>> { self.content .try_get_attribute("standalone") .map(|a| a.map(|a| a.value)) .transpose() } /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get) /// algorithm. /// /// If encoding in not known, or `encoding` key was not found, returns `None`. /// In case of duplicated `encoding` key, encoding, corresponding to the first /// one, is returned. #[cfg(feature = "encoding")] pub fn encoder(&self) -> Option<&'static Encoding> { self.encoding() .and_then(|e| e.ok()) .and_then(|e| Encoding::for_label(&e)) } /// Converts the event into an owned event. pub fn into_owned(self) -> BytesDecl<'static> { BytesDecl { content: self.content.into_owned(), } } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesDecl<'_> { BytesDecl { content: self.content.borrow(), } } } impl<'a> Deref for BytesDecl<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.content } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { Ok(Self::new( <&str>::arbitrary(u)?, Option::<&str>::arbitrary(u)?, Option::<&str>::arbitrary(u)?, )) } fn size_hint(depth: usize) -> (usize, Option) { <&str as arbitrary::Arbitrary>::size_hint(depth) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Character or general entity reference (`Event::GeneralRef`): `&ref;` or `&#;`. /// /// This event implements `Deref`. The `deref()` implementation /// returns the content of this event between `&` and `;`: /// /// ``` /// # use quick_xml::events::{BytesRef, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// let mut reader = Reader::from_str(r#"&entity;"#); /// let content = "entity"; /// let event = BytesRef::new(content); /// /// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(event.borrow())); /// // deref coercion of &BytesRef to &[u8] /// assert_eq!(&event as &[u8], content.as_bytes()); /// // AsRef<[u8]> for &T + deref coercion /// assert_eq!(event.as_ref(), content.as_bytes()); /// ``` #[derive(Clone, Eq, PartialEq)] pub struct BytesRef<'a> { content: Cow<'a, [u8]>, /// Encoding in which the `content` is stored inside the event. decoder: Decoder, } impl<'a> BytesRef<'a> { /// Internal constructor, used by `Reader`. Supplies data in reader's encoding #[inline] pub(crate) const fn wrap(content: &'a [u8], decoder: Decoder) -> Self { Self { content: Cow::Borrowed(content), decoder, } } /// Creates a new `BytesRef` borrowing a slice. /// /// # Warning /// /// `name` must be a valid name. #[inline] pub fn new>>(name: C) -> Self { Self { content: str_cow_to_bytes(name), decoder: Decoder::utf8(), } } /// Converts the event into an owned event. pub fn into_owned(self) -> BytesRef<'static> { BytesRef { content: Cow::Owned(self.content.into_owned()), decoder: self.decoder, } } /// Extracts the inner `Cow` from the `BytesRef` event container. #[inline] pub fn into_inner(self) -> Cow<'a, [u8]> { self.content } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> BytesRef<'_> { BytesRef { content: Cow::Borrowed(&self.content), decoder: self.decoder, } } /// Decodes the content of the event. /// /// This will allocate if the value contains any escape sequences or in /// non-UTF-8 encoding. /// /// This method does not normalizes end-of-line characters as required by [specification]. /// Usually you need [`xml_content()`](Self::xml_content) instead of this method. /// /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends pub fn decode(&self) -> Result, EncodingError> { self.decoder.decode_cow(&self.content) } /// Decodes the content of the XML 1.0 or HTML event. /// /// When this event produced by the reader, it uses the encoding information /// associated with that reader to interpret the raw bytes contained within /// this general reference event. /// /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization /// is required. /// /// Note, that this method should be used only if event represents XML 1.0 or HTML content, /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs. /// /// This method also can be used to get HTML content, because rules the same. /// /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines pub fn xml10_content(&self) -> Result, EncodingError> { self.decoder.content(&self.content, normalize_xml10_eols) } /// Decodes the content of the XML 1.1 event. /// /// When this event produced by the reader, it uses the encoding information /// associated with that reader to interpret the raw bytes contained within /// this general reference event. /// /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization /// is required. /// /// Note, that this method should be used only if event represents XML 1.1 content, /// because rules for normalizing EOLs for [XML 1.0] / [HTML] and [XML 1.1] differs. /// /// To get HTML content use [`xml10_content()`](Self::xml10_content). /// /// [XML 1.0]: https://www.w3.org/TR/xml/#sec-line-ends /// [XML 1.1]: https://www.w3.org/TR/xml11/#sec-line-ends /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines pub fn xml11_content(&self) -> Result, EncodingError> { self.decoder.content(&self.content, normalize_xml11_eols) } /// Alias for [`xml11_content()`](Self::xml11_content). #[inline] pub fn xml_content(&self) -> Result, EncodingError> { self.xml11_content() } /// Alias for [`xml10_content()`](Self::xml10_content). #[inline] pub fn html_content(&self) -> Result, EncodingError> { self.xml10_content() } /// Returns `true` if the specified reference represents the character reference /// (`&#;`). /// /// ``` /// # use quick_xml::events::BytesRef; /// # use pretty_assertions::assert_eq; /// assert_eq!(BytesRef::new("#x30").is_char_ref(), true); /// assert_eq!(BytesRef::new("#49" ).is_char_ref(), true); /// assert_eq!(BytesRef::new("lt" ).is_char_ref(), false); /// ``` pub fn is_char_ref(&self) -> bool { matches!(self.content.first(), Some(b'#')) } /// If this reference represents character reference, then resolves it and /// returns the character, otherwise returns `None`. /// /// This method does not check if character is allowed for XML, in other words, /// well-formedness constraint [WFC: Legal Char] is not enforced. /// The character `0x0`, however, will return `EscapeError::InvalidCharRef`. /// /// ``` /// # use quick_xml::events::BytesRef; /// # use pretty_assertions::assert_eq; /// assert_eq!(BytesRef::new("#x30").resolve_char_ref().unwrap(), Some('0')); /// assert_eq!(BytesRef::new("#49" ).resolve_char_ref().unwrap(), Some('1')); /// assert_eq!(BytesRef::new("lt" ).resolve_char_ref().unwrap(), None); /// ``` /// /// [WFC: Legal Char]: https://www.w3.org/TR/xml11/#wf-Legalchar pub fn resolve_char_ref(&self) -> Result, Error> { if let Some(num) = self.decode()?.strip_prefix('#') { let ch = parse_number(num).map_err(EscapeError::InvalidCharRef)?; return Ok(Some(ch)); } Ok(None) } } impl<'a> Debug for BytesRef<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "BytesRef {{ content: ")?; write_cow_string(f, &self.content)?; write!(f, " }}") } } impl<'a> Deref for BytesRef<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { &self.content } } #[cfg(feature = "arbitrary")] impl<'a> arbitrary::Arbitrary<'a> for BytesRef<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { Ok(Self::new(<&str>::arbitrary(u)?)) } fn size_hint(depth: usize) -> (usize, Option) { <&str as arbitrary::Arbitrary>::size_hint(depth) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Event emitted by [`Reader::read_event_into`]. /// /// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into #[derive(Clone, Debug, Eq, PartialEq)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum Event<'a> { /// Start tag (with attributes) ``. Start(BytesStart<'a>), /// End tag ``. End(BytesEnd<'a>), /// Empty element tag (with attributes) ``. Empty(BytesStart<'a>), /// Escaped character data between tags. Text(BytesText<'a>), /// Unescaped character data stored in ``. CData(BytesCData<'a>), /// Comment ``. Comment(BytesText<'a>), /// XML declaration ``. Decl(BytesDecl<'a>), /// Processing instruction ``. PI(BytesPI<'a>), /// Document type definition data (DTD) stored in ``. DocType(BytesText<'a>), /// General reference `&entity;` in the textual data. Can be either an entity /// reference, or a character reference. GeneralRef(BytesRef<'a>), /// End of XML document. Eof, } impl<'a> Event<'a> { /// Converts the event to an owned version, untied to the lifetime of /// buffer used when reading but incurring a new, separate allocation. pub fn into_owned(self) -> Event<'static> { match self { Event::Start(e) => Event::Start(e.into_owned()), Event::End(e) => Event::End(e.into_owned()), Event::Empty(e) => Event::Empty(e.into_owned()), Event::Text(e) => Event::Text(e.into_owned()), Event::Comment(e) => Event::Comment(e.into_owned()), Event::CData(e) => Event::CData(e.into_owned()), Event::Decl(e) => Event::Decl(e.into_owned()), Event::PI(e) => Event::PI(e.into_owned()), Event::DocType(e) => Event::DocType(e.into_owned()), Event::GeneralRef(e) => Event::GeneralRef(e.into_owned()), Event::Eof => Event::Eof, } } /// Converts the event into a borrowed event. #[inline] pub fn borrow(&self) -> Event<'_> { match self { Event::Start(e) => Event::Start(e.borrow()), Event::End(e) => Event::End(e.borrow()), Event::Empty(e) => Event::Empty(e.borrow()), Event::Text(e) => Event::Text(e.borrow()), Event::Comment(e) => Event::Comment(e.borrow()), Event::CData(e) => Event::CData(e.borrow()), Event::Decl(e) => Event::Decl(e.borrow()), Event::PI(e) => Event::PI(e.borrow()), Event::DocType(e) => Event::DocType(e.borrow()), Event::GeneralRef(e) => Event::GeneralRef(e.borrow()), Event::Eof => Event::Eof, } } } impl<'a> Deref for Event<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { match *self { Event::Start(ref e) | Event::Empty(ref e) => e, Event::End(ref e) => e, Event::Text(ref e) => e, Event::Decl(ref e) => e, Event::PI(ref e) => e, Event::CData(ref e) => e, Event::Comment(ref e) => e, Event::DocType(ref e) => e, Event::GeneralRef(ref e) => e, Event::Eof => &[], } } } impl<'a> AsRef> for Event<'a> { fn as_ref(&self) -> &Event<'a> { self } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[inline] fn str_cow_to_bytes<'a, C: Into>>(content: C) -> Cow<'a, [u8]> { match content.into() { Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), Cow::Owned(s) => Cow::Owned(s.into_bytes()), } } fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]> where F: FnOnce(&[u8]) -> &[u8], { match value { Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)), Cow::Owned(mut bytes) => { let trimmed = trim(&bytes); if trimmed.len() != bytes.len() { bytes = trimmed.to_vec(); } Cow::Owned(bytes) } } } #[cfg(test)] mod test { use super::*; use pretty_assertions::assert_eq; #[test] fn bytestart_create() { let b = BytesStart::new("test"); assert_eq!(b.len(), 4); assert_eq!(b.name(), QName(b"test")); } #[test] fn bytestart_set_name() { let mut b = BytesStart::new("test"); assert_eq!(b.len(), 4); assert_eq!(b.name(), QName(b"test")); assert_eq!(b.attributes_raw(), b""); b.push_attribute(("x", "a")); assert_eq!(b.len(), 10); assert_eq!(b.attributes_raw(), b" x=\"a\""); b.set_name(b"g"); assert_eq!(b.len(), 7); assert_eq!(b.name(), QName(b"g")); } #[test] fn bytestart_clear_attributes() { let mut b = BytesStart::new("test"); b.push_attribute(("x", "y\"z")); b.push_attribute(("x", "y\"z")); b.clear_attributes(); assert!(b.attributes().next().is_none()); assert_eq!(b.len(), 4); assert_eq!(b.name(), QName(b"test")); } } quick-xml-0.38.4/src/lib.rs000064400000000000000000000054201046102023000135360ustar 00000000000000//! High performance XML reader/writer. //! //! # Description //! //! quick-xml contains two modes of operation: //! //! A streaming API based on the [StAX] model. This is suited for larger XML documents which //! cannot completely read into memory at once. //! //! The user has to explicitly _ask_ for the next XML event, similar to a database cursor. //! This is achieved by the following two structs: //! //! - [`Reader`]: A low level XML pull-reader where buffer allocation/clearing is left to user. //! - [`Writer`]: A XML writer. Can be nested with readers if you want to transform XMLs. //! //! Especially for nested XML elements, the user must keep track _where_ (how deep) //! in the XML document the current event is located. //! //! quick-xml contains optional support of asynchronous reading and writing using [tokio]. //! To get it enable the [`async-tokio`](#async-tokio) feature. //! //! Furthermore, quick-xml also contains optional [Serde] support to directly //! serialize and deserialize from structs, without having to deal with the XML events. //! To get it enable the [`serialize`](#serialize) feature. Read more about mapping Rust types //! to XML in the documentation of [`de`] module. Also check [`serde_helpers`] //! module. //! //! # Examples //! //! - For a reading example see [`Reader`] //! - For a writing example see [`Writer`] //! //! # Features //! //! `quick-xml` supports the following features: //! //! [StAX]: https://en.wikipedia.org/wiki/StAX //! [tokio]: https://tokio.rs/ //! [Serde]: https://serde.rs/ //! [`de`]: ./de/index.html #![cfg_attr( feature = "document-features", cfg_attr(doc, doc = ::document_features::document_features!( feature_label = "
{feature}" )) )] #![forbid(unsafe_code)] #![deny(missing_docs)] #![recursion_limit = "1024"] // Enable feature requirements in the docs from 1.57 // See https://stackoverflow.com/questions/61417452 // docs.rs defines `docsrs` when building documentation // Since 1.92 `doc_auto_cfg` was merged into `doc_cfg` #![cfg_attr(docsrs, feature(doc_cfg))] #[cfg(feature = "serialize")] pub mod de; pub mod encoding; pub mod errors; pub mod escape; pub mod events; pub mod name; pub mod parser; pub mod reader; #[cfg(feature = "serialize")] pub mod se; #[cfg(feature = "serde-types")] pub mod serde_helpers; /// Not an official API, public for integration tests #[doc(hidden)] pub mod utils; pub mod writer; // reexports pub use crate::encoding::Decoder; #[cfg(feature = "serialize")] pub use crate::errors::serialize::{DeError, SeError}; pub use crate::errors::{Error, Result}; pub use crate::reader::{NsReader, Reader}; pub use crate::writer::{ElementWriter, Writer}; quick-xml-0.38.4/src/name.rs000064400000000000000000001733001046102023000137130ustar 00000000000000//! Module for handling names according to the W3C [Namespaces in XML 1.1 (Second Edition)][spec] //! specification //! //! [spec]: https://www.w3.org/TR/xml-names11 use crate::events::attributes::Attribute; use crate::events::{BytesStart, Event}; use crate::utils::write_byte_string; use memchr::memchr; use std::fmt::{self, Debug, Formatter}; use std::iter::FusedIterator; /// Some namespace was invalid #[derive(Debug, Clone, PartialEq, Eq)] pub enum NamespaceError { /// Specified namespace prefix is unknown, cannot resolve namespace for it UnknownPrefix(Vec), /// Attempts to bind the `xml` prefix to something other than `http://www.w3.org/XML/1998/namespace`. /// /// `xml` prefix can be bound only to `http://www.w3.org/XML/1998/namespace`. /// /// Contains the namespace to which `xml` tried to be bound. InvalidXmlPrefixBind(Vec), /// Attempts to bind the `xmlns` prefix. /// /// `xmlns` prefix is always bound to `http://www.w3.org/2000/xmlns/` and cannot be bound /// to any other namespace or even to `http://www.w3.org/2000/xmlns/`. /// /// Contains the namespace to which `xmlns` tried to be bound. InvalidXmlnsPrefixBind(Vec), /// Attempts to bind some prefix (except `xml`) to `http://www.w3.org/XML/1998/namespace`. /// /// Only `xml` prefix can be bound to `http://www.w3.org/XML/1998/namespace`. /// /// Contains the prefix that is tried to be bound. InvalidPrefixForXml(Vec), /// Attempts to bind some prefix to `http://www.w3.org/2000/xmlns/`. /// /// `http://www.w3.org/2000/xmlns/` cannot be bound to any prefix, even to `xmlns`. /// /// Contains the prefix that is tried to be bound. InvalidPrefixForXmlns(Vec), } impl fmt::Display for NamespaceError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::UnknownPrefix(prefix) => { f.write_str("unknown namespace prefix '")?; write_byte_string(f, prefix)?; f.write_str("'") } Self::InvalidXmlPrefixBind(namespace) => { f.write_str("the namespace prefix 'xml' cannot be bound to '")?; write_byte_string(f, namespace)?; f.write_str("'") } Self::InvalidXmlnsPrefixBind(namespace) => { f.write_str("the namespace prefix 'xmlns' cannot be bound to '")?; write_byte_string(f, namespace)?; f.write_str("'") } Self::InvalidPrefixForXml(prefix) => { f.write_str("the namespace prefix '")?; write_byte_string(f, prefix)?; f.write_str("' cannot be bound to 'http://www.w3.org/XML/1998/namespace'") } Self::InvalidPrefixForXmlns(prefix) => { f.write_str("the namespace prefix '")?; write_byte_string(f, prefix)?; f.write_str("' cannot be bound to 'http://www.w3.org/2000/xmlns/'") } } } } impl std::error::Error for NamespaceError {} //////////////////////////////////////////////////////////////////////////////////////////////////// /// A [qualified name] of an element or an attribute, including an optional /// namespace [prefix](Prefix) and a [local name](LocalName). /// /// [qualified name]: https://www.w3.org/TR/xml-names11/#dt-qualname #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] pub struct QName<'a>(pub &'a [u8]); impl<'a> QName<'a> { /// Converts this name to an internal slice representation. #[inline(always)] pub const fn into_inner(self) -> &'a [u8] { self.0 } /// Returns local part of this qualified name. /// /// All content up to and including the first `:` character is removed from /// the tag name. /// /// # Examples /// /// ``` /// # use quick_xml::name::QName; /// let simple = QName(b"simple-name"); /// assert_eq!(simple.local_name().as_ref(), b"simple-name"); /// /// let qname = QName(b"namespace:simple-name"); /// assert_eq!(qname.local_name().as_ref(), b"simple-name"); /// ``` pub fn local_name(&self) -> LocalName<'a> { LocalName(self.index().map_or(self.0, |i| &self.0[i + 1..])) } /// Returns namespace part of this qualified name or `None` if namespace part /// is not defined (symbol `':'` not found). /// /// # Examples /// /// ``` /// # use std::convert::AsRef; /// # use quick_xml::name::QName; /// let simple = QName(b"simple-name"); /// assert_eq!(simple.prefix(), None); /// /// let qname = QName(b"prefix:simple-name"); /// assert_eq!(qname.prefix().as_ref().map(|n| n.as_ref()), Some(b"prefix".as_ref())); /// ``` pub fn prefix(&self) -> Option> { self.index().map(|i| Prefix(&self.0[..i])) } /// The same as `(qname.local_name(), qname.prefix())`, but does only one /// lookup for a `':'` symbol. pub fn decompose(&self) -> (LocalName<'a>, Option>) { match self.index() { None => (LocalName(self.0), None), Some(i) => (LocalName(&self.0[i + 1..]), Some(Prefix(&self.0[..i]))), } } /// If that `QName` represents `"xmlns"` series of names, returns `Some`, /// otherwise `None` is returned. /// /// # Examples /// /// ``` /// # use quick_xml::name::{QName, PrefixDeclaration}; /// let qname = QName(b"xmlns"); /// assert_eq!(qname.as_namespace_binding(), Some(PrefixDeclaration::Default)); /// /// let qname = QName(b"xmlns:prefix"); /// assert_eq!(qname.as_namespace_binding(), Some(PrefixDeclaration::Named(b"prefix"))); /// /// // Be aware that this method does not check the validity of the prefix - it can be empty! /// let qname = QName(b"xmlns:"); /// assert_eq!(qname.as_namespace_binding(), Some(PrefixDeclaration::Named(b""))); /// /// let qname = QName(b"other-name"); /// assert_eq!(qname.as_namespace_binding(), None); /// /// // https://www.w3.org/TR/xml-names11/#xmlReserved /// let qname = QName(b"xmlns-reserved-name"); /// assert_eq!(qname.as_namespace_binding(), None); /// ``` pub fn as_namespace_binding(&self) -> Option> { if self.0.starts_with(b"xmlns") { return match self.0.get(5) { None => Some(PrefixDeclaration::Default), Some(&b':') => Some(PrefixDeclaration::Named(&self.0[6..])), _ => None, }; } None } /// Returns the index in the name where prefix ended #[inline(always)] fn index(&self) -> Option { memchr(b':', self.0) } } impl<'a> Debug for QName<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "QName(")?; write_byte_string(f, self.0)?; write!(f, ")") } } impl<'a> AsRef<[u8]> for QName<'a> { #[inline] fn as_ref(&self) -> &[u8] { self.0 } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A [local (unqualified) name] of an element or an attribute, i.e. a name /// without [prefix](Prefix). /// /// [local (unqualified) name]: https://www.w3.org/TR/xml-names11/#dt-localname #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] pub struct LocalName<'a>(pub(crate) &'a [u8]); impl<'a> LocalName<'a> { /// Converts this name to an internal slice representation. #[inline(always)] pub const fn into_inner(self) -> &'a [u8] { self.0 } } impl<'a> Debug for LocalName<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "LocalName(")?; write_byte_string(f, self.0)?; write!(f, ")") } } impl<'a> AsRef<[u8]> for LocalName<'a> { #[inline] fn as_ref(&self) -> &[u8] { self.0 } } impl<'a> From> for LocalName<'a> { /// Creates `LocalName` from a [`QName`] /// /// # Examples /// /// ``` /// # use quick_xml::name::{LocalName, QName}; /// /// let local: LocalName = QName(b"unprefixed").into(); /// assert_eq!(local.as_ref(), b"unprefixed"); /// /// let local: LocalName = QName(b"some:prefix").into(); /// assert_eq!(local.as_ref(), b"prefix"); /// ``` #[inline] fn from(name: QName<'a>) -> Self { Self(name.index().map_or(name.0, |i| &name.0[i + 1..])) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A [namespace prefix] part of the [qualified name](QName) of an element tag /// or an attribute: a `prefix` in `` or /// `prefix:local-attribute-name="attribute value"`. /// /// [namespace prefix]: https://www.w3.org/TR/xml-names11/#dt-prefix #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] pub struct Prefix<'a>(&'a [u8]); impl<'a> Prefix<'a> { /// Extracts internal slice #[inline(always)] pub const fn into_inner(self) -> &'a [u8] { self.0 } /// Checks if this prefix is a special prefix `xml`. #[inline(always)] pub const fn is_xml(&self) -> bool { matches!(self.0, b"xml") } /// Checks if this prefix is a special prefix `xmlns`. #[inline(always)] pub const fn is_xmlns(&self) -> bool { matches!(self.0, b"xmlns") } } impl<'a> Debug for Prefix<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "Prefix(")?; write_byte_string(f, self.0)?; write!(f, ")") } } impl<'a> AsRef<[u8]> for Prefix<'a> { #[inline] fn as_ref(&self) -> &[u8] { self.0 } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A namespace prefix declaration, `xmlns` or `xmlns:`, as defined in /// [XML Schema specification](https://www.w3.org/TR/xml-names11/#ns-decl) #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum PrefixDeclaration<'a> { /// XML attribute binds a default namespace. Corresponds to `xmlns` in `xmlns="..."` Default, /// XML attribute binds a specified prefix to a namespace. Corresponds to a /// `prefix` in `xmlns:prefix="..."`, which is stored as payload of this variant. Named(&'a [u8]), } impl<'a> Debug for PrefixDeclaration<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { Self::Default => f.write_str("PrefixDeclaration::Default"), Self::Named(prefix) => { f.write_str("PrefixDeclaration::Named(")?; write_byte_string(f, prefix)?; f.write_str(")") } } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A [namespace name] that is declared in a `xmlns[:prefix]="namespace name"`. /// /// [namespace name]: https://www.w3.org/TR/xml-names11/#dt-NSName #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] pub struct Namespace<'a>(pub &'a [u8]); impl<'a> Namespace<'a> { /// Converts this namespace to an internal slice representation. /// /// This is [non-normalized] attribute value, i.e. any entity references is /// not expanded and space characters are not removed. This means, that /// different byte slices, returned from this method, can represent the same /// namespace and would be treated by parser as identical. /// /// For example, if the entity **eacute** has been defined to be **é**, /// the empty tags below all contain namespace declarations binding the /// prefix `p` to the same [IRI reference], `http://example.org/rosé`. /// /// ```xml /// /// /// /// /// /// ``` /// /// This is because XML entity references are expanded during attribute value /// normalization. /// /// [non-normalized]: https://www.w3.org/TR/xml11/#AVNormalize /// [IRI reference]: https://datatracker.ietf.org/doc/html/rfc3987 #[inline(always)] pub const fn into_inner(self) -> &'a [u8] { self.0 } //TODO: implement value normalization and use it when comparing namespaces } impl<'a> Debug for Namespace<'a> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "Namespace(")?; write_byte_string(f, self.0)?; write!(f, ")") } } impl<'a> AsRef<[u8]> for Namespace<'a> { #[inline] fn as_ref(&self) -> &[u8] { self.0 } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Result of [prefix] resolution which creates by [`NamespaceResolver::resolve`], [`NsReader::resolve_attribute`], /// [`NsReader::resolve_element`], [`NsReader::read_resolved_event`] and /// [`NsReader::read_resolved_event_into`] methods. /// /// [prefix]: Prefix /// [`NsReader::resolve_attribute`]: crate::reader::NsReader::resolve_attribute /// [`NsReader::resolve_element`]: crate::reader::NsReader::resolve_element /// [`NsReader::read_resolved_event`]: crate::reader::NsReader::read_resolved_event /// [`NsReader::read_resolved_event_into`]: crate::reader::NsReader::read_resolved_event_into #[derive(Clone, PartialEq, Eq, Hash)] pub enum ResolveResult<'ns> { /// Qualified name does not contain prefix, and resolver does not define /// default namespace, so name is not bound to any namespace Unbound, /// [`Prefix`] resolved to the specified namespace Bound(Namespace<'ns>), /// Specified prefix was not found in scope Unknown(Vec), } impl<'ns> Debug for ResolveResult<'ns> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { Self::Unbound => write!(f, "Unbound"), Self::Bound(ns) => write!(f, "Bound({:?})", ns), Self::Unknown(p) => { write!(f, "Unknown(")?; write_byte_string(f, p)?; write!(f, ")") } } } } impl<'ns> TryFrom> for Option> { type Error = NamespaceError; /// Try to convert this result to an optional namespace and returns /// [`NamespaceError::UnknownPrefix`] if this result represents unknown prefix fn try_from(result: ResolveResult<'ns>) -> Result { use ResolveResult::*; match result { Unbound => Ok(None), Bound(ns) => Ok(Some(ns)), Unknown(p) => Err(NamespaceError::UnknownPrefix(p)), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// An entry that contains index into the buffer with namespace bindings. /// /// Defines a mapping from *[namespace prefix]* to *[namespace name]*. /// If prefix is empty, defines a *default namespace* binding that applies to /// unprefixed element names (unprefixed attribute names do not bind to any /// namespace and they processing is dependent on the element in which their /// defined). /// /// [namespace prefix]: https://www.w3.org/TR/xml-names11/#dt-prefix /// [namespace name]: https://www.w3.org/TR/xml-names11/#dt-NSName #[derive(Debug, Clone)] struct NamespaceBinding { /// Index of the namespace in the buffer start: usize, /// Length of the prefix /// * if greater than zero, then binds this namespace to the slice /// `[start..start + prefix_len]` in the buffer. /// * else defines the current default namespace. prefix_len: usize, /// The length of a namespace name (the URI) of this namespace declaration. /// Name started just after prefix and extend for `value_len` bytes. /// /// The XML standard [specifies] that an empty namespace value 'removes' a namespace declaration /// for the extent of its scope. For prefix declarations that's not very interesting, but it is /// vital for default namespace declarations. With `xmlns=""` you can revert back to the default /// behaviour of leaving unqualified element names unqualified. /// /// [specifies]: https://www.w3.org/TR/xml-names11/#scoping value_len: usize, /// Level of nesting at which this namespace was declared. The declaring element is included, /// i.e., a declaration on the document root has `level = 1`. /// This is used to pop the namespace when the element gets closed. level: u16, } impl NamespaceBinding { /// Get the namespace prefix, bound to this namespace declaration, or `None`, /// if this declaration is for default namespace (`xmlns="..."`). #[inline] fn prefix<'b>(&self, ns_buffer: &'b [u8]) -> Option> { if self.prefix_len == 0 { None } else { Some(Prefix(&ns_buffer[self.start..self.start + self.prefix_len])) } } /// Gets the namespace name (the URI) slice out of namespace buffer /// /// Returns `None` if namespace for this prefix was explicitly removed from /// scope, using `xmlns[:prefix]=""` #[inline] fn namespace<'ns>(&self, buffer: &'ns [u8]) -> ResolveResult<'ns> { if self.value_len == 0 { ResolveResult::Unbound } else { let start = self.start + self.prefix_len; ResolveResult::Bound(Namespace(&buffer[start..start + self.value_len])) } } } /// A storage for currently defined namespace bindings, which is used to resolve /// prefixes into namespaces. /// /// Holds all internal logic to push/pop namespaces with their levels. #[derive(Debug, Clone)] pub struct NamespaceResolver { /// Buffer that contains names of namespace prefixes (the part between `xmlns:` /// and an `=`) and namespace values. buffer: Vec, /// A stack of namespace bindings to prefixes that currently in scope bindings: Vec, /// The number of open tags at the moment. We need to keep track of this to know which namespace /// declarations to remove when we encounter an `End` event. nesting_level: u16, } /// That constant define the one of [reserved namespaces] for the xml standard. /// /// The prefix `xml` is by definition bound to the namespace name /// `http://www.w3.org/XML/1998/namespace`. It may, but need not, be declared, and must not be /// undeclared or bound to any other namespace name. Other prefixes must not be bound to this /// namespace name, and it must not be declared as the default namespace. /// /// [reserved namespaces]: https://www.w3.org/TR/xml-names11/#xmlReserved const RESERVED_NAMESPACE_XML: (Prefix, Namespace) = ( Prefix(b"xml"), Namespace(b"http://www.w3.org/XML/1998/namespace"), ); /// That constant define the one of [reserved namespaces] for the xml standard. /// /// The prefix `xmlns` is used only to declare namespace bindings and is by definition bound /// to the namespace name `http://www.w3.org/2000/xmlns/`. It must not be declared or /// undeclared. Other prefixes must not be bound to this namespace name, and it must not be /// declared as the default namespace. Element names must not have the prefix `xmlns`. /// /// [reserved namespaces]: https://www.w3.org/TR/xml-names11/#xmlReserved const RESERVED_NAMESPACE_XMLNS: (Prefix, Namespace) = ( Prefix(b"xmlns"), Namespace(b"http://www.w3.org/2000/xmlns/"), ); impl Default for NamespaceResolver { fn default() -> Self { let mut buffer = Vec::new(); let mut bindings = Vec::new(); for ent in &[RESERVED_NAMESPACE_XML, RESERVED_NAMESPACE_XMLNS] { let prefix = ent.0.into_inner(); let uri = ent.1.into_inner(); bindings.push(NamespaceBinding { start: buffer.len(), prefix_len: prefix.len(), value_len: uri.len(), level: 0, }); buffer.extend(prefix); buffer.extend(uri); } Self { buffer, bindings, nesting_level: 0, } } } impl NamespaceResolver { /// Adds new binding of prefix to namespace, returns the result of operation. /// /// Binding will be added on current nesting level and will be removed, when /// level will be [popped out]. /// /// The operation may fail if you try to (re-)declare reserved prefixes `xml` and `xmlns`. /// /// Note, that method does not check if namespace was already added on that level. /// Use `resolver.bindings_of(resolver.level()).any()` if you want to check that. /// New definition will be added and replace the old. /// /// Implementation detail: memory occupied by old binding of that level still will be used. /// /// ``` /// # use pretty_assertions::assert_eq; /// # use quick_xml::name::{Namespace, NamespaceResolver, PrefixDeclaration, QName, ResolveResult}; /// # /// let mut resolver = NamespaceResolver::default(); /// // names without prefix are unbound by default /// assert_eq!( /// resolver.resolve_element(QName(b"name")).0, /// ResolveResult::Unbound, /// ); /// // names with undeclared prefix are unknown /// assert_eq!( /// resolver.resolve_element(QName(b"ns:name")).0, /// ResolveResult::Unknown(b"ns".to_vec()), /// ); /// /// resolver.add(PrefixDeclaration::Default, Namespace(b"example.com")); /// resolver.add(PrefixDeclaration::Named(b"ns"), Namespace(b"my:namespace")); /// /// assert_eq!( /// resolver.resolve_element(QName(b"name")).0, /// ResolveResult::Bound(Namespace(b"example.com")), /// ); /// assert_eq!( /// resolver.resolve_element(QName(b"ns:name")).0, /// ResolveResult::Bound(Namespace(b"my:namespace")), /// ); /// /// // adding empty namespace clears the binding /// resolver.add(PrefixDeclaration::Default, Namespace(b"")); /// resolver.add(PrefixDeclaration::Named(b"ns"), Namespace(b"")); /// /// assert_eq!( /// resolver.resolve_element(QName(b"name")).0, /// ResolveResult::Unbound, /// ); /// assert_eq!( /// resolver.resolve_element(QName(b"ns:name")).0, /// ResolveResult::Unknown(b"ns".to_vec()), /// ); /// ``` /// [popped out]: Self::pop pub fn add( &mut self, prefix: PrefixDeclaration, namespace: Namespace, ) -> Result<(), NamespaceError> { let level = self.nesting_level; match prefix { PrefixDeclaration::Default => { let start = self.buffer.len(); self.buffer.extend_from_slice(namespace.0); self.bindings.push(NamespaceBinding { start, prefix_len: 0, value_len: namespace.0.len(), level, }); } PrefixDeclaration::Named(b"xml") => { if namespace != RESERVED_NAMESPACE_XML.1 { // error, `xml` prefix explicitly set to different value return Err(NamespaceError::InvalidXmlPrefixBind(namespace.0.to_vec())); } // don't add another NamespaceEntry for the `xml` namespace prefix } PrefixDeclaration::Named(b"xmlns") => { // error, `xmlns` prefix explicitly set return Err(NamespaceError::InvalidXmlnsPrefixBind(namespace.0.to_vec())); } PrefixDeclaration::Named(prefix) => { // error, non-`xml` prefix set to xml uri if namespace == RESERVED_NAMESPACE_XML.1 { return Err(NamespaceError::InvalidPrefixForXml(prefix.to_vec())); } else // error, non-`xmlns` prefix set to xmlns uri if namespace == RESERVED_NAMESPACE_XMLNS.1 { return Err(NamespaceError::InvalidPrefixForXmlns(prefix.to_vec())); } let start = self.buffer.len(); self.buffer.extend_from_slice(prefix); self.buffer.extend_from_slice(namespace.0); self.bindings.push(NamespaceBinding { start, prefix_len: prefix.len(), value_len: namespace.0.len(), level, }); } } Ok(()) } /// Begins a new scope and add to it all [namespace bindings] that found in /// the specified start element. /// /// [namespace bindings]: https://www.w3.org/TR/xml-names11/#dt-NSDecl pub fn push(&mut self, start: &BytesStart) -> Result<(), NamespaceError> { self.nesting_level += 1; // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns' // (default namespace) attribute. for a in start.attributes().with_checks(false) { if let Ok(Attribute { key: k, value: v }) = a { if let Some(prefix) = k.as_namespace_binding() { self.add(prefix, Namespace(&v))?; } } else { break; } } Ok(()) } /// Ends a top-most scope by popping all [namespace bindings], that was added by /// last call to [`Self::push()`] and [`Self::add()`]. /// /// [namespace bindings]: https://www.w3.org/TR/xml-names11/#dt-NSDecl pub fn pop(&mut self) { self.nesting_level = self.nesting_level.saturating_sub(1); let current_level = self.nesting_level; // from the back (most deeply nested scope), look for the first scope that is still valid match self.bindings.iter().rposition(|n| n.level <= current_level) { // none of the namespaces are valid, remove all of them None => { self.buffer.clear(); self.bindings.clear(); } // drop all namespaces past the last valid namespace Some(last_valid_pos) => { if let Some(len) = self.bindings.get(last_valid_pos + 1).map(|n| n.start) { self.buffer.truncate(len); self.bindings.truncate(last_valid_pos + 1); } } } } /// Resolves a potentially qualified **element name** or **attribute name** /// into _(namespace name, local name)_. /// /// _Qualified_ names have the form `local-name` or `prefix:local-name` where the `prefix` /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the name in question. /// /// The method returns following results depending on the `name` shape, `attribute` flag /// and the presence of the default namespace on element or any of its parents: /// /// |use_default|`xmlns="..."`|QName |ResolveResult |LocalName /// |-----------|-------------|-------------------|-----------------------|------------ /// |`false` |_(any)_ |`local-name` |[`Unbound`] |`local-name` /// |`false` |_(any)_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// |`true` |Not defined |`local-name` |[`Unbound`] |`local-name` /// |`true` |Defined |`local-name` |[`Bound`] (to `xmlns`) |`local-name` /// |`true` |_(any)_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// /// # Parameters /// - `name`: probably qualified name to resolve; /// - `use_default`: whether to try to translate `None` prefix to the currently default namespace /// (bound using `xmlns="default namespace"`) or return [`ResolveResult::Unbound`]. /// For attribute names this should be set to `false` and for element names to `true`. /// /// # Lifetimes /// /// - `'n`: lifetime of a name. Returned local name will be bound to the same /// lifetime as the name in question. /// - returned namespace name will be bound to the resolver itself /// /// [`Bound`]: ResolveResult::Bound /// [`Unbound`]: ResolveResult::Unbound /// [`Unknown`]: ResolveResult::Unknown #[inline] pub fn resolve<'n>( &self, name: QName<'n>, use_default: bool, ) -> (ResolveResult<'_>, LocalName<'n>) { let (local_name, prefix) = name.decompose(); (self.resolve_prefix(prefix, use_default), local_name) } /// Convenient method to call `resolve(name, true)`. May be used to clearly /// express that we want to resolve an element name, and not an attribute name. #[inline] pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult<'_>, LocalName<'n>) { self.resolve(name, true) } /// Convenient method to call `resolve(name, false)`. May be used to clearly /// express that we want to resolve an attribute name, and not an element name. #[inline] pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult<'_>, LocalName<'n>) { self.resolve(name, false) } /// Finds a [namespace name] for a given event, if applicable. /// /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. /// For all other events the concept of namespace is not defined, so /// a [`ResolveResult::Unbound`] is returned. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); /// loop { /// let event = reader.read_event().unwrap(); /// match reader.resolver().resolve_event(event) { /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag1").into()); /// } /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag2").into()); /// } /// (_, Event::Start(_)) => unreachable!(), /// /// (_, Event::Text(e)) => { /// txt.push(e.decode().unwrap().into_owned()) /// } /// (_, Event::Eof) => break, /// _ => (), /// } /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` /// /// [namespace name]: https://www.w3.org/TR/xml-names11/#dt-NSName /// [`Empty`]: Event::Empty /// [`Start`]: Event::Start /// [`End`]: Event::End pub fn resolve_event<'i>(&self, event: Event<'i>) -> (ResolveResult<'_>, Event<'i>) { use Event::*; match event { Empty(e) => (self.resolve_prefix(e.name().prefix(), true), Empty(e)), Start(e) => (self.resolve_prefix(e.name().prefix(), true), Start(e)), End(e) => (self.resolve_prefix(e.name().prefix(), true), End(e)), e => (ResolveResult::Unbound, e), } } /// Resolves given optional prefix (usually got from [`QName`]) into a corresponding namespace. /// /// # Parameters /// - `prefix`: prefix to resolve, usually result of [`QName::prefix()`]; /// - `use_default`: whether to try to translate `None` prefix to the currently default namespace /// (bound using `xmlns="default namespace"`) or return [`ResolveResult::Unbound`]. /// For attribute names this should be set to `false` and for element names to `true`. pub fn resolve_prefix(&self, prefix: Option, use_default: bool) -> ResolveResult<'_> { // Find the last defined binding that corresponds to the given prefix let mut iter = self.bindings.iter().rev(); match (prefix, use_default) { // Attribute name has no explicit prefix -> Unbound (None, false) => ResolveResult::Unbound, // Element name has no explicit prefix -> find nearest xmlns binding (None, true) => match iter.find(|n| n.prefix_len == 0) { Some(n) => n.namespace(&self.buffer), None => ResolveResult::Unbound, }, // Attribute or element name with explicit prefix (Some(p), _) => match iter.find(|n| n.prefix(&self.buffer) == prefix) { Some(n) if n.value_len != 0 => n.namespace(&self.buffer), // Not found or binding reset (corresponds to `xmlns:p=""`) _ => ResolveResult::Unknown(p.into_inner().to_vec()), }, } } /// Returns all the bindings currently in effect except the default `xml` and `xmlns` bindings. /// /// # Examples /// /// This example shows what results the returned iterator would return after /// reading each event of a simple XML. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::name::{Namespace, PrefixDeclaration}; /// use quick_xml::NsReader; /// /// let src = " /// /// /// /// /// /// /// "; /// let mut reader = NsReader::from_str(src); /// reader.config_mut().trim_text(true); /// // No bindings at the beginning /// assert_eq!(reader.resolver().bindings().collect::>(), vec![]); /// /// reader.read_resolved_event()?; // /// // No bindings declared on root /// assert_eq!(reader.resolver().bindings().collect::>(), vec![]); /// /// reader.read_resolved_event()?; // /// // Two bindings declared on "a" /// assert_eq!(reader.resolver().bindings().collect::>(), vec![ /// (PrefixDeclaration::Default, Namespace(b"a1")), /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")) /// ]); /// /// reader.read_resolved_event()?; // /// // The default prefix got overridden and new "b" prefix /// assert_eq!(reader.resolver().bindings().collect::>(), vec![ /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")), /// (PrefixDeclaration::Default, Namespace(b"b1")), /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2")) /// ]); /// /// reader.read_resolved_event()?; // /// // Still the same /// assert_eq!(reader.resolver().bindings().collect::>(), vec![ /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")), /// (PrefixDeclaration::Default, Namespace(b"b1")), /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2")) /// ]); /// /// reader.read_resolved_event()?; // /// // Still the same /// assert_eq!(reader.resolver().bindings().collect::>(), vec![ /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")), /// (PrefixDeclaration::Default, Namespace(b"b1")), /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2")) /// ]); /// /// reader.read_resolved_event()?; // /// // got closed so back to the bindings declared on /// assert_eq!(reader.resolver().bindings().collect::>(), vec![ /// (PrefixDeclaration::Default, Namespace(b"a1")), /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")) /// ]); /// /// reader.read_resolved_event()?; // /// // Still the same /// assert_eq!(reader.resolver().bindings().collect::>(), vec![ /// (PrefixDeclaration::Default, Namespace(b"a1")), /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")) /// ]); /// /// reader.read_resolved_event()?; // /// // got closed /// assert_eq!(reader.resolver().bindings().collect::>(), vec![]); /// # quick_xml::Result::Ok(()) /// ``` #[inline] pub const fn bindings(&self) -> NamespaceBindingsIter<'_> { NamespaceBindingsIter { resolver: self, // We initialize the cursor to 2 to skip the two default namespaces xml: and xmlns: cursor: 2, } } /// Returns all the bindings on the specified level, including the default /// `xml` and `xmlns` bindings. /// /// # Parameters /// - `level`: the nesting level of an XML tag. The document without tags has /// level 0, at which default bindings are declared. The root tag has level 1 /// and all other tags has levels > 1. If specify level more than [current], the /// empty iterator is returned. /// /// # Examples /// /// This example shows what results the returned iterator would return on each /// level after reaning some events of a simple XML. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::name::{Namespace, PrefixDeclaration}; /// use quick_xml::NsReader; /// /// let src = " /// /// /// /// /// /// /// "; /// let mut reader = NsReader::from_str(src); /// reader.config_mut().trim_text(true); /// reader.read_resolved_event()?; // /// reader.read_resolved_event()?; // /// reader.read_resolved_event()?; // /// reader.read_resolved_event()?; // /// /// // Default bindings at the beginning /// assert_eq!(reader.resolver().bindings_of(0).collect::>(), vec![ /// (PrefixDeclaration::Named(b"xml"), Namespace(b"http://www.w3.org/XML/1998/namespace")), /// (PrefixDeclaration::Named(b"xmlns"), Namespace(b"http://www.w3.org/2000/xmlns/")), /// ]); /// /// // No bindings declared on root /// assert_eq!(reader.resolver().bindings_of(1).collect::>(), vec![]); /// /// // Two bindings declared on "a" /// assert_eq!(reader.resolver().bindings_of(2).collect::>(), vec![ /// (PrefixDeclaration::Default, Namespace(b"a1")), /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")), /// ]); /// /// // Two bindings declared on "b" /// assert_eq!(reader.resolver().bindings_of(3).collect::>(), vec![ /// (PrefixDeclaration::Default, Namespace(b"b1")), /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2")), /// ]); /// /// // No bindings declared on "c" /// assert_eq!(reader.resolver().bindings_of(4).collect::>(), vec![]); /// /// // No bindings on non-existent level /// assert_eq!(reader.resolver().bindings_of(5).collect::>(), vec![]); /// # quick_xml::Result::Ok(()) /// ``` /// /// [current]: Self::level pub const fn bindings_of(&self, level: u16) -> NamespaceBindingsOfLevelIter<'_> { NamespaceBindingsOfLevelIter { resolver: self, cursor: 0, level, } } /// Returns the number of [`push`] calls that were not followed by [`pop`] calls. /// /// Due to use of `u16` for level number the number of nested tags in XML /// are limited by [`u16::MAX`], but that is enough for any real application. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// # use quick_xml::events::BytesStart; /// # use quick_xml::name::{Namespace, NamespaceResolver, PrefixDeclaration, QName, ResolveResult}; /// # /// let mut resolver = NamespaceResolver::default(); /// /// assert_eq!(resolver.level(), 0); /// /// resolver.push(&BytesStart::new("tag")); /// assert_eq!(resolver.level(), 1); /// /// resolver.pop(); /// assert_eq!(resolver.level(), 0); /// /// // pop from empty resolver does nothing /// resolver.pop(); /// assert_eq!(resolver.level(), 0); /// ``` /// /// [`push`]: Self::push /// [`pop`]: Self::pop pub const fn level(&self) -> u16 { self.nesting_level } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Iterator on the current declared namespace bindings. Returns pairs of the _(prefix, namespace)_. /// /// See [`NamespaceResolver::bindings`] for documentation. #[derive(Debug, Clone)] pub struct NamespaceBindingsIter<'a> { resolver: &'a NamespaceResolver, cursor: usize, } impl<'a> Iterator for NamespaceBindingsIter<'a> { type Item = (PrefixDeclaration<'a>, Namespace<'a>); fn next(&mut self) -> Option<(PrefixDeclaration<'a>, Namespace<'a>)> { while let Some(binding) = self.resolver.bindings.get(self.cursor) { self.cursor += 1; // We increment for next read // We check if the key has not been overridden by having a look // at the namespaces declared after in the array let prefix = binding.prefix(&self.resolver.buffer); if self.resolver.bindings[self.cursor..] .iter() .any(|ne| prefix == ne.prefix(&self.resolver.buffer)) { continue; // Overridden } if let ResolveResult::Bound(namespace) = binding.namespace(&self.resolver.buffer) { let prefix = match prefix { Some(Prefix(prefix)) => PrefixDeclaration::Named(prefix), None => PrefixDeclaration::Default, }; return Some((prefix, namespace)); } } None // We have exhausted the array } fn size_hint(&self) -> (usize, Option) { // Real count could be less if some namespaces was overridden (0, Some(self.resolver.bindings.len() - self.cursor)) } } impl<'a> FusedIterator for NamespaceBindingsIter<'a> {} /// The previous name for [`NamespaceBindingsIter`]. pub type PrefixIter<'a> = NamespaceBindingsIter<'a>; /// Iterator on the declared namespace bindings on specified level. Returns pairs of the _(prefix, namespace)_. /// /// See [`NamespaceResolver::bindings_of`] for documentation. #[derive(Debug, Clone)] pub struct NamespaceBindingsOfLevelIter<'a> { resolver: &'a NamespaceResolver, cursor: usize, level: u16, } impl<'a> Iterator for NamespaceBindingsOfLevelIter<'a> { type Item = (PrefixDeclaration<'a>, Namespace<'a>); fn next(&mut self) -> Option<(PrefixDeclaration<'a>, Namespace<'a>)> { while let Some(binding) = self.resolver.bindings.get(self.cursor) { self.cursor += 1; // We increment for next read if binding.level < self.level { continue; } if binding.level > self.level { break; } if let ResolveResult::Bound(namespace) = binding.namespace(&self.resolver.buffer) { let prefix = match binding.prefix(&self.resolver.buffer) { Some(Prefix(prefix)) => PrefixDeclaration::Named(prefix), None => PrefixDeclaration::Default, }; return Some((prefix, namespace)); } } None // We have exhausted the array } fn size_hint(&self) -> (usize, Option) { // Real count could be less (0, Some(self.resolver.bindings.len() - self.cursor)) } } impl<'a> FusedIterator for NamespaceBindingsOfLevelIter<'a> {} //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod namespaces { use super::*; use pretty_assertions::assert_eq; use ResolveResult::*; /// Unprefixed attribute names (resolved with `false` flag) never have a namespace /// according to : /// /// > A default namespace declaration applies to all unprefixed element names /// > within its scope. Default namespace declarations do not apply directly /// > to attribute names; the interpretation of unprefixed attributes is /// > determined by the element on which they appear. mod unprefixed { use super::*; use pretty_assertions::assert_eq; /// Basic tests that checks that basic resolver functionality is working #[test] fn basic() { let name = QName(b"simple"); let ns = Namespace(b"default"); let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver .push(&BytesStart::from_content(" xmlns='default'", 0)) .unwrap(); assert_eq!(&resolver.buffer[s..], b"default"); // Check that tags without namespaces does not change result resolver.push(&BytesStart::from_content("", 0)).unwrap(); assert_eq!(&resolver.buffer[s..], b"default"); resolver.pop(); assert_eq!(&resolver.buffer[s..], b"default"); assert_eq!( resolver.resolve(name, true), (Bound(ns), LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); } /// Test adding a second level of namespaces, which replaces the previous binding #[test] fn override_namespace() { let name = QName(b"simple"); let old_ns = Namespace(b"old"); let new_ns = Namespace(b"new"); let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver .push(&BytesStart::from_content(" xmlns='old'", 0)) .unwrap(); resolver .push(&BytesStart::from_content(" xmlns='new'", 0)) .unwrap(); assert_eq!(&resolver.buffer[s..], b"oldnew"); assert_eq!( resolver.resolve(name, true), (Bound(new_ns), LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); resolver.pop(); assert_eq!(&resolver.buffer[s..], b"old"); assert_eq!( resolver.resolve(name, true), (Bound(old_ns), LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); } /// Test adding a second level of namespaces, which reset the previous binding /// to not bound state by specifying an empty namespace name. /// /// See #[test] fn reset() { let name = QName(b"simple"); let old_ns = Namespace(b"old"); let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver .push(&BytesStart::from_content(" xmlns='old'", 0)) .unwrap(); resolver .push(&BytesStart::from_content(" xmlns=''", 0)) .unwrap(); assert_eq!(&resolver.buffer[s..], b"old"); assert_eq!( resolver.resolve(name, true), (Unbound, LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); resolver.pop(); assert_eq!(&resolver.buffer[s..], b"old"); assert_eq!( resolver.resolve(name, true), (Bound(old_ns), LocalName(b"simple")) ); assert_eq!( resolver.resolve(name, false), (Unbound, LocalName(b"simple")) ); } } mod declared_prefix { use super::*; use pretty_assertions::assert_eq; /// Basic tests that checks that basic resolver functionality is working #[test] fn basic() { let name = QName(b"p:with-declared-prefix"); let ns = Namespace(b"default"); let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver .push(&BytesStart::from_content(" xmlns:p='default'", 0)) .unwrap(); assert_eq!(&resolver.buffer[s..], b"pdefault"); // Check that tags without namespaces does not change result resolver.push(&BytesStart::from_content("", 0)).unwrap(); assert_eq!(&resolver.buffer[s..], b"pdefault"); resolver.pop(); assert_eq!(&resolver.buffer[s..], b"pdefault"); assert_eq!( resolver.resolve(name, true), (Bound(ns), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, false), (Bound(ns), LocalName(b"with-declared-prefix")) ); } /// Test adding a second level of namespaces, which replaces the previous binding #[test] fn override_namespace() { let name = QName(b"p:with-declared-prefix"); let old_ns = Namespace(b"old"); let new_ns = Namespace(b"new"); let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver .push(&BytesStart::from_content(" xmlns:p='old'", 0)) .unwrap(); resolver .push(&BytesStart::from_content(" xmlns:p='new'", 0)) .unwrap(); assert_eq!(&resolver.buffer[s..], b"poldpnew"); assert_eq!( resolver.resolve(name, true), (Bound(new_ns), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, false), (Bound(new_ns), LocalName(b"with-declared-prefix")) ); resolver.pop(); assert_eq!(&resolver.buffer[s..], b"pold"); assert_eq!( resolver.resolve(name, true), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, false), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); } /// Test adding a second level of namespaces, which reset the previous binding /// to not bound state by specifying an empty namespace name. /// /// See #[test] fn reset() { let name = QName(b"p:with-declared-prefix"); let old_ns = Namespace(b"old"); let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver .push(&BytesStart::from_content(" xmlns:p='old'", 0)) .unwrap(); resolver .push(&BytesStart::from_content(" xmlns:p=''", 0)) .unwrap(); assert_eq!(&resolver.buffer[s..], b"poldp"); assert_eq!( resolver.resolve(name, true), (Unknown(b"p".to_vec()), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, false), (Unknown(b"p".to_vec()), LocalName(b"with-declared-prefix")) ); resolver.pop(); assert_eq!(&resolver.buffer[s..], b"pold"); assert_eq!( resolver.resolve(name, true), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); assert_eq!( resolver.resolve(name, false), (Bound(old_ns), LocalName(b"with-declared-prefix")) ); } } /// Tests for `xml` and `xmlns` built-in prefixes. /// /// See mod builtin_prefixes { use super::*; mod xml { use super::*; use pretty_assertions::assert_eq; /// `xml` prefix are always defined, it is not required to define it explicitly. #[test] fn undeclared() { let name = QName(b"xml:random"); let namespace = RESERVED_NAMESPACE_XML.1; let resolver = NamespaceResolver::default(); assert_eq!( resolver.resolve(name, true), (Bound(namespace), LocalName(b"random")) ); assert_eq!( resolver.resolve(name, false), (Bound(namespace), LocalName(b"random")) ); } /// `xml` prefix can be declared but it must be bound to the value /// `http://www.w3.org/XML/1998/namespace` #[test] fn rebound_to_correct_ns() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); resolver.push( &BytesStart::from_content( " xmlns:xml='http://www.w3.org/XML/1998/namespace'", 0, ), ).expect("`xml` prefix should be possible to bound to `http://www.w3.org/XML/1998/namespace`"); assert_eq!(&resolver.buffer[s..], b""); } /// `xml` prefix cannot be re-declared to another namespace #[test] fn rebound_to_incorrect_ns() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); assert_eq!( resolver.push(&BytesStart::from_content( " xmlns:xml='not_correct_namespace'", 0, )), Err(NamespaceError::InvalidXmlPrefixBind( b"not_correct_namespace".to_vec() )), ); assert_eq!(&resolver.buffer[s..], b""); } /// `xml` prefix cannot be unbound #[test] fn unbound() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); assert_eq!( resolver.push(&BytesStart::from_content(" xmlns:xml=''", 0)), Err(NamespaceError::InvalidXmlPrefixBind(b"".to_vec())), ); assert_eq!(&resolver.buffer[s..], b""); } /// Other prefix cannot be bound to `xml` namespace #[test] fn other_prefix_bound_to_xml_namespace() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); assert_eq!( resolver.push(&BytesStart::from_content( " xmlns:not_xml='http://www.w3.org/XML/1998/namespace'", 0, )), Err(NamespaceError::InvalidPrefixForXml(b"not_xml".to_vec())), ); assert_eq!(&resolver.buffer[s..], b""); } } mod xmlns { use super::*; use pretty_assertions::assert_eq; /// `xmlns` prefix are always defined, it is forbidden to define it explicitly #[test] fn undeclared() { let name = QName(b"xmlns:random"); let namespace = RESERVED_NAMESPACE_XMLNS.1; let resolver = NamespaceResolver::default(); assert_eq!( resolver.resolve(name, true), (Bound(namespace), LocalName(b"random")) ); assert_eq!( resolver.resolve(name, false), (Bound(namespace), LocalName(b"random")) ); } /// `xmlns` prefix cannot be re-declared event to its own namespace #[test] fn rebound_to_correct_ns() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); assert_eq!( resolver.push(&BytesStart::from_content( " xmlns:xmlns='http://www.w3.org/2000/xmlns/'", 0, )), Err(NamespaceError::InvalidXmlnsPrefixBind( b"http://www.w3.org/2000/xmlns/".to_vec() )), ); assert_eq!(&resolver.buffer[s..], b""); } /// `xmlns` prefix cannot be re-declared #[test] fn rebound_to_incorrect_ns() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); assert_eq!( resolver.push(&BytesStart::from_content( " xmlns:xmlns='not_correct_namespace'", 0, )), Err(NamespaceError::InvalidXmlnsPrefixBind( b"not_correct_namespace".to_vec() )), ); assert_eq!(&resolver.buffer[s..], b""); } /// `xmlns` prefix cannot be unbound #[test] fn unbound() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); assert_eq!( resolver.push(&BytesStart::from_content(" xmlns:xmlns=''", 0)), Err(NamespaceError::InvalidXmlnsPrefixBind(b"".to_vec())), ); assert_eq!(&resolver.buffer[s..], b""); } /// Other prefix cannot be bound to `xmlns` namespace #[test] fn other_prefix_bound_to_xmlns_namespace() { let mut resolver = NamespaceResolver::default(); let s = resolver.buffer.len(); assert_eq!( resolver.push(&BytesStart::from_content( " xmlns:not_xmlns='http://www.w3.org/2000/xmlns/'", 0, )), Err(NamespaceError::InvalidPrefixForXmlns(b"not_xmlns".to_vec())), ); assert_eq!(&resolver.buffer[s..], b""); } } } #[test] fn undeclared_prefix() { let name = QName(b"unknown:prefix"); let resolver = NamespaceResolver::default(); assert_eq!( resolver.buffer, b"xmlhttp://www.w3.org/XML/1998/namespacexmlnshttp://www.w3.org/2000/xmlns/" ); assert_eq!( resolver.resolve(name, true), (Unknown(b"unknown".to_vec()), LocalName(b"prefix")) ); assert_eq!( resolver.resolve(name, false), (Unknown(b"unknown".to_vec()), LocalName(b"prefix")) ); } /// Checks how the QName is decomposed to a prefix and a local name #[test] fn prefix_and_local_name() { let name = QName(b"foo:bus"); assert_eq!(name.prefix(), Some(Prefix(b"foo"))); assert_eq!(name.local_name(), LocalName(b"bus")); assert_eq!(name.decompose(), (LocalName(b"bus"), Some(Prefix(b"foo")))); let name = QName(b"foo:"); assert_eq!(name.prefix(), Some(Prefix(b"foo"))); assert_eq!(name.local_name(), LocalName(b"")); assert_eq!(name.decompose(), (LocalName(b""), Some(Prefix(b"foo")))); let name = QName(b":foo"); assert_eq!(name.prefix(), Some(Prefix(b""))); assert_eq!(name.local_name(), LocalName(b"foo")); assert_eq!(name.decompose(), (LocalName(b"foo"), Some(Prefix(b"")))); let name = QName(b"foo:bus:baz"); assert_eq!(name.prefix(), Some(Prefix(b"foo"))); assert_eq!(name.local_name(), LocalName(b"bus:baz")); assert_eq!( name.decompose(), (LocalName(b"bus:baz"), Some(Prefix(b"foo"))) ); } } quick-xml-0.38.4/src/parser/element.rs000064400000000000000000000106651046102023000157240ustar 00000000000000//! Contains a parser for an XML element. use crate::errors::SyntaxError; use crate::parser::Parser; /// A parser that search a `>` symbol in the slice outside of quoted regions. /// /// The parser considers two quoted regions: a double-quoted (`"..."`) and /// a single-quoted (`'...'`) region. Matches found inside those regions are not /// considered as results. Each region starts and ends by its quote symbol, /// which cannot be escaped (but can be encoded as XML character entity or named /// entity. Anyway, that encoding does not contain literal quotes). /// /// To use a parser create an instance of parser and [`feed`] data into it. /// After successful search the parser will return [`Some`] with position of /// found symbol. If search is unsuccessful, a [`None`] will be returned. You /// typically would expect positive result of search, so that you should feed /// new data until you get it. /// /// NOTE: after successful match the parser does not returned to the initial /// state and should not be used anymore. Create a new parser if you want to perform /// new search. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::parser::{ElementParser, Parser}; /// /// let mut parser = ElementParser::default(); /// /// // Parse `and the text follow...` /// // splitted into three chunks /// assert_eq!(parser.feed(b"and the text follow..."), Some(8)); /// // ^ ^ /// // 0 8 /// ``` /// /// [`feed`]: Self::feed() #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ElementParser { /// The initial state (inside element, but outside of attribute value). Outside, /// Inside a single-quoted region (`'...'`). SingleQ, /// Inside a double-quoted region (`"..."`). DoubleQ, } impl Parser for ElementParser { /// Returns number of consumed bytes or `None` if `>` was not found in `bytes`. #[inline] fn feed(&mut self, bytes: &[u8]) -> Option { for i in memchr::memchr3_iter(b'>', b'\'', b'"', bytes) { *self = match (*self, bytes[i]) { // only allowed to match `>` while we are in state `Outside` (Self::Outside, b'>') => return Some(i), (Self::Outside, b'\'') => Self::SingleQ, (Self::Outside, b'\"') => Self::DoubleQ, // the only end_byte that gets us out if the same character (Self::SingleQ, b'\'') | (Self::DoubleQ, b'"') => Self::Outside, // all other bytes: no state change _ => continue, }; } None } #[inline] fn eof_error() -> SyntaxError { SyntaxError::UnclosedTag } } impl Default for ElementParser { #[inline] fn default() -> Self { Self::Outside } } #[test] fn parse() { use pretty_assertions::assert_eq; use ElementParser::*; /// Returns `Ok(pos)` with the position in the buffer where element is ended. /// /// Returns `Err(internal_state)` if parsing does not done yet. fn parse_element(bytes: &[u8], mut parser: ElementParser) -> Result { match parser.feed(bytes) { Some(i) => Ok(i), None => Err(parser), } } assert_eq!(parse_element(b"", Outside), Err(Outside)); assert_eq!(parse_element(b"", SingleQ), Err(SingleQ)); assert_eq!(parse_element(b"", DoubleQ), Err(DoubleQ)); assert_eq!(parse_element(b"'", Outside), Err(SingleQ)); assert_eq!(parse_element(b"'", SingleQ), Err(Outside)); assert_eq!(parse_element(b"'", DoubleQ), Err(DoubleQ)); assert_eq!(parse_element(b"\"", Outside), Err(DoubleQ)); assert_eq!(parse_element(b"\"", SingleQ), Err(SingleQ)); assert_eq!(parse_element(b"\"", DoubleQ), Err(Outside)); assert_eq!(parse_element(b">", Outside), Ok(0)); assert_eq!(parse_element(b">", SingleQ), Err(SingleQ)); assert_eq!(parse_element(b">", DoubleQ), Err(DoubleQ)); assert_eq!(parse_element(b"''>", Outside), Ok(2)); assert_eq!(parse_element(b"''>", SingleQ), Err(SingleQ)); assert_eq!(parse_element(b"''>", DoubleQ), Err(DoubleQ)); } quick-xml-0.38.4/src/parser/mod.rs000064400000000000000000000020151046102023000150400ustar 00000000000000//! Contains low-level parsers of different XML pieces. use crate::errors::SyntaxError; mod element; mod pi; pub use element::ElementParser; pub use pi::PiParser; /// Used to decouple reading of data from data source and parsing XML structure from it. /// This is a state preserved between getting chunks of bytes from the reader. /// /// This trait is implemented for every parser that processes piece of XML grammar. pub trait Parser { /// Process new data and try to determine end of the parsed thing. /// /// Returns position of the end of thing in `bytes` in case of successful search /// and `None` otherwise. /// /// # Parameters /// - `bytes`: a slice to find the end of a thing. /// Should contain text in ASCII-compatible encoding fn feed(&mut self, bytes: &[u8]) -> Option; /// Returns parse error produced by this parser in case of reaching end of /// input without finding the end of a parsed thing. fn eof_error() -> SyntaxError; } quick-xml-0.38.4/src/parser/pi.rs000064400000000000000000000101211046102023000146660ustar 00000000000000//! Contains a parser for an XML processing instruction. use crate::errors::SyntaxError; use crate::parser::Parser; /// A parser that search a `?>` sequence in the slice. /// /// To use a parser create an instance of parser and [`feed`] data into it. /// After successful search the parser will return [`Some`] with position where /// processing instruction is ended (the position after `?>`). If search was /// unsuccessful, a [`None`] will be returned. You typically would expect positive /// result of search, so that you should feed new data until you get it. /// /// NOTE: after successful match the parser does not returned to the initial /// state and should not be used anymore. Create a new parser if you want to perform /// new search. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::parser::{Parser, PiParser}; /// /// let mut parser = PiParser::default(); /// /// // Parse ` and ?' inside?>and the text follow...` /// // splitted into three chunks /// assert_eq!(parser.feed(b" and ?"), None); /// // ...get another chunk of data /// assert_eq!(parser.feed(b"' inside?>and the text follow..."), Some(9)); /// // ^ ^ /// // 0 9 /// ``` /// /// [`feed`]: Self::feed() #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub struct PiParser( /// A flag that indicates was the `bytes` in the previous attempt to find the /// end ended with `?`. pub bool, ); impl Parser for PiParser { /// Determines the end position of a processing instruction in the provided slice. /// Processing instruction ends on the first occurrence of `?>` which cannot be /// escaped. /// /// Returns position after the `?>` or `None` if such sequence was not found. /// /// [Section 2.6]: Parameter entity references MUST NOT be recognized within /// processing instructions, so parser do not search for them. /// /// # Parameters /// - `bytes`: a slice to find the end of a processing instruction. /// Should contain text in ASCII-compatible encoding /// /// [Section 2.6]: https://www.w3.org/TR/xml11/#sec-pi #[inline] fn feed(&mut self, bytes: &[u8]) -> Option { for i in memchr::memchr_iter(b'>', bytes) { match i { 0 if self.0 => return Some(0), // If the previous byte is `?`, then we found `?>` i if i > 0 && bytes[i - 1] == b'?' => return Some(i), _ => {} } } self.0 = bytes.last().copied() == Some(b'?'); None } #[inline] fn eof_error() -> SyntaxError { SyntaxError::UnclosedPIOrXmlDecl } } #[test] fn pi() { use pretty_assertions::assert_eq; /// Returns `Ok(pos)` with the position in the buffer where processing /// instruction is ended. /// /// Returns `Err(internal_state)` if parsing is not done yet. fn parse_pi(bytes: &[u8], had_question_mark: bool) -> Result { let mut parser = PiParser(had_question_mark); match parser.feed(bytes) { Some(i) => Ok(i), None => Err(parser.0), } } // Comments shows which character was seen the last before calling `feed`. // `x` means any character, pipe denotes start of the buffer that passed to `feed` assert_eq!(parse_pi(b"", false), Err(false)); // x| assert_eq!(parse_pi(b"", true), Err(false)); // ?| assert_eq!(parse_pi(b"?", false), Err(true)); // x|? assert_eq!(parse_pi(b"?", true), Err(true)); // ?|? assert_eq!(parse_pi(b">", false), Err(false)); // x|> assert_eq!(parse_pi(b">", true), Ok(0)); // ?|> assert_eq!(parse_pi(b"?>", false), Ok(1)); // x|?> assert_eq!(parse_pi(b"?>", true), Ok(1)); // ?|?> assert_eq!(parse_pi(b">?>", false), Ok(2)); // x|>?> assert_eq!(parse_pi(b">?>", true), Ok(0)); // ?|>?> } quick-xml-0.38.4/src/reader/async_tokio.rs000064400000000000000000000403331046102023000165560ustar 00000000000000//! This is an implementation of [`Reader`] for reading from a [`AsyncBufRead`] //! as underlying byte stream. This reader fully implements async/await so reading //! can use non-blocking I/O. use std::pin::Pin; use std::task::{Context, Poll}; use tokio::io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, ReadBuf}; use crate::errors::{Error, IllFormedError, Result, SyntaxError}; use crate::events::{BytesRef, Event}; use crate::name::{QName, ResolveResult}; use crate::parser::{ElementParser, Parser, PiParser}; use crate::reader::buffered_reader::impl_buffered_source; use crate::reader::{ BangType, BinaryStream, NsReader, ParseState, ReadRefResult, ReadTextResult, Reader, Span, }; use crate::utils::is_whitespace; /// A struct for read XML asynchronously from an [`AsyncBufRead`]. /// /// Having own struct allows us to implement anything without risk of name conflicts /// and does not suffer from the impossibility of having `async` in traits. struct TokioAdapter<'a, R>(&'a mut R); impl<'a, R: AsyncBufRead + Unpin> TokioAdapter<'a, R> { impl_buffered_source!('b, 0, async, await); } //////////////////////////////////////////////////////////////////////////////////////////////////// impl<'r, R> AsyncRead for BinaryStream<'r, R> where R: AsyncRead + Unpin, { fn poll_read( self: Pin<&mut Self>, cx: &mut Context<'_>, buf: &mut ReadBuf<'_>, ) -> Poll> { let start = buf.remaining(); let this = self.get_mut(); let poll = Pin::new(&mut *this.inner).poll_read(cx, buf); // If something was read, update offset if let Poll::Ready(Ok(_)) = poll { let amt = start - buf.remaining(); *this.offset += amt as u64; } poll } } impl<'r, R> AsyncBufRead for BinaryStream<'r, R> where R: AsyncBufRead + Unpin, { #[inline] fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { Pin::new(&mut *self.get_mut().inner).poll_fill_buf(cx) } #[inline] fn consume(self: Pin<&mut Self>, amt: usize) { let this = self.get_mut(); this.inner.consume(amt); *this.offset += amt as u64; } } //////////////////////////////////////////////////////////////////////////////////////////////////// impl Reader { /// An asynchronous version of [`read_event_into()`]. Reads the next event into /// given buffer. /// /// This is the main entry point for reading XML `Event`s when using an async reader. /// /// See the documentation of [`read_event_into()`] for more information. /// /// # Examples /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// // This explicitly uses `from_reader("...".as_bytes())` to use a buffered /// // reader instead of relying on the zero-copy optimizations for reading /// // from byte slices, which provides the sync interface anyway. /// let mut reader = Reader::from_reader(r#" /// /// Test /// Test 2 /// /// "#.as_bytes()); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into_async(&mut buf).await { /// Ok(Event::Start(_)) => count += 1, /// Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()), /// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), /// Ok(Event::Eof) => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// # }) // tokio_test::block_on /// ``` /// /// [`read_event_into()`]: Reader::read_event_into pub async fn read_event_into_async<'b>( &mut self, mut buf: &'b mut Vec, ) -> Result> { read_event_impl!( self, buf, TokioAdapter(&mut self.reader), read_until_close_async, await ) } /// An asynchronous version of [`read_to_end_into()`]. /// Reads asynchronously until end element is found using provided buffer as /// intermediate storage for events content. This function is supposed to be /// called after you already read a [`Start`] event. /// /// See the documentation of [`read_to_end_into()`] for more information. /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_reader(r#" /// /// /// /// /// /// /// /// /// "#.as_bytes()); /// reader.config_mut().trim_text(true); /// let mut buf = Vec::new(); /// /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Event::Start(start)); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end_into_async(end.name(), &mut buf).await.unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Event::Eof); /// # }) // tokio_test::block_on /// ``` /// /// [`read_to_end_into()`]: Self::read_to_end_into /// [`Start`]: Event::Start pub async fn read_to_end_into_async<'n>( &mut self, // We should name that lifetime due to https://github.com/rust-lang/rust/issues/63033` end: QName<'n>, buf: &mut Vec, ) -> Result { Ok(read_to_end!( self, end, buf, read_event_into_async, { buf.clear(); }, await )) } /// Private function to read until `>` is found. This function expects that /// it was called just after encounter a `<` symbol. async fn read_until_close_async<'b>(&mut self, buf: &'b mut Vec) -> Result> { read_until_close!(self, buf, TokioAdapter(&mut self.reader), await) } } //////////////////////////////////////////////////////////////////////////////////////////////////// impl NsReader { /// An asynchronous version of [`read_event_into()`]. Reads the next event into /// given buffer. /// /// This method manages namespaces but doesn't resolve them automatically. /// You should call [`resolver().resolve_element()`] if you want to get a namespace. /// /// You also can use [`read_resolved_event_into_async()`] instead if you want /// to resolve namespace as soon as you get an event. /// /// # Examples /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_reader(r#" /// /// Test /// Test 2 /// /// "#.as_bytes()); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into_async(&mut buf).await.unwrap() { /// Event::Start(e) => { /// count += 1; /// let (ns, local) = reader.resolver().resolve_element(e.name()); /// match local.as_ref() { /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))), /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))), /// _ => unreachable!(), /// } /// } /// Event::Text(e) => { /// txt.push(e.decode().unwrap().into_owned()) /// } /// Event::Eof => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// # }) // tokio_test::block_on /// ``` /// /// [`read_event_into()`]: NsReader::read_event_into /// [`resolver().resolve_element()`]: crate::name::NamespaceResolver::resolve_element /// [`read_resolved_event_into_async()`]: Self::read_resolved_event_into_async pub async fn read_event_into_async<'b>(&mut self, buf: &'b mut Vec) -> Result> { self.pop(); let event = self.reader.read_event_into_async(buf).await; self.process_event(event) } /// An asynchronous version of [`read_to_end_into()`]. /// Reads asynchronously until end element is found using provided buffer as /// intermediate storage for events content. This function is supposed to be /// called after you already read a [`Start`] event. /// /// See the documentation of [`read_to_end_into()`] for more information. /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::name::{Namespace, ResolveResult}; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_reader(r#" /// /// /// /// /// /// /// /// /// /// /// /// /// "#.as_bytes()); /// reader.config_mut().trim_text(true); /// let mut buf = Vec::new(); /// /// let ns = Namespace(b"namespace 1"); /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!( /// reader.read_resolved_event_into_async(&mut buf).await.unwrap(), /// (ResolveResult::Bound(ns), Event::Start(start)) /// ); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end_into_async(end.name(), &mut buf).await.unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!( /// reader.read_resolved_event_into_async(&mut buf).await.unwrap(), /// (ResolveResult::Unbound, Event::Eof) /// ); /// # }) // tokio_test::block_on /// ``` /// /// [`read_to_end_into()`]: Self::read_to_end_into /// [`Start`]: Event::Start pub async fn read_to_end_into_async<'n>( &mut self, // We should name that lifetime due to https://github.com/rust-lang/rust/issues/63033` end: QName<'n>, buf: &mut Vec, ) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should // match literally the start name. See `Config::check_end_names` documentation self.reader.read_to_end_into_async(end, buf).await } /// An asynchronous version of [`read_resolved_event_into()`]. Reads the next /// event into given buffer asynchronously and resolves its namespace (if applicable). /// /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. /// For all other events the concept of namespace is not defined, so /// a [`ResolveResult::Unbound`] is returned. /// /// If you are not interested in namespaces, you can use [`read_event_into_async()`] /// which will not automatically resolve namespaces for you. /// /// # Examples /// /// ``` /// # tokio_test::block_on(async { /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_reader(r#" /// /// Test /// Test 2 /// /// "#.as_bytes()); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_resolved_event_into_async(&mut buf).await.unwrap() { /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag1").into()); /// } /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag2").into()); /// } /// (_, Event::Start(_)) => unreachable!(), /// /// (_, Event::Text(e)) => { /// txt.push(e.decode().unwrap().into_owned()) /// } /// (_, Event::Eof) => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// # }) // tokio_test::block_on /// ``` /// /// [`read_resolved_event_into()`]: NsReader::read_resolved_event_into /// [`Start`]: Event::Start /// [`Empty`]: Event::Empty /// [`End`]: Event::End /// [`read_event_into_async()`]: Self::read_event_into_async pub async fn read_resolved_event_into_async<'ns, 'b>( // Name 'ns lifetime, because otherwise we get an error // "implicit elided lifetime not allowed here" on ResolveResult &'ns mut self, buf: &'b mut Vec, ) -> Result<(ResolveResult<'ns>, Event<'b>)> { let event = self.read_event_into_async(buf).await?; Ok(self.resolver().resolve_event(event)) } } #[cfg(test)] mod test { use super::TokioAdapter; use crate::reader::test::check; check!( #[tokio::test] read_event_into_async, read_until_close_async, TokioAdapter, &mut Vec::new(), async, await ); #[test] fn test_future_is_send() { // This test should just compile, no actual runtime checks are performed here. use super::*; use tokio::io::BufReader; fn check_send(_: T) {} let input = vec![]; let mut reading_buf = vec![]; let mut reader = Reader::from_reader(BufReader::new(input.as_slice())); check_send(reader.read_event_into_async(&mut reading_buf)); } } quick-xml-0.38.4/src/reader/buffered_reader.rs000064400000000000000000000474331046102023000173500ustar 00000000000000//! This is an implementation of [`Reader`] for reading from a [`BufRead`] as //! underlying byte stream. use std::fs::File; use std::io::{self, BufRead, BufReader}; use std::path::Path; use crate::errors::{Error, Result}; use crate::events::Event; use crate::name::QName; use crate::parser::Parser; use crate::reader::{BangType, ReadRefResult, ReadTextResult, Reader, Span, XmlSource}; use crate::utils::is_whitespace; macro_rules! impl_buffered_source { ($($lf:lifetime, $reader:tt, $async:ident, $await:ident)?) => { #[cfg(not(feature = "encoding"))] #[inline] $($async)? fn remove_utf8_bom(&mut self) -> io::Result<()> { use crate::encoding::UTF8_BOM; loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => { if n.starts_with(UTF8_BOM) { self $(.$reader)? .consume(UTF8_BOM.len()); } Ok(()) }, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => Err(e), }; } } #[cfg(feature = "encoding")] #[inline] $($async)? fn detect_encoding(&mut self) -> io::Result> { loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => if let Some((enc, bom_len)) = crate::encoding::detect_encoding(n) { self $(.$reader)? .consume(bom_len); Ok(Some(enc)) } else { Ok(None) }, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => Err(e), }; } } #[inline] $($async)? fn read_text $(<$lf>)? ( &mut self, buf: &'b mut Vec, position: &mut u64, ) -> ReadTextResult<'b, &'b mut Vec> { let mut read = 0; let start = buf.len(); loop { let available = match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) if n.is_empty() => break, Ok(n) => n, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => { *position += read; return ReadTextResult::Err(e); } }; // Search for start of markup or an entity or character reference match memchr::memchr2(b'<', b'&', available) { // Special handling is needed only on the first iteration. // On next iterations we already read something and should emit Text event Some(0) if read == 0 && available[0] == b'<' => { self $(.$reader)? .consume(1); *position += 1; return ReadTextResult::Markup(buf); } // Do not consume `&` because it may be lone and we would be need to // return it as part of Text event Some(0) if read == 0 => return ReadTextResult::Ref(buf), Some(i) if available[i] == b'<' => { buf.extend_from_slice(&available[..i]); // +1 to skip `<` let used = i + 1; self $(.$reader)? .consume(used); read += used as u64; *position += read; return ReadTextResult::UpToMarkup(&buf[start..]); } Some(i) => { buf.extend_from_slice(&available[..i]); self $(.$reader)? .consume(i); read += i as u64; *position += read; return ReadTextResult::UpToRef(&buf[start..]); } None => { buf.extend_from_slice(available); let used = available.len(); self $(.$reader)? .consume(used); read += used as u64; } } } *position += read; ReadTextResult::UpToEof(&buf[start..]) } #[inline] $($async)? fn read_ref $(<$lf>)? ( &mut self, buf: &'b mut Vec, position: &mut u64, ) -> ReadRefResult<'b> { let mut read = 0; let start = buf.len(); loop { let available = match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) if n.is_empty() => break, Ok(n) => n, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => { *position += read; return ReadRefResult::Err(e); } }; // `read_ref` called when the first character is `&`, so we // should explicitly skip it at first iteration lest we confuse // it with the end if read == 0 { debug_assert_eq!( available.first(), Some(&b'&'), "`read_ref` must be called at `&`" ); // If that ampersand is lone, then it will be part of text // and we should keep it buf.push(b'&'); self $(.$reader)? .consume(1); read += 1; continue; } match memchr::memchr3(b';', b'&', b'<', available) { // Do not consume `&` because it may be lone and we would be need to // return it as part of Text event Some(i) if available[i] == b'&' => { buf.extend_from_slice(&available[..i]); self $(.$reader)? .consume(i); read += i as u64; *position += read; return ReadRefResult::UpToRef(&buf[start..]); } Some(i) => { let is_end = available[i] == b';'; buf.extend_from_slice(&available[..i]); // +1 -- skip the end `;` or `<` let used = i + 1; self $(.$reader)? .consume(used); read += used as u64; *position += read; return if is_end { ReadRefResult::Ref(&buf[start..]) } else { ReadRefResult::UpToMarkup(&buf[start..]) }; } None => { buf.extend_from_slice(available); let used = available.len(); self $(.$reader)? .consume(used); read += used as u64; } } } *position += read; ReadRefResult::UpToEof(&buf[start..]) } #[inline] $($async)? fn read_with<$($lf,)? P: Parser>( &mut self, mut parser: P, buf: &'b mut Vec, position: &mut u64, ) -> Result<&'b [u8]> { let mut read = 0; let start = buf.len(); loop { let available = match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) if n.is_empty() => break, Ok(n) => n, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => { *position += read; return Err(Error::Io(e.into())); } }; if let Some(i) = parser.feed(available) { buf.extend_from_slice(&available[..i]); // +1 for `>` which we do not include self $(.$reader)? .consume(i + 1); read += i as u64 + 1; *position += read; return Ok(&buf[start..]); } // The `>` symbol not yet found, continue reading buf.extend_from_slice(available); let used = available.len(); self $(.$reader)? .consume(used); read += used as u64; } *position += read; Err(Error::Syntax(P::eof_error())) } #[inline] $($async)? fn read_bang_element $(<$lf>)? ( &mut self, buf: &'b mut Vec, position: &mut u64, ) -> Result<(BangType, &'b [u8])> { // Peeked one bang ('!') before being called, so it's guaranteed to // start with it. let start = buf.len(); let mut read = 1; buf.push(b'!'); self $(.$reader)? .consume(1); let mut bang_type = BangType::new(self.peek_one() $(.$await)? ?)?; loop { match self $(.$reader)? .fill_buf() $(.$await)? { // Note: Do not update position, so the error points to // somewhere sane rather than at the EOF Ok(n) if n.is_empty() => break, Ok(available) => { // We only parse from start because we don't want to consider // whatever is in the buffer before the bang element if let Some((consumed, used)) = bang_type.parse(&buf[start..], available) { buf.extend_from_slice(consumed); self $(.$reader)? .consume(used); read += used as u64; *position += read; return Ok((bang_type, &buf[start..])); } else { buf.extend_from_slice(available); let used = available.len(); self $(.$reader)? .consume(used); read += used as u64; } } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => { *position += read; return Err(Error::Io(e.into())); } } } *position += read; Err(bang_type.to_err().into()) } #[inline] $($async)? fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()> { loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => { let count = n.iter().position(|b| !is_whitespace(*b)).unwrap_or(n.len()); if count > 0 { self $(.$reader)? .consume(count); *position += count as u64; continue; } else { Ok(()) } } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => Err(e), }; } } #[inline] $($async)? fn peek_one(&mut self) -> io::Result> { loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => Ok(n.first().cloned()), Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => Err(e), }; } } }; } // Make it public for use in async implementations. // New rustc reports // > warning: the item `impl_buffered_source` is imported redundantly // so make it public only when async feature is enabled #[cfg(feature = "async-tokio")] pub(super) use impl_buffered_source; /// Implementation of `XmlSource` for any `BufRead` reader using a user-given /// `Vec` as buffer that will be borrowed by events. impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec> for R { impl_buffered_source!(); } //////////////////////////////////////////////////////////////////////////////////////////////////// /// This is an implementation for reading from a [`BufRead`] as underlying byte stream. impl Reader { /// Reads the next `Event`. /// /// This is the main entry point for reading XML `Event`s. /// /// `Event`s borrow `buf` and can be converted to own their data if needed (uses `Cow` /// internally). /// /// Having the possibility to control the internal buffers gives you some additional benefits /// such as: /// /// - Reduce the number of allocations by reusing the same buffer. For constrained systems, /// you can call `buf.clear()` once you are done with processing the event (typically at the /// end of your loop). /// - Reserve the buffer length if you know the file size (using `Vec::with_capacity`). /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// let xml = r#" /// Test /// Test 2 /// "#; /// let mut reader = Reader::from_str(xml); /// reader.config_mut().trim_text(true); /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into(&mut buf) { /// Ok(Event::Start(_)) => count += 1, /// Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()), /// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), /// Ok(Event::Eof) => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` #[inline] pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec) -> Result> { self.read_event_impl(buf) } /// Reads until end element is found using provided buffer as intermediate /// storage for events content. This function is supposed to be called after /// you already read a [`Start`] event. /// /// Returns a span that cover content between `>` of an opening tag and `<` of /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// If a corresponding [`End`] event is not found, an error of type [`Error::IllFormed`] /// will be returned. In particularly, that error will be returned if you call /// this method without consuming the corresponding [`Start`] event first. /// /// If your reader created from a string slice or byte array slice, it is /// better to use [`read_to_end()`] method, because it will not copy bytes /// into intermediate buffer. /// /// The provided `buf` buffer will be filled only by one event content at time. /// Before reading of each event the buffer will be cleared. If you know an /// appropriate size of each event, you can preallocate the buffer to reduce /// number of reallocations. /// /// The `end` parameter should contain name of the end element _in the reader /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// /// The correctness of the skipped events does not checked, if you disabled /// the [`check_end_names`] option. /// /// # Namespaces /// /// While the `Reader` does not support namespace resolution, namespaces /// does not change the algorithm for comparing names. Although the names /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the /// same namespace, are semantically equivalent, `` cannot close /// ``, because according to [the specification] /// /// > The end of every element that begins with a **start-tag** MUST be marked /// > by an **end-tag** containing a name that echoes the element's type as /// > given in the **start-tag** /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str(r#" /// /// /// /// /// /// /// /// /// "#); /// reader.config_mut().trim_text(true); /// let mut buf = Vec::new(); /// /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start)); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end_into(end.name(), &mut buf).unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); /// ``` /// /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`read_to_end()`]: Self::read_to_end /// [`expand_empty_elements`]: crate::reader::Config::expand_empty_elements /// [`check_end_names`]: crate::reader::Config::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result { Ok(read_to_end!(self, end, buf, read_event_impl, { buf.clear(); })) } } impl Reader> { /// Creates an XML reader from a file path. pub fn from_file>(path: P) -> Result { let file = File::open(path)?; let reader = BufReader::new(file); Ok(Self::from_reader(reader)) } } #[cfg(test)] mod test { use crate::reader::test::check; use crate::reader::XmlSource; /// Default buffer constructor just pass the byte array from the test fn identity(input: T) -> T { input } check!( #[test] read_event_impl, read_until_close, identity, &mut Vec::new() ); } quick-xml-0.38.4/src/reader/mod.rs000064400000000000000000002725701046102023000150250ustar 00000000000000//! Contains high-level interface for a pull-based XML parser. #[cfg(feature = "encoding")] use encoding_rs::Encoding; use std::io; use std::ops::Range; use crate::encoding::Decoder; use crate::errors::{Error, IllFormedError, SyntaxError}; use crate::events::{BytesRef, Event}; use crate::parser::{ElementParser, Parser, PiParser}; use crate::reader::state::ReaderState; /// A struct that holds a parser configuration. /// /// Current parser configuration can be retrieved by calling [`Reader::config()`] /// and changed by changing properties of the object returned by a call to /// [`Reader::config_mut()`]. /// /// [`Reader::config()`]: crate::reader::Reader::config /// [`Reader::config_mut()`]: crate::reader::Reader::config_mut #[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] #[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))] #[non_exhaustive] pub struct Config { /// Whether lone ampersand character (without a paired semicolon) should be /// allowed in textual content. Unless enabled, in case of a dangling ampersand, /// the [`Error::IllFormed(UnclosedReference)`] is returned from read methods. /// /// Default: `false` /// /// # Example /// /// ``` /// # use quick_xml::events::{BytesRef, BytesText, Event}; /// # use quick_xml::reader::Reader; /// # use pretty_assertions::assert_eq; /// let mut reader = Reader::from_str("text with & & & alone"); /// reader.config_mut().allow_dangling_amp = true; /// /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new("text with "))); /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& "))); /// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(BytesRef::new("amp"))); /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::new(" "))); /// assert_eq!(reader.read_event().unwrap(), Event::Text(BytesText::from_escaped("& alone"))); /// assert_eq!(reader.read_event().unwrap(), Event::Eof); /// ``` /// /// [`Error::IllFormed(UnclosedReference)`]: crate::errors::IllFormedError::UnclosedReference pub allow_dangling_amp: bool, /// Whether unmatched closing tag names should be allowed. Unless enabled, /// in case of a dangling end tag, the [`Error::IllFormed(UnmatchedEndTag)`] /// is returned from read methods. /// /// When set to `true`, it won't check if a closing tag has a corresponding /// opening tag at all. For example, `` will be permitted. /// /// Note that the emitted [`End`] event will not be modified if this is enabled, /// ie. it will contain the data of the unmatched end tag. /// /// Note, that setting this to `true` will lead to additional allocates that /// needed to store tag name for an [`End`] event. /// /// Default: `false` /// /// [`Error::IllFormed(UnmatchedEndTag)`]: crate::errors::IllFormedError::UnmatchedEndTag /// [`End`]: crate::events::Event::End pub allow_unmatched_ends: bool, /// Whether comments should be validated. If enabled, in case of invalid comment /// [`Error::IllFormed(DoubleHyphenInComment)`] is returned from read methods. /// /// When set to `true`, every [`Comment`] event will be checked for not /// containing `--`, which [is not allowed] in XML comments. Most of the time /// we don't want comments at all so we don't really care about comment /// correctness, thus the default value is `false` to improve performance. /// /// Default: `false` /// /// [`Error::IllFormed(DoubleHyphenInComment)`]: crate::errors::IllFormedError::DoubleHyphenInComment /// [`Comment`]: crate::events::Event::Comment /// [is not allowed]: https://www.w3.org/TR/xml11/#sec-comments pub check_comments: bool, /// Whether mismatched closing tag names should be detected. If enabled, in /// case of mismatch the [`Error::IllFormed(MismatchedEndTag)`] is returned from /// read methods. /// /// Note, that start and end tags [should match literally][spec], they cannot /// have different prefixes even if both prefixes resolve to the same namespace. /// The XML /// /// ```xml /// /// /// ``` /// /// is not valid, even though semantically the start tag is the same as the /// end tag. The reason is that namespaces are an extension of the original /// XML specification (without namespaces) and it should be backward-compatible. /// /// When set to `false`, it won't check if a closing tag matches the corresponding /// opening tag. For example, `` will be permitted. /// /// If the XML is known to be sane (already processed, etc.) this saves extra time. /// /// Note that the emitted [`End`] event will not be modified if this is disabled, /// ie. it will contain the data of the mismatched end tag. /// /// Note, that setting this to `true` will lead to additional allocates that /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`] /// is also set, only one additional allocation will be performed that support /// both these options. /// /// Default: `true` /// /// [`Error::IllFormed(MismatchedEndTag)`]: crate::errors::IllFormedError::MismatchedEndTag /// [spec]: https://www.w3.org/TR/xml11/#dt-etag /// [`End`]: crate::events::Event::End /// [`expand_empty_elements`]: Self::expand_empty_elements pub check_end_names: bool, /// Whether empty elements should be split into an `Open` and a `Close` event. /// /// When set to `true`, all [`Empty`] events produced by a self-closing tag /// like `` are expanded into a [`Start`] event followed by an [`End`] /// event. When set to `false` (the default), those tags are represented by /// an [`Empty`] event instead. /// /// Note, that setting this to `true` will lead to additional allocates that /// needed to store tag name for an [`End`] event. However if [`check_end_names`] /// is also set, only one additional allocation will be performed that support /// both these options. /// /// Default: `false` /// /// [`Empty`]: crate::events::Event::Empty /// [`Start`]: crate::events::Event::Start /// [`End`]: crate::events::Event::End /// [`check_end_names`]: Self::check_end_names pub expand_empty_elements: bool, /// Whether trailing whitespace after the markup name are trimmed in closing /// tags ``. /// /// If `true` the emitted [`End`] event is stripped of trailing whitespace /// after the markup name. /// /// Note that if set to `false` and [`check_end_names`] is `true` the comparison /// of markup names is going to fail erroneously if a closing tag contains /// trailing whitespace. /// /// Default: `true` /// /// [`End`]: crate::events::Event::End /// [`check_end_names`]: Self::check_end_names pub trim_markup_names_in_closing_tags: bool, /// Whether whitespace before character data should be removed. /// /// When set to `true`, leading whitespace is trimmed in [`Text`] events. /// If after that the event is empty it will not be pushed. /// /// Default: `false` /// ///
/// /// WARNING: With this option every text events will be trimmed which is /// incorrect behavior when text events delimited by comments, processing /// instructions or CDATA sections. To correctly trim data manually apply /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] /// only to necessary events. ///
/// /// [`Text`]: crate::events::Event::Text /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end pub trim_text_start: bool, /// Whether whitespace after character data should be removed. /// /// When set to `true`, trailing whitespace is trimmed in [`Text`] events. /// If after that the event is empty it will not be pushed. /// /// Default: `false` /// ///
/// /// WARNING: With this option every text events will be trimmed which is /// incorrect behavior when text events delimited by comments, processing /// instructions or CDATA sections. To correctly trim data manually apply /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] /// only to necessary events. ///
/// /// [`Text`]: crate::events::Event::Text /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end pub trim_text_end: bool, } impl Config { /// Set both [`trim_text_start`] and [`trim_text_end`] to the same value. /// ///
/// /// WARNING: With this option every text events will be trimmed which is /// incorrect behavior when text events delimited by comments, processing /// instructions or CDATA sections. To correctly trim data manually apply /// [`BytesText::inplace_trim_start`] and [`BytesText::inplace_trim_end`] /// only to necessary events. ///
/// /// [`trim_text_start`]: Self::trim_text_start /// [`trim_text_end`]: Self::trim_text_end /// [`BytesText::inplace_trim_start`]: crate::events::BytesText::inplace_trim_start /// [`BytesText::inplace_trim_end`]: crate::events::BytesText::inplace_trim_end #[inline] pub fn trim_text(&mut self, trim: bool) { self.trim_text_start = trim; self.trim_text_end = trim; } /// Turn on or off all checks for well-formedness. Currently it is that settings: /// - [`check_comments`](Self::check_comments) /// - [`check_end_names`](Self::check_end_names) #[inline] pub fn enable_all_checks(&mut self, enable: bool) { self.check_comments = enable; self.check_end_names = enable; } } impl Default for Config { fn default() -> Self { Self { allow_dangling_amp: false, allow_unmatched_ends: false, check_comments: false, check_end_names: true, expand_empty_elements: false, trim_markup_names_in_closing_tags: true, trim_text_start: false, trim_text_end: false, } } } //////////////////////////////////////////////////////////////////////////////////////////////////// macro_rules! read_event_impl { ( $self:ident, $buf:ident, $reader:expr, $read_until_close:ident $(, $await:ident)? ) => {{ let event = loop { break match $self.state.state { ParseState::Init => { // Go to InsideText state // If encoding set explicitly, we not need to detect it. For example, // explicit UTF-8 set automatically if Reader was created using `from_str`. // But we still need to remove BOM for consistency with no encoding // feature enabled path #[cfg(feature = "encoding")] if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? { if $self.state.encoding.can_be_refined() { $self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding); } } // Removes UTF-8 BOM if it is present #[cfg(not(feature = "encoding"))] $reader.remove_utf8_bom() $(.$await)? ?; $self.state.state = ParseState::InsideText; continue; }, ParseState::InsideRef => { // Go to InsideText let start = $self.state.offset; match $reader.read_ref($buf, &mut $self.state.offset) $(.$await)? { // Emit reference, go to InsideText state ReadRefResult::Ref(bytes) => { $self.state.state = ParseState::InsideText; // +1 to skip start `&` Ok(Event::GeneralRef(BytesRef::wrap(&bytes[1..], $self.decoder()))) } // Go to Done state ReadRefResult::UpToEof(bytes) if $self.state.config.allow_dangling_amp => { $self.state.state = ParseState::Done; Ok(Event::Text($self.state.emit_text(bytes))) } ReadRefResult::UpToEof(_) => { $self.state.state = ParseState::Done; $self.state.last_error_offset = start; Err(Error::IllFormed(IllFormedError::UnclosedReference)) } // Do not change state, stay in InsideRef ReadRefResult::UpToRef(bytes) if $self.state.config.allow_dangling_amp => { Ok(Event::Text($self.state.emit_text(bytes))) } ReadRefResult::UpToRef(_) => { $self.state.last_error_offset = start; Err(Error::IllFormed(IllFormedError::UnclosedReference)) } // Go to InsideMarkup state ReadRefResult::UpToMarkup(bytes) if $self.state.config.allow_dangling_amp => { $self.state.state = ParseState::InsideMarkup; Ok(Event::Text($self.state.emit_text(bytes))) } ReadRefResult::UpToMarkup(_) => { $self.state.state = ParseState::InsideMarkup; $self.state.last_error_offset = start; Err(Error::IllFormed(IllFormedError::UnclosedReference)) } ReadRefResult::Err(e) => Err(Error::Io(e.into())), } } ParseState::InsideText => { // Go to InsideMarkup or Done state if $self.state.config.trim_text_start { $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?; } match $reader.read_text($buf, &mut $self.state.offset) $(.$await)? { ReadTextResult::Markup(buf) => { $self.state.state = ParseState::InsideMarkup; // Pass `buf` to the next next iteration of parsing loop $buf = buf; continue; } ReadTextResult::Ref(buf) => { $self.state.state = ParseState::InsideRef; // Pass `buf` to the next next iteration of parsing loop $buf = buf; continue; } ReadTextResult::UpToMarkup(bytes) => { $self.state.state = ParseState::InsideMarkup; // FIXME: Can produce an empty event if: // - event contains only spaces // - trim_text_start = false // - trim_text_end = true Ok(Event::Text($self.state.emit_text(bytes))) } ReadTextResult::UpToRef(bytes) => { $self.state.state = ParseState::InsideRef; // Return Text event with `bytes` content or Eof if bytes is empty Ok(Event::Text($self.state.emit_text(bytes))) } ReadTextResult::UpToEof(bytes) => { $self.state.state = ParseState::Done; // Trim bytes from end if required let event = $self.state.emit_text(bytes); if event.is_empty() { Ok(Event::Eof) } else { Ok(Event::Text(event)) } } ReadTextResult::Err(e) => Err(Error::Io(e.into())), } }, // Go to InsideText state in next two arms ParseState::InsideMarkup => $self.$read_until_close($buf) $(.$await)?, ParseState::InsideEmpty => Ok(Event::End($self.state.close_expanded_empty())), ParseState::Done => Ok(Event::Eof), }; }; match event { // #513: In case of ill-formed errors we already consume the wrong data // and change the state. We can continue parsing if we wish Err(Error::IllFormed(_)) => {} Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Done, _ => {} } event }}; } /// Read bytes up to the `>` and skip it. This method is expected to be called /// after seeing the `<` symbol and skipping it. Inspects the next (current) /// symbol and returns an appropriate [`Event`]: /// /// |Symbol |Event /// |-------|------------------------------------- /// |`!` |[`Comment`], [`CData`] or [`DocType`] /// |`/` |[`End`] /// |`?` |[`PI`] /// |_other_|[`Start`] or [`Empty`] /// /// Moves parser to the `InsideText` state. /// /// [`Comment`]: Event::Comment /// [`CData`]: Event::CData /// [`DocType`]: Event::DocType /// [`End`]: Event::End /// [`PI`]: Event::PI /// [`Start`]: Event::Start /// [`Empty`]: Event::Empty macro_rules! read_until_close { ( $self:ident, $buf:ident, $reader:expr $(, $await:ident)? ) => {{ $self.state.state = ParseState::InsideText; let start = $self.state.offset; match $reader.peek_one() $(.$await)? { // ` match $reader .read_bang_element($buf, &mut $self.state.offset) $(.$await)? { Ok((bang_type, bytes)) => $self.state.emit_bang(bang_type, bytes), Err(e) => { // We want to report error at `<`, but offset was increased, // so return it back (-1 for `<`) $self.state.last_error_offset = start - 1; Err(e) } }, // `` we will parse `
` as end tag // `
` which probably no one existing parser // does. This is malformed XML, however it is tolerated by some parsers // (e.g. the one used by Adobe Flash) and such documents do exist in the wild. Ok(Some(b'/')) => match $reader .read_with(ElementParser::Outside, $buf, &mut $self.state.offset) $(.$await)? { Ok(bytes) => $self.state.emit_end(bytes), Err(e) => { // We want to report error at `<`, but offset was increased, // so return it back (-1 for `<`) $self.state.last_error_offset = start - 1; Err(e) } }, // ` match $reader .read_with(PiParser(false), $buf, &mut $self.state.offset) $(.$await)? { Ok(bytes) => $self.state.emit_question_mark(bytes), Err(e) => { // We want to report error at `<`, but offset was increased, // so return it back (-1 for `<`) $self.state.last_error_offset = start - 1; Err(e) } }, // `<...` - opening or self-closed tag Ok(Some(_)) => match $reader .read_with(ElementParser::Outside, $buf, &mut $self.state.offset) $(.$await)? { Ok(bytes) => Ok($self.state.emit_start(bytes)), Err(e) => { // We want to report error at `<`, but offset was increased, // so return it back (-1 for `<`) $self.state.last_error_offset = start - 1; Err(e) } }, // `<` - syntax error, tag not closed Ok(None) => { // We want to report error at `<`, but offset was increased, // so return it back (-1 for `<`) $self.state.last_error_offset = start - 1; Err(Error::Syntax(SyntaxError::UnclosedTag)) } Err(e) => Err(Error::Io(e.into())), } }}; } /// Generalization of `read_to_end` method for buffered and borrowed readers macro_rules! read_to_end { ( // $self: &mut Reader $self:expr, $end:expr, $buf:expr, $read_event:ident, // Code block that performs clearing of internal buffer after read of each event $clear:block $(, $await:ident)? ) => {{ // Because we take position after the event before the End event, // it is important that this position indicates beginning of the End event. // If between last event and the End event would be only spaces, then we // take position before the spaces, but spaces would be skipped without // generating event if `trim_text_start` is set to `true`. To prevent that // we temporary disable start text trimming. // // We also cannot take position after getting End event, because if // `trim_markup_names_in_closing_tags` is set to `true` (which is the default), // we do not known the real size of the End event that it is occupies in // the source and cannot correct the position after the End event. // So, we in any case should tweak parser configuration. let config = $self.config_mut(); let trim = config.trim_text_start; config.trim_text_start = false; let start = $self.buffer_position(); let mut depth = 0; loop { $clear let end = $self.buffer_position(); match $self.$read_event($buf) $(.$await)? { Err(e) => { $self.config_mut().trim_text_start = trim; return Err(e); } Ok(Event::Start(e)) if e.name() == $end => depth += 1, Ok(Event::End(e)) if e.name() == $end => { if depth == 0 { $self.config_mut().trim_text_start = trim; break start..end; } depth -= 1; } Ok(Event::Eof) => { $self.config_mut().trim_text_start = trim; return Err(Error::missed_end($end, $self.decoder())); } _ => (), } } }}; } #[cfg(feature = "async-tokio")] mod async_tokio; mod buffered_reader; mod ns_reader; mod slice_reader; mod state; pub use ns_reader::NsReader; /// Range of input in bytes, that corresponds to some piece of XML pub type Span = Range; //////////////////////////////////////////////////////////////////////////////////////////////////// /// Possible reader states. The state transition diagram (`true` and `false` shows /// value of [`Config::expand_empty_elements`] option): /// /// ```mermaid /// flowchart LR /// subgraph _ /// direction LR /// /// Init -- "(no event)"\n --> InsideMarkup /// InsideMarkup -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> InsideText /// InsideText -- "#lt;false#gt;\n(no event)"\nText --> InsideMarkup /// InsideRef -- "(no event)"\nGeneralRef --> InsideText /// end /// InsideText -- "#lt;true#gt;"\nStart --> InsideEmpty /// InsideEmpty -- End --> InsideText /// _ -. Eof .-> Done /// ``` #[derive(Clone, Debug)] enum ParseState { /// Initial state in which reader stay after creation. Transition from that /// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next /// state is always `InsideMarkup`. The reader will never return to this state. The /// event emitted during transition to `InsideMarkup` is a `StartEvent` if the /// first symbol not `<`, otherwise no event are emitted. Init, /// State after seeing the `&` symbol in textual content. Depending on the next symbol all other /// events could be generated. /// /// After generating one event the reader moves to the `ClosedTag` state. InsideRef, /// State after seeing the `<` symbol. Depending on the next symbol all other /// events could be generated. /// /// After generating one event the reader moves to the `InsideText` state. InsideMarkup, /// State in which reader searches the `<` symbol of a markup. All bytes before /// that symbol will be returned in the [`Event::Text`] event. After that /// the reader moves to the `InsideMarkup` state. InsideText, /// This state is used only if option [`expand_empty_elements`] is set to `true`. /// Reader enters to this state when it is in a `InsideText` state and emits an /// [`Event::Start`] event. The next event emitted will be an [`Event::End`], /// after which reader returned to the `InsideText` state. /// /// [`expand_empty_elements`]: Config::expand_empty_elements InsideEmpty, /// Reader enters this state when `Eof` event generated or an error occurred. /// This is the last state, the reader stay in it forever. Done, } /// A reference to an encoding together with information about how it was retrieved. /// /// The state transition diagram: /// /// ```mermaid /// flowchart LR /// Implicit -- from_str --> Explicit /// Implicit -- BOM --> BomDetected /// Implicit -- "encoding=..." --> XmlDetected /// BomDetected -- "encoding=..." --> XmlDetected /// ``` #[cfg(feature = "encoding")] #[derive(Clone, Copy, Debug)] enum EncodingRef { /// Encoding was implicitly assumed to have a specified value. It can be refined /// using BOM or by the XML declaration event (``) Implicit(&'static Encoding), /// Encoding was explicitly set to the desired value. It cannot be changed /// nor by BOM, nor by parsing XML declaration (``) Explicit(&'static Encoding), /// Encoding was detected from a byte order mark (BOM) or by the first bytes /// of the content. It can be refined by the XML declaration event (``) BomDetected(&'static Encoding), /// Encoding was detected using XML declaration event (``). /// It can no longer change XmlDetected(&'static Encoding), } #[cfg(feature = "encoding")] impl EncodingRef { #[inline] const fn encoding(&self) -> &'static Encoding { match self { Self::Implicit(e) => e, Self::Explicit(e) => e, Self::BomDetected(e) => e, Self::XmlDetected(e) => e, } } #[inline] const fn can_be_refined(&self) -> bool { match self { Self::Implicit(_) | Self::BomDetected(_) => true, Self::Explicit(_) | Self::XmlDetected(_) => false, } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A direct stream to the underlying [`Reader`]s reader which updates /// [`Reader::buffer_position()`] when read from it. #[derive(Debug)] #[must_use = "streams do nothing unless read or polled"] pub struct BinaryStream<'r, R> { inner: &'r mut R, offset: &'r mut u64, } impl<'r, R> BinaryStream<'r, R> { /// Returns current position in bytes in the original source. #[inline] pub const fn offset(&self) -> u64 { *self.offset } /// Gets a reference to the underlying reader. #[inline] pub const fn get_ref(&self) -> &R { self.inner } /// Gets a mutable reference to the underlying reader. /// /// Avoid read from this reader because this will not update reader's position /// and will lead to incorrect positions of errors. Read from this stream instead. #[inline] pub fn get_mut(&mut self) -> &mut R { self.inner } } impl<'r, R> io::Read for BinaryStream<'r, R> where R: io::Read, { #[inline] fn read(&mut self, buf: &mut [u8]) -> io::Result { let amt = self.inner.read(buf)?; *self.offset += amt as u64; Ok(amt) } } impl<'r, R> io::BufRead for BinaryStream<'r, R> where R: io::BufRead, { #[inline] fn fill_buf(&mut self) -> io::Result<&[u8]> { self.inner.fill_buf() } #[inline] fn consume(&mut self, amt: usize) { self.inner.consume(amt); *self.offset += amt as u64; } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A low level encoding-agnostic XML event reader. /// /// Consumes bytes and streams XML [`Event`]s. /// /// This reader does not manage namespace declarations and not able to resolve /// prefixes. If you want these features, use the [`NsReader`]. /// /// # Examples /// /// ``` /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// let xml = r#" /// Test /// Test 2 /// "#; /// let mut reader = Reader::from_str(xml); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); /// let mut buf = Vec::new(); /// /// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s) /// loop { /// // NOTE: this is the generic case when we don't know about the input BufRead. /// // when the input is a &str or a &[u8], we don't actually need to use another /// // buffer, we could directly call `reader.read_event()` /// match reader.read_event_into(&mut buf) { /// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), /// // exits the loop when reaching end of file /// Ok(Event::Eof) => break, /// /// Ok(Event::Start(e)) => { /// match e.name().as_ref() { /// b"tag1" => println!("attributes values: {:?}", /// e.attributes().map(|a| a.unwrap().value) /// .collect::>()), /// b"tag2" => count += 1, /// _ => (), /// } /// } /// Ok(Event::Text(e)) => txt.push(e.decode().unwrap().into_owned()), /// /// // There are several other `Event`s we do not consider here /// _ => (), /// } /// // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low /// buf.clear(); /// } /// ``` /// /// [`NsReader`]: crate::reader::NsReader #[derive(Debug, Clone)] pub struct Reader { /// Source of data for parse reader: R, /// Configuration and current parse state state: ReaderState, } /// Builder methods impl Reader { /// Creates a `Reader` that reads from a given reader. pub fn from_reader(reader: R) -> Self { Self { reader, state: ReaderState::default(), } } /// Returns reference to the parser configuration pub const fn config(&self) -> &Config { &self.state.config } /// Returns mutable reference to the parser configuration pub fn config_mut(&mut self) -> &mut Config { &mut self.state.config } } /// Getters impl Reader { /// Consumes `Reader` returning the underlying reader /// /// Can be used to compute line and column of a parsing error position /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use std::{str, io::Cursor}; /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// let xml = r#" /// Test /// Test 2 /// "#; /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes())); /// let mut buf = Vec::new(); /// /// fn into_line_and_column(reader: Reader>) -> (usize, usize) { /// // We known that size cannot exceed usize::MAX because we created parser from single &[u8] /// let end_pos = reader.buffer_position() as usize; /// let mut cursor = reader.into_inner(); /// let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned()) /// .expect("can't make a string"); /// let mut line = 1; /// let mut column = 0; /// for c in s.chars() { /// if c == '\n' { /// line += 1; /// column = 0; /// } else { /// column += 1; /// } /// } /// (line, column) /// } /// /// loop { /// match reader.read_event_into(&mut buf) { /// Ok(Event::Start(ref e)) => match e.name().as_ref() { /// b"tag1" | b"tag2" => (), /// tag => { /// assert_eq!(b"tag3", tag); /// assert_eq!((3, 22), into_line_and_column(reader)); /// break; /// } /// }, /// Ok(Event::Eof) => unreachable!(), /// _ => (), /// } /// buf.clear(); /// } /// ``` pub fn into_inner(self) -> R { self.reader } /// Gets a reference to the underlying reader. pub const fn get_ref(&self) -> &R { &self.reader } /// Gets a mutable reference to the underlying reader. /// /// Avoid read from this reader because this will not update reader's position /// and will lead to incorrect positions of errors. If you want to read, use /// [`stream()`] instead. /// /// [`stream()`]: Self::stream pub fn get_mut(&mut self) -> &mut R { &mut self.reader } /// Gets the byte position in the input data just after the last emitted event /// (i.e. this is position where data of last event ends). /// /// Note, that for text events which is originally ended with whitespace characters /// (` `, `\t`, `\r`, and `\n`) if [`Config::trim_text_end`] is set this is position /// before trim, not the position of the last byte of the [`Event::Text`] content. pub const fn buffer_position(&self) -> u64 { // when internal state is InsideMarkup, we have actually read until '<', // which we don't want to show if let ParseState::InsideMarkup = self.state.state { self.state.offset - 1 } else { self.state.offset } } /// Gets the last error byte position in the input data. If there is no errors /// yet, returns `0`. /// /// Unlike `buffer_position` it will point to the place where it is rational /// to report error to the end user. For example, all [`SyntaxError`]s are /// reported when the parser sees EOF inside of some kind of markup. The /// `buffer_position()` will point to the last byte of input which is not /// very useful. `error_position()` will point to the start of corresponding /// markup element (i. e. to the `<` character). /// /// This position is always `<= buffer_position()`. pub const fn error_position(&self) -> u64 { self.state.last_error_offset } /// Get the decoder, used to decode bytes, read by this reader, to the strings. /// /// If [`encoding`] feature is enabled, the used encoding may change after /// parsing the XML declaration, otherwise encoding is fixed to UTF-8. /// /// If [`encoding`] feature is enabled and no encoding is specified in declaration, /// defaults to UTF-8. /// /// [`encoding`]: ../index.html#encoding #[inline] pub const fn decoder(&self) -> Decoder { self.state.decoder() } /// Get the direct access to the underlying reader, but tracks the amount of /// read data and update [`Reader::buffer_position()`] accordingly. /// /// Note, that this method gives you access to the internal reader and read /// data will not be returned in any subsequent events read by `read_event` /// family of methods. /// /// # Example /// /// This example demonstrates how to read stream raw bytes from an XML document. /// This could be used to implement streaming read of text, or to read raw binary /// bytes embedded in an XML document. (Documents with embedded raw bytes are not /// valid XML, but XML-derived file formats exist where such documents are valid). /// /// ``` /// # use pretty_assertions::assert_eq; /// use std::io::{BufRead, Read}; /// use quick_xml::events::{BytesEnd, BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str("binary << data&>"); /// // ^ ^ ^ ^ /// // 0 5 21 27 /// /// assert_eq!( /// (reader.read_event().unwrap(), reader.buffer_position()), /// // 5 - end of the `` /// (Event::Start(BytesStart::new("tag")), 5) /// ); /// /// // Reading directly from underlying reader will not update position /// // let mut inner = reader.get_mut(); /// /// // Reading from the stream() advances position /// let mut inner = reader.stream(); /// /// // Read binary data. We must know its size /// let mut binary = [0u8; 16]; /// inner.read_exact(&mut binary).unwrap(); /// assert_eq!(&binary, b"binary << data&>"); /// // 21 - end of the `binary << data&>` /// assert_eq!(inner.offset(), 21); /// assert_eq!(reader.buffer_position(), 21); /// /// assert_eq!( /// (reader.read_event().unwrap(), reader.buffer_position()), /// // 27 - end of the `` /// (Event::End(BytesEnd::new("tag")), 27) /// ); /// /// assert_eq!(reader.read_event().unwrap(), Event::Eof); /// ``` #[inline] pub fn stream(&mut self) -> BinaryStream<'_, R> { BinaryStream { inner: &mut self.reader, offset: &mut self.state.offset, } } } /// Private sync reading methods impl Reader { /// Read text into the given buffer, and return an event that borrows from /// either that buffer or from the input itself, based on the type of the /// reader. fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result, Error> where R: XmlSource<'i, B>, { read_event_impl!(self, buf, self.reader, read_until_close) } /// Private function to read until `>` is found. This function expects that /// it was called just after encounter a `<` symbol. fn read_until_close<'i, B>(&mut self, buf: B) -> Result, Error> where R: XmlSource<'i, B>, { read_until_close!(self, buf, self.reader) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Result of an attempt to read XML textual data from the source. #[derive(Debug)] enum ReadTextResult<'r, B> { /// Start of markup (`<` character) was found in the first byte. `<` was consumed. /// Contains buffer that should be returned back to the next iteration cycle /// to satisfy borrow checker requirements. Markup(B), /// Start of reference (`&` character) was found in the first byte. /// `&` was not consumed. /// Contains buffer that should be returned back to the next iteration cycle /// to satisfy borrow checker requirements. Ref(B), /// Contains text block up to start of markup (`<` character). `<` was consumed. UpToMarkup(&'r [u8]), /// Contains text block up to start of reference (`&` character). /// `&` was not consumed. UpToRef(&'r [u8]), /// Contains text block up to EOF, neither start of markup (`<` character) /// or start of reference (`&` character) was found. UpToEof(&'r [u8]), /// IO error occurred. Err(io::Error), } /// Result of an attempt to read general reference from the reader. #[derive(Debug)] enum ReadRefResult<'r> { /// Contains text block up to end of reference (`;` character). /// Result includes start `&`, but not end `;`. Ref(&'r [u8]), /// Contains text block up to EOF. Neither end of reference (`;`), start of /// another reference (`&`) or start of markup (`<`) characters was found. /// Result includes start `&`. UpToEof(&'r [u8]), /// Contains text block up to next possible reference (`&` character). /// Result includes start `&`. UpToRef(&'r [u8]), /// Contains text block up to start of markup (`<` character). /// Result includes start `&`. UpToMarkup(&'r [u8]), /// IO error occurred. Err(io::Error), } /// Represents an input for a reader that can return borrowed data. /// /// There are two implementors of this trait: generic one that read data from /// `Self`, copies some part of it into a provided buffer of type `B` and then /// returns data that borrow from that buffer. /// /// The other implementor is for `&[u8]` and instead of copying data returns /// borrowed data from `Self` instead. This implementation allows zero-copy /// deserialization. /// /// # Parameters /// - `'r`: lifetime of a buffer from which events will borrow /// - `B`: a type of a buffer that can be used to store data read from `Self` and /// from which events can borrow trait XmlSource<'r, B> { /// Removes UTF-8 BOM if it is present #[cfg(not(feature = "encoding"))] fn remove_utf8_bom(&mut self) -> io::Result<()>; /// Determines encoding from the start of input and removes BOM if it is present #[cfg(feature = "encoding")] fn detect_encoding(&mut self) -> io::Result>; /// Read input until start of markup (the `<`) is found, start of general entity /// reference (the `&`) is found or end of input is reached. /// /// # Parameters /// - `buf`: Buffer that could be filled from an input (`Self`) and /// from which [events] could borrow their data /// - `position`: Will be increased by amount of bytes consumed /// /// [events]: crate::events::Event fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>; /// Read input until end of general reference (the `;`) is found, start of /// another general reference (the `&`) is found or end of input is reached. /// /// This method must be called when current character is `&`. /// /// # Parameters /// - `buf`: Buffer that could be filled from an input (`Self`) and /// from which [events] could borrow their data /// - `position`: Will be increased by amount of bytes consumed /// /// [events]: crate::events::Event fn read_ref(&mut self, buf: B, position: &mut u64) -> ReadRefResult<'r>; /// Read input until processing instruction is finished. /// /// This method expect that start sequence of a parser already was read. /// /// Returns a slice of data read up to the end of the thing being parsed. /// The end of thing and the returned content is determined by the used parser. /// /// If input (`Self`) is exhausted and no bytes was read, or if the specified /// parser could not find the ending sequence of the thing, returns `SyntaxError`. /// /// # Parameters /// - `buf`: Buffer that could be filled from an input (`Self`) and /// from which [events] could borrow their data /// - `position`: Will be increased by amount of bytes consumed /// /// A `P` type parameter is used to preserve state between calls to the underlying /// reader which provides bytes fed into the parser. /// /// [events]: crate::events::Event fn read_with

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" /// /// "#); /// reader.config_mut().trim_text(true); /// /// let start = BytesStart::new("html"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// // ...and disable checking of end names because we expect HTML further... /// reader.config_mut().check_end_names = false; /// /// // ...then, we could read text content until close tag. /// // This call will correctly handle nested elements. /// let text = reader.read_text(end.name()).unwrap(); /// assert_eq!(text, Cow::Borrowed(r#" /// This is a HTML text ///

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" /// "#)); /// /// // Now we can enable checks again /// reader.config_mut().check_end_names = true; /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event().unwrap(), Event::Eof); /// ``` /// /// [`Start`]: Event::Start /// [`decoder()`]: Reader::decoder() #[inline] pub fn read_text(&mut self, end: QName) -> Result> { self.reader.read_text(end) } } impl Deref for NsReader { type Target = Reader; #[inline] fn deref(&self) -> &Self::Target { &self.reader } } quick-xml-0.38.4/src/reader/slice_reader.rs000064400000000000000000000357271046102023000166700ustar 00000000000000//! This is an implementation of [`Reader`] for reading from a `&[u8]` as //! underlying byte stream. This implementation supports not using an //! intermediate buffer as the byte slice itself can be used to borrow from. use std::borrow::Cow; use std::io; #[cfg(feature = "encoding")] use crate::reader::EncodingRef; #[cfg(feature = "encoding")] use encoding_rs::{Encoding, UTF_8}; use crate::errors::{Error, Result}; use crate::events::Event; use crate::name::QName; use crate::parser::Parser; use crate::reader::{BangType, ReadRefResult, ReadTextResult, Reader, Span, XmlSource}; use crate::utils::is_whitespace; /// This is an implementation for reading from a `&[u8]` as underlying byte stream. /// This implementation supports not using an intermediate buffer as the byte slice /// itself can be used to borrow from. impl<'a> Reader<&'a [u8]> { /// Creates an XML reader from a string slice. #[allow(clippy::should_implement_trait)] pub fn from_str(s: &'a str) -> Self { // Rust strings are guaranteed to be UTF-8, so lock the encoding #[cfg(feature = "encoding")] { let mut reader = Self::from_reader(s.as_bytes()); reader.state.encoding = EncodingRef::Explicit(UTF_8); reader } #[cfg(not(feature = "encoding"))] Self::from_reader(s.as_bytes()) } /// Read an event that borrows from the input rather than a buffer. /// /// There is no asynchronous `read_event_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); /// loop { /// match reader.read_event().unwrap() { /// Event::Start(e) => count += 1, /// Event::Text(e) => txt.push(e.decode().unwrap().into_owned()), /// Event::Eof => break, /// _ => (), /// } /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` #[inline] pub fn read_event(&mut self) -> Result> { self.read_event_impl(()) } /// Reads until end element is found. This function is supposed to be called /// after you already read a [`Start`] event. /// /// Returns a span that cover content between `>` of an opening tag and `<` of /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// If a corresponding [`End`] event is not found, an error of type [`Error::IllFormed`] /// will be returned. In particularly, that error will be returned if you call /// this method without consuming the corresponding [`Start`] event first. /// /// The `end` parameter should contain name of the end element _in the reader /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// /// The correctness of the skipped events does not checked, if you disabled /// the [`check_end_names`] option. /// /// There is no asynchronous `read_to_end_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Namespaces /// /// While the `Reader` does not support namespace resolution, namespaces /// does not change the algorithm for comparing names. Although the names /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the /// same namespace, are semantically equivalent, `` cannot close /// ``, because according to [the specification] /// /// > The end of every element that begins with a **start-tag** MUST be marked /// > by an **end-tag** containing a name that echoes the element's type as /// > given in the **start-tag** /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str(r#" /// /// /// /// /// /// /// /// /// "#); /// reader.config_mut().trim_text(true); /// /// let start = BytesStart::new("outer"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end(end.name()).unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event().unwrap(), Event::Eof); /// ``` /// /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`expand_empty_elements`]: crate::reader::Config::expand_empty_elements /// [`check_end_names`]: crate::reader::Config::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end(&mut self, end: QName) -> Result { Ok(read_to_end!(self, end, (), read_event_impl, {})) } /// Reads content between start and end tags, including any markup. This /// function is supposed to be called after you already read a [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// This method does not unescape read data, instead it returns content /// "as is" of the XML document. This is because it has no idea what text /// it reads, and if, for example, it contains CDATA section, attempt to /// unescape it content will spoil data. /// /// Any text will be decoded using the XML current [`decoder()`]. /// /// Actually, this method perform the following code: /// /// ```ignore /// let span = reader.read_to_end(end)?; /// let text = reader.decoder().decode(&reader.inner_slice[span]); /// ``` /// /// # Examples /// /// This example shows, how you can read a HTML content from your XML document. /// /// ``` /// # use pretty_assertions::assert_eq; /// # use std::borrow::Cow; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::Reader; /// /// let mut reader = Reader::from_str(" /// /// This is a HTML text ///

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" /// /// "); /// reader.config_mut().trim_text(true); /// /// let start = BytesStart::new("html"); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); /// // ...and disable checking of end names because we expect HTML further... /// reader.config_mut().check_end_names = false; /// /// // ...then, we could read text content until close tag. /// // This call will correctly handle nested elements. /// let text = reader.read_text(end.name()).unwrap(); /// assert_eq!(text, Cow::Borrowed(r#" /// This is a HTML text ///

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" /// "#)); /// assert!(matches!(text, Cow::Borrowed(_))); /// /// // Now we can enable checks again /// reader.config_mut().check_end_names = true; /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!(reader.read_event().unwrap(), Event::Eof); /// ``` /// /// [`Start`]: Event::Start /// [`decoder()`]: Self::decoder() pub fn read_text(&mut self, end: QName) -> Result> { // self.reader will be changed, so store original reference let buffer = self.reader; let span = self.read_to_end(end)?; let len = span.end - span.start; // SAFETY: `span` can only contain indexes up to usize::MAX because it // was created from offsets from a single &[u8] slice Ok(self.decoder().decode(&buffer[0..len as usize])?) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Implementation of `XmlSource` for `&[u8]` reader using a `Self` as buffer /// that will be borrowed by events. This implementation provides a zero-copy deserialization impl<'a> XmlSource<'a, ()> for &'a [u8] { #[cfg(not(feature = "encoding"))] #[inline] fn remove_utf8_bom(&mut self) -> io::Result<()> { if self.starts_with(crate::encoding::UTF8_BOM) { *self = &self[crate::encoding::UTF8_BOM.len()..]; } Ok(()) } #[cfg(feature = "encoding")] #[inline] fn detect_encoding(&mut self) -> io::Result> { if let Some((enc, bom_len)) = crate::encoding::detect_encoding(self) { *self = &self[bom_len..]; return Ok(Some(enc)); } Ok(None) } #[inline] fn read_text(&mut self, _buf: (), position: &mut u64) -> ReadTextResult<'a, ()> { // Search for start of markup or an entity or character reference match memchr::memchr2(b'<', b'&', self) { Some(0) if self[0] == b'<' => { *self = &self[1..]; *position += 1; ReadTextResult::Markup(()) } // Do not consume `&` because it may be lone and we would be need to // return it as part of Text event Some(0) => ReadTextResult::Ref(()), Some(i) if self[i] == b'<' => { let bytes = &self[..i]; *self = &self[i + 1..]; *position += i as u64 + 1; ReadTextResult::UpToMarkup(bytes) } Some(i) => { let (bytes, rest) = self.split_at(i); *self = rest; *position += i as u64; ReadTextResult::UpToRef(bytes) } None => { let bytes = &self[..]; *self = &[]; *position += bytes.len() as u64; ReadTextResult::UpToEof(bytes) } } } #[inline] fn read_ref(&mut self, _buf: (), position: &mut u64) -> ReadRefResult<'a> { debug_assert_eq!( self.first(), Some(&b'&'), "`read_ref` must be called at `&`" ); // Search for the end of reference or a start of another reference or a markup match memchr::memchr3(b';', b'&', b'<', &self[1..]) { // Do not consume `&` because it may be lone and we would be need to // return it as part of Text event Some(i) if self[i + 1] == b'&' => { let (bytes, rest) = self.split_at(i + 1); *self = rest; *position += i as u64 + 1; ReadRefResult::UpToRef(bytes) } Some(i) => { let end = i + 1; let is_end = self[end] == b';'; let bytes = &self[..end]; // +1 -- skip the end `;` or `<` *self = &self[end + 1..]; *position += end as u64 + 1; if is_end { ReadRefResult::Ref(bytes) } else { ReadRefResult::UpToMarkup(bytes) } } None => { let bytes = &self[..]; *self = &[]; *position += bytes.len() as u64; ReadRefResult::UpToEof(bytes) } } } #[inline] fn read_with

(&mut self, mut parser: P, _buf: (), position: &mut u64) -> Result<&'a [u8]> where P: Parser, { if let Some(i) = parser.feed(self) { // +1 for `>` which we do not include *position += i as u64 + 1; let bytes = &self[..i]; *self = &self[i + 1..]; return Ok(bytes); } *position += self.len() as u64; Err(Error::Syntax(P::eof_error())) } #[inline] fn read_bang_element(&mut self, _buf: (), position: &mut u64) -> Result<(BangType, &'a [u8])> { // Peeked one bang ('!') before being called, so it's guaranteed to // start with it. debug_assert_eq!(self[0], b'!'); let mut bang_type = BangType::new(self[1..].first().copied())?; if let Some((bytes, i)) = bang_type.parse(&[], self) { *position += i as u64; *self = &self[i..]; return Ok((bang_type, bytes)); } *position += self.len() as u64; Err(bang_type.to_err().into()) } #[inline] fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()> { let whitespaces = self .iter() .position(|b| !is_whitespace(*b)) .unwrap_or(self.len()); *position += whitespaces as u64; *self = &self[whitespaces..]; Ok(()) } #[inline] fn peek_one(&mut self) -> io::Result> { Ok(self.first().copied()) } } #[cfg(test)] mod test { use crate::reader::test::check; use crate::reader::XmlSource; /// Default buffer constructor just pass the byte array from the test fn identity(input: T) -> T { input } check!( #[test] read_event_impl, read_until_close, identity, () ); } quick-xml-0.38.4/src/reader/state.rs000064400000000000000000000351401046102023000153540ustar 00000000000000#[cfg(feature = "encoding")] use encoding_rs::UTF_8; use crate::encoding::Decoder; use crate::errors::{Error, IllFormedError, Result, SyntaxError}; use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event}; #[cfg(feature = "encoding")] use crate::reader::EncodingRef; use crate::reader::{BangType, Config, ParseState}; use crate::utils::{is_whitespace, name_len}; /// A struct that holds a current reader state and a parser configuration. /// It is independent on a way of reading data: the reader feed data into it and /// get back produced [`Event`]s. #[derive(Clone, Debug)] pub(super) struct ReaderState { /// Number of bytes read from the source of data since the reader was created pub offset: u64, /// A snapshot of an `offset` of the last error returned. It can be less than /// `offset`, because some errors conveniently report at earlier position, /// and changing `offset` is not possible, because `Error::IllFormed` errors /// are recoverable. pub last_error_offset: u64, /// Defines how to process next byte pub state: ParseState, /// User-defined settings that affect parsing pub config: Config, /// All currently Started elements which didn't have a matching /// End element yet. /// /// For an XML /// /// ```xml /// | /// ``` /// when cursor at the `|` position buffer contains: /// /// ```text /// rootinner /// ^ ^ /// ``` /// /// The `^` symbols shows which positions stored in the [`Self::opened_starts`] /// (0 and 4 in that case). opened_buffer: Vec, /// Opened name start indexes into [`Self::opened_buffer`]. See documentation /// for that field for details opened_starts: Vec, #[cfg(feature = "encoding")] /// Reference to the encoding used to read an XML pub encoding: EncodingRef, } impl ReaderState { /// Trims end whitespaces from `bytes`, if required, and returns a text event. /// /// # Parameters /// - `bytes`: data from the start of stream to the first `<` or from `>` to `<` pub fn emit_text<'b>(&mut self, bytes: &'b [u8]) -> BytesText<'b> { let mut content = bytes; if self.config.trim_text_end { // Skip the ending '<' let len = bytes .iter() .rposition(|&b| !is_whitespace(b)) .map_or(0, |p| p + 1); content = &bytes[..len]; } BytesText::wrap(content, self.decoder()) } /// Returns `Comment`, `CData` or `DocType` event. /// /// `buf` contains data between `<` and `>`: /// - CDATA: `![CDATA[...]]` /// - Comment: `!--...--` /// - Doctype (uppercase): `!D...` /// - Doctype (lowercase): `!d...` pub fn emit_bang<'b>(&mut self, bang_type: BangType, buf: &'b [u8]) -> Result> { debug_assert_eq!( buf.first(), Some(&b'!'), "CDATA, comment or DOCTYPE should start from '!'" ); let uncased_starts_with = |string: &[u8], prefix: &[u8]| { string.len() >= prefix.len() && string[..prefix.len()].eq_ignore_ascii_case(prefix) }; let len = buf.len(); match bang_type { BangType::Comment if buf.starts_with(b"!--") => { debug_assert!(buf.ends_with(b"--")); if self.config.check_comments { // search if '--' not in comments let mut haystack = &buf[3..len - 2]; let mut off = 0; while let Some(p) = memchr::memchr(b'-', haystack) { off += p + 1; // if next byte after `-` is also `-`, return an error if buf[3 + off] == b'-' { // Explanation of the magic: // // - `self.offset`` just after `>`, // - `buf` contains `!-- con--tent --` // - `p` is counted from byte after `: // ~~~~~~~~~~~~~~~~ : - buf // : =========== : - zone of search (possible values of `p`) // : |---p : - p is counted from | (| is 0) // : : : ^ - self.offset // ^ : : - self.offset - len // ^ : - self.offset - len + 2 // ^ - self.offset - len + 2 + p self.last_error_offset = self.offset - len as u64 + 2 + p as u64; return Err(Error::IllFormed(IllFormedError::DoubleHyphenInComment)); } // Continue search after single `-` (+1 to skip it) haystack = &haystack[p + 1..]; } } Ok(Event::Comment(BytesText::wrap( // Cut of `!--` and `--` from start and end &buf[3..len - 2], self.decoder(), ))) } // XML requires uppercase only: // https://www.w3.org/TR/xml11/#sec-cdata-sect // Even HTML5 required uppercase only: // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state BangType::CData if buf.starts_with(b"![CDATA[") => { debug_assert!(buf.ends_with(b"]]")); Ok(Event::CData(BytesCData::wrap( // Cut of `![CDATA[` and `]]` from start and end &buf[8..len - 2], self.decoder(), ))) } // XML requires uppercase only, but we will check that on validation stage: // https://www.w3.org/TR/xml11/#sec-prolog-dtd // HTML5 allows mixed case for doctype declarations: // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state BangType::DocType(0) if uncased_starts_with(buf, b"!DOCTYPE") => { match buf[8..].iter().position(|&b| !is_whitespace(b)) { Some(start) => Ok(Event::DocType(BytesText::wrap( // Cut of `!DOCTYPE` and any number of spaces from start &buf[8 + start..], self.decoder(), ))), None => { // Because we here, we at least read `` and offset after `>`. // We want report error at place where name is expected - this is just // before `>` self.last_error_offset = self.offset - 1; Err(Error::IllFormed(IllFormedError::MissingDoctypeName)) } } } _ => { // // ^^^^^ - `buf` does not contain `<` and `>`, but `self.offset` is after `>`. // ^------- We report error at that position, so we need to subtract 2 and buf len self.last_error_offset = self.offset - len as u64 - 2; Err(bang_type.to_err().into()) } } } /// Wraps content of `buf` into the [`Event::End`] event. Does the check that /// end name matches the last opened start name if `self.config.check_end_names` is set. /// /// `buf` contains data between `<` and `>`, for example `/tag`. pub fn emit_end<'b>(&mut self, buf: &'b [u8]) -> Result> { debug_assert_eq!( buf.first(), Some(&b'/'), "closing tag should start from '/'" ); // Strip the `/` character. `content` contains data between `` let content = &buf[1..]; // XML standard permits whitespaces after the markup name in closing tags. // Let's strip them from the buffer before comparing tag names. let name = if self.config.trim_markup_names_in_closing_tags { if let Some(pos_end_name) = content.iter().rposition(|&b| !is_whitespace(b)) { &content[..pos_end_name + 1] } else { content } } else { content }; let decoder = self.decoder(); // Get the index in self.opened_buffer of the name of the last opened tag match self.opened_starts.pop() { Some(start) => { if self.config.check_end_names { let expected = &self.opened_buffer[start..]; if name != expected { let expected = decoder.decode(expected).unwrap_or_default().into_owned(); // #513: In order to allow error recovery we should drop content of the buffer self.opened_buffer.truncate(start); // Report error at start of the end tag at `<` character // -2 for `<` and `>` self.last_error_offset = self.offset - buf.len() as u64 - 2; return Err(Error::IllFormed(IllFormedError::MismatchedEndTag { expected, found: decoder.decode(name).unwrap_or_default().into_owned(), })); } } self.opened_buffer.truncate(start); } None => { if !self.config.allow_unmatched_ends { // Report error at start of the end tag at `<` character // -2 for `<` and `>` self.last_error_offset = self.offset - buf.len() as u64 - 2; return Err(Error::IllFormed(IllFormedError::UnmatchedEndTag( decoder.decode(name).unwrap_or_default().into_owned(), ))); } } } Ok(Event::End(BytesEnd::wrap(name.into()))) } /// `buf` contains data between `<` and `>` and the first byte is `?`. /// `self.offset` already after the `>` /// /// Returns `Decl` or `PI` event pub fn emit_question_mark<'b>(&mut self, buf: &'b [u8]) -> Result> { debug_assert!(!buf.is_empty()); debug_assert_eq!(buf[0], b'?'); let len = buf.len(); // We accept at least // ~~ - len = 2 if len > 1 && buf[len - 1] == b'?' { // Cut of `?` and `?` from start and end let content = &buf[1..len - 1]; let len = content.len(); if content.starts_with(b"xml") && (len == 3 || is_whitespace(content[3])) { let event = BytesDecl::from_start(BytesStart::wrap(content, 3, self.decoder())); // Try getting encoding from the declaration event #[cfg(feature = "encoding")] if self.encoding.can_be_refined() { if let Some(encoding) = event.encoder() { self.encoding = EncodingRef::XmlDetected(encoding); } } Ok(Event::Decl(event)) } else { Ok(Event::PI(BytesPI::wrap( content, name_len(content), self.decoder(), ))) } } else { // `) self.last_error_offset = self.offset - len as u64 - 2; Err(Error::Syntax(SyntaxError::UnclosedPIOrXmlDecl)) } } /// Converts content of a tag to a `Start` or an `Empty` event /// /// # Parameters /// - `content`: Content of a tag between `<` and `>` pub fn emit_start<'b>(&mut self, content: &'b [u8]) -> Event<'b> { if let Some(content) = content.strip_suffix(b"/") { // This is self-closed tag `` let event = BytesStart::wrap(content, name_len(content), self.decoder()); if self.config.expand_empty_elements { self.state = ParseState::InsideEmpty; self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(event.name().as_ref()); Event::Start(event) } else { Event::Empty(event) } } else { let event = BytesStart::wrap(content, name_len(content), self.decoder()); // #514: Always store names event when .check_end_names == false, // because checks can be temporary disabled and when they would be // enabled, we should have that information self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(event.name().as_ref()); Event::Start(event) } } #[inline] pub fn close_expanded_empty(&mut self) -> BytesEnd<'static> { self.state = ParseState::InsideText; let name = self .opened_buffer .split_off(self.opened_starts.pop().unwrap()); BytesEnd::wrap(name.into()) } /// Get the decoder, used to decode bytes, read by this reader, to the strings. /// /// If [`encoding`] feature is enabled, the used encoding may change after /// parsing the XML declaration, otherwise encoding is fixed to UTF-8. /// /// If [`encoding`] feature is enabled and no encoding is specified in declaration, /// defaults to UTF-8. /// /// [`encoding`]: ../../index.html#encoding pub const fn decoder(&self) -> Decoder { Decoder { #[cfg(feature = "encoding")] encoding: self.encoding.encoding(), } } } impl Default for ReaderState { fn default() -> Self { Self { offset: 0, last_error_offset: 0, state: ParseState::Init, config: Config::default(), opened_buffer: Vec::new(), opened_starts: Vec::new(), #[cfg(feature = "encoding")] encoding: EncodingRef::Implicit(UTF_8), } } } quick-xml-0.38.4/src/se/content.rs000064400000000000000000001751451046102023000150650ustar 00000000000000//! Contains serializer for content of an XML element use crate::de::TEXT_KEY; use crate::se::element::{ElementSerializer, Struct, Tuple}; use crate::se::simple_type::{QuoteTarget, SimpleTypeSerializer}; use crate::se::{Indent, QuoteLevel, SeError, TextFormat, WriteResult, XmlName}; use serde::ser::{ Impossible, Serialize, SerializeSeq, SerializeTuple, SerializeTupleStruct, Serializer, }; use serde::serde_if_integer128; use std::fmt::Write; macro_rules! write_primitive { ($method:ident ( $ty:ty )) => { #[inline] fn $method(self, value: $ty) -> Result { self.into_simple_type_serializer()?.$method(value)?; Ok(WriteResult::Text) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer used to serialize content of an element. It does not write /// surrounding tags. Unlike the [`ElementSerializer`], this serializer serializes /// enums using variant names as tag names, i. e. as `...`. /// /// Returns the classification of the last written type. /// /// This serializer does the following: /// - numbers converted to a decimal representation and serialized as naked strings; /// - booleans serialized ether as `"true"` or `"false"`; /// - strings and characters are serialized as naked strings; /// - `None` does not write anything; /// - `Some` and newtypes are serialized as an inner type using the same serializer; /// - units (`()`) and unit structs does not write anything; /// - sequences, tuples and tuple structs are serialized without delimiters. /// `[1, 2, 3]` would be serialized as `123` (if not using indent); /// - structs and maps are not supported ([`SeError::Unsupported`] is returned); /// - enums: /// - unit variants are serialized as self-closed ``; /// - newtype variants are serialized as inner value wrapped in `...`; /// - tuple variants are serialized as sequences where each element is wrapped /// in `...`; /// - struct variants are serialized as a sequence of fields wrapped in /// `...`. Each field is serialized recursively using /// either [`ElementSerializer`], `ContentSerializer` (`$value` fields), or /// [`SimpleTypeSerializer`] (`$text` fields). In particular, the empty struct /// is serialized as ``; /// /// Usage of empty tags depends on the [`Self::expand_empty_elements`] setting. /// /// The difference between this serializer and [`SimpleTypeSerializer`] is in how /// sequences and maps are serialized. Unlike `SimpleTypeSerializer` it supports /// any types in sequences and serializes them as list of elements, but that has /// drawbacks. Sequence of primitives would be serialized without delimiters and /// it will be impossible to distinguish between them. Even worse, when serializing /// with indent, sequence of strings become one big string with additional content /// and it would be impossible to distinguish between content of the original /// strings and inserted indent characters. pub struct ContentSerializer<'w, 'i, W: Write> { pub writer: &'w mut W, /// Defines which XML characters need to be escaped in text content pub level: QuoteLevel, /// Current indentation level. Note, that `Indent::None` means that there is /// no indentation at all, but `write_indent == false` means only, that indent /// writing is disabled in this instantiation of `ContentSerializer`, but /// child serializers should have access to the actual state of indentation. pub(super) indent: Indent<'i>, /// If `true`, then current indent will be written before writing the content, /// but only if content is not empty. This flag is reset after writing indent. pub write_indent: bool, /// Defines how text content should be serialized (as escaped text or CDATA) pub text_format: TextFormat, /// If `true`, then primitive types that serializes to a text content without /// surrounding tag will be allowed, otherwise the [`SeError::Unsupported`] /// will be returned. /// /// This method protects from the situation when two consequent values serialized /// as a text that makes it impossible to distinguish between them during /// deserialization. Instead of ambiguous serialization the error is returned. pub allow_primitive: bool, // If `true`, then empty elements will be serialized as `` // instead of ``. pub expand_empty_elements: bool, } impl<'w, 'i, W: Write> ContentSerializer<'w, 'i, W> { /// Turns this serializer into serializer of a text content #[inline] pub fn into_simple_type_serializer_impl(self) -> SimpleTypeSerializer<&'w mut W> { SimpleTypeSerializer { writer: self.writer, target: match self.text_format { TextFormat::Text => QuoteTarget::Text, TextFormat::CData => QuoteTarget::CData, }, level: self.level, } } /// Turns this serializer into serializer of a text content if that is allowed, /// otherwise error is returned #[inline] pub fn into_simple_type_serializer(self) -> Result, SeError> { if self.allow_primitive { Ok(self.into_simple_type_serializer_impl()) } else { Err(SeError::Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back".into())) } } /// Creates new serializer that shares state with this serializer and /// writes to the same underlying writer #[inline] pub fn new_seq_element_serializer( &mut self, allow_primitive: bool, ) -> ContentSerializer<'_, '_, W> { ContentSerializer { writer: self.writer, level: self.level, indent: self.indent.borrow(), write_indent: self.write_indent, text_format: self.text_format, allow_primitive, expand_empty_elements: self.expand_empty_elements, } } /// Writes `name` as self-closed tag #[inline] pub(super) fn write_empty(mut self, name: XmlName) -> Result { self.write_indent()?; if self.expand_empty_elements { self.writer.write_char('<')?; self.writer.write_str(name.0)?; self.writer.write_str(">')?; } else { self.writer.write_str("<")?; self.writer.write_str(name.0)?; self.writer.write_str("/>")?; } Ok(WriteResult::Element) } /// Writes simple type content between `name` tags pub(super) fn write_wrapped( mut self, name: XmlName, serialize: S, ) -> Result where S: for<'a> FnOnce(SimpleTypeSerializer<&'a mut W>) -> Result<&'a mut W, SeError>, { self.write_indent()?; self.writer.write_char('<')?; self.writer.write_str(name.0)?; self.writer.write_char('>')?; let writer = serialize(self.into_simple_type_serializer_impl())?; writer.write_str("')?; Ok(WriteResult::Element) } pub(super) fn write_indent(&mut self) -> Result<(), SeError> { if self.write_indent { self.indent.write_indent(&mut self.writer)?; self.write_indent = false; } Ok(()) } } impl<'w, 'i, W: Write> Serializer for ContentSerializer<'w, 'i, W> { type Ok = WriteResult; type Error = SeError; type SerializeSeq = Seq<'w, 'i, W>; type SerializeTuple = Seq<'w, 'i, W>; type SerializeTupleStruct = Seq<'w, 'i, W>; type SerializeTupleVariant = Tuple<'w, 'i, W>; type SerializeMap = Impossible; type SerializeStruct = Struct<'w, 'i, W>; type SerializeStructVariant = Struct<'w, 'i, W>; write_primitive!(serialize_bool(bool)); write_primitive!(serialize_i8(i8)); write_primitive!(serialize_i16(i16)); write_primitive!(serialize_i32(i32)); write_primitive!(serialize_i64(i64)); write_primitive!(serialize_u8(u8)); write_primitive!(serialize_u16(u16)); write_primitive!(serialize_u32(u32)); write_primitive!(serialize_u64(u64)); serde_if_integer128! { write_primitive!(serialize_i128(i128)); write_primitive!(serialize_u128(u128)); } write_primitive!(serialize_f32(f32)); write_primitive!(serialize_f64(f64)); write_primitive!(serialize_bytes(&[u8])); #[inline] fn serialize_char(self, value: char) -> Result { self.into_simple_type_serializer()?.serialize_char(value)?; Ok(WriteResult::SensitiveText) } #[inline] fn serialize_str(self, value: &str) -> Result { if !value.is_empty() { self.into_simple_type_serializer()?.serialize_str(value)?; } Ok(WriteResult::SensitiveText) } /// Does not write anything #[inline] fn serialize_none(self) -> Result { // Classify `None` as sensitive to whitespaces, because this can be `Option`. // Unfortunately, we do not known what the type the option contains, so have no chance // to adapt our behavior to it. The safe variant is assume sensitiviness Ok(WriteResult::SensitiveNothing) } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } /// Does not write anything #[inline] fn serialize_unit(self) -> Result { Ok(WriteResult::Nothing) } /// Does not write anything #[inline] fn serialize_unit_struct(self, _name: &'static str) -> Result { Ok(WriteResult::Nothing) } /// If `variant` is a special `$text` variant, then do nothing, otherwise /// checks `variant` for XML name validity and writes ``. fn serialize_unit_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, ) -> Result { if variant == TEXT_KEY { Ok(WriteResult::Nothing) } else { let name = XmlName::try_from(variant)?; self.write_empty(name) } } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { value.serialize(self) } /// If `variant` is a special `$text` variant, then writes `value` as a `xs:simpleType`, /// otherwise checks `variant` for XML name validity and writes `value` as a new /// `` element. fn serialize_newtype_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, value: &T, ) -> Result { if variant == TEXT_KEY { value.serialize(self.into_simple_type_serializer()?)?; Ok(WriteResult::SensitiveText) } else { value.serialize(ElementSerializer { key: XmlName::try_from(variant)?, ser: self, })?; Ok(WriteResult::Element) } } #[inline] fn serialize_seq(self, _len: Option) -> Result { Ok(Seq { ser: self, // If sequence if empty, nothing will be serialized. Because sequence can be of `Option`s // we need to assume that writing indent may change the data and do not write anything last: WriteResult::SensitiveNothing, }) } #[inline] fn serialize_tuple(self, len: usize) -> Result { self.serialize_seq(Some(len)) } #[inline] fn serialize_tuple_struct( self, _name: &'static str, len: usize, ) -> Result { self.serialize_tuple(len) } /// Serializes variant as a tuple with name `variant`, producing /// /// ```xml /// /// /// /// /// ``` #[inline] fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, len: usize, ) -> Result { if variant == TEXT_KEY { self.into_simple_type_serializer()? .serialize_tuple_struct(name, len) .map(Tuple::Text) } else { let ser = ElementSerializer { key: XmlName::try_from(variant)?, ser: self, }; ser.serialize_tuple_struct(name, len).map(Tuple::Element) } } fn serialize_map(self, _len: Option) -> Result { Err(SeError::Unsupported( "serialization of map types is not supported in `$value` field".into(), )) } #[inline] fn serialize_struct( self, name: &'static str, len: usize, ) -> Result { ElementSerializer { ser: self, key: XmlName::try_from(name)?, } .serialize_struct(name, len) } /// Serializes variant as an element with name `variant`, producing /// /// ```xml /// /// /// /// ``` /// /// If struct has no fields which is represented by nested elements or a text, /// it may be serialized as self-closed element ``. #[inline] fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, len: usize, ) -> Result { if variant == TEXT_KEY { Err(SeError::Unsupported( format!("cannot serialize `$text` struct variant of `{}` enum", name).into(), )) } else { let ser = ElementSerializer { key: XmlName::try_from(variant)?, ser: self, }; ser.serialize_struct(name, len) } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Helper struct which remembers the classification of the last serialized element /// and reports it when the sequence ends pub struct Seq<'w, 'k, W: Write> { ser: ContentSerializer<'w, 'k, W>, /// Classification of the result of the last serialized element. last: WriteResult, } impl<'w, 'i, W: Write> SerializeSeq for Seq<'w, 'i, W> { type Ok = WriteResult; type Error = SeError; fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { self.last = value.serialize(self.ser.new_seq_element_serializer(self.last.is_text()))?; // Write indent for next element if indents are used self.ser.write_indent = self.last.allow_indent(); Ok(()) } #[inline] fn end(self) -> Result { Ok(self.last) } } impl<'w, 'i, W: Write> SerializeTuple for Seq<'w, 'i, W> { type Ok = WriteResult; type Error = SeError; #[inline] fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } impl<'w, 'i, W: Write> SerializeTupleStruct for Seq<'w, 'i, W> { type Ok = WriteResult; type Error = SeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Make tests public to reuse types in `elements::tests` module #[cfg(test)] pub(super) mod tests { use super::*; use crate::utils::Bytes; use serde::Serialize; use std::collections::BTreeMap; use WriteResult::*; #[derive(Debug, Serialize, PartialEq)] pub struct Unit; #[derive(Debug, Serialize, PartialEq)] #[serde(rename = "<\"&'>")] pub struct UnitEscaped; #[derive(Debug, Serialize, PartialEq)] pub struct Newtype(pub usize); #[derive(Debug, Serialize, PartialEq)] pub struct Tuple(pub &'static str, pub usize); #[derive(Debug, Serialize, PartialEq)] pub struct Struct { pub key: &'static str, pub val: (usize, usize), } /// Struct with a special `$text` field #[derive(Debug, Serialize, PartialEq)] pub struct Text { pub before: &'static str, #[serde(rename = "$text")] pub content: T, pub after: &'static str, } /// Struct with a special `$value` field #[derive(Debug, Serialize, PartialEq)] pub struct Value { pub before: &'static str, #[serde(rename = "$value")] pub content: T, pub after: &'static str, } /// Attributes identified by starting with `@` character #[derive(Debug, Serialize, PartialEq)] pub struct Attributes { #[serde(rename = "@key")] pub key: &'static str, #[serde(rename = "@val")] pub val: (usize, usize), } #[derive(Debug, Serialize, PartialEq)] pub struct AttributesBefore { #[serde(rename = "@key")] pub key: &'static str, pub val: usize, } #[derive(Debug, Serialize, PartialEq)] pub struct AttributesAfter { pub key: &'static str, #[serde(rename = "@val")] pub val: usize, } #[derive(Debug, Serialize, PartialEq)] pub enum Enum { Unit, /// Variant name becomes a tag name, but the name of variant is invalid /// XML name. Serialization of this element should be forbidden #[serde(rename = "<\"&'>")] UnitEscaped, Newtype(usize), Tuple(&'static str, usize), Struct { key: &'static str, /// Should be serialized as elements val: (usize, usize), }, Attributes { #[serde(rename = "@key")] key: &'static str, #[serde(rename = "@val")] val: (usize, usize), }, AttributesBefore { #[serde(rename = "@key")] key: &'static str, val: usize, }, AttributesAfter { key: &'static str, #[serde(rename = "@val")] val: usize, }, } #[derive(Debug, Serialize, PartialEq)] pub enum SpecialEnum { /// Struct variant with a special `$text` field Text { before: &'static str, #[serde(rename = "$text")] content: T, after: &'static str, }, /// Struct variant with a special `$value` field Value { before: &'static str, #[serde(rename = "$value")] content: T, after: &'static str, }, } mod without_indent { use super::Struct; use super::*; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:expr) => { serialize_as!($name: $data => $expected, WriteResult::Element); }; ($name:ident: $data:expr => $expected:expr, $result:expr) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, text_format: TextFormat::Text, allow_primitive: true, expand_empty_elements: false, }; let result = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); assert_eq!(result, $result); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, text_format: TextFormat::Text, allow_primitive: true, expand_empty_elements: false, }; match $data.serialize(ser).unwrap_err() { SeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } // We could write something before fail // assert_eq!(buffer, ""); } }; } // Primitives is serialized in the same way as for SimpleTypeSerializer serialize_as!(false_: false => "false", Text); serialize_as!(true_: true => "true", Text); serialize_as!(i8_: -42i8 => "-42", Text); serialize_as!(i16_: -4200i16 => "-4200", Text); serialize_as!(i32_: -42000000i32 => "-42000000", Text); serialize_as!(i64_: -42000000000000i64 => "-42000000000000", Text); serialize_as!(isize_: -42000000isize => "-42000000", Text); serialize_as!(u8_: 42u8 => "42", Text); serialize_as!(u16_: 4200u16 => "4200", Text); serialize_as!(u32_: 42000000u32 => "42000000", Text); serialize_as!(u64_: 42000000000000u64 => "42000000000000", Text); serialize_as!(usize_: 42000000usize => "42000000", Text); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000", Text); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000", Text); } serialize_as!(f32_: 4.2f32 => "4.2", Text); serialize_as!(f64_: 4.2f64 => "4.2", Text); serialize_as!(char_non_escaped: 'h' => "h", SensitiveText); serialize_as!(char_lt: '<' => "<", SensitiveText); serialize_as!(char_gt: '>' => ">", SensitiveText); serialize_as!(char_amp: '&' => "&", SensitiveText); serialize_as!(char_apos: '\'' => "'", SensitiveText); serialize_as!(char_quot: '"' => """, SensitiveText); serialize_as!(char_space: ' ' => " ", SensitiveText); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string", SensitiveText); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>", SensitiveText); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::::None => "", SensitiveNothing); serialize_as!(option_some: Some("non-escaped string") => "non-escaped string", SensitiveText); serialize_as!(option_some_empty_str: Some("") => "", SensitiveText); serialize_as!(unit: () => "", Nothing); serialize_as!(unit_struct: Unit => "", Nothing); serialize_as!(unit_struct_escaped: UnitEscaped => "", Nothing); // Unlike SimpleTypeSerializer, enumeration values serialized as tags serialize_as!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: Enum::UnitEscaped => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); // Newtypes recursively applies ContentSerializer serialize_as!(newtype: Newtype(42) => "42", Text); serialize_as!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! err!(seq: vec![1, 2, 3] => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); serialize_as!(seq_empty: Vec::::new() => "", SensitiveNothing); err!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); err!(tuple_struct: Tuple("first", 42) => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); serialize_as!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); // Structured types cannot be serialized without surrounding tag, which // only `enum` can provide err!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => Unsupported("serialization of map types is not supported in `$value` field")); serialize_as!(struct_: Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); serialize_as!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); /// Special field name `$text` should be serialized as a text content mod text_field { use super::*; use pretty_assertions::assert_eq; err!(map: BTreeMap::from([("$text", 2), ("_3", 4)]) => Unsupported("serialization of map types is not supported in `$value` field")); serialize_as!(struct_: Text { before: "answer", content: (42, 42), after: "answer", } => "\ answer\ 42 42\ answer\ "); serialize_as!(enum_struct: SpecialEnum::Text { before: "answer", content: (42, 42), after: "answer", } => "\ answer\ 42 42\ answer\ "); } /// `$text` field inside a struct variant of an enum mod enum_with_text_field { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: SpecialEnum::Text { before: "answer", content: $data, after: "answer", } => concat!( "answer", $expected, "answer", )); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000isize => "-42000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000usize => "42000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: SpecialEnum::Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None => ""); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("") => ""); text!(unit: () => ""); text!(unit_struct: Unit => ""); text!(unit_struct_escaped: UnitEscaped => ""); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: SpecialEnum::Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new() => ""); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: SpecialEnum::Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as text content value")); // Complex types cannot be serialized in `$text` field err!(map: SpecialEnum::Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("cannot serialize map as text content value")); err!(struct_: SpecialEnum::Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize struct `Struct` as text content value")); err!(enum_struct: SpecialEnum::Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as text content value")); } /// `$value` field inside a struct variant of an enum mod enum_with_value_field { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: SpecialEnum::Value { before: "answer", content: $data, after: "answer", } => concat!( "answer", $expected, "answer", )); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000isize => "-42000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000usize => "42000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: SpecialEnum::Value { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None => ""); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("") => ""); value!(unit: () => ""); value!(unit_struct: Unit => ""); value!(unit_struct_escaped: UnitEscaped => ""); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: SpecialEnum::Value { before: "answer", content: Enum::UnitEscaped, after: "answer", } => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! err!(seq: SpecialEnum::Value { before: "answer", content: vec![1, 2, 3], after: "answer", } => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(seq_empty: Vec::::new() => ""); err!(tuple: SpecialEnum::Value { before: "answer", content: ("<\"&'>", "with\t\n\r spaces", 3usize), after: "answer", } => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); err!(tuple_struct: SpecialEnum::Value { before: "answer", content: Tuple("first", 42), after: "answer", } => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: SpecialEnum::Value { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("serialization of map types is not supported in `$value` field")); value!(struct_: SpecialEnum::Value { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => "\ answer\ \ answer\ 42\ 42\ \ answer\ "); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); } mod attributes { use super::*; use pretty_assertions::assert_eq; err!(map_attr: BTreeMap::from([("@key1", 1), ("@key2", 2)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(map_mixed: BTreeMap::from([("@key1", 1), ("key2", 2)]) => Unsupported("serialization of map types is not supported in `$value` field")); serialize_as!(struct_: Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(struct_before: AttributesBefore { key: "answer", val: 42 } => r#"42"#); serialize_as!(struct_after: AttributesAfter { key: "answer", val: 42 } => r#"answer"#); serialize_as!(enum_: Enum::Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(enum_before: Enum::AttributesBefore { key: "answer", val: 42 } => r#"42"#); serialize_as!(enum_after: Enum::AttributesAfter { key: "answer", val: 42 } => r#"answer"#); } } mod with_indent { use super::Struct; use super::*; use crate::writer::Indentation; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:expr) => { serialize_as!($name: $data => $expected, WriteResult::Element); }; ($name:ident: $data:expr => $expected:expr, $result:expr) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, text_format: TextFormat::Text, allow_primitive: true, expand_empty_elements: false, }; let result = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); assert_eq!(result, $result); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, text_format: TextFormat::Text, allow_primitive: true, expand_empty_elements: false, }; match $data.serialize(ser).unwrap_err() { SeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } // We can write something before fail // assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false", Text); serialize_as!(true_: true => "true", Text); serialize_as!(i8_: -42i8 => "-42", Text); serialize_as!(i16_: -4200i16 => "-4200", Text); serialize_as!(i32_: -42000000i32 => "-42000000", Text); serialize_as!(i64_: -42000000000000i64 => "-42000000000000", Text); serialize_as!(isize_: -42000000isize => "-42000000", Text); serialize_as!(u8_: 42u8 => "42", Text); serialize_as!(u16_: 4200u16 => "4200", Text); serialize_as!(u32_: 42000000u32 => "42000000", Text); serialize_as!(u64_: 42000000000000u64 => "42000000000000", Text); serialize_as!(usize_: 42000000usize => "42000000", Text); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000", Text); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000", Text); } serialize_as!(f32_: 4.2f32 => "4.2", Text); serialize_as!(f64_: 4.2f64 => "4.2", Text); serialize_as!(char_non_escaped: 'h' => "h", SensitiveText); serialize_as!(char_lt: '<' => "<", SensitiveText); serialize_as!(char_gt: '>' => ">", SensitiveText); serialize_as!(char_amp: '&' => "&", SensitiveText); serialize_as!(char_apos: '\'' => "'", SensitiveText); serialize_as!(char_quot: '"' => """, SensitiveText); serialize_as!(char_space: ' ' => " ", SensitiveText); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string", SensitiveText); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>", SensitiveText); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::::None => "", SensitiveNothing); serialize_as!(option_some: Some(Enum::Unit) => ""); serialize_as!(unit: () => "", Nothing); serialize_as!(unit_struct: Unit => "", Nothing); serialize_as!(unit_struct_escaped: UnitEscaped => "", Nothing); // Unlike SimpleTypeSerializer, enumeration values serialized as tags serialize_as!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: Enum::UnitEscaped => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); // Newtypes recursively applies ContentSerializer serialize_as!(newtype: Newtype(42) => "42", Text); serialize_as!(enum_newtype: Enum::Newtype(42) => "42"); err!(seq: vec![1, 2, 3] => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); serialize_as!(seq_empty: Vec::::new() => "", SensitiveNothing); err!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); err!(tuple_struct: Tuple("first", 42) => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); serialize_as!(enum_tuple: Enum::Tuple("first", 42) => "first\n\ 42"); // Structured types cannot be serialized without surrounding tag, which // only `enum` can provide err!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => Unsupported("serialization of map types is not supported in `$value` field")); serialize_as!(struct_: Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n\ "); serialize_as!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n\ "); /// Special field name `$text` should be serialized as text content mod text_field { use super::*; use pretty_assertions::assert_eq; err!(map: BTreeMap::from([("$text", 2), ("_3", 4)]) => Unsupported("serialization of map types is not supported in `$value` field")); serialize_as!(struct_: Text { before: "answer", content: (42, 42), after: "answer", } => "\n \ answer42 42answer\n\ "); serialize_as!(enum_struct: SpecialEnum::Text { before: "answer", content: (42, 42), after: "answer", } => "\n \ answer42 42answer\n\ "); } /// `$text` field inside a struct variant of an enum mod enum_with_text_field { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: SpecialEnum::Text { before: "answer", content: $data, after: "answer", } => concat!( "\n answer", $expected, "answer\n", )); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000isize => "-42000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000usize => "42000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: SpecialEnum::Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None => ""); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("") => ""); text!(unit: () => ""); text!(unit_struct: Unit => ""); text!(unit_struct_escaped: UnitEscaped => ""); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: SpecialEnum::Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new() => ""); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: SpecialEnum::Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as text content value")); // Complex types cannot be serialized in `$text` field err!(map: SpecialEnum::Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("cannot serialize map as text content value")); err!(struct_: SpecialEnum::Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize struct `Struct` as text content value")); err!(enum_struct: SpecialEnum::Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as text content value")); } /// `$value` field inside a struct variant of an enum mod enum_with_value_field { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: SpecialEnum::Value { before: "answer", content: $data, after: "answer", } => concat!( "\n answer", $expected, "answer\n", )); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000isize => "-42000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000usize => "42000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: SpecialEnum::Value { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None => ""); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("") => ""); value!(unit: () => "\n "); value!(unit_struct: Unit => "\n "); value!(unit_struct_escaped: UnitEscaped => "\n "); value!(enum_unit: Enum::Unit => "\n \n "); err!(enum_unit_escaped: SpecialEnum::Value { before: "answer", content: Enum::UnitEscaped, after: "answer", } => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "\n 42\n "); // Note that sequences of primitives serialized without delimiters! err!(seq: SpecialEnum::Value { before: "answer", content: vec![1, 2, 3], after: "answer", } => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(seq_empty: Vec::::new() => ""); err!(tuple: SpecialEnum::Value { before: "answer", content: ("<\"&'>", "with\t\n\r spaces", 3usize), after: "answer", } => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); err!(tuple_struct: SpecialEnum::Value { before: "answer", content: Tuple("first", 42), after: "answer", } => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(enum_tuple: Enum::Tuple("first", 42) => "\n \ first\n \ 42\n "); // We cannot wrap map or struct in any container and should not // flatten it, so it is impossible to serialize maps and structs err!(map: SpecialEnum::Value { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("serialization of map types is not supported in `$value` field")); value!(struct_: SpecialEnum::Value { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => "\n \ \n \ answer\n \ \n \ answer\n \ 42\n \ 42\n \ \n \ answer\n \ \n "); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\n \ \n \ answer\n \ 42\n \ 42\n \ \n "); } mod attributes { use super::*; use pretty_assertions::assert_eq; err!(map_attr: BTreeMap::from([("@key1", 1), ("@key2", 2)]) => Unsupported("serialization of map types is not supported in `$value` field")); err!(map_mixed: BTreeMap::from([("@key1", 1), ("key2", 2)]) => Unsupported("serialization of map types is not supported in `$value` field")); serialize_as!(struct_: Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(struct_before: AttributesBefore { key: "answer", val: 42 } => "\n \ 42\n\ "); serialize_as!(struct_after: AttributesAfter { key: "answer", val: 42 } => "\n \ answer\n\ "); serialize_as!(enum_: Enum::Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(enum_before: Enum::AttributesBefore { key: "answer", val: 42 } => "\n \ 42\n\ "); serialize_as!(enum_after: Enum::AttributesAfter { key: "answer", val: 42 } => "\n \ answer\n\ "); } } } quick-xml-0.38.4/src/se/element.rs000064400000000000000000002630071046102023000150370ustar 00000000000000//! Contains serializer for an XML element use crate::de::{TEXT_KEY, VALUE_KEY}; use crate::se::content::ContentSerializer; use crate::se::key::QNameSerializer; use crate::se::simple_type::{QuoteTarget, SimpleSeq, SimpleTypeSerializer}; use crate::se::text::TextSerializer; use crate::se::{SeError, WriteResult, XmlName}; use serde::ser::{ Impossible, Serialize, SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant, SerializeTuple, SerializeTupleStruct, SerializeTupleVariant, Serializer, }; use serde::serde_if_integer128; use std::fmt::Write; /// Writes simple type content between [`ElementSerializer::key`] tags. macro_rules! write_primitive { ($method:ident ( $ty:ty )) => { fn $method(self, value: $ty) -> Result { self.ser.write_wrapped(self.key, |ser| ser.$method(value)) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer used to serialize element with specified name. Unlike the [`ContentSerializer`], /// this serializer never uses variant names of enum variants, and because of that /// it is unable to serialize any enum values, except unit variants. /// /// Returns the classification of the last written type. /// /// This serializer is used for an ordinary fields in structs, which are not special /// fields named `$text` ([`TEXT_KEY`]) or `$value` ([`VALUE_KEY`]). `$text` field /// should be serialized using [`SimpleTypeSerializer`] and `$value` field should be /// serialized using [`ContentSerializer`]. /// /// This serializer does the following: /// - numbers converted to a decimal representation and serialized as `value`; /// - booleans serialized ether as `true` or `false`; /// - strings and characters are serialized as `value`. In particular, /// an empty string is serialized as ``; /// - `None` is serialized as ``; /// - `Some` and newtypes are serialized as an inner type using the same serializer; /// - units (`()`) and unit structs are serialized as ``; /// - sequences, tuples and tuple structs are serialized as repeated `` tag. /// In particular, empty sequence is serialized to nothing; /// - structs are serialized as a sequence of fields wrapped in a `` tag. Each /// field is serialized recursively using either `ElementSerializer`, [`ContentSerializer`] /// (`$value` fields), or [`SimpleTypeSerializer`] (`$text` fields). /// In particular, the empty struct is serialized as ``; /// - maps are serialized as a sequence of entries wrapped in a `` tag. If key is /// serialized to a special name, the same rules as for struct fields are applied. /// In particular, the empty map is serialized as ``; /// - enums: /// - unit variants are serialized as `variant`; /// - other variants are not supported ([`SeError::Unsupported`] is returned); /// /// Usage of empty tags depends on the [`ContentSerializer::expand_empty_elements`] setting. pub struct ElementSerializer<'w, 'k, W: Write> { /// The inner serializer that contains the settings and mostly do the actual work pub ser: ContentSerializer<'w, 'k, W>, /// Tag name used to wrap serialized types except enum variants which uses the variant name pub(super) key: XmlName<'k>, } impl<'w, 'k, W: Write> Serializer for ElementSerializer<'w, 'k, W> { type Ok = WriteResult; type Error = SeError; type SerializeSeq = Self; type SerializeTuple = Self; type SerializeTupleStruct = Self; type SerializeTupleVariant = Impossible; type SerializeMap = Map<'w, 'k, W>; type SerializeStruct = Struct<'w, 'k, W>; type SerializeStructVariant = Struct<'w, 'k, W>; write_primitive!(serialize_bool(bool)); write_primitive!(serialize_i8(i8)); write_primitive!(serialize_i16(i16)); write_primitive!(serialize_i32(i32)); write_primitive!(serialize_i64(i64)); write_primitive!(serialize_u8(u8)); write_primitive!(serialize_u16(u16)); write_primitive!(serialize_u32(u32)); write_primitive!(serialize_u64(u64)); serde_if_integer128! { write_primitive!(serialize_i128(i128)); write_primitive!(serialize_u128(u128)); } write_primitive!(serialize_f32(f32)); write_primitive!(serialize_f64(f64)); write_primitive!(serialize_char(char)); write_primitive!(serialize_bytes(&[u8])); fn serialize_str(self, value: &str) -> Result { if value.is_empty() { self.ser.write_empty(self.key) } else { self.ser .write_wrapped(self.key, |ser| ser.serialize_str(value)) } } /// By serde contract we should serialize key of [`None`] values. If someone /// wants to skip the field entirely, he should use /// `#[serde(skip_serializing_if = "Option::is_none")]`. /// /// In XML when we serialize field, we write field name as: /// - element name, or /// - attribute name /// /// and field value as /// - content of the element, or /// - attribute value /// /// So serialization of `None` works the same as [serialization of `()`](#method.serialize_unit) fn serialize_none(self) -> Result { self.serialize_unit() } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } fn serialize_unit(self) -> Result { self.ser.write_empty(self.key) } fn serialize_unit_struct(self, _name: &'static str) -> Result { self.ser.write_empty(self.key) } /// Writes a tag with name [`Self::key`] and content of unit variant inside. /// If variant is a special `$text` value, then empty tag `` is written. /// Otherwise a `variant` is written. fn serialize_unit_variant( self, name: &'static str, variant_index: u32, variant: &'static str, ) -> Result { if variant == TEXT_KEY { self.ser.write_empty(self.key) } else { self.ser.write_wrapped(self.key, |ser| { ser.serialize_unit_variant(name, variant_index, variant) }) } } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { value.serialize(self) } /// Always returns [`SeError::Unsupported`]. Newtype variants can be serialized /// only in `$value` fields, which is serialized using [`ContentSerializer`]. #[inline] fn serialize_newtype_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _value: &T, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize enum newtype variant `{}::{}`", name, variant ) .into(), )) } #[inline] fn serialize_seq(self, _len: Option) -> Result { Ok(self) } #[inline] fn serialize_tuple(self, len: usize) -> Result { self.serialize_seq(Some(len)) } #[inline] fn serialize_tuple_struct( self, _name: &'static str, len: usize, ) -> Result { self.serialize_tuple(len) } /// Always returns [`SeError::Unsupported`]. Tuple variants can be serialized /// only in `$value` fields, which is serialized using [`ContentSerializer`]. #[inline] fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize enum tuple variant `{}::{}`", name, variant ) .into(), )) } fn serialize_map(self, _len: Option) -> Result { Ok(Map { ser: self.serialize_struct("", 0)?, key: None, }) } #[inline] fn serialize_struct( mut self, _name: &'static str, _len: usize, ) -> Result { self.ser.write_indent()?; self.ser.indent.increase(); self.ser.writer.write_char('<')?; self.ser.writer.write_str(self.key.0)?; Ok(Struct { ser: self, children: String::new(), write_indent: true, }) } /// Always returns [`SeError::Unsupported`]. Struct variants can be serialized /// only in `$value` fields, which is serialized using [`ContentSerializer`]. #[inline] fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize enum struct variant `{}::{}`", name, variant ) .into(), )) } } impl<'w, 'k, W: Write> SerializeSeq for ElementSerializer<'w, 'k, W> { type Ok = WriteResult; type Error = SeError; fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { value.serialize(ElementSerializer { ser: self.ser.new_seq_element_serializer(true), key: self.key, })?; // Write indent for the next element self.ser.write_indent = true; Ok(()) } #[inline] fn end(self) -> Result { Ok(WriteResult::Element) } } impl<'w, 'k, W: Write> SerializeTuple for ElementSerializer<'w, 'k, W> { type Ok = WriteResult; type Error = SeError; #[inline] fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } impl<'w, 'k, W: Write> SerializeTupleStruct for ElementSerializer<'w, 'k, W> { type Ok = WriteResult; type Error = SeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer for tuple variants. Tuples can be serialized in two modes: /// - wrapping each tuple field into a tag /// - without wrapping, fields are delimited by a space pub enum Tuple<'w, 'k, W: Write> { /// Serialize each tuple field as an element Element(ElementSerializer<'w, 'k, W>), /// Serialize tuple as an `xs:list`: space-delimited content of fields Text(SimpleSeq<&'w mut W>), } impl<'w, 'k, W: Write> SerializeTupleVariant for Tuple<'w, 'k, W> { type Ok = WriteResult; type Error = SeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { match self { Self::Element(ser) => SerializeTuple::serialize_element(ser, value), Self::Text(ser) => SerializeTuple::serialize_element(ser, value), } } #[inline] fn end(self) -> Result { match self { Self::Element(ser) => SerializeTuple::end(ser), // Do not write indent after `$text` fields because it may be interpreted as // part of content when deserialize Self::Text(ser) => SerializeTuple::end(ser).map(|_| WriteResult::SensitiveText), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer for struct variants, which serializes the struct contents inside /// of wrapping tags (`<${tag}>...`). /// /// Returns the classification of the last written type. /// /// Serialization of each field depends on it representation: /// - attributes written directly to the higher serializer /// - elements buffered into internal buffer and at the end written into higher /// serializer pub struct Struct<'w, 'k, W: Write> { ser: ElementSerializer<'w, 'k, W>, /// Buffer to store serialized elements // TODO: Customization point: allow direct writing of elements, but all // attributes should be listed first. Fail, if attribute encountered after // element. Use feature to configure children: String, /// Whether need to write indent after the last written field write_indent: bool, } impl<'w, 'k, W: Write> Struct<'w, 'k, W> { #[inline] fn write_field(&mut self, key: &str, value: &T) -> Result<(), SeError> where T: ?Sized + Serialize, { //TODO: Customization point: allow user to determine if field is attribute or not if let Some(key) = key.strip_prefix('@') { let key = XmlName::try_from(key)?; self.write_attribute(key, value) } else { self.write_element(key, value) } } /// Writes `value` as an attribute #[inline] fn write_attribute(&mut self, key: XmlName, value: &T) -> Result<(), SeError> where T: ?Sized + Serialize, { //TODO: Customization point: each attribute on new line self.ser.ser.writer.write_char(' ')?; self.ser.ser.writer.write_str(key.0)?; self.ser.ser.writer.write_char('=')?; //TODO: Customization point: preferred quote style self.ser.ser.writer.write_char('"')?; value.serialize(SimpleTypeSerializer { writer: &mut self.ser.ser.writer, target: QuoteTarget::DoubleQAttr, level: self.ser.ser.level, })?; self.ser.ser.writer.write_char('"')?; Ok(()) } /// Writes `value` either as a text content, or as an element. /// /// If `key` has a magic value [`TEXT_KEY`], then `value` serialized as a /// [simple type]. /// /// If `key` has a magic value [`VALUE_KEY`], then `value` serialized as a /// [content] without wrapping in tags, otherwise it is wrapped in /// `<${key}>...`. /// /// [simple type]: SimpleTypeSerializer /// [content]: ContentSerializer fn write_element(&mut self, key: &str, value: &T) -> Result<(), SeError> where T: ?Sized + Serialize, { let ser = ContentSerializer { writer: &mut self.children, level: self.ser.ser.level, indent: self.ser.ser.indent.borrow(), // If previous field does not require indent, do not write it write_indent: self.write_indent, text_format: self.ser.ser.text_format, allow_primitive: true, expand_empty_elements: self.ser.ser.expand_empty_elements, }; if key == TEXT_KEY { value.serialize(TextSerializer(ser.into_simple_type_serializer()?))?; // Text was written so we don't need to indent next field self.write_indent = false; } else if key == VALUE_KEY { // If element was written then we need to indent next field unless it is a text field self.write_indent = value.serialize(ser)?.allow_indent(); } else { value.serialize(ElementSerializer { key: XmlName::try_from(key)?, ser, })?; // Element was written so we need to indent next field unless it is a text field self.write_indent = true; } Ok(()) } } impl<'w, 'k, W: Write> SerializeStruct for Struct<'w, 'k, W> { type Ok = WriteResult; type Error = SeError; fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { self.write_field(key, value) } fn end(mut self) -> Result { self.ser.ser.indent.decrease(); if self.children.is_empty() { if self.ser.ser.expand_empty_elements { self.ser.ser.writer.write_str(">')?; } else { self.ser.ser.writer.write_str("/>")?; } } else { self.ser.ser.writer.write_char('>')?; self.ser.ser.writer.write_str(&self.children)?; if self.write_indent { self.ser.ser.indent.write_indent(&mut self.ser.ser.writer)?; } self.ser.ser.writer.write_str("')?; } Ok(WriteResult::Element) } } impl<'w, 'k, W: Write> SerializeStructVariant for Struct<'w, 'k, W> { type Ok = WriteResult; type Error = SeError; #[inline] fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeStruct::serialize_field(self, key, value) } #[inline] fn end(self) -> Result { SerializeStruct::end(self) } } //////////////////////////////////////////////////////////////////////////////////////////////////// pub struct Map<'w, 'k, W: Write> { ser: Struct<'w, 'k, W>, /// Key, serialized by `QNameSerializer` if consumer uses `serialize_key` + /// `serialize_value` calls instead of `serialize_entry` key: Option, } impl<'w, 'k, W: Write> Map<'w, 'k, W> { fn make_key(&mut self, key: &T) -> Result where T: ?Sized + Serialize, { key.serialize(QNameSerializer { writer: String::new(), }) } } impl<'w, 'k, W: Write> SerializeMap for Map<'w, 'k, W> { type Ok = WriteResult; type Error = SeError; fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { if self.key.take().is_some() { return Err(SeError::Custom( "calling `serialize_key` twice without `serialize_value`".to_string(), )); } self.key = Some(self.make_key(key)?); Ok(()) } fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { if let Some(key) = self.key.take() { return self.ser.write_field(&key, value); } Err(SeError::Custom( "calling `serialize_value` without call of `serialize_key`".to_string(), )) } fn serialize_entry(&mut self, key: &K, value: &V) -> Result<(), Self::Error> where K: ?Sized + Serialize, V: ?Sized + Serialize, { let key = self.make_key(key)?; self.ser.write_field(&key, value) } fn end(mut self) -> Result { if let Some(key) = self.key.take() { return Err(SeError::Custom(format!( "calling `end` without call of `serialize_value` for key `{key}`" ))); } SerializeStruct::end(self.ser) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use crate::se::content::tests::*; use crate::se::{Indent, QuoteLevel, TextFormat}; use crate::utils::Bytes; use serde::Serialize; use std::collections::BTreeMap; #[derive(Debug, Serialize, PartialEq)] struct OptionalElements { a: Option<&'static str>, #[serde(skip_serializing_if = "Option::is_none")] b: Option<&'static str>, } #[derive(Debug, Serialize, PartialEq)] struct OptionalAttributes { #[serde(rename = "@a")] a: Option<&'static str>, #[serde(rename = "@b")] #[serde(skip_serializing_if = "Option::is_none")] b: Option<&'static str>, } mod without_indent { use super::*; use crate::se::content::tests::Struct; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:expr) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ElementSerializer { ser: ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, text_format: TextFormat::Text, allow_primitive: true, expand_empty_elements: false, }, key: XmlName("root"), }; let result = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); assert_eq!(result, WriteResult::Element); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ElementSerializer { ser: ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, text_format: TextFormat::Text, allow_primitive: true, expand_empty_elements: false, }, key: XmlName("root"), }; match $data.serialize(ser).unwrap_err() { SeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } // We can write something before fail // assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000isize => "-42000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000usize => "42000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped string") => "non-escaped string"); serialize_as!(option_some_empty_str: Some("") => ""); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(unit_struct_escaped: UnitEscaped => ""); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); serialize_as!(newtype: Newtype(42) => "42"); err!(enum_newtype: Enum::Newtype(42) => Unsupported("cannot serialize enum newtype variant `Enum::Newtype`")); serialize_as!(seq: vec![1, 2, 3] => "1\ 2\ 3"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\ with\t\n\r spaces\ 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first\ 42"); err!(enum_tuple: Enum::Tuple("first", 42) => Unsupported("cannot serialize enum tuple variant `Enum::Tuple`")); serialize_as!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => "\ <_1>2\ <_3>4\ "); serialize_as!(struct_: Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); err!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => Unsupported("cannot serialize enum struct variant `Enum::Struct`")); /// Special field name `$text` should be serialized as text content. /// Sequences serialized as an `xs:list` content mod text_field { use super::*; /// `$text` key in a map mod map { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr) => { serialize_as!($name: BTreeMap::from([("$text", $data)]) => ""); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: BTreeMap::from([("$text", $data)]) => concat!("", $expected,"")); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000isize => "-42000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000usize => "42000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("")); text!(unit: ()); text!(unit_struct: Unit); text!(unit_struct_escaped: UnitEscaped); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new()); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as text content value")); // Complex types cannot be serialized in `$text` field err!(map: Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("cannot serialize map as text content value")); err!(struct_: Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize struct `Struct` as text content value")); err!(enum_struct: Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as text content value")); } /// `$text` field inside a struct mod struct_ { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: Text { before: "answer", content: $data, after: "answer", } => concat!( "answer", $expected, "answer", )); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000isize => "-42000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000usize => "42000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None => ""); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("") => ""); text!(unit: () => ""); text!(unit_struct: Unit => ""); text!(unit_struct_escaped: UnitEscaped => ""); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new() => ""); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as text content value")); // Complex types cannot be serialized in `$text` field err!(map: Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("cannot serialize map as text content value")); err!(struct_: Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize struct `Struct` as text content value")); err!(enum_struct: Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as text content value")); } } /// Special field name `$value` should be serialized using name, provided /// by the type of value instead of a key. Sequences serialized as a list /// of tags with that name (each element can have their own name) mod value_field { use super::*; /// `$value` key in a map mod map { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr) => { serialize_as!($name: BTreeMap::from([("$value", $data)]) => ""); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: BTreeMap::from([("$value", $data)]) => concat!("", $expected,"")); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000isize => "-42000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000usize => "42000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: BTreeMap::from([("$value", Bytes(b"<\"escaped & bytes'>"))]) => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("")); value!(unit: ()); value!(unit_struct: Unit); value!(unit_struct_escaped: UnitEscaped); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: BTreeMap::from([("$value", Enum::UnitEscaped)]) => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! err!(seq: BTreeMap::from([("$value", vec![1, 2, 3])]) => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(seq_empty: Vec::::new()); err!(tuple: BTreeMap::from([("$value", ("<\"&'>", "with\t\n\r spaces", 3usize))]) => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); err!(tuple_struct: BTreeMap::from([("$value", Tuple("first", 42))]) => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); // We cannot wrap map in any container and should not // flatten it, so it is impossible to serialize maps err!(map: BTreeMap::from([("$value", BTreeMap::from([("_1", 2), ("_3", 4)]))]) => Unsupported("serialization of map types is not supported in `$value` field")); value!(struct_: Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); } /// `$value` field inside a struct mod struct_ { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: Value { before: "answer", content: $data, after: "answer", } => concat!( "answer", $expected, "answer", )); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000isize => "-42000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000usize => "42000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Value { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None => ""); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("") => ""); value!(unit: () => ""); value!(unit_struct: Unit => ""); value!(unit_struct_escaped: UnitEscaped => ""); value!(enum_unit: Enum::Unit => ""); err!(enum_unit_escaped: Value { before: "answer", content: Enum::UnitEscaped, after: "answer", } => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "42"); // Note that sequences of primitives serialized without delimiters! err!(seq: Value { before: "answer", content: vec![1, 2, 3], after: "answer", } => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(seq_empty: Vec::::new() => ""); err!(tuple: Value { before: "answer", content: ("<\"&'>", "with\t\n\r spaces", 3usize), after: "answer", } => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); err!(tuple_struct: Value { before: "answer", content: Tuple("first", 42), after: "answer", } => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(enum_tuple: Enum::Tuple("first", 42) => "first\ 42"); // We cannot wrap map in any container and should not // flatten it, so it is impossible to serialize maps err!(map: Value { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("serialization of map types is not supported in `$value` field")); value!(struct_: Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\ answer\ 42\ 42\ "); } } mod attributes { use super::*; use pretty_assertions::assert_eq; serialize_as!(map_attr: BTreeMap::from([("@key1", 1), ("@key2", 2)]) => r#""#); serialize_as!(map_mixed: BTreeMap::from([("@key1", 1), ("key2", 2)]) => r#"2"#); serialize_as!(struct_: Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(struct_before: AttributesBefore { key: "answer", val: 42 } => r#"42"#); serialize_as!(struct_after: AttributesAfter { key: "answer", val: 42 } => r#"answer"#); err!(enum_: Enum::Attributes { key: "answer", val: (42, 42) } => Unsupported("cannot serialize enum struct variant `Enum::Attributes`")); /// Test for https://github.com/tafia/quick-xml/issues/252 mod optional { use super::*; use pretty_assertions::assert_eq; serialize_as!(none: OptionalAttributes { a: None, b: None } => r#""#); serialize_as!(some_empty_str: OptionalAttributes { a: Some(""), b: Some(""), } => r#""#); serialize_as!(some_non_empty: OptionalAttributes { a: Some("1"), b: Some("2"), } => r#""#); } } /// Test for https://github.com/tafia/quick-xml/issues/252 mod optional { use super::*; use pretty_assertions::assert_eq; serialize_as!(none: OptionalElements { a: None, b: None } => "\ \ "); serialize_as!(some_empty_str: OptionalElements { a: Some(""), b: Some(""), } => "\ \ \ "); serialize_as!(some_non_empty: OptionalElements { a: Some("1"), b: Some("2"), } => "\ 1\ 2\ "); } } mod with_indent { use super::*; use crate::se::content::tests::Struct; use crate::writer::Indentation; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected`. /// Writes `$data` using [`ElementSerializer`] with indent of two spaces. macro_rules! serialize_as { ($name:ident: $data:expr => $expected:expr) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ElementSerializer { ser: ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, text_format: TextFormat::Text, allow_primitive: true, expand_empty_elements: false, }, key: XmlName("root"), }; let result = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); assert_eq!(result, WriteResult::Element); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ElementSerializer { ser: ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::Owned(Indentation::new(b' ', 2)), write_indent: false, text_format: TextFormat::Text, allow_primitive: true, expand_empty_elements: false, }, key: XmlName("root"), }; match $data.serialize(ser).unwrap_err() { SeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } // We can write something before fail // assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000isize => "-42000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000usize => "42000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); serialize_as!(char_space: ' ' => " "); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped string") => "non-escaped string"); serialize_as!(option_some_empty: Some("") => ""); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(unit_struct_escaped: UnitEscaped => ""); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); serialize_as!(newtype: Newtype(42) => "42"); err!(enum_newtype: Enum::Newtype(42) => Unsupported("cannot serialize enum newtype variant `Enum::Newtype`")); serialize_as!(seq: vec![1, 2, 3] => "1\n\ 2\n\ 3"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'>\n\ with\t\n\r spaces\n\ 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first\n\ 42"); err!(enum_tuple: Enum::Tuple("first", 42) => Unsupported("cannot serialize enum tuple variant `Enum::Tuple`")); serialize_as!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => "\n \ <_1>2\n \ <_3>4\n\ "); serialize_as!(struct_: Struct { key: "answer", val: (42, 42) } => "\n \ answer\n \ 42\n \ 42\n\ "); err!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => Unsupported("cannot serialize enum struct variant `Enum::Struct`")); /// Special field name `$text` should be serialized as text content. /// Sequences serialized as an `xs:list` content mod text_field { use super::*; /// `$text` key in a map mod map { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr) => { serialize_as!($name: // Serialization started from ElementSerializer::serialize_map BTreeMap::from([("$text", $data)]) => ""); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: // Serialization started from ElementSerializer::serialize_map BTreeMap::from([("$text", $data)]) => concat!("", $expected,"")); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000isize => "-42000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000usize => "42000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("")); text!(unit: ()); text!(unit_struct: Unit); text!(unit_struct_escaped: UnitEscaped); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new()); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as text content value")); // Complex types cannot be serialized in `$text` field err!(map: Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("cannot serialize map as text content value")); err!(struct_: Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize struct `Struct` as text content value")); err!(enum_struct: Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as text content value")); } /// `$text` field inside a struct mod struct_ { use super::*; use pretty_assertions::assert_eq; macro_rules! text { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: // Serialization started from ElementSerializer::serialize_struct Text { before: "answer", content: $data, after: "answer", } => concat!( "\n answer", $expected, "answer\n", )); }; } text!(false_: false => "false"); text!(true_: true => "true"); text!(i8_: -42i8 => "-42"); text!(i16_: -4200i16 => "-4200"); text!(i32_: -42000000i32 => "-42000000"); text!(i64_: -42000000000000i64 => "-42000000000000"); text!(isize_: -42000000isize => "-42000000"); text!(u8_: 42u8 => "42"); text!(u16_: 4200u16 => "4200"); text!(u32_: 42000000u32 => "42000000"); text!(u64_: 42000000000000u64 => "42000000000000"); text!(usize_: 42000000usize => "42000000"); serde_if_integer128! { text!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); text!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } text!(f32_: 4.2f32 => "4.2"); text!(f64_: 4.2f64 => "4.2"); text!(char_non_escaped: 'h' => "h"); text!(char_lt: '<' => "<"); text!(char_gt: '>' => ">"); text!(char_amp: '&' => "&"); text!(char_apos: '\'' => "'"); text!(char_quot: '"' => """); text!(char_space: ' ' => " "); text!(str_non_escaped: "non-escaped string" => "non-escaped string"); text!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Text { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); text!(option_none: Option::<&str>::None => ""); text!(option_some: Some("non-escaped string") => "non-escaped string"); text!(option_some_empty_str: Some("") => ""); text!(unit: () => ""); text!(unit_struct: Unit => ""); text!(unit_struct_escaped: UnitEscaped => ""); text!(enum_unit: Enum::Unit => "Unit"); text!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); text!(newtype: Newtype(42) => "42"); // We have no space where name of a variant can be stored err!(enum_newtype: Text { before: "answer", content: Enum::Newtype(42), after: "answer", } => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as text content value")); // Sequences are serialized separated by spaces, all spaces inside are escaped text!(seq: vec![1, 2, 3] => "1 2 3"); text!(seq_empty: Vec::::new() => ""); text!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> \ with spaces \ 3"); text!(tuple_struct: Tuple("first", 42) => "first 42"); // We have no space where name of a variant can be stored err!(enum_tuple: Text { before: "answer", content: Enum::Tuple("first", 42), after: "answer", } => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as text content value")); // Complex types cannot be serialized in `$text` field err!(map: Text { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("cannot serialize map as text content value")); err!(struct_: Text { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize struct `Struct` as text content value")); err!(enum_struct: Text { before: "answer", content: Enum::Struct { key: "answer", val: (42, 42) }, after: "answer", } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as text content value")); } } /// Special field name `$value` should be serialized using name, provided /// by the type of value instead of a key. Sequences serialized as a list /// of tags with that name (each element can have their own name) mod value_field { use super::*; /// `$value` key in a map mod map { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr) => { serialize_as!($name: // Serialization started from ElementSerializer::serialize_map BTreeMap::from([("$value", $data)]) => ""); }; ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: // Serialization started from ElementSerializer::serialize_map BTreeMap::from([("$value", $data)]) => concat!("", $expected,"")); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000isize => "-42000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000usize => "42000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: BTreeMap::from([("$value", Bytes(b"<\"escaped & bytes'>"))]) => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("")); value!(unit: ()); value!(unit_struct: Unit); value!(unit_struct_escaped: UnitEscaped); value!(enum_unit: Enum::Unit => "\n \n"); err!(enum_unit_escaped: BTreeMap::from([("$value", Enum::UnitEscaped)]) => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "\n 42\n"); err!(seq: BTreeMap::from([("$value", vec![1, 2, 3])]) => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(seq_empty: Vec::::new()); err!(tuple: BTreeMap::from([("$value", ("<\"&'>", "with\t\n\r spaces", 3usize))]) => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); err!(tuple_struct: BTreeMap::from([("$value", Tuple("first", 42))]) => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(enum_tuple: Enum::Tuple("first", 42) => "\n \ first\n \ 42\n"); // We cannot wrap map in any container and should not // flatten it, so it is impossible to serialize maps err!(map: BTreeMap::from([("$value", BTreeMap::from([("_1", 2), ("_3", 4)]))]) => Unsupported("serialization of map types is not supported in `$value` field")); value!(struct_: Struct { key: "answer", val: (42, 42) } => "\n \ \n \ answer\n \ 42\n \ 42\n \ \n"); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\n \ \n \ answer\n \ 42\n \ 42\n \ \n"); } /// `$value` field inside a struct mod struct_ { use super::*; use pretty_assertions::assert_eq; macro_rules! value { ($name:ident: $data:expr => $expected:literal) => { serialize_as!($name: // Serialization started from ElementSerializer::serialize_struct Value { before: "answer", content: $data, after: "answer", } => concat!( "\n answer", $expected, "answer\n", )); }; } value!(false_: false => "false"); value!(true_: true => "true"); value!(i8_: -42i8 => "-42"); value!(i16_: -4200i16 => "-4200"); value!(i32_: -42000000i32 => "-42000000"); value!(i64_: -42000000000000i64 => "-42000000000000"); value!(isize_: -42000000isize => "-42000000"); value!(u8_: 42u8 => "42"); value!(u16_: 4200u16 => "4200"); value!(u32_: 42000000u32 => "42000000"); value!(u64_: 42000000000000u64 => "42000000000000"); value!(usize_: 42000000usize => "42000000"); serde_if_integer128! { value!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); value!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } value!(f32_: 4.2f32 => "4.2"); value!(f64_: 4.2f64 => "4.2"); value!(char_non_escaped: 'h' => "h"); value!(char_lt: '<' => "<"); value!(char_gt: '>' => ">"); value!(char_amp: '&' => "&"); value!(char_apos: '\'' => "'"); value!(char_quot: '"' => """); value!(char_space: ' ' => " "); value!(str_non_escaped: "non-escaped string" => "non-escaped string"); value!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Value { before: "answer", content: Bytes(b"<\"escaped & bytes'>"), after: "answer", } => Unsupported("`serialize_bytes` not supported yet")); value!(option_none: Option::<&str>::None => ""); value!(option_some: Some("non-escaped string") => "non-escaped string"); value!(option_some_empty_str: Some("") => ""); value!(unit: () => "\n "); value!(unit_struct: Unit => "\n "); value!(unit_struct_escaped: UnitEscaped => "\n "); value!(enum_unit: Enum::Unit => "\n \n "); err!(enum_unit_escaped: Value { before: "answer", content: Enum::UnitEscaped, after: "answer", } => Unsupported("character `<` is not allowed at the start of an XML name `<\"&'>`")); value!(newtype: Newtype(42) => "42"); value!(enum_newtype: Enum::Newtype(42) => "\n 42\n "); err!(seq: Value { before: "answer", content: vec![1, 2, 3], after: "answer", } => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(seq_empty: Vec::::new() => ""); err!(tuple: Value { before: "answer", content: ("<\"&'>", "with\t\n\r spaces", 3usize), after: "answer", } => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); err!(tuple_struct: Value { before: "answer", content: Tuple("first", 42), after: "answer", } => Unsupported("consequent primitives would be serialized without delimiter and cannot be deserialized back")); value!(enum_tuple: Enum::Tuple("first", 42) => "\n \ first\n \ 42\n "); // We cannot wrap map in any container and should not // flatten it, so it is impossible to serialize maps err!(map: Value { before: "answer", content: BTreeMap::from([("_1", 2), ("_3", 4)]), after: "answer", } => Unsupported("serialization of map types is not supported in `$value` field")); value!(struct_: Value { before: "answer", content: Struct { key: "answer", val: (42, 42) }, after: "answer", } => "\n \ \n \ answer\n \ \n \ answer\n \ 42\n \ 42\n \ \n \ answer\n \ \n "); value!(enum_struct: Enum::Struct { key: "answer", val: (42, 42) } => "\n \ \n \ answer\n \ 42\n \ 42\n \ \n "); } } mod attributes { use super::*; use pretty_assertions::assert_eq; serialize_as!(map_attr: BTreeMap::from([("@key1", 1), ("@key2", 2)]) => r#""#); serialize_as!(map_mixed: BTreeMap::from([("@key1", 1), ("key2", 2)]) => "\n \ 2\n\ "); serialize_as!(struct_: Attributes { key: "answer", val: (42, 42) } => r#""#); serialize_as!(struct_before: AttributesBefore { key: "answer", val: 42 } => "\n \ 42\n\ "); serialize_as!(struct_after: AttributesAfter { key: "answer", val: 42 } => "\n \ answer\n\ "); err!(enum_: Enum::Attributes { key: "answer", val: (42, 42) } => Unsupported("cannot serialize enum struct variant `Enum::Attributes`")); /// Test for https://github.com/tafia/quick-xml/issues/252 mod optional { use super::*; use pretty_assertions::assert_eq; serialize_as!(none: OptionalAttributes { a: None, b: None } => r#""#); serialize_as!(some_empty_str: OptionalAttributes { a: Some(""), b: Some("") } => r#""#); serialize_as!(some_non_empty: OptionalAttributes { a: Some("a"), b: Some("b") } => r#""#); } } /// Test for https://github.com/tafia/quick-xml/issues/252 mod optional { use super::*; use pretty_assertions::assert_eq; serialize_as!(none: OptionalElements { a: None, b: None } => "\n \ \n\ "); serialize_as!(some_empty_str: OptionalElements { a: Some(""), b: Some("") } => "\n \ \n \ \n\ "); serialize_as!(some_non_empty: OptionalElements { a: Some("a"), b: Some("b") } => "\n \ a\n \ b\n\ "); } } mod expand_empty_elements { use super::*; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:expr) => { #[test] fn $name() { let mut buffer = String::new(); let ser = ElementSerializer { ser: ContentSerializer { writer: &mut buffer, level: QuoteLevel::Full, indent: Indent::None, write_indent: false, text_format: TextFormat::Text, allow_primitive: true, expand_empty_elements: true, }, key: XmlName("root"), }; let result = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); assert_eq!(result, WriteResult::Element); } }; } serialize_as!(option_some_empty: Some("") => ""); serialize_as!(option_some_empty_str: Some("") => ""); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(unit_struct_escaped: UnitEscaped => ""); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); } } quick-xml-0.38.4/src/se/key.rs000064400000000000000000000252651046102023000142000ustar 00000000000000use crate::se::SeError; use serde::ser::{Impossible, Serialize, Serializer}; use serde::serde_if_integer128; use std::fmt::{self, Write}; /// A serializer, that ensures, that only plain types can be serialized, /// so result can be used as an XML tag or attribute name. /// /// This serializer does not check that name does not contain characters that /// [not allowed] in XML names, because in some cases it should pass names /// that would be filtered on higher level. /// /// [not allowed]: https://www.w3.org/TR/xml11/#sec-common-syn pub struct QNameSerializer { /// Writer to which this serializer writes content pub writer: W, } impl QNameSerializer { #[inline] fn write_str(&mut self, value: &str) -> Result<(), SeError> { Ok(self.writer.write_str(value)?) } #[inline] fn write_fmt(&mut self, args: fmt::Arguments<'_>) -> Result<(), SeError> { Ok(self.writer.write_fmt(args)?) } } impl Serializer for QNameSerializer { type Ok = W; type Error = SeError; type SerializeSeq = Impossible; type SerializeTuple = Impossible; type SerializeTupleStruct = Impossible; type SerializeTupleVariant = Impossible; type SerializeMap = Impossible; type SerializeStruct = Impossible; type SerializeStructVariant = Impossible; write_primitive!(); fn serialize_str(mut self, value: &str) -> Result { self.write_str(value)?; Ok(self.writer) } /// Because unit type can be represented only by empty string which is not /// a valid XML name, serialization of unit returns `Err(Unsupported)` fn serialize_unit(self) -> Result { Err(SeError::Unsupported( "cannot serialize unit type `()` as an XML tag name".into(), )) } /// Because unit struct can be represented only by empty string which is not /// a valid XML name, serialization of unit struct returns `Err(Unsupported)` fn serialize_unit_struct(self, name: &'static str) -> Result { Err(SeError::Unsupported( format!("cannot serialize unit struct `{}` as an XML tag name", name).into(), )) } /// We cannot store both a variant discriminant and a variant value, /// so serialization of enum newtype variant returns `Err(Unsupported)` fn serialize_newtype_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _value: &T, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize enum newtype variant `{}::{}` as an XML tag name", name, variant ) .into(), )) } fn serialize_seq(self, _len: Option) -> Result { Err(SeError::Unsupported( "cannot serialize sequence as an XML tag name".into(), )) } fn serialize_tuple(self, _len: usize) -> Result { Err(SeError::Unsupported( "cannot serialize tuple as an XML tag name".into(), )) } fn serialize_tuple_struct( self, name: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize tuple struct `{}` as an XML tag name", name ) .into(), )) } fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize enum tuple variant `{}::{}` as an XML tag name", name, variant ) .into(), )) } fn serialize_map(self, _len: Option) -> Result { Err(SeError::Unsupported( "cannot serialize map as an XML tag name".into(), )) } fn serialize_struct( self, name: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!("cannot serialize struct `{}` as an XML tag name", name).into(), )) } fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize enum struct variant `{}::{}` as an XML tag name", name, variant ) .into(), )) } } #[cfg(test)] mod tests { use super::*; use crate::utils::Bytes; use pretty_assertions::assert_eq; use serde::Serialize; use std::collections::BTreeMap; #[derive(Debug, Serialize, PartialEq)] struct Unit; #[derive(Debug, Serialize, PartialEq)] struct Newtype(bool); #[derive(Debug, Serialize, PartialEq)] struct Tuple(&'static str, usize); #[derive(Debug, Serialize, PartialEq)] struct Struct { key: &'static str, val: usize, } #[derive(Debug, Serialize, PartialEq)] enum Enum { Unit, #[serde(rename = "<\"&'>")] UnitEscaped, Newtype(bool), Tuple(&'static str, usize), Struct { key: &'static str, val: usize, }, } /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:literal) => { #[test] fn $name() { let ser = QNameSerializer { writer: String::new(), }; let buffer = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = QNameSerializer { writer: &mut buffer, }; match $data.serialize(ser).unwrap_err() { SeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000isize => "-42000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000usize => "42000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => "\""); serialize_as!(str_valid_name: "valid-name" => "valid-name"); serialize_as!(str_space: "string with spaces" => "string with spaces"); serialize_as!(str_lt: "string<" => "string<"); serialize_as!(str_gt: "string>" => "string>"); serialize_as!(str_amp: "string&" => "string&"); serialize_as!(str_apos: "string'" => "string'"); serialize_as!(str_quot: "string\"" => "string\""); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped-string") => "non-escaped-string"); err!(unit: () => Unsupported("cannot serialize unit type `()` as an XML tag name")); err!(unit_struct: Unit => Unsupported("cannot serialize unit struct `Unit` as an XML tag name")); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<\"&'>"); serialize_as!(newtype: Newtype(true) => "true"); err!(enum_newtype: Enum::Newtype(false) => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as an XML tag name")); err!(seq: vec![1, 2, 3] => Unsupported("cannot serialize sequence as an XML tag name")); err!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) => Unsupported("cannot serialize tuple as an XML tag name")); err!(tuple_struct: Tuple("first", 42) => Unsupported("cannot serialize tuple struct `Tuple` as an XML tag name")); err!(enum_tuple: Enum::Tuple("first", 42) => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as an XML tag name")); err!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) => Unsupported("cannot serialize map as an XML tag name")); err!(struct_: Struct { key: "answer", val: 42 } => Unsupported("cannot serialize struct `Struct` as an XML tag name")); err!(enum_struct: Enum::Struct { key: "answer", val: 42 } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as an XML tag name")); } quick-xml-0.38.4/src/se/mod.rs000064400000000000000000001014411046102023000141560ustar 00000000000000//! Module to handle custom serde `Serializer` /// Implements writing primitives to the underlying writer. /// Implementor must provide `write_str(self, &str) -> Result<(), DeError>` method macro_rules! write_primitive { ($method:ident ( $ty:ty )) => { fn $method(mut self, value: $ty) -> Result { self.write_fmt(format_args!("{}", value))?; Ok(self.writer) } }; () => { fn serialize_bool(mut self, value: bool) -> Result { self.write_str(if value { "true" } else { "false" })?; Ok(self.writer) } write_primitive!(serialize_i8(i8)); write_primitive!(serialize_i16(i16)); write_primitive!(serialize_i32(i32)); write_primitive!(serialize_i64(i64)); write_primitive!(serialize_u8(u8)); write_primitive!(serialize_u16(u16)); write_primitive!(serialize_u32(u32)); write_primitive!(serialize_u64(u64)); serde_if_integer128! { write_primitive!(serialize_i128(i128)); write_primitive!(serialize_u128(u128)); } write_primitive!(serialize_f32(f32)); write_primitive!(serialize_f64(f64)); fn serialize_char(self, value: char) -> Result { self.serialize_str(value.encode_utf8(&mut [0u8; 4])) } fn serialize_bytes(self, _value: &[u8]) -> Result { //TODO: customization point - allow user to decide how to encode bytes Err(Self::Error::Unsupported( "`serialize_bytes` not supported yet".into(), )) } fn serialize_none(self) -> Result { Ok(self.writer) } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } fn serialize_unit_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, ) -> Result { self.serialize_str(variant) } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { value.serialize(self) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// mod content; mod element; pub(crate) mod key; pub(crate) mod simple_type; mod text; use self::content::ContentSerializer; use self::element::{ElementSerializer, Map, Struct, Tuple}; use crate::de::TEXT_KEY; use crate::writer::{Indentation, ToFmtWrite}; use serde::ser::{self, Serialize}; use serde::serde_if_integer128; use std::fmt::Write; use std::str::from_utf8; pub use self::simple_type::SimpleTypeSerializer; pub use crate::errors::serialize::SeError; /// Serialize struct into a `Write`r. /// /// Returns the classification of the last written type. /// /// # Examples /// /// ``` /// # use quick_xml::se::to_writer; /// # use serde::Serialize; /// # use pretty_assertions::assert_eq; /// #[derive(Serialize)] /// struct Root<'a> { /// #[serde(rename = "@attribute")] /// attribute: &'a str, /// element: &'a str, /// #[serde(rename = "$text")] /// text: &'a str, /// } /// /// let data = Root { /// attribute: "attribute content", /// element: "element content", /// text: "text content", /// }; /// /// let mut buffer = String::new(); /// to_writer(&mut buffer, &data).unwrap(); /// assert_eq!( /// buffer, /// // The root tag name is automatically deduced from the struct name /// // This will not work for other types or struct with #[serde(flatten)] fields /// "\ /// element content\ /// text content\ /// " /// ); /// ``` pub fn to_writer(mut writer: W, value: &T) -> Result where W: Write, T: ?Sized + Serialize, { value.serialize(Serializer::new(&mut writer)) } /// Serialize struct into a `io::Write`r restricted to utf-8 encoding. /// /// Returns the classification of the last written type. /// /// # Examples /// /// ``` /// # use quick_xml::se::to_utf8_io_writer; /// # use serde::Serialize; /// # use pretty_assertions::assert_eq; /// # use std::io::BufWriter; /// #[derive(Serialize)] /// struct Root<'a> { /// #[serde(rename = "@attribute")] /// attribute: &'a str, /// element: &'a str, /// #[serde(rename = "$text")] /// text: &'a str, /// } /// /// let data = Root { /// attribute: "attribute content", /// element: "element content", /// text: "text content", /// }; /// /// let mut buffer = Vec::new(); /// to_utf8_io_writer(&mut BufWriter::new(&mut buffer), &data).unwrap(); /// /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// // The root tag name is automatically deduced from the struct name /// // This will not work for other types or struct with #[serde(flatten)] fields /// "\ /// element content\ /// text content\ /// " /// ); /// ``` pub fn to_utf8_io_writer(writer: W, value: &T) -> Result where W: std::io::Write, T: ?Sized + Serialize, { value.serialize(Serializer::new(&mut ToFmtWrite(writer))) } /// Serialize struct into a `String`. /// /// # Examples /// /// ``` /// # use quick_xml::se::to_string; /// # use serde::Serialize; /// # use pretty_assertions::assert_eq; /// #[derive(Serialize)] /// struct Root<'a> { /// #[serde(rename = "@attribute")] /// attribute: &'a str, /// element: &'a str, /// #[serde(rename = "$text")] /// text: &'a str, /// } /// /// let data = Root { /// attribute: "attribute content", /// element: "element content", /// text: "text content", /// }; /// /// assert_eq!( /// to_string(&data).unwrap(), /// // The root tag name is automatically deduced from the struct name /// // This will not work for other types or struct with #[serde(flatten)] fields /// "\ /// element content\ /// text content\ /// " /// ); /// ``` pub fn to_string(value: &T) -> Result where T: ?Sized + Serialize, { let mut buffer = String::new(); to_writer(&mut buffer, value)?; Ok(buffer) } /// Serialize struct into a `Write`r using specified root tag name. /// `root_tag` should be valid [XML name], otherwise error is returned. /// /// Returns the classification of the last written type. /// /// # Examples /// /// ``` /// # use quick_xml::se::to_writer_with_root; /// # use serde::Serialize; /// # use pretty_assertions::assert_eq; /// #[derive(Serialize)] /// struct Root<'a> { /// #[serde(rename = "@attribute")] /// attribute: &'a str, /// element: &'a str, /// #[serde(rename = "$text")] /// text: &'a str, /// } /// /// let data = Root { /// attribute: "attribute content", /// element: "element content", /// text: "text content", /// }; /// /// let mut buffer = String::new(); /// to_writer_with_root(&mut buffer, "top-level", &data).unwrap(); /// assert_eq!( /// buffer, /// "\ /// element content\ /// text content\ /// " /// ); /// ``` /// /// [XML name]: https://www.w3.org/TR/xml11/#NT-Name pub fn to_writer_with_root( mut writer: W, root_tag: &str, value: &T, ) -> Result where W: Write, T: ?Sized + Serialize, { value.serialize(Serializer::with_root(&mut writer, Some(root_tag))?) } /// Serialize struct into a `String` using specified root tag name. /// `root_tag` should be valid [XML name], otherwise error is returned. /// /// # Examples /// /// ``` /// # use quick_xml::se::to_string_with_root; /// # use serde::Serialize; /// # use pretty_assertions::assert_eq; /// #[derive(Serialize)] /// struct Root<'a> { /// #[serde(rename = "@attribute")] /// attribute: &'a str, /// element: &'a str, /// #[serde(rename = "$text")] /// text: &'a str, /// } /// /// let data = Root { /// attribute: "attribute content", /// element: "element content", /// text: "text content", /// }; /// /// assert_eq!( /// to_string_with_root("top-level", &data).unwrap(), /// "\ /// element content\ /// text content\ /// " /// ); /// ``` /// /// [XML name]: https://www.w3.org/TR/xml11/#NT-Name pub fn to_string_with_root(root_tag: &str, value: &T) -> Result where T: ?Sized + Serialize, { let mut buffer = String::new(); to_writer_with_root(&mut buffer, root_tag, value)?; Ok(buffer) } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Defines the format for text content serialization #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[non_exhaustive] pub enum TextFormat { /// Serialize as regular text content with escaping Text, /// Serialize as CDATA section without escaping CData, } /// Defines which characters would be escaped in [`Text`] events and attribute /// values. /// /// [`Text`]: crate::events::Event::Text #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum QuoteLevel { /// Performs escaping, escape all characters that could have special meaning /// in the XML. This mode is compatible with SGML specification. /// /// Characters that will be replaced: /// /// Original | Replacement /// ---------|------------ /// `<` | `<` /// `>` | `>` /// `&` | `&` /// `"` | `"` /// `'` | `'` Full, /// Performs escaping that is compatible with SGML specification. /// /// This level adds escaping of `>` to the `Minimal` level, which is [required] /// for compatibility with SGML. /// /// Characters that will be replaced: /// /// Original | Replacement /// ---------|------------ /// `<` | `<` /// `>` | `>` /// `&` | `&` /// /// [required]: https://www.w3.org/TR/xml11/#syntax Partial, /// Performs the minimal possible escaping, escape only strictly necessary /// characters. /// /// Characters that will be replaced: /// /// Original | Replacement /// ---------|------------ /// `<` | `<` /// `&` | `&` Minimal, } /// Classification of the type written by the serializer. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum WriteResult { /// Text with insignificant spaces was written, for example a number. Adding indent to the /// serialized data does not change meaning of the data. Text, /// The XML tag was written. Adding indent to the serialized data does not change meaning of the data. Element, /// Nothing was written (i. e. serialized type not represented in XML a all). Adding indent to the /// serialized data does not change meaning of the data. This is returned for units, unit structs /// and unit variants. Nothing, /// Text with significant spaces was written, for example a string. Adding indent to the /// serialized data may change meaning of the data. SensitiveText, /// `None` was serialized and nothing was written. `None` does not represented in XML, /// but adding indent after it may change meaning of the data. SensitiveNothing, } impl WriteResult { /// Returns `true` if indent should be written after the object (if configured) and `false` otherwise. #[inline] pub fn allow_indent(&self) -> bool { matches!(self, Self::Element | Self::Nothing) } /// Returns `true` if self is `Text` or `SensitiveText`. #[inline] pub fn is_text(&self) -> bool { matches!(self, Self::Text | Self::SensitiveText) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Implements serialization method by forwarding it to the serializer created by /// the helper method [`Serializer::ser`]. macro_rules! forward { ($name:ident($ty:ty)) => { fn $name(self, value: $ty) -> Result { self.ser(&concat!("`", stringify!($ty), "`"))?.$name(value) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Almost all characters can form a name. Citation from : /// /// > The overall philosophy of names has changed since XML 1.0. Whereas XML 1.0 /// > provided a rigid definition of names, wherein everything that was not permitted /// > was forbidden, XML 1.1 names are designed so that everything that is not /// > forbidden (for a specific reason) is permitted. Since Unicode will continue /// > to grow past version 4.0, further changes to XML can be avoided by allowing /// > almost any character, including those not yet assigned, in names. /// /// const fn is_xml11_name_start_char(ch: char) -> bool { // Not need to use macro when core primitives is enough #[allow(clippy::match_like_matches_macro)] match ch { ':' | 'A'..='Z' | '_' | 'a'..='z' | '\u{00C0}'..='\u{00D6}' | '\u{00D8}'..='\u{00F6}' | '\u{00F8}'..='\u{02FF}' | '\u{0370}'..='\u{037D}' | '\u{037F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}' | '\u{10000}'..='\u{EFFFF}' => true, _ => false, } } /// const fn is_xml11_name_char(ch: char) -> bool { match ch { '-' | '.' | '0'..='9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}' => { true } _ => is_xml11_name_start_char(ch), } } /// Helper struct to self-defense from errors #[derive(Clone, Copy, Debug, PartialEq)] struct XmlName<'n>(&'n str); impl<'n> XmlName<'n> { /// Checks correctness of the XML name according to [XML 1.1 specification] /// /// [XML 1.1 specification]: https://www.w3.org/TR/xml11/#NT-Name pub fn try_from(name: &'n str) -> Result, SeError> { //TODO: Customization point: allow user to decide if he want to reject or encode the name match name.chars().next() { Some(ch) if !is_xml11_name_start_char(ch) => Err(SeError::Unsupported( format!("character `{ch}` is not allowed at the start of an XML name `{name}`") .into(), )), _ => match name.matches(|ch| !is_xml11_name_char(ch)).next() { Some(s) => Err(SeError::Unsupported( format!("character `{s}` is not allowed in an XML name `{name}`").into(), )), None => Ok(XmlName(name)), }, } } } //////////////////////////////////////////////////////////////////////////////////////////////////// pub(crate) enum Indent<'i> { /// No indent should be written before the element None, /// The specified indent should be written. The type owns the buffer with indent Owned(Indentation), /// The specified indent should be written. The type borrows buffer with indent /// from its owner Borrow(&'i mut Indentation), } impl<'i> Indent<'i> { pub fn borrow(&mut self) -> Indent<'_> { match self { Self::None => Indent::None, Self::Owned(ref mut i) => Indent::Borrow(i), Self::Borrow(i) => Indent::Borrow(i), } } pub fn increase(&mut self) { match self { Self::None => {} Self::Owned(i) => i.grow(), Self::Borrow(i) => i.grow(), } } pub fn decrease(&mut self) { match self { Self::None => {} Self::Owned(i) => i.shrink(), Self::Borrow(i) => i.shrink(), } } pub fn write_indent(&mut self, mut writer: W) -> Result<(), SeError> { match self { Self::None => {} Self::Owned(i) => { writer.write_char('\n')?; writer.write_str(from_utf8(i.current())?)?; } Self::Borrow(i) => { writer.write_char('\n')?; writer.write_str(from_utf8(i.current())?)?; } } Ok(()) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A Serializer. /// /// Returns the classification of the last written type. pub struct Serializer<'w, 'r, W: Write> { ser: ContentSerializer<'w, 'r, W>, /// Name of the root tag. If not specified, deduced from the structure name root_tag: Option>, } impl<'w, 'r, W: Write> Serializer<'w, 'r, W> { /// Creates a new `Serializer` that uses struct name as a root tag name. /// /// Note, that attempt to serialize a non-struct (including unit structs /// and newtype structs) will end up to an error. Use `with_root` to create /// serializer with explicitly defined root element name pub fn new(writer: &'w mut W) -> Self { Self { ser: ContentSerializer { writer, level: QuoteLevel::Partial, indent: Indent::None, write_indent: false, text_format: TextFormat::Text, allow_primitive: true, expand_empty_elements: false, }, root_tag: None, } } /// Creates a new `Serializer` that uses specified root tag name. `name` should /// be valid [XML name], otherwise error is returned. /// /// # Examples /// /// When serializing a primitive type, only its representation will be written: /// /// ``` /// # use pretty_assertions::assert_eq; /// # use serde::Serialize; /// # use quick_xml::se::Serializer; /// /// let mut buffer = String::new(); /// let ser = Serializer::with_root(&mut buffer, Some("root")).unwrap(); /// /// "node".serialize(ser).unwrap(); /// assert_eq!(buffer, "node"); /// ``` /// /// When serializing a struct, newtype struct, unit struct or tuple `root_tag` /// is used as tag name of root(s) element(s): /// /// ``` /// # use pretty_assertions::assert_eq; /// # use serde::Serialize; /// # use quick_xml::se::Serializer; /// /// #[derive(Debug, PartialEq, Serialize)] /// struct Struct { /// question: String, /// answer: u32, /// } /// /// let mut buffer = String::new(); /// let ser = Serializer::with_root(&mut buffer, Some("root")).unwrap(); /// /// let data = Struct { /// question: "The Ultimate Question of Life, the Universe, and Everything".into(), /// answer: 42, /// }; /// /// data.serialize(ser).unwrap(); /// assert_eq!( /// buffer, /// "\ /// The Ultimate Question of Life, the Universe, and Everything\ /// 42\ /// " /// ); /// ``` /// /// [XML name]: https://www.w3.org/TR/xml11/#NT-Name pub fn with_root(writer: &'w mut W, root_tag: Option<&'r str>) -> Result { Ok(Self { ser: ContentSerializer { writer, level: QuoteLevel::Partial, indent: Indent::None, write_indent: false, text_format: TextFormat::Text, allow_primitive: true, expand_empty_elements: false, }, root_tag: root_tag.map(XmlName::try_from).transpose()?, }) } /// Enable or disable expansion of empty elements. Defaults to `false`. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// # use serde::Serialize; /// # use quick_xml::se::Serializer; /// /// #[derive(Debug, PartialEq, Serialize)] /// struct Struct { /// question: Option, /// } /// /// let mut buffer = String::new(); /// let mut ser = Serializer::new(&mut buffer); /// ser.expand_empty_elements(true); /// /// let data = Struct { /// question: None, /// }; /// /// data.serialize(ser).unwrap(); /// assert_eq!( /// buffer, /// "" /// ); /// ``` pub fn expand_empty_elements(&mut self, expand: bool) -> &mut Self { self.ser.expand_empty_elements = expand; self } /// Set the text format used for serializing text content. /// /// - [`TextFormat::Text`]: Regular XML escaping (default) /// - [`TextFormat::CData`]: CDATA sections for text content /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// # use serde::Serialize; /// # use quick_xml::se::{Serializer, TextFormat}; /// /// #[derive(Debug, PartialEq, Serialize)] /// struct Document { /// #[serde(rename = "$text")] /// content: String, /// } /// /// let mut buffer = String::new(); /// let mut ser = Serializer::with_root(&mut buffer, Some("doc")).unwrap(); /// ser.text_format(TextFormat::CData); /// /// let data = Document { /// content: "Content with & entities".to_string(), /// }; /// /// data.serialize(ser).unwrap(); /// assert_eq!(buffer, " & entities]]>"); /// ``` pub fn text_format(&mut self, format: TextFormat) -> &mut Self { self.ser.text_format = format; self } /// Configure indent for a serializer pub fn indent(&mut self, indent_char: char, indent_size: usize) -> &mut Self { self.ser.indent = Indent::Owned(Indentation::new(indent_char as u8, indent_size)); self } /// Set the level of quoting used when writing texts /// /// Default: [`QuoteLevel::Minimal`] pub fn set_quote_level(&mut self, level: QuoteLevel) -> &mut Self { self.ser.level = level; self } /// Set the indent object for a serializer pub(crate) fn set_indent(&mut self, indent: Indent<'r>) -> &mut Self { self.ser.indent = indent; self } /// Creates actual serializer or returns an error if root tag is not defined. /// In that case `err` contains the name of type that cannot be serialized. fn ser(self, err: &str) -> Result, SeError> { if let Some(key) = self.root_tag { Ok(ElementSerializer { ser: self.ser, key }) } else { Err(SeError::Unsupported( format!("cannot serialize {} without defined root tag", err).into(), )) } } /// Creates actual serializer using root tag or a specified `key` if root tag /// is not defined. Returns an error if root tag is not defined and a `key` /// does not conform [XML rules](XmlName::try_from) for names. fn ser_name(self, key: &'static str) -> Result, SeError> { Ok(ElementSerializer { ser: self.ser, key: match self.root_tag { Some(key) => key, None => XmlName::try_from(key)?, }, }) } } impl<'w, 'r, W: Write> ser::Serializer for Serializer<'w, 'r, W> { type Ok = WriteResult; type Error = SeError; type SerializeSeq = ElementSerializer<'w, 'r, W>; type SerializeTuple = ElementSerializer<'w, 'r, W>; type SerializeTupleStruct = ElementSerializer<'w, 'r, W>; type SerializeTupleVariant = Tuple<'w, 'r, W>; type SerializeMap = Map<'w, 'r, W>; type SerializeStruct = Struct<'w, 'r, W>; type SerializeStructVariant = Struct<'w, 'r, W>; forward!(serialize_bool(bool)); forward!(serialize_i8(i8)); forward!(serialize_i16(i16)); forward!(serialize_i32(i32)); forward!(serialize_i64(i64)); forward!(serialize_u8(u8)); forward!(serialize_u16(u16)); forward!(serialize_u32(u32)); forward!(serialize_u64(u64)); serde_if_integer128! { forward!(serialize_i128(i128)); forward!(serialize_u128(u128)); } forward!(serialize_f32(f32)); forward!(serialize_f64(f64)); forward!(serialize_char(char)); forward!(serialize_str(&str)); forward!(serialize_bytes(&[u8])); fn serialize_none(self) -> Result { // Do not write indent after `Option` field with `None` value, because // this can be `Option`. Unfortunately, we do not known what the // type the option contains, so have no chance to adapt our behavior to it. // The safe variant is not to write indent Ok(WriteResult::SensitiveNothing) } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } fn serialize_unit(self) -> Result { self.ser("`()`")?.serialize_unit() } fn serialize_unit_struct(self, name: &'static str) -> Result { self.ser_name(name)?.serialize_unit_struct(name) } fn serialize_unit_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, ) -> Result { if variant == TEXT_KEY { // We should write some text but we don't known what text to write Err(SeError::Unsupported( format!( "cannot serialize enum unit variant `{}::$text` as text content value", name ) .into(), )) } else { let name = XmlName::try_from(variant)?; self.ser.write_empty(name) } } fn serialize_newtype_struct( self, name: &'static str, value: &T, ) -> Result { self.ser_name(name)?.serialize_newtype_struct(name, value) } fn serialize_newtype_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, value: &T, ) -> Result { if variant == TEXT_KEY { value.serialize(self.ser.into_simple_type_serializer()?)?; // Do not write indent after `$text` variant because it may be interpreted as // part of content when deserialize Ok(WriteResult::SensitiveText) } else { let ser = ElementSerializer { ser: self.ser, key: XmlName::try_from(variant)?, }; value.serialize(ser) } } fn serialize_seq(self, len: Option) -> Result { self.ser("sequence")?.serialize_seq(len) } fn serialize_tuple(self, len: usize) -> Result { self.ser("unnamed tuple")?.serialize_tuple(len) } fn serialize_tuple_struct( self, name: &'static str, len: usize, ) -> Result { self.ser_name(name)?.serialize_tuple_struct(name, len) } fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, len: usize, ) -> Result { if variant == TEXT_KEY { self.ser .into_simple_type_serializer()? .serialize_tuple_struct(name, len) .map(Tuple::Text) } else { let ser = ElementSerializer { ser: self.ser, key: XmlName::try_from(variant)?, }; ser.serialize_tuple_struct(name, len).map(Tuple::Element) } } fn serialize_map(self, len: Option) -> Result { self.ser("map")?.serialize_map(len) } fn serialize_struct( self, name: &'static str, len: usize, ) -> Result { self.ser_name(name)?.serialize_struct(name, len) } fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, len: usize, ) -> Result { if variant == TEXT_KEY { Err(SeError::Unsupported( format!( "cannot serialize enum struct variant `{}::$text` as text content value", name ) .into(), )) } else { let ser = ElementSerializer { ser: self.ser, key: XmlName::try_from(variant)?, }; ser.serialize_struct(name, len) } } } #[cfg(test)] mod quote_level { use super::*; use pretty_assertions::assert_eq; use serde::Serialize; #[derive(Debug, PartialEq, Serialize)] struct Element(&'static str); #[derive(Debug, PartialEq, Serialize)] struct Example { #[serde(rename = "@attribute")] attribute: &'static str, element: Element, } #[test] fn default_() { let example = Example { attribute: "special chars: &, <, >, \", '", element: Element("special chars: &, <, >, \", '"), }; let mut buffer = String::new(); let ser = Serializer::new(&mut buffer); example.serialize(ser).unwrap(); assert_eq!( buffer, "\ special chars: &, <, >, \", '\ " ); } #[test] fn minimal() { let example = Example { attribute: "special chars: &, <, >, \", '", element: Element("special chars: &, <, >, \", '"), }; let mut buffer = String::new(); let mut ser = Serializer::new(&mut buffer); ser.set_quote_level(QuoteLevel::Minimal); example.serialize(ser).unwrap(); assert_eq!( buffer, ", ", '\">\ special chars: &, <, >, \", '\ " ); } #[test] fn partial() { let example = Example { attribute: "special chars: &, <, >, \", '", element: Element("special chars: &, <, >, \", '"), }; let mut buffer = String::new(); let mut ser = Serializer::new(&mut buffer); ser.set_quote_level(QuoteLevel::Partial); example.serialize(ser).unwrap(); assert_eq!( buffer, "\ special chars: &, <, >, \", '\ " ); } #[test] fn full() { let example = Example { attribute: "special chars: &, <, >, \", '", element: Element("special chars: &, <, >, \", '"), }; let mut buffer = String::new(); let mut ser = Serializer::new(&mut buffer); ser.set_quote_level(QuoteLevel::Full); example.serialize(ser).unwrap(); assert_eq!( buffer, "\ special chars: &, <, >, ", '\ " ); } } quick-xml-0.38.4/src/se/simple_type.rs000064400000000000000000001416401046102023000157360ustar 00000000000000//! Contains Serde `Serializer` for XML [simple types] [as defined] in the XML Schema. //! //! [simple types]: https://www.w3schools.com/xml/el_simpletype.asp //! [as defined]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition use crate::escape::escape_char; use crate::se::{QuoteLevel, SeError}; use crate::utils::CDataIterator; use serde::ser::{ Impossible, Serialize, SerializeSeq, SerializeTuple, SerializeTupleStruct, SerializeTupleVariant, Serializer, }; use serde::serde_if_integer128; use std::fmt::{self, Write}; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum QuoteTarget { /// Escape data for a text content. No additional escape symbols Text, /// Escape data for a double-quoted attribute. `"` always escaped DoubleQAttr, /// Escape data for a single-quoted attribute. `'` always escaped SingleQAttr, /// Escape data for a CDATA content. No escaping for `&` and `>`, but split /// content on `]]>` and make several CDATA sections CData, } fn escape_into(mut writer: W, value: &str, escape_chars: F) -> fmt::Result where W: Write, F: Fn(u8) -> bool, { let bytes = value.as_bytes(); let mut iter = bytes.iter(); let mut pos = 0; while let Some(i) = iter.position(|&b| escape_chars(b)) { let new_pos = pos + i; escape_char(&mut writer, value, pos, new_pos)?; pos = new_pos + 1; } if let Some(raw) = value.get(pos..) { writer.write_str(raw)?; } Ok(()) } /// Escapes atomic value that could be part of a `xs:list`. All whitespace characters /// additionally escaped fn escape_item(mut writer: W, value: &str, target: QuoteTarget, level: QuoteLevel) -> fmt::Result where W: Write, { use QuoteLevel::*; use QuoteTarget::*; match (target, level) { (CData, _) => { let mut it = CDataIterator::new(value); if let Some(part) = it.next() { writer.write_str(part)?; } for part in it { writer.write_str("]]> escape_into(writer, value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' | b'>' | b'\'' | b'\"' => true, _ => false, }), //---------------------------------------------------------------------- (Text, Partial) => escape_into(writer, value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' | b'>' => true, _ => false, }), (Text, Minimal) => escape_into(writer, value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' => true, _ => false, }), //---------------------------------------------------------------------- (DoubleQAttr, Partial) => escape_into(writer, value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' | b'>' => true, // Double quoted attribute should escape quote b'"' => true, _ => false, }), (DoubleQAttr, Minimal) => escape_into(writer, value, |ch| match ch { // Spaces used as delimiters of list items, cannot be used in the item b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' => true, // Double quoted attribute should escape quote b'"' => true, _ => false, }), //---------------------------------------------------------------------- (SingleQAttr, Partial) => escape_into(writer, value, |ch| match ch { // Spaces used as delimiters of list items b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' | b'>' => true, // Single quoted attribute should escape quote b'\'' => true, _ => false, }), (SingleQAttr, Minimal) => escape_into(writer, value, |ch| match ch { // Spaces used as delimiters of list items b' ' | b'\r' | b'\n' | b'\t' => true, // Required characters to escape b'&' | b'<' => true, // Single quoted attribute should escape quote b'\'' => true, _ => false, }), } } /// Escapes XSD simple type value fn escape_list(mut writer: W, value: &str, target: QuoteTarget, level: QuoteLevel) -> fmt::Result where W: Write, { use QuoteLevel::*; use QuoteTarget::*; match (target, level) { (CData, _) => { for part in CDataIterator::new(value) { writer.write_str("")?; } Ok(()) } (_, Full) => escape_into(writer, value, |ch| match ch { // Required characters to escape b'&' | b'<' | b'>' | b'\'' | b'\"' => true, _ => false, }), //---------------------------------------------------------------------- (Text, Partial) => escape_into(writer, value, |ch| match ch { // Required characters to escape b'&' | b'<' | b'>' => true, _ => false, }), (Text, Minimal) => escape_into(writer, value, |ch| match ch { // Required characters to escape b'&' | b'<' => true, _ => false, }), //---------------------------------------------------------------------- (DoubleQAttr, Partial) => escape_into(writer, value, |ch| match ch { // Required characters to escape b'&' | b'<' | b'>' => true, // Double quoted attribute should escape quote b'"' => true, _ => false, }), (DoubleQAttr, Minimal) => escape_into(writer, value, |ch| match ch { // Required characters to escape b'&' | b'<' => true, // Double quoted attribute should escape quote b'"' => true, _ => false, }), //---------------------------------------------------------------------- (SingleQAttr, Partial) => escape_into(writer, value, |ch| match ch { // Required characters to escape b'&' | b'<' | b'>' => true, // Single quoted attribute should escape quote b'\'' => true, _ => false, }), (SingleQAttr, Minimal) => escape_into(writer, value, |ch| match ch { // Required characters to escape b'&' | b'<' => true, // Single quoted attribute should escape quote b'\'' => true, _ => false, }), } } //////////////////////////////////////////////////////////////////////////////////////////////////// macro_rules! write_atomic { ($method:ident ( $ty:ty )) => { fn $method(mut self, value: $ty) -> Result { self.write_fmt(format_args!("{}", value))?; Ok(true) } }; } /// A serializer that handles ordinary [simple type definition][item] with /// `{variety} = atomic`, or an ordinary [simple type] definition with /// `{variety} = union` whose basic members are all atomic. /// /// This serializer can serialize only primitive types: /// - numbers /// - booleans /// - strings /// - units /// - options /// - unit variants of enums /// /// Identifiers represented as strings and serialized accordingly. /// /// Serialization of all other types returns [`Unsupported`][SeError::Unsupported] error. /// /// This serializer returns `true` if something was written and `false` otherwise. /// /// [item]: https://www.w3.org/TR/xmlschema11-1/#std-item_type_definition /// [simple type]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition pub struct AtomicSerializer { pub writer: W, pub target: QuoteTarget, /// Defines which XML characters need to be escaped pub level: QuoteLevel, /// When `true` an `xs:list` delimiter (a space) should be written pub(crate) write_delimiter: bool, } impl AtomicSerializer { fn write_delimiter(&mut self) -> fmt::Result { if self.write_delimiter { // TODO: Customization point -- possible non-XML compatible extension to specify delimiter char return self.writer.write_char(' '); } Ok(()) } fn write_str(&mut self, value: &str) -> Result<(), SeError> { self.write_delimiter()?; Ok(self.writer.write_str(value)?) } fn write_fmt(&mut self, args: fmt::Arguments<'_>) -> Result<(), SeError> { self.write_delimiter()?; Ok(self.writer.write_fmt(args)?) } } impl Serializer for AtomicSerializer { type Ok = bool; type Error = SeError; type SerializeSeq = Impossible; type SerializeTuple = Impossible; type SerializeTupleStruct = Impossible; type SerializeTupleVariant = Impossible; type SerializeMap = Impossible; type SerializeStruct = Impossible; type SerializeStructVariant = Impossible; fn serialize_bool(mut self, value: bool) -> Result { self.write_str(if value { "true" } else { "false" })?; Ok(true) } write_atomic!(serialize_i8(i8)); write_atomic!(serialize_i16(i16)); write_atomic!(serialize_i32(i32)); write_atomic!(serialize_i64(i64)); write_atomic!(serialize_u8(u8)); write_atomic!(serialize_u16(u16)); write_atomic!(serialize_u32(u32)); write_atomic!(serialize_u64(u64)); serde_if_integer128! { write_atomic!(serialize_i128(i128)); write_atomic!(serialize_u128(u128)); } write_atomic!(serialize_f32(f32)); write_atomic!(serialize_f64(f64)); fn serialize_char(self, value: char) -> Result { self.serialize_str(value.encode_utf8(&mut [0u8; 4])) } fn serialize_str(mut self, value: &str) -> Result { if !value.is_empty() { self.write_delimiter()?; escape_item(self.writer, value, self.target, self.level)?; } Ok(!value.is_empty()) } fn serialize_bytes(self, _value: &[u8]) -> Result { //TODO: Customization point - allow user to decide how to encode bytes Err(SeError::Unsupported( "`serialize_bytes` not supported yet".into(), )) } fn serialize_none(self) -> Result { Ok(false) } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } /// We cannot store anything, so the absence of a unit and presence of it /// does not differ, so serialization of unit returns `Err(Unsupported)` fn serialize_unit(self) -> Result { Err(SeError::Unsupported( "cannot serialize unit type `()` as an `xs:list` item".into(), )) } /// We cannot store anything, so the absence of a unit and presence of it /// does not differ, so serialization of unit returns `Err(Unsupported)` fn serialize_unit_struct(self, name: &'static str) -> Result { Err(SeError::Unsupported( format!( "cannot serialize unit struct `{}` as an `xs:list` item", name ) .into(), )) } fn serialize_unit_variant( self, _name: &'static str, _variant_index: u32, variant: &'static str, ) -> Result { self.serialize_str(variant) } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { value.serialize(self) } /// We cannot store both a variant discriminant and a variant value, /// so serialization of enum newtype variant returns `Err(Unsupported)` fn serialize_newtype_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _value: &T, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize enum newtype variant `{}::{}` as an `xs:list` item", name, variant ) .into(), )) } fn serialize_seq(self, _len: Option) -> Result { Err(SeError::Unsupported( "cannot serialize sequence as an `xs:list` item".into(), )) } fn serialize_tuple(self, _len: usize) -> Result { Err(SeError::Unsupported( "cannot serialize tuple as an `xs:list` item".into(), )) } fn serialize_tuple_struct( self, name: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize tuple struct `{}` as an `xs:list` item", name ) .into(), )) } fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize enum tuple variant `{}::{}` as an `xs:list` item", name, variant ) .into(), )) } fn serialize_map(self, _len: Option) -> Result { Err(SeError::Unsupported( "cannot serialize map as an `xs:list` item".into(), )) } fn serialize_struct( self, name: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!("cannot serialize struct `{}` as an `xs:list` item", name).into(), )) } fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize enum struct variant `{}::{}` as an `xs:list` item", name, variant ) .into(), )) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer for a values representing XSD [simple types], which used in: /// - attribute values (`<... ...="value" ...>`) /// - text content (`<...>text`) /// - CDATA content (`<...>`) /// /// [simple types]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition pub struct SimpleTypeSerializer { /// Writer to which this serializer writes content pub writer: W, /// Target for which element is serializing. Affects additional characters to escape. pub target: QuoteTarget, /// Defines which XML characters need to be escaped pub level: QuoteLevel, } impl SimpleTypeSerializer { #[inline] fn write_str(&mut self, value: &str) -> Result<(), SeError> { Ok(self.writer.write_str(value)?) } #[inline] fn write_fmt(&mut self, args: fmt::Arguments<'_>) -> Result<(), SeError> { Ok(self.writer.write_fmt(args)?) } } impl Serializer for SimpleTypeSerializer { type Ok = W; type Error = SeError; type SerializeSeq = SimpleSeq; type SerializeTuple = SimpleSeq; type SerializeTupleStruct = SimpleSeq; type SerializeTupleVariant = Impossible; type SerializeMap = Impossible; type SerializeStruct = Impossible; type SerializeStructVariant = Impossible; write_primitive!(); fn serialize_str(mut self, value: &str) -> Result { if !value.is_empty() { escape_list(&mut self.writer, value, self.target, self.level)?; } Ok(self.writer) } /// Does not write anything fn serialize_unit(self) -> Result { Ok(self.writer) } /// Does not write anything fn serialize_unit_struct(self, _name: &'static str) -> Result { Ok(self.writer) } /// We cannot store both a variant discriminant and a variant value, /// so serialization of enum newtype variant returns `Err(Unsupported)` fn serialize_newtype_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _value: &T, ) -> Result { Err(SeError::Unsupported( format!("cannot serialize enum newtype variant `{}::{}` as an attribute or text content value", name, variant).into(), )) } #[inline] fn serialize_seq(mut self, _len: Option) -> Result { if let QuoteTarget::CData = self.target { self.writer.write_str(" Result { self.serialize_seq(None) } #[inline] fn serialize_tuple_struct( self, _name: &'static str, _len: usize, ) -> Result { self.serialize_seq(None) } fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!("cannot serialize enum tuple variant `{}::{}` as an attribute or text content value", name, variant).into(), )) } fn serialize_map(self, _len: Option) -> Result { Err(SeError::Unsupported( "cannot serialize map as an attribute or text content value".into(), )) } fn serialize_struct( self, name: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize struct `{}` as an attribute or text content value", name ) .into(), )) } fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!("cannot serialize enum struct variant `{}::{}` as an attribute or text content value", name, variant).into(), )) } } /// Serializer for a sequence of atomic values delimited by space pub struct SimpleSeq { writer: W, target: QuoteTarget, level: QuoteLevel, /// If `true`, nothing was written yet to the `writer` is_empty: bool, } impl SerializeSeq for SimpleSeq { type Ok = W; type Error = SeError; fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { if value.serialize(AtomicSerializer { writer: &mut self.writer, target: self.target, level: self.level, write_delimiter: !self.is_empty, })? { self.is_empty = false; } Ok(()) } #[inline] fn end(mut self) -> Result { if let QuoteTarget::CData = self.target { self.writer.write_str("]]>")?; } Ok(self.writer) } } impl SerializeTuple for SimpleSeq { type Ok = W; type Error = SeError; #[inline] fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } impl SerializeTupleStruct for SimpleSeq { type Ok = W; type Error = SeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } impl SerializeTupleVariant for SimpleSeq { type Ok = W; type Error = SeError; #[inline] fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where T: ?Sized + Serialize, { SerializeSeq::serialize_element(self, value) } #[inline] fn end(self) -> Result { SerializeSeq::end(self) } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use crate::utils::Bytes; use serde::Serialize; use std::collections::BTreeMap; #[derive(Debug, Serialize, PartialEq)] struct Unit; #[derive(Debug, Serialize, PartialEq)] struct Newtype(usize); #[derive(Debug, Serialize, PartialEq)] struct Tuple(&'static str, usize); #[derive(Debug, Serialize, PartialEq)] struct Struct { key: &'static str, val: usize, } #[derive(Debug, Serialize, PartialEq)] enum Enum { Unit, #[serde(rename = "<\"&'>")] UnitEscaped, Newtype(usize), Tuple(&'static str, usize), Struct { key: &'static str, val: usize, }, } mod escape_item { use super::*; use pretty_assertions::assert_eq; fn escape_item(value: &str, target: QuoteTarget, level: QuoteLevel) -> String { let mut result = String::new(); super::escape_item(&mut result, value, target, level).unwrap(); result } mod full { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_item("text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Full), "text<"'&> text" ); } #[test] fn double_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Full ), "text<"'&> text" ); } #[test] fn single_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Full ), "text<"'&> text" ); } } mod partial { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Partial ), "text<\"'&> text" ); } #[test] fn double_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Partial ), "text<"'&> text" ); } #[test] fn single_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Partial ), "text<\"'&> text" ); } } mod minimal { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Minimal ), "text<\"'&> text" ); } #[test] fn double_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Minimal ), "text<"'&> text" ); } #[test] fn single_quote_attr() { assert_eq!( escape_item( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Minimal ), "text<\"'&> text" ); } } /// Escape function does not surround text with ``, that should be done outside #[test] fn cdata() { assert_eq!( escape_item( "text<\"'&>]]> \t\n\rtext", QuoteTarget::CData, QuoteLevel::Full ), "text<\"'&>]]]]> \t\n\rtext" ); assert_eq!( escape_item( "text<\"'&>]]> \t\n\rtext", QuoteTarget::CData, QuoteLevel::Partial ), "text<\"'&>]]]]> \t\n\rtext" ); assert_eq!( escape_item( "text<\"'&>]]> \t\n\rtext", QuoteTarget::CData, QuoteLevel::Minimal ), "text<\"'&>]]]]> \t\n\rtext" ); } } mod escape_list { use super::*; use pretty_assertions::assert_eq; fn escape_list(value: &str, target: QuoteTarget, level: QuoteLevel) -> String { let mut result = String::new(); super::escape_list(&mut result, value, target, level).unwrap(); result } mod full { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_list("text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Full), "text<"'&> \t\n\rtext" ); } #[test] fn double_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Full ), "text<"'&> \t\n\rtext" ); } #[test] fn single_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Full ), "text<"'&> \t\n\rtext" ); } } mod partial { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Partial ), "text<\"'&> \t\n\rtext" ); } #[test] fn double_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Partial ), "text<"'&> \t\n\rtext" ); } #[test] fn single_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Partial ), "text<\"'&> \t\n\rtext" ); } } mod minimal { use super::*; use pretty_assertions::assert_eq; #[test] fn text() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::Text, QuoteLevel::Minimal ), "text<\"'&> \t\n\rtext" ); } #[test] fn double_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::DoubleQAttr, QuoteLevel::Minimal ), "text<"'&> \t\n\rtext" ); } #[test] fn single_quote_attr() { assert_eq!( escape_list( "text<\"'&> \t\n\rtext", QuoteTarget::SingleQAttr, QuoteLevel::Minimal ), "text<\"'&> \t\n\rtext" ); } } #[test] fn cdata() { assert_eq!( escape_list( "text<\"'&>]]> \t\n\rtext", QuoteTarget::CData, QuoteLevel::Full ), "]]]]> \t\n\rtext]]>" ); assert_eq!( escape_list( "text<\"'&>]]> \t\n\rtext", QuoteTarget::CData, QuoteLevel::Partial ), "]]]]> \t\n\rtext]]>" ); assert_eq!( escape_list( "text<\"'&>]]> \t\n\rtext", QuoteTarget::CData, QuoteLevel::Minimal ), "]]]]> \t\n\rtext]]>" ); } } /// Tests for serialize atomic and union values, as defined in XSD specification mod atomic { use super::*; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:literal) => { #[test] fn $name() { let mut buffer = String::new(); let ser = AtomicSerializer { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, write_delimiter: false, }; let has_written = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); assert_eq!(has_written, !buffer.is_empty()); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = AtomicSerializer { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, write_delimiter: false, }; match $data.serialize(ser).unwrap_err() { SeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000isize => "-42000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000usize => "42000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); serialize_as!(str_non_escaped: "non-escaped-string" => "non-escaped-string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped-string") => "non-escaped-string"); err!(unit: () => Unsupported("cannot serialize unit type `()` as an `xs:list` item")); err!(unit_struct: Unit => Unsupported("cannot serialize unit struct `Unit` as an `xs:list` item")); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); serialize_as!(newtype: Newtype(42) => "42"); err!(enum_newtype: Enum::Newtype(42) => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as an `xs:list` item")); err!(seq: vec![1, 2, 3] => Unsupported("cannot serialize sequence as an `xs:list` item")); err!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => Unsupported("cannot serialize tuple as an `xs:list` item")); err!(tuple_struct: Tuple("first", 42) => Unsupported("cannot serialize tuple struct `Tuple` as an `xs:list` item")); err!(enum_tuple: Enum::Tuple("first", 42) => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as an `xs:list` item")); err!(map: BTreeMap::from([(1, 2), (3, 4)]) => Unsupported("cannot serialize map as an `xs:list` item")); err!(struct_: Struct { key: "answer", val: 42 } => Unsupported("cannot serialize struct `Struct` as an `xs:list` item")); err!(enum_struct: Enum::Struct { key: "answer", val: 42 } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as an `xs:list` item")); } mod simple_type { use super::*; use pretty_assertions::assert_eq; /// Checks that given `$data` successfully serialized as `$expected` macro_rules! serialize_as { ($name:ident: $data:expr => $expected:literal) => { #[test] fn $name() { let ser = SimpleTypeSerializer { writer: String::new(), target: QuoteTarget::Text, level: QuoteLevel::Full, }; let buffer = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } /// Checks that attempt to serialize given `$data` results to a /// serialization error `$kind` with `$reason` macro_rules! err { ($name:ident: $data:expr => $kind:ident($reason:literal)) => { #[test] fn $name() { let mut buffer = String::new(); let ser = SimpleTypeSerializer { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, }; match $data.serialize(ser).unwrap_err() { SeError::$kind(e) => assert_eq!(e, $reason), e => panic!( "Expected `Err({}({}))`, but got `{:?}`", stringify!($kind), $reason, e ), } assert_eq!(buffer, ""); } }; } serialize_as!(false_: false => "false"); serialize_as!(true_: true => "true"); serialize_as!(i8_: -42i8 => "-42"); serialize_as!(i16_: -4200i16 => "-4200"); serialize_as!(i32_: -42000000i32 => "-42000000"); serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); serialize_as!(isize_: -42000000isize => "-42000000"); serialize_as!(u8_: 42u8 => "42"); serialize_as!(u16_: 4200u16 => "4200"); serialize_as!(u32_: 42000000u32 => "42000000"); serialize_as!(u64_: 42000000000000u64 => "42000000000000"); serialize_as!(usize_: 42000000usize => "42000000"); serde_if_integer128! { serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); } serialize_as!(f32_: 4.2f32 => "4.2"); serialize_as!(f64_: 4.2f64 => "4.2"); serialize_as!(char_non_escaped: 'h' => "h"); serialize_as!(char_lt: '<' => "<"); serialize_as!(char_gt: '>' => ">"); serialize_as!(char_amp: '&' => "&"); serialize_as!(char_apos: '\'' => "'"); serialize_as!(char_quot: '"' => """); serialize_as!(str_non_escaped: "non-escaped string" => "non-escaped string"); serialize_as!(str_escaped: "<\"escaped & string'>" => "<"escaped & string'>"); err!(bytes: Bytes(b"<\"escaped & bytes'>") => Unsupported("`serialize_bytes` not supported yet")); serialize_as!(option_none: Option::<&str>::None => ""); serialize_as!(option_some: Some("non-escaped string") => "non-escaped string"); serialize_as!(unit: () => ""); serialize_as!(unit_struct: Unit => ""); serialize_as!(enum_unit: Enum::Unit => "Unit"); serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<"&'>"); serialize_as!(newtype: Newtype(42) => "42"); err!(enum_newtype: Enum::Newtype(42) => Unsupported("cannot serialize enum newtype variant `Enum::Newtype` as an attribute or text content value")); serialize_as!(seq: vec![1, 2, 3] => "1 2 3"); serialize_as!(seq_empty: Vec::::new() => ""); serialize_as!(seq_with_1_empty_str: vec![""] => ""); serialize_as!(seq_with_2_empty_strs: vec!["", ""] => ""); serialize_as!(tuple: ("<\"&'>", "with\t\n\r spaces", 3usize) => "<"&'> with spaces 3"); serialize_as!(tuple_struct: Tuple("first", 42) => "first 42"); err!(enum_tuple: Enum::Tuple("first", 42) => Unsupported("cannot serialize enum tuple variant `Enum::Tuple` as an attribute or text content value")); err!(map: BTreeMap::from([(1, 2), (3, 4)]) => Unsupported("cannot serialize map as an attribute or text content value")); err!(struct_: Struct { key: "answer", val: 42 } => Unsupported("cannot serialize struct `Struct` as an attribute or text content value")); err!(enum_struct: Enum::Struct { key: "answer", val: 42 } => Unsupported("cannot serialize enum struct variant `Enum::Struct` as an attribute or text content value")); } mod simple_seq { use super::*; use pretty_assertions::assert_eq; #[test] fn empty_seq() { let mut buffer = String::new(); let ser = SimpleSeq { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, is_empty: true, }; SerializeSeq::end(ser).unwrap(); assert_eq!(buffer, ""); } #[test] fn all_items_empty() { let mut buffer = String::new(); let mut ser = SimpleSeq { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, is_empty: true, }; SerializeSeq::serialize_element(&mut ser, "").unwrap(); SerializeSeq::serialize_element(&mut ser, "").unwrap(); SerializeSeq::serialize_element(&mut ser, "").unwrap(); SerializeSeq::end(ser).unwrap(); assert_eq!(buffer, ""); } #[test] fn some_items_empty1() { let mut buffer = String::new(); let mut ser = SimpleSeq { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, is_empty: true, }; SerializeSeq::serialize_element(&mut ser, "").unwrap(); SerializeSeq::serialize_element(&mut ser, &1).unwrap(); SerializeSeq::serialize_element(&mut ser, "").unwrap(); SerializeSeq::end(ser).unwrap(); assert_eq!(buffer, "1"); } #[test] fn some_items_empty2() { let mut buffer = String::new(); let mut ser = SimpleSeq { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, is_empty: true, }; SerializeSeq::serialize_element(&mut ser, &1).unwrap(); SerializeSeq::serialize_element(&mut ser, "").unwrap(); SerializeSeq::serialize_element(&mut ser, &2).unwrap(); SerializeSeq::end(ser).unwrap(); assert_eq!(buffer, "1 2"); } #[test] fn items() { let mut buffer = String::new(); let mut ser = SimpleSeq { writer: &mut buffer, target: QuoteTarget::Text, level: QuoteLevel::Full, is_empty: true, }; SerializeSeq::serialize_element(&mut ser, &1).unwrap(); SerializeSeq::serialize_element(&mut ser, &2).unwrap(); SerializeSeq::serialize_element(&mut ser, &3).unwrap(); SerializeSeq::end(ser).unwrap(); assert_eq!(buffer, "1 2 3"); } } mod cdata { use super::*; use pretty_assertions::assert_eq; macro_rules! serialize_as_cdata { ($name:ident: $data:expr => $expected:literal) => { #[test] fn $name() { let ser = SimpleTypeSerializer { writer: String::new(), target: QuoteTarget::CData, level: QuoteLevel::Full, }; let buffer = $data.serialize(ser).unwrap(); assert_eq!(buffer, $expected); } }; } serialize_as_cdata!(empty_string: "" => ""); serialize_as_cdata!(simple_text: "Hello World" => ""); serialize_as_cdata!(with_markup: "content" => "content]]>"); serialize_as_cdata!(with_ampersand: "Tom & Jerry" => ""); serialize_as_cdata!(with_quotes: r#"He said "Hello""# => r#""#); serialize_as_cdata!(all_xml_chars: "<>&\"'" => "&\"']]>"); serialize_as_cdata!(with_cdata_end: "foo]]>bar" => "bar]]>"); serialize_as_cdata!(multiple_cdata_ends: "a]]>b]]>c" => "b]]]]>c]]>"); serialize_as_cdata!(starts_with_cdata_end: "]]>hello" => "hello]]>"); serialize_as_cdata!(ends_with_cdata_end: "hello]]>" => "]]>"); serialize_as_cdata!(only_cdata_end: "]]>" => "]]>"); serialize_as_cdata!(seq_basic: vec!["foo", "bar", "baz"] => ""); serialize_as_cdata!(seq_with_space: vec!["hello world", "hello\tworld", "world"] => ""); serialize_as_cdata!(seq_with_markup_chars: vec!["", "&entity", "\"quoted\""] => " &entity \"quoted\"]]>"); serialize_as_cdata!(seq_with_cdata_end_split: vec!["foo]]>bar", "test"] => "bar test]]>"); serialize_as_cdata!(tuple_cdata: ("first", 42, "third") => ""); } } quick-xml-0.38.4/src/se/text.rs000064400000000000000000000132571046102023000143720ustar 00000000000000//! Contains serializer for a special `&text` field use crate::de::TEXT_KEY; use crate::se::simple_type::{SimpleSeq, SimpleTypeSerializer}; use crate::se::SeError; use serde::ser::{Impossible, Serialize, Serializer}; use serde::serde_if_integer128; use std::fmt::Write; macro_rules! write_primitive { ($method:ident ( $ty:ty )) => { #[inline] fn $method(self, value: $ty) -> Result { self.0.$method(value) } }; } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A serializer used to serialize a `$text` field of a struct or map. /// /// This serializer a very similar to [`SimpleTypeSerializer`], but different /// from it in how it processes unit enum variants. Unlike [`SimpleTypeSerializer`] /// this serializer does not write anything for the unit variant. pub struct TextSerializer(pub SimpleTypeSerializer); impl Serializer for TextSerializer { type Ok = W; type Error = SeError; type SerializeSeq = SimpleSeq; type SerializeTuple = SimpleSeq; type SerializeTupleStruct = SimpleSeq; type SerializeTupleVariant = SimpleSeq; type SerializeMap = Impossible; type SerializeStruct = Impossible; type SerializeStructVariant = Impossible; write_primitive!(serialize_bool(bool)); write_primitive!(serialize_i8(i8)); write_primitive!(serialize_i16(i16)); write_primitive!(serialize_i32(i32)); write_primitive!(serialize_i64(i64)); write_primitive!(serialize_u8(u8)); write_primitive!(serialize_u16(u16)); write_primitive!(serialize_u32(u32)); write_primitive!(serialize_u64(u64)); serde_if_integer128! { write_primitive!(serialize_i128(i128)); write_primitive!(serialize_u128(u128)); } write_primitive!(serialize_f32(f32)); write_primitive!(serialize_f64(f64)); write_primitive!(serialize_char(char)); write_primitive!(serialize_str(&str)); write_primitive!(serialize_bytes(&[u8])); #[inline] fn serialize_none(self) -> Result { self.0.serialize_none() } fn serialize_some(self, value: &T) -> Result { value.serialize(self) } #[inline] fn serialize_unit(self) -> Result { self.0.serialize_unit() } #[inline] fn serialize_unit_struct(self, name: &'static str) -> Result { self.0.serialize_unit_struct(name) } #[inline] fn serialize_unit_variant( self, name: &'static str, variant_index: u32, variant: &'static str, ) -> Result { if variant == TEXT_KEY { Ok(self.0.writer) } else { self.0.serialize_unit_variant(name, variant_index, variant) } } fn serialize_newtype_struct( self, _name: &'static str, value: &T, ) -> Result { value.serialize(self) } #[inline] fn serialize_newtype_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _value: &T, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize enum newtype variant `{}::{}` as text content value", name, variant ) .into(), )) } #[inline] fn serialize_seq(self, len: Option) -> Result { self.0.serialize_seq(len) } #[inline] fn serialize_tuple(self, len: usize) -> Result { self.0.serialize_tuple(len) } #[inline] fn serialize_tuple_struct( self, name: &'static str, len: usize, ) -> Result { self.0.serialize_tuple_struct(name, len) } #[inline] fn serialize_tuple_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize enum tuple variant `{}::{}` as text content value", name, variant ) .into(), )) } #[inline] fn serialize_map(self, _len: Option) -> Result { Err(SeError::Unsupported( "cannot serialize map as text content value".into(), )) } #[inline] fn serialize_struct( self, name: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!("cannot serialize struct `{}` as text content value", name).into(), )) } #[inline] fn serialize_struct_variant( self, name: &'static str, _variant_index: u32, variant: &'static str, _len: usize, ) -> Result { Err(SeError::Unsupported( format!( "cannot serialize enum struct variant `{}::{}` as text content value", name, variant ) .into(), )) } } quick-xml-0.38.4/src/serde_helpers.rs000064400000000000000000000343261046102023000156230ustar 00000000000000//! Provides helper functions to glue an XML with a serde content model. use serde::{Deserialize, Deserializer, Serialize, Serializer}; #[macro_export] #[doc(hidden)] macro_rules! deserialize_variant { // Produce struct enum variant ( $de:expr, $enum:tt, $variant:ident { $( $(#[$meta:meta])* $field:ident : $typ:ty ),* $(,)? } ) => ({ let var = { // Create anonymous type #[derive(serde::Deserialize)] struct $variant { $( $(#[$meta])* $field: $typ, )* } <$variant>::deserialize($de)? }; // Due to https://github.com/rust-lang/rust/issues/86935 we cannot use // <$enum> :: $variant use $enum :: *; $variant { $($field: var.$field,)* } }); // Produce newtype enum variant ( $de:expr, $enum:tt, $variant:ident($typ:ty) ) => ({ let var = <$typ>::deserialize($de)?; <$enum> :: $variant(var) }); // Produce unit enum variant ( $de:expr, $enum:tt, $variant:ident ) => ({ serde::de::IgnoredAny::deserialize($de)?; <$enum> :: $variant }); } /// Helper macro that generates different match expressions depending on the presence /// of default variant #[macro_export] #[doc(hidden)] macro_rules! deserialize_match { // Only default variant ( $tag:ident, $de:ident, $enum:ty, (_ => $($default_variant:tt)+ ) $(,)? ) => ( Ok($crate::deserialize_variant!( $de, $enum, $($default_variant)+ )) ); // With default variant ( $tag:ident, $de:ident, $enum:ty, $( ($variant_tag:literal => $($variant:tt)+ ) ),* , (_ => $($default_variant:tt)+ ) $(,)? ) => ( match $tag.as_ref() { $( $variant_tag => Ok($crate::deserialize_variant!( $de, $enum, $($variant)+ )), )* _ => Ok($crate::deserialize_variant!( $de, $enum, $($default_variant)+ )), } ); // Without default variant ( $tag:ident, $de:ident, $enum:ty, $( ($variant_tag:literal => $($variant:tt)+ ) ),* $(,)? ) => ( match $tag.as_ref() { $( $variant_tag => Ok($crate::deserialize_variant!( $de, $enum, $($variant)+ )), )* _ => Err(A::Error::unknown_field(&$tag, &[$($variant_tag),+])), } ); } /// A helper to implement [`Deserialize`] for [internally tagged] enums which /// does not use [`Deserializer::deserialize_any`] that produces wrong results /// with XML because of [serde#1183]. /// /// In contrast to deriving [`Deserialize`] this macro assumes that a tag will be /// the first element or attribute in the XML. /// /// # Example /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::de::from_str; /// use quick_xml::impl_deserialize_for_internally_tagged_enum; /// use serde::Deserialize; /// /// #[derive(Deserialize, Debug, PartialEq)] /// struct Root { /// one: InternallyTaggedEnum, /// two: InternallyTaggedEnum, /// three: InternallyTaggedEnum, /// } /// /// #[derive(Debug, PartialEq)] /// // #[serde(tag = "@tag")] /// enum InternallyTaggedEnum { /// Unit, /// Newtype(Newtype), /// Struct { /// // #[serde(rename = "@attribute")] /// attribute: u32, /// element: f32, /// }, /// } /// /// #[derive(Deserialize, Debug, PartialEq)] /// struct Newtype { /// #[serde(rename = "@attribute")] /// attribute: u64, /// } /// /// // The macro needs the type of the enum, the tag name, /// // and information about all the variants /// impl_deserialize_for_internally_tagged_enum!{ /// InternallyTaggedEnum, "@tag", /// ("Unit" => Unit), /// ("Newtype" => Newtype(Newtype)), /// ("Struct" => Struct { /// #[serde(rename = "@attribute")] /// attribute: u32, /// element: f32, /// }), /// } /// /// assert_eq!( /// from_str::(r#" /// /// /// /// /// 4.2 /// /// /// "#).unwrap(), /// Root { /// one: InternallyTaggedEnum::Unit, /// two: InternallyTaggedEnum::Newtype(Newtype { attribute: 42 }), /// three: InternallyTaggedEnum::Struct { /// attribute: 42, /// element: 4.2, /// }, /// }, /// ); /// ``` /// /// You don't necessarily have to provide all the enumeration variants and can use /// `_` to put every undefined tag into an enumeration variant. /// This default variant (`_ => ...`) must be the last one to appear in the macro, /// like `_ => Other` in the example below: /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::de::from_str; /// use quick_xml::impl_deserialize_for_internally_tagged_enum; /// use serde::Deserialize; /// /// #[derive(Deserialize, Debug, PartialEq)] /// struct Root { /// one: InternallyTaggedEnum, /// two: InternallyTaggedEnum, /// three: InternallyTaggedEnum, /// } /// /// #[derive(Debug, PartialEq)] /// enum InternallyTaggedEnum { /// NewType(Newtype), /// Other, /// } /// /// #[derive(Deserialize, Debug, PartialEq)] /// struct Newtype { /// #[serde(rename = "@attribute")] /// attribute: u64, /// } /// /// // The macro needs the type of the enum, the tag name, /// // and information about all the variants /// impl_deserialize_for_internally_tagged_enum!{ /// InternallyTaggedEnum, "@tag", /// ("NewType" => NewType(Newtype)), /// (_ => Other), /// } /// /// assert_eq!( /// from_str::(r#" /// /// /// /// /// /// /// /// "#).unwrap(), /// Root { /// one: InternallyTaggedEnum::NewType(Newtype { attribute: 42 }), /// two: InternallyTaggedEnum::Other, /// three: InternallyTaggedEnum::Other, /// }, /// ); /// ``` /// /// [internally tagged]: https://serde.rs/enum-representations.html#internally-tagged /// [serde#1183]: https://github.com/serde-rs/serde/issues/1183 #[macro_export(local_inner_macros)] macro_rules! impl_deserialize_for_internally_tagged_enum { ( $enum:ty, $tag:literal, $($cases:tt)* ) => { impl<'de> serde::de::Deserialize<'de> for $enum { fn deserialize(deserializer: D) -> Result where D: serde::de::Deserializer<'de>, { use serde::de::{Error, MapAccess, Visitor}; // The Visitor struct is normally used for state, but none is needed struct TheVisitor; // The main logic of the deserializing happens in the Visitor trait impl<'de> Visitor<'de> for TheVisitor { // The type that is being deserialized type Value = $enum; // Try to give a better error message when this is used wrong fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { f.write_str("expecting map with tag in ")?; f.write_str($tag) } // The xml data is provided as an opaque map, // that map is parsed into the type fn visit_map(self, mut map: A) -> Result where A: MapAccess<'de>, { // Here the assumption is made that only one attribute // exists and it's the discriminator (enum "tag"). let entry: Option<(String, String)> = map.next_entry()?; // If there are more attributes those would need // to be parsed as well. let tag = match entry { // Return an error if the no attributes are found, // and indicate that the @tag attribute is missing. None => Err(A::Error::missing_field($tag)), // Check if the attribute is the tag Some((attribute, value)) => { if attribute == $tag { // return the value of the tag Ok(value) } else { // The attribute is not @tag, return an error // indicating that there is an unexpected attribute Err(A::Error::unknown_field(&attribute, &[$tag])) } } }?; let de = serde::de::value::MapAccessDeserializer::new(map); $crate::deserialize_match!( tag, de, $enum, $($cases)* ) } } // Tell the deserializer to deserialize the data as a map, // using the TheVisitor as the decoder deserializer.deserialize_map(TheVisitor) } } } } /// Provides helper functions to serialization and deserialization of types /// (usually enums) as a text content of an element and intended to use with /// [`#[serde(with = "...")]`][with], [`#[serde(deserialize_with = "...")]`][de-with] /// and [`#[serde(serialize_with = "...")]`][se-with]. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::de::from_str; /// use quick_xml::se::to_string; /// use serde::{Serialize, Deserialize}; /// /// #[derive(Serialize, Deserialize, PartialEq, Debug)] /// enum SomeEnum { /// // Default implementation serializes enum as an `` element /// EnumValue, /// # /* /// ... /// # */ /// } /// /// #[derive(Serialize, Deserialize, PartialEq, Debug)] /// #[serde(rename = "some-container")] /// struct SomeContainer { /// #[serde(with = "quick_xml::serde_helpers::text_content")] /// field: SomeEnum, /// } /// /// let container = SomeContainer { /// field: SomeEnum::EnumValue, /// }; /// let xml = "\ /// \ /// EnumValue\ /// "; /// /// assert_eq!(to_string(&container).unwrap(), xml); /// assert_eq!(from_str::(xml).unwrap(), container); /// ``` /// /// Using of this module is equivalent to replacing `field`'s type to this: /// /// ``` /// # use serde::{Deserialize, Serialize}; /// # type SomeEnum = (); /// #[derive(Serialize, Deserialize)] /// struct Field { /// // Use a special name `$text` to map field to the text content /// #[serde(rename = "$text")] /// content: SomeEnum, /// } /// /// #[derive(Serialize, Deserialize)] /// #[serde(rename = "some-container")] /// struct SomeContainer { /// field: Field, /// } /// ``` /// Read about the meaning of a special [`$text`] field. /// /// In versions of quick-xml before 0.31.0 this module used to represent enum /// unit variants as `EnumUnitVariant` instead of ``. /// Since version 0.31.0 this is default representation of enums in normal fields, /// and `` requires `$value` field: /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::de::from_str; /// use quick_xml::se::to_string; /// use serde::{Serialize, Deserialize}; /// /// #[derive(Serialize, Deserialize, PartialEq, Debug)] /// enum SomeEnum { /// // Default implementation serializes enum as an `` element /// EnumValue, /// # /* /// ... /// # */ /// } /// /// #[derive(Serialize, Deserialize, PartialEq, Debug)] /// #[serde(rename = "some-container")] /// struct SomeContainer { /// #[serde(rename = "$value")] /// field: SomeEnum, /// } /// /// let container = SomeContainer { /// field: SomeEnum::EnumValue, /// }; /// let xml = "\ /// \ /// \ /// "; /// /// assert_eq!(to_string(&container).unwrap(), xml); /// assert_eq!(from_str::(xml).unwrap(), container); /// ``` /// /// [with]: https://serde.rs/field-attrs.html#with /// [de-with]: https://serde.rs/field-attrs.html#deserialize_with /// [se-with]: https://serde.rs/field-attrs.html#serialize_with /// [`$text`]: ../../de/index.html#text pub mod text_content { use super::*; /// Serializes `value` as an XSD [simple type]. Intended to use with /// `#[serde(serialize_with = "...")]`. See example at [`text_content`] /// module level. /// /// [simple type]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition pub fn serialize(value: &T, serializer: S) -> Result where S: Serializer, T: Serialize, { #[derive(Serialize)] struct Field<'a, T> { #[serde(rename = "$text")] value: &'a T, } Field { value }.serialize(serializer) } /// Deserializes XSD's [simple type]. Intended to use with /// `#[serde(deserialize_with = "...")]`. See example at [`text_content`] /// module level. /// /// [simple type]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition pub fn deserialize<'de, D, T>(deserializer: D) -> Result where D: Deserializer<'de>, T: Deserialize<'de>, { #[derive(Deserialize)] struct Field { #[serde(rename = "$text")] value: T, } Ok(Field::deserialize(deserializer)?.value) } } quick-xml-0.38.4/src/utils.rs000064400000000000000000000363501046102023000141360ustar 00000000000000use std::borrow::{Borrow, Cow}; use std::fmt::{self, Debug, Formatter}; use std::io; use std::iter::FusedIterator; use std::ops::Deref; #[cfg(feature = "async-tokio")] use std::{ pin::Pin, task::{Context, Poll}, }; #[cfg(feature = "serialize")] use serde::de::{Deserialize, Deserializer, Error, Visitor}; #[cfg(feature = "serialize")] use serde::ser::{Serialize, Serializer}; #[allow(clippy::ptr_arg)] pub fn write_cow_string(f: &mut Formatter, cow_string: &Cow<[u8]>) -> fmt::Result { match cow_string { Cow::Owned(s) => { write!(f, "Owned(")?; write_byte_string(f, s)?; } Cow::Borrowed(s) => { write!(f, "Borrowed(")?; write_byte_string(f, s)?; } } write!(f, ")") } pub fn write_byte_string(f: &mut Formatter, byte_string: &[u8]) -> fmt::Result { write!(f, "\"")?; for b in byte_string { match *b { 32..=33 | 35..=126 => write!(f, "{}", *b as char)?, 34 => write!(f, "\\\"")?, _ => write!(f, "{:#02X}", b)?, } } write!(f, "\"")?; Ok(()) } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A version of [`Cow`] that can borrow from two different buffers, one of them /// is a deserializer input. /// /// # Lifetimes /// /// - `'i`: lifetime of the data that deserializer borrow from the parsed input /// - `'s`: lifetime of the data that owned by a deserializer pub enum CowRef<'i, 's, B> where B: ToOwned + ?Sized, { /// An input borrowed from the parsed data Input(&'i B), /// An input borrowed from the buffer owned by another deserializer Slice(&'s B), /// An input taken from an external deserializer, owned by that deserializer Owned(::Owned), } impl<'i, 's, B> Deref for CowRef<'i, 's, B> where B: ToOwned + ?Sized, B::Owned: Borrow, { type Target = B; fn deref(&self) -> &B { match *self { Self::Input(borrowed) => borrowed, Self::Slice(borrowed) => borrowed, Self::Owned(ref owned) => owned.borrow(), } } } impl<'i, 's, B> Debug for CowRef<'i, 's, B> where B: ToOwned + ?Sized + Debug, B::Owned: Debug, { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match *self { Self::Input(borrowed) => Debug::fmt(borrowed, f), Self::Slice(borrowed) => Debug::fmt(borrowed, f), Self::Owned(ref owned) => Debug::fmt(owned, f), } } } impl<'i, 's> CowRef<'i, 's, str> { /// Supply to the visitor a borrowed string, a string slice, or an owned /// string depending on the kind of input. Unlike [`Self::deserialize_all`], /// only part of [`Self::Owned`] string will be passed to the visitor. /// /// Calls /// - `visitor.visit_borrowed_str` if data borrowed from the input /// - `visitor.visit_str` if data borrowed from another source /// - `visitor.visit_string` if data owned by this type #[cfg(feature = "serialize")] pub fn deserialize_str(self, visitor: V) -> Result where V: Visitor<'i>, E: Error, { match self { Self::Input(s) => visitor.visit_borrowed_str(s), Self::Slice(s) => visitor.visit_str(s), Self::Owned(s) => visitor.visit_string(s), } } /// Calls [`Visitor::visit_bool`] with `true` or `false` if text contains /// [valid] boolean representation, otherwise calls [`Self::deserialize_str`]. /// /// The valid boolean representations are only `"true"`, `"false"`, `"1"`, and `"0"`. /// /// [valid]: https://www.w3.org/TR/xmlschema11-2/#boolean #[cfg(feature = "serialize")] pub fn deserialize_bool(self, visitor: V) -> Result where V: Visitor<'i>, E: Error, { match self.as_ref() { "1" | "true" => visitor.visit_bool(true), "0" | "false" => visitor.visit_bool(false), _ => self.deserialize_str(visitor), } } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Wrapper around `Vec` that has a human-readable debug representation: /// printable ASCII symbols output as is, all other output in HEX notation. /// /// Also, when [`serialize`] feature is on, this type deserialized using /// [`deserialize_byte_buf`](serde::Deserializer::deserialize_byte_buf) instead /// of vector's generic [`deserialize_seq`](serde::Deserializer::deserialize_seq) /// /// [`serialize`]: ../index.html#serialize #[derive(PartialEq, Eq)] pub struct ByteBuf(pub Vec); impl Debug for ByteBuf { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write_byte_string(f, &self.0) } } #[cfg(feature = "serialize")] impl<'de> Deserialize<'de> for ByteBuf { fn deserialize(d: D) -> Result where D: Deserializer<'de>, { struct ValueVisitor; impl<'de> Visitor<'de> for ValueVisitor { type Value = ByteBuf; fn expecting(&self, f: &mut Formatter) -> fmt::Result { f.write_str("byte data") } fn visit_bytes(self, v: &[u8]) -> Result { Ok(ByteBuf(v.to_vec())) } fn visit_byte_buf(self, v: Vec) -> Result { Ok(ByteBuf(v)) } } d.deserialize_byte_buf(ValueVisitor) } } #[cfg(feature = "serialize")] impl Serialize for ByteBuf { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.serialize_bytes(&self.0) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Wrapper around `&[u8]` that has a human-readable debug representation: /// printable ASCII symbols output as is, all other output in HEX notation. /// /// Also, when [`serialize`] feature is on, this type deserialized using /// [`deserialize_bytes`](serde::Deserializer::deserialize_bytes) instead /// of vector's generic [`deserialize_seq`](serde::Deserializer::deserialize_seq) /// /// [`serialize`]: ../index.html#serialize #[derive(PartialEq, Eq)] pub struct Bytes<'de>(pub &'de [u8]); impl<'de> Debug for Bytes<'de> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write_byte_string(f, self.0) } } #[cfg(feature = "serialize")] impl<'de> Deserialize<'de> for Bytes<'de> { fn deserialize(d: D) -> Result where D: Deserializer<'de>, { struct ValueVisitor; impl<'de> Visitor<'de> for ValueVisitor { type Value = Bytes<'de>; fn expecting(&self, f: &mut Formatter) -> fmt::Result { f.write_str("borrowed bytes") } fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result { Ok(Bytes(v)) } } d.deserialize_bytes(ValueVisitor) } } #[cfg(feature = "serialize")] impl<'de> Serialize for Bytes<'de> { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.serialize_bytes(self.0) } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A simple producer of infinite stream of bytes, useful in tests. /// /// Will repeat `chunk` field indefinitely. pub struct Fountain<'a> { /// That piece of data repeated infinitely... pub chunk: &'a [u8], /// Part of `chunk` that was consumed by BufRead impl pub consumed: usize, /// The overall count of read bytes pub overall_read: u64, } impl<'a> io::Read for Fountain<'a> { fn read(&mut self, buf: &mut [u8]) -> io::Result { let available = &self.chunk[self.consumed..]; let len = buf.len().min(available.len()); let (portion, _) = available.split_at(len); buf.copy_from_slice(portion); Ok(len) } } impl<'a> io::BufRead for Fountain<'a> { #[inline] fn fill_buf(&mut self) -> io::Result<&[u8]> { Ok(&self.chunk[self.consumed..]) } fn consume(&mut self, amt: usize) { self.consumed += amt; if self.consumed == self.chunk.len() { self.consumed = 0; } self.overall_read += amt as u64; } } #[cfg(feature = "async-tokio")] impl<'a> tokio::io::AsyncRead for Fountain<'a> { fn poll_read( self: Pin<&mut Self>, _cx: &mut Context<'_>, buf: &mut tokio::io::ReadBuf<'_>, ) -> Poll> { let available = &self.chunk[self.consumed..]; let len = buf.remaining().min(available.len()); let (portion, _) = available.split_at(len); buf.put_slice(portion); Poll::Ready(Ok(())) } } #[cfg(feature = "async-tokio")] impl<'a> tokio::io::AsyncBufRead for Fountain<'a> { #[inline] fn poll_fill_buf(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { Poll::Ready(io::BufRead::fill_buf(self.get_mut())) } #[inline] fn consume(self: Pin<&mut Self>, amt: usize) { io::BufRead::consume(self.get_mut(), amt); } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab). #[inline] pub const fn is_whitespace(b: u8) -> bool { matches!(b, b' ' | b'\r' | b'\n' | b'\t') } /// Calculates name from an element-like content. Name is the first word in `content`, /// where word boundaries is XML whitespace characters. /// /// 'Whitespace' refers to the definition used by [`is_whitespace`]. #[inline] pub const fn name_len(mut bytes: &[u8]) -> usize { // Note: A pattern matching based approach (instead of indexing) allows // making the function const. let mut len = 0; while let [first, rest @ ..] = bytes { if is_whitespace(*first) { break; } len += 1; bytes = rest; } len } /// Returns a byte slice with leading XML whitespace bytes removed. /// /// 'Whitespace' refers to the definition used by [`is_whitespace`]. #[inline] pub const fn trim_xml_start(mut bytes: &[u8]) -> &[u8] { // Note: A pattern matching based approach (instead of indexing) allows // making the function const. while let [first, rest @ ..] = bytes { if is_whitespace(*first) { bytes = rest; } else { break; } } bytes } /// Returns a byte slice with trailing XML whitespace bytes removed. /// /// 'Whitespace' refers to the definition used by [`is_whitespace`]. #[inline] pub const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] { // Note: A pattern matching based approach (instead of indexing) allows // making the function const. while let [rest @ .., last] = bytes { if is_whitespace(*last) { bytes = rest; } else { break; } } bytes } /// Returns a string slice with XML whitespace characters removed from both sides. /// /// 'Whitespace' refers to the definition used by [`is_whitespace`]. #[inline] pub fn trim_xml_spaces(text: &str) -> &str { let bytes = trim_xml_end(trim_xml_start(text.as_bytes())); match core::str::from_utf8(bytes) { Ok(s) => s, // SAFETY: Removing XML space characters (subset of ASCII) from a `&str` does not invalidate UTF-8. _ => unreachable!(), } } //////////////////////////////////////////////////////////////////////////////////////////////////// /// Splits string into pieces which can be part of a single `CDATA` section. /// /// Because CDATA cannot contain the `]]>` sequence, split the string between /// `]]` and `>`. #[derive(Debug, Clone)] pub(crate) struct CDataIterator<'a> { /// The unprocessed data which should be emitted as `BytesCData` events. /// At each iteration, the processed data is cut from this slice. unprocessed: &'a str, finished: bool, } impl<'a> CDataIterator<'a> { pub fn new(value: &'a str) -> Self { Self { unprocessed: value, finished: false, } } } impl<'a> Iterator for CDataIterator<'a> { type Item = &'a str; fn next(&mut self) -> Option<&'a str> { if self.finished { return None; } for gt in memchr::memchr_iter(b'>', self.unprocessed.as_bytes()) { let (slice, rest) = self.unprocessed.split_at(gt); if slice.ends_with("]]") { self.unprocessed = rest; return Some(slice); } } self.finished = true; Some(self.unprocessed) } } impl FusedIterator for CDataIterator<'_> {} //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod tests { use super::*; use pretty_assertions::assert_eq; #[test] fn write_byte_string0() { let bytes = ByteBuf(vec![10, 32, 32, 32, 32, 32, 32, 32, 32]); assert_eq!(format!("{:?}", bytes), "\"0xA \""); } #[test] fn write_byte_string1() { let bytes = ByteBuf(vec![ 104, 116, 116, 112, 58, 47, 47, 119, 119, 119, 46, 119, 51, 46, 111, 114, 103, 47, 50, 48, 48, 50, 47, 48, 55, 47, 111, 119, 108, 35, ]); assert_eq!( format!("{:?}", bytes), r##""http://www.w3.org/2002/07/owl#""## ); } #[test] fn write_byte_string3() { let bytes = ByteBuf(vec![ 67, 108, 97, 115, 115, 32, 73, 82, 73, 61, 34, 35, 66, 34, ]); assert_eq!(format!("{:?}", bytes), r##""Class IRI=\"#B\"""##); } #[test] fn name_len() { assert_eq!(super::name_len(b""), 0); assert_eq!(super::name_len(b" abc"), 0); assert_eq!(super::name_len(b" \t\r\n"), 0); assert_eq!(super::name_len(b"abc"), 3); assert_eq!(super::name_len(b"abc "), 3); assert_eq!(super::name_len(b"a bc"), 1); assert_eq!(super::name_len(b"ab\tc"), 2); assert_eq!(super::name_len(b"ab\rc"), 2); assert_eq!(super::name_len(b"ab\nc"), 2); } #[test] fn trim_xml_start() { assert_eq!(Bytes(super::trim_xml_start(b"")), Bytes(b"")); assert_eq!(Bytes(super::trim_xml_start(b"abc")), Bytes(b"abc")); assert_eq!( Bytes(super::trim_xml_start(b"\r\n\t ab \t\r\nc \t\r\n")), Bytes(b"ab \t\r\nc \t\r\n") ); } #[test] fn trim_xml_end() { assert_eq!(Bytes(super::trim_xml_end(b"")), Bytes(b"")); assert_eq!(Bytes(super::trim_xml_end(b"abc")), Bytes(b"abc")); assert_eq!( Bytes(super::trim_xml_end(b"\r\n\t ab \t\r\nc \t\r\n")), Bytes(b"\r\n\t ab \t\r\nc") ); } } quick-xml-0.38.4/src/writer/async_tokio.rs000064400000000000000000000465511046102023000166400ustar 00000000000000use std::future::Future; use std::result::Result as StdResult; use tokio::io::{AsyncWrite, AsyncWriteExt}; use crate::errors::{Error, Result}; use crate::events::{BytesCData, BytesPI, BytesText, Event}; use crate::{ElementWriter, Writer}; impl Writer { /// Writes the given event to the underlying writer. Async version of [`Writer::write_event`]. pub async fn write_event_async<'a, E: Into>>(&mut self, event: E) -> Result<()> { let mut next_should_line_break = true; let result = match event.into() { Event::Start(e) => { let result = self.write_wrapped_async(b"<", &e, b">").await; if let Some(i) = self.indent.as_mut() { i.grow(); } result } Event::End(e) => { if let Some(i) = self.indent.as_mut() { i.shrink(); } self.write_wrapped_async(b"").await } Event::Empty(e) => self.write_wrapped_async(b"<", &e, b"/>").await, Event::Text(e) => { next_should_line_break = false; self.write_async(&e).await } Event::Comment(e) => self.write_wrapped_async(b"").await, Event::CData(e) => { next_should_line_break = false; self.write_async(b"").await } Event::Decl(e) => self.write_wrapped_async(b"").await, Event::PI(e) => self.write_wrapped_async(b"").await, Event::DocType(e) => self.write_wrapped_async(b"").await, Event::GeneralRef(e) => self.write_wrapped_async(b"&", &e, b";").await, Event::Eof => Ok(()), }; if let Some(i) = self.indent.as_mut() { i.should_line_break = next_should_line_break; } result } /// Manually write a newline and indentation at the proper level. Async version of /// [`Writer::write_indent`]. /// /// This method will do nothing if `Writer` was not constructed with [`Writer::new_with_indent`]. pub async fn write_indent_async(&mut self) -> Result<()> { if let Some(ref i) = self.indent { self.writer.write_all(b"\n").await?; self.writer.write_all(i.current()).await?; } Ok(()) } #[inline] async fn write_async(&mut self, value: &[u8]) -> Result<()> { self.writer.write_all(value).await.map_err(Into::into) } #[inline] async fn write_wrapped_async( &mut self, before: &[u8], value: &[u8], after: &[u8], ) -> Result<()> { if let Some(ref i) = self.indent { if i.should_line_break { self.writer.write_all(b"\n").await?; self.writer.write_all(i.current()).await?; } } self.write_async(before).await?; self.write_async(value).await?; self.write_async(after).await?; Ok(()) } } impl<'a, W: AsyncWrite + Unpin> ElementWriter<'a, W> { /// Write some text inside the current element. /// /// # Example /// /// ``` /// # use quick_xml::writer::Writer; /// # use quick_xml::events::BytesText; /// # use tokio::io::AsyncWriteExt; /// # #[tokio::main(flavor = "current_thread")] async fn main() { /// let mut buffer = Vec::new(); /// let mut tokio_buffer = tokio::io::BufWriter::new(&mut buffer); /// let mut writer = Writer::new_with_indent(&mut tokio_buffer, b' ', 4); /// /// writer /// .create_element("paired") /// .with_attribute(("attr1", "value1")) /// .with_attribute(("attr2", "value2")) /// .write_text_content_async(BytesText::new("text")) /// .await /// .expect("cannot write content"); /// /// tokio_buffer.flush().await.expect("flush failed"); /// /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// r#"text"# /// ); /// # } pub async fn write_text_content_async(self, text: BytesText<'_>) -> Result<&'a mut Writer> { self.writer .write_event_async(Event::Start(self.start_tag.borrow())) .await?; self.writer.write_event_async(Event::Text(text)).await?; self.writer .write_event_async(Event::End(self.start_tag.to_end())) .await?; Ok(self.writer) } /// Write a CData event `` inside the current element. /// /// # Example /// /// ``` /// # use quick_xml::writer::Writer; /// # use quick_xml::events::BytesCData; /// # use tokio::io::AsyncWriteExt; /// # #[tokio::main(flavor = "current_thread")] async fn main() { /// let mut buffer = Vec::new(); /// let mut tokio_buffer = tokio::io::BufWriter::new(&mut buffer); /// let mut writer = Writer::new_with_indent(&mut tokio_buffer, b' ', 4); /// /// writer /// .create_element("paired") /// .with_attribute(("attr1", "value1")) /// .with_attribute(("attr2", "value2")) /// .write_cdata_content_async(BytesCData::new("text & content")) /// .await /// .expect("cannot write content"); /// /// tokio_buffer.flush().await.expect("flush failed"); /// /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// r#""# /// ); /// # } pub async fn write_cdata_content_async( self, text: BytesCData<'_>, ) -> Result<&'a mut Writer> { self.writer .write_event_async(Event::Start(self.start_tag.borrow())) .await?; self.writer.write_event_async(Event::CData(text)).await?; self.writer .write_event_async(Event::End(self.start_tag.to_end())) .await?; Ok(self.writer) } /// Write a processing instruction `` inside the current element. /// /// # Example /// /// ``` /// # use quick_xml::writer::Writer; /// # use quick_xml::events::BytesPI; /// # use tokio::io::AsyncWriteExt; /// # #[tokio::main(flavor = "current_thread")] async fn main() { /// let mut buffer = Vec::new(); /// let mut tokio_buffer = tokio::io::BufWriter::new(&mut buffer); /// let mut writer = Writer::new_with_indent(&mut tokio_buffer, b' ', 4); /// /// writer /// .create_element("paired") /// .with_attribute(("attr1", "value1")) /// .with_attribute(("attr2", "value2")) /// .write_pi_content_async(BytesPI::new(r#"xml-stylesheet href="style.css""#)) /// .await /// .expect("cannot write content"); /// /// tokio_buffer.flush().await.expect("flush failed"); /// /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// r#" /// /// "# /// ); /// # } pub async fn write_pi_content_async(self, text: BytesPI<'_>) -> Result<&'a mut Writer> { self.writer .write_event_async(Event::Start(self.start_tag.borrow())) .await?; self.writer.write_event_async(Event::PI(text)).await?; self.writer .write_event_async(Event::End(self.start_tag.to_end())) .await?; Ok(self.writer) } /// Write an empty (self-closing) tag. /// /// # Example /// /// ``` /// # use quick_xml::writer::Writer; /// # use quick_xml::events::BytesText; /// # use tokio::io::AsyncWriteExt; /// # #[tokio::main(flavor = "current_thread")] async fn main() { /// let mut buffer = Vec::new(); /// let mut tokio_buffer = tokio::io::BufWriter::new(&mut buffer); /// let mut writer = Writer::new_with_indent(&mut tokio_buffer, b' ', 4); /// /// writer /// .create_element("empty") /// .with_attribute(("attr1", "value1")) /// .with_attribute(("attr2", "value2")) /// .write_empty_async() /// .await /// .expect("cannot write content"); /// /// tokio_buffer.flush().await.expect("flush failed"); /// /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// r#""# /// ); /// # } pub async fn write_empty_async(self) -> Result<&'a mut Writer> { self.writer .write_event_async(Event::Empty(self.start_tag)) .await?; Ok(self.writer) } /// Create a new scope for writing XML inside the current element. /// /// # Example /// /// ``` /// # use quick_xml::writer::Writer; /// # use quick_xml::events::BytesText; /// # use tokio::io::AsyncWriteExt; /// use quick_xml::Error; /// /// # #[tokio::main(flavor = "current_thread")] async fn main() { /// let mut buffer = Vec::new(); /// let mut tokio_buffer = tokio::io::BufWriter::new(&mut buffer); /// let mut writer = Writer::new_with_indent(&mut tokio_buffer, b' ', 4); /// /// writer /// .create_element("outer") /// .with_attributes([("attr1", "value1"), ("attr2", "value2")]) /// // We need to provide error type, because it is not named somewhere explicitly /// .write_inner_content_async::<_, _, Error>(|writer| async move { /// let fruits = ["apple", "orange", "banana"]; /// for (quant, item) in fruits.iter().enumerate() { /// writer /// .create_element("fruit") /// .with_attributes([("quantity", quant.to_string().as_str())]) /// .write_text_content_async(BytesText::new(item)) /// .await?; /// } /// writer /// .create_element("inner") /// .write_inner_content_async(|writer| async move { /// writer.create_element("empty").write_empty_async().await /// }) /// .await?; /// /// Ok(writer) /// }) /// .await /// .expect("cannot write content"); /// /// tokio_buffer.flush().await.expect("flush failed"); /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// r#" /// apple /// orange /// banana /// /// /// /// "# /// ); /// # } pub async fn write_inner_content_async( mut self, closure: F, ) -> StdResult<&'a mut Writer, E> where F: FnOnce(&'a mut Writer) -> Fut, Fut: Future, E>>, E: From, { self.writer .write_event_async(Event::Start(self.start_tag.borrow())) .await?; self.writer = closure(self.writer).await?; self.writer .write_event_async(Event::End(self.start_tag.to_end())) .await?; Ok(self.writer) } } #[cfg(test)] mod tests { use super::*; use crate::events::*; use pretty_assertions::assert_eq; macro_rules! test { ($name: ident, $event: expr, $expected: expr) => { #[tokio::test] async fn $name() { let mut buffer = Vec::new(); let mut writer = Writer::new(&mut buffer); writer .write_event_async($event) .await .expect("write event failed"); assert_eq!(std::str::from_utf8(&buffer).unwrap(), $expected,); } }; } test!( xml_header, Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), Some("no"))), r#""# ); test!(empty_tag, Event::Empty(BytesStart::new("tag")), r#""#); test!( comment, Event::Comment(BytesText::new("this is a comment")), r#""# ); test!( cdata, Event::CData(BytesCData::new("this is a cdata")), r#""# ); test!( pi, Event::PI(BytesPI::new("this is a processing instruction")), r#""# ); test!( doctype, Event::DocType(BytesText::new("this is a doctype")), r#""# ); #[tokio::test] async fn full_tag() { let mut buffer = Vec::new(); let mut writer = Writer::new(&mut buffer); let start = Event::Start(BytesStart::new("tag")); let text = Event::Text(BytesText::new("inner text")); let end = Event::End(BytesEnd::new("tag")); for i in [start, text, end] { writer.write_event_async(i).await.expect("write tag failed"); } assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#"inner text"# ); } } #[cfg(test)] mod indentation_async { use super::*; use crate::events::*; use pretty_assertions::assert_eq; #[tokio::test] async fn self_closed() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let tag = BytesStart::new("self-closed") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); writer .write_event_async(Event::Empty(tag)) .await .expect("write tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#""# ); } #[tokio::test] async fn empty_paired() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); writer .write_event_async(Event::Start(start.clone())) .await .expect("write start tag failed"); writer .write_event_async(Event::End(end)) .await .expect("write end tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#" "# ); } #[tokio::test] async fn paired_with_inner() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); let inner = BytesStart::new("inner"); writer .write_event_async(Event::Start(start.clone())) .await .expect("write start tag failed"); writer .write_event_async(Event::Empty(inner)) .await .expect("write inner tag failed"); writer .write_event_async(Event::End(end)) .await .expect("write end tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#" "# ); } #[tokio::test] async fn paired_with_text() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); let text = BytesText::new("text"); writer .write_event_async(Event::Start(start.clone())) .await .expect("write start tag failed"); writer .write_event_async(Event::Text(text)) .await .expect("write text failed"); writer .write_event_async(Event::End(end)) .await .expect("write end tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#"text"# ); } #[tokio::test] async fn mixed_content() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); let text = BytesText::new("text"); let inner = BytesStart::new("inner"); writer .write_event_async(Event::Start(start.clone())) .await .expect("write start tag failed"); writer .write_event_async(Event::Text(text)) .await .expect("write text failed"); writer .write_event_async(Event::Empty(inner)) .await .expect("write inner tag failed"); writer .write_event_async(Event::End(end)) .await .expect("write end tag failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#"text "# ); } #[tokio::test] async fn nested() { let mut buffer = Vec::new(); let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); let start = BytesStart::new("paired") .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()); let end = start.to_end(); let inner = BytesStart::new("inner"); writer .write_event_async(Event::Start(start.clone())) .await .expect("write start 1 tag failed"); writer .write_event_async(Event::Start(start.clone())) .await .expect("write start 2 tag failed"); writer .write_event_async(Event::Empty(inner)) .await .expect("write inner tag failed"); writer .write_event_async(Event::End(end.clone())) .await .expect("write end tag 2 failed"); writer .write_event_async(Event::End(end)) .await .expect("write end tag 1 failed"); assert_eq!( std::str::from_utf8(&buffer).unwrap(), r#" "# ); } } quick-xml-0.38.4/src/writer.rs000064400000000000000000000561761046102023000143220ustar 00000000000000//! Contains high-level interface for an events-based XML emitter. use std::borrow::Cow; use std::io::{self, Write}; use crate::encoding::UTF8_BOM; use crate::events::{attributes::Attribute, BytesCData, BytesPI, BytesStart, BytesText, Event}; #[cfg(feature = "async-tokio")] mod async_tokio; /// XML writer. Writes XML [`Event`]s to a [`std::io::Write`] or [`tokio::io::AsyncWrite`] implementor. #[cfg(feature = "serialize")] use {crate::se::SeError, serde::Serialize}; /// XML writer. Writes XML [`Event`]s to a [`std::io::Write`] implementor. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{Event, BytesEnd, BytesStart}; /// use quick_xml::reader::Reader; /// use quick_xml::writer::Writer; /// use std::io::Cursor; /// /// let xml = r#"text"#; /// let mut reader = Reader::from_str(xml); /// let mut writer = Writer::new(Cursor::new(Vec::new())); /// loop { /// match reader.read_event() { /// Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => { /// /// // crates a new element ... alternatively we could reuse `e` by calling /// // `e.into_owned()` /// let mut elem = BytesStart::new("my_elem"); /// /// // collect existing attributes /// elem.extend_attributes(e.attributes().map(|attr| attr.unwrap())); /// /// // copy existing attributes, adds a new my-key="some value" attribute /// elem.push_attribute(("my-key", "some value")); /// /// // writes the event to the writer /// assert!(writer.write_event(Event::Start(elem)).is_ok()); /// }, /// Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => { /// assert!(writer.write_event(Event::End(BytesEnd::new("my_elem"))).is_ok()); /// }, /// Ok(Event::Eof) => break, /// // we can either move or borrow the event to write, depending on your use-case /// Ok(e) => assert!(writer.write_event(e.borrow()).is_ok()), /// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e), /// } /// } /// /// let result = writer.into_inner().into_inner(); /// let expected = r#"text"#; /// assert_eq!(result, expected.as_bytes()); /// ``` #[derive(Clone)] pub struct Writer { /// underlying writer writer: W, indent: Option, } impl Writer { /// Creates a `Writer` from a generic writer. pub const fn new(inner: W) -> Writer { Writer { writer: inner, indent: None, } } /// Creates a `Writer` with configured indents from a generic writer. pub fn new_with_indent(inner: W, indent_char: u8, indent_size: usize) -> Writer { Writer { writer: inner, indent: Some(Indentation::new(indent_char, indent_size)), } } /// Consumes this `Writer`, returning the underlying writer. pub fn into_inner(self) -> W { self.writer } /// Get a mutable reference to the underlying writer. pub fn get_mut(&mut self) -> &mut W { &mut self.writer } /// Get a reference to the underlying writer. pub const fn get_ref(&self) -> &W { &self.writer } /// Provides a simple, high-level API for writing XML elements. /// /// Returns an [`ElementWriter`] that simplifies setting attributes and writing /// content inside the element. /// /// # Example /// /// ``` /// # use quick_xml::Result; /// # fn main() -> Result<()> { /// use quick_xml::events::{BytesStart, BytesText, Event}; /// use quick_xml::writer::Writer; /// use quick_xml::Error; /// use std::io::Cursor; /// /// let mut writer = Writer::new(Cursor::new(Vec::new())); /// /// // writes /// writer.create_element("tag") /// .with_attribute(("attr1", "value1")) // chain `with_attribute()` calls to add many attributes /// .write_empty()?; /// /// // writes with some text inside /// writer.create_element("tag") /// .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()) // or add attributes from an iterator /// .write_text_content(BytesText::new("with some text inside"))?; /// /// // writes appleorange /// writer.create_element("tag") /// // We need to provide error type, because it is not named somewhere explicitly /// .write_inner_content(|writer| { /// let fruits = ["apple", "orange"]; /// for (quant, item) in fruits.iter().enumerate() { /// writer /// .create_element("fruit") /// .with_attribute(("quantity", quant.to_string().as_str())) /// .write_text_content(BytesText::new(item))?; /// } /// Ok(()) /// })?; /// # Ok(()) /// # } /// ``` #[must_use] pub fn create_element<'a, N>(&'a mut self, name: N) -> ElementWriter<'a, W> where N: Into>, { ElementWriter { writer: self, start_tag: BytesStart::new(name), state: AttributeIndent::NoneAttributesWritten, spaces: Vec::new(), } } } impl Writer { /// Write a [Byte-Order-Mark] character to the document. /// /// # Example /// /// ```rust /// # use quick_xml::Result; /// # fn main() -> Result<()> { /// use quick_xml::events::{BytesStart, BytesText, Event}; /// use quick_xml::writer::Writer; /// use quick_xml::Error; /// use std::io::Cursor; /// /// let mut buffer = Vec::new(); /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); /// /// writer.write_bom()?; /// writer /// .create_element("empty") /// .with_attribute(("attr1", "value1")) /// .write_empty() /// .expect("failure"); /// /// assert_eq!( /// std::str::from_utf8(&buffer).unwrap(), /// "\u{FEFF}" /// ); /// # Ok(()) /// # } /// ``` /// [Byte-Order-Mark]: https://unicode.org/faq/utf_bom.html#BOM pub fn write_bom(&mut self) -> io::Result<()> { self.write(UTF8_BOM) } /// Writes the given event to the underlying writer. pub fn write_event<'a, E: Into>>(&mut self, event: E) -> io::Result<()> { let mut next_should_line_break = true; let result = match event.into() { Event::Start(e) => { let result = self.write_wrapped(b"<", &e, b">"); if let Some(i) = self.indent.as_mut() { i.grow(); } result } Event::End(e) => { if let Some(i) = self.indent.as_mut() { i.shrink(); } self.write_wrapped(b"") } Event::Empty(e) => self.write_wrapped(b"<", &e, b"/>"), Event::Text(e) => { next_should_line_break = false; self.write(&e) } Event::Comment(e) => self.write_wrapped(b""), Event::CData(e) => { next_should_line_break = false; self.write(b"") } Event::Decl(e) => self.write_wrapped(b""), Event::PI(e) => self.write_wrapped(b""), Event::DocType(e) => self.write_wrapped(b""), Event::GeneralRef(e) => self.write_wrapped(b"&", &e, b";"), Event::Eof => Ok(()), }; if let Some(i) = self.indent.as_mut() { i.should_line_break = next_should_line_break; } result } /// Writes bytes #[inline] pub(crate) fn write(&mut self, value: &[u8]) -> io::Result<()> { self.writer.write_all(value) } #[inline] fn write_wrapped(&mut self, before: &[u8], value: &[u8], after: &[u8]) -> io::Result<()> { if let Some(ref i) = self.indent { if i.should_line_break { self.writer.write_all(b"\n")?; self.writer.write_all(i.current())?; } } self.write(before)?; self.write(value)?; self.write(after)?; Ok(()) } /// Manually write a newline and indentation at the proper level. /// /// This can be used when the heuristic to line break and indent after any /// [`Event`] apart from [`Text`] fails such as when a [`Start`] occurs directly /// after [`Text`]. /// /// This method will do nothing if `Writer` was not constructed with [`new_with_indent`]. /// /// [`Text`]: Event::Text /// [`Start`]: Event::Start /// [`new_with_indent`]: Self::new_with_indent pub fn write_indent(&mut self) -> io::Result<()> { if let Some(ref i) = self.indent { self.writer.write_all(b"\n")?; self.writer.write_all(i.current())?; } Ok(()) } /// Write an arbitrary serializable type /// /// Note: If you are attempting to write XML in a non-UTF-8 encoding, this may not /// be safe to use. Rust basic types assume UTF-8 encodings. /// /// ```rust /// # use pretty_assertions::assert_eq; /// # use serde::Serialize; /// # use quick_xml::events::{BytesStart, Event}; /// # use quick_xml::writer::Writer; /// # use quick_xml::se::SeError; /// # fn main() -> Result<(), SeError> { /// #[derive(Debug, PartialEq, Serialize)] /// struct MyData { /// question: String, /// answer: u32, /// } /// /// let data = MyData { /// question: "The Ultimate Question of Life, the Universe, and Everything".into(), /// answer: 42, /// }; /// /// let mut buffer = Vec::new(); /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4); /// /// let start = BytesStart::new("root"); /// let end = start.to_end(); /// /// writer.write_event(Event::Start(start.clone()))?; /// writer.write_serializable("my_data", &data)?; /// writer.write_event(Event::End(end))?; /// /// assert_eq!( /// std::str::from_utf8(&buffer)?, /// r#" /// /// The Ultimate Question of Life, the Universe, and Everything /// 42 /// /// "# /// ); /// # Ok(()) /// # } /// ``` #[cfg(feature = "serialize")] pub fn write_serializable( &mut self, tag_name: &str, content: &T, ) -> Result<(), SeError> { use crate::se::{Indent, Serializer}; self.write_indent()?; let mut fmt = ToFmtWrite(&mut self.writer); let mut serializer = Serializer::with_root(&mut fmt, Some(tag_name))?; if let Some(indent) = &mut self.indent { serializer.set_indent(Indent::Borrow(indent)); } content.serialize(serializer)?; Ok(()) } } /// Track indent inside elements state /// /// ```mermaid /// stateDiagram-v2 /// [*] --> NoneAttributesWritten /// NoneAttributesWritten --> Spaces : .with_attribute() /// NoneAttributesWritten --> WriteConfigured : .new_line() /// /// Spaces --> Spaces : .with_attribute() /// Spaces --> WriteSpaces : .new_line() /// /// WriteSpaces --> Spaces : .with_attribute() /// WriteSpaces --> WriteSpaces : .new_line() /// /// Configured --> Configured : .with_attribute() /// Configured --> WriteConfigured : .new_line() /// /// WriteConfigured --> Configured : .with_attribute() /// WriteConfigured --> WriteConfigured : .new_line() /// ``` #[derive(Debug)] enum AttributeIndent { /// Initial state. `ElementWriter` was just created and no attributes written yet NoneAttributesWritten, /// Write specified count of spaces to indent before writing attribute in `with_attribute()` WriteSpaces(usize), /// Keep space indent that should be used if `new_line()` would be called Spaces(usize), /// Write specified count of indent characters before writing attribute in `with_attribute()` WriteConfigured(usize), /// Keep indent that should be used if `new_line()` would be called Configured(usize), } /// A struct to write an element. Contains methods to add attributes and inner /// elements to the element pub struct ElementWriter<'a, W> { writer: &'a mut Writer, start_tag: BytesStart<'a>, state: AttributeIndent, /// Contains spaces used to write space indents of attributes spaces: Vec, } impl<'a, W> ElementWriter<'a, W> { /// Adds an attribute to this element. pub fn with_attribute<'b, I>(mut self, attr: I) -> Self where I: Into>, { self.write_attr(attr.into()); self } /// Add additional attributes to this element using an iterator. /// /// The yielded items must be convertible to [`Attribute`] using `Into`. pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self where I: IntoIterator, I::Item: Into>, { let mut iter = attributes.into_iter(); if let Some(attr) = iter.next() { self.write_attr(attr.into()); self.start_tag.extend_attributes(iter); } self } /// Push a new line inside an element between attributes. Note, that this /// method does nothing if [`Writer`] was created without indentation support. /// /// # Examples /// /// The following code /// /// ``` /// # use quick_xml::writer::Writer; /// let mut buffer = Vec::new(); /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 2); /// writer /// .create_element("element") /// //.new_line() (1) /// .with_attribute(("first", "1")) /// .with_attribute(("second", "2")) /// .new_line() /// .with_attributes([ /// ("third", "3"), /// ("fourth", "4"), /// ]) /// //.new_line() (2) /// .write_empty(); /// ``` /// will produce the following XMLs: /// ```xml /// /// /// /// /// /// /// /// /// ``` pub fn new_line(mut self) -> Self { if let Some(i) = self.writer.indent.as_mut() { match self.state { // .new_line() called just after .create_element(). // Use element indent to additionally indent attributes AttributeIndent::NoneAttributesWritten => { self.state = AttributeIndent::WriteConfigured(i.indent_size) } AttributeIndent::WriteSpaces(_) => {} // .new_line() called when .with_attribute() was called at least once. // The spaces should be used to indent // Plan saved indent AttributeIndent::Spaces(indent) => { self.state = AttributeIndent::WriteSpaces(indent) } AttributeIndent::WriteConfigured(_) => {} // .new_line() called when .with_attribute() was called at least once. // The configured indent characters should be used to indent // Plan saved indent AttributeIndent::Configured(indent) => { self.state = AttributeIndent::WriteConfigured(indent) } } self.start_tag.push_newline(); }; self } /// Writes attribute and maintain indentation state fn write_attr<'b>(&mut self, attr: Attribute<'b>) { if let Some(i) = self.writer.indent.as_mut() { // Save the indent that we should use next time when .new_line() be called self.state = match self.state { // Neither .new_line() or .with_attribute() yet called // If newline inside attributes will be requested, we should indent them // by the length of tag name and +1 for `<` and +1 for one space AttributeIndent::NoneAttributesWritten => { self.start_tag.push_attribute(attr); AttributeIndent::Spaces(self.start_tag.name().as_ref().len() + 2) } // Indent was requested by previous call to .new_line(), write it // New line was already written AttributeIndent::WriteSpaces(indent) => { if self.spaces.len() < indent { self.spaces.resize(indent, b' '); } self.start_tag.push_indent(&self.spaces[..indent]); self.start_tag.push_attr(attr); AttributeIndent::Spaces(indent) } // .new_line() was not called, but .with_attribute() was. // use the previously calculated indent AttributeIndent::Spaces(indent) => { self.start_tag.push_attribute(attr); AttributeIndent::Spaces(indent) } // Indent was requested by previous call to .new_line(), write it // New line was already written AttributeIndent::WriteConfigured(indent) => { self.start_tag.push_indent(i.additional(indent)); self.start_tag.push_attr(attr); AttributeIndent::Configured(indent) } // .new_line() was not called, but .with_attribute() was. // use the previously calculated indent AttributeIndent::Configured(indent) => { self.start_tag.push_attribute(attr); AttributeIndent::Configured(indent) } }; } else { self.start_tag.push_attribute(attr); } } } impl<'a, W: Write> ElementWriter<'a, W> { /// Write some text inside the current element. pub fn write_text_content(self, text: BytesText) -> io::Result<&'a mut Writer> { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; self.writer.write_event(Event::Text(text))?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) } /// Write a CData event `` inside the current element. pub fn write_cdata_content(self, text: BytesCData) -> io::Result<&'a mut Writer> { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; self.writer.write_event(Event::CData(text))?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) } /// Write a processing instruction `` inside the current element. pub fn write_pi_content(self, pi: BytesPI) -> io::Result<&'a mut Writer> { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; self.writer.write_event(Event::PI(pi))?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) } /// Write an empty (self-closing) tag. pub fn write_empty(self) -> io::Result<&'a mut Writer> { self.writer.write_event(Event::Empty(self.start_tag))?; Ok(self.writer) } /// Create a new scope for writing XML inside the current element. pub fn write_inner_content(self, closure: F) -> io::Result<&'a mut Writer> where F: FnOnce(&mut Writer) -> io::Result<()>, { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; closure(self.writer)?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) } } #[cfg(feature = "serialize")] pub(crate) struct ToFmtWrite(pub T); #[cfg(feature = "serialize")] impl std::fmt::Write for ToFmtWrite where T: std::io::Write, { fn write_str(&mut self, s: &str) -> std::fmt::Result { self.0.write_all(s.as_bytes()).map_err(|_| std::fmt::Error) } } #[derive(Clone)] pub(crate) struct Indentation { /// todo: this is an awkward fit as it has no impact on indentation logic, but it is /// only applicable when an indentation exists. Potentially refactor later should_line_break: bool, /// The character code to be used for indentations (e.g. ` ` or `\t`) indent_char: u8, /// How many instances of the indent character ought to be used for each level of indentation indent_size: usize, /// Used as a cache for the bytes used for indentation indents: Vec, /// The current amount of indentation current_indent_len: usize, } impl Indentation { pub fn new(indent_char: u8, indent_size: usize) -> Self { Self { should_line_break: false, indent_char, indent_size, indents: vec![indent_char; 128], current_indent_len: 0, // invariant - needs to remain less than indents.len() } } /// Increase indentation by one level pub fn grow(&mut self) { self.current_indent_len += self.indent_size; self.ensure(self.current_indent_len); } /// Decrease indentation by one level. Do nothing, if level already zero pub fn shrink(&mut self) { self.current_indent_len = self.current_indent_len.saturating_sub(self.indent_size); } /// Returns indent string for current level pub fn current(&self) -> &[u8] { &self.indents[..self.current_indent_len] } /// Returns indent with current indent plus additional indent pub fn additional(&mut self, additional_indent: usize) -> &[u8] { let new_len = self.current_indent_len + additional_indent; self.ensure(new_len); &self.indents[..new_len] } fn ensure(&mut self, new_len: usize) { if self.indents.len() < new_len { self.indents.resize(new_len, self.indent_char); } } } quick-xml-0.38.4/tests/README.md000064400000000000000000000023761046102023000142630ustar 00000000000000# Document descriptions document.xml medium length, mostly empty tags, a few short attributes per element, no escaping html5.html html5.txt libreoffice_document.fodt long, mix of attributes and text, not much escaping, lots of non-ascii characters, lots of namespaces linescore.xml medium length, lots of attributes, short attributes, few escapes opennews_all.rss players.xml long, lots of attributes, short attributes, no text, no escapes rpm_filelists.xml long, mostly medium-length text elements, not much escaping rpm_other.xml long, mix of attributes and text, lots of escaping (both entity and char literal), long attributes rpm_primary.xml long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces rpm_primary2.xml long, mix of attributes and text, not much escaping, mix of attribute lengths, some namespaces sample_1.xml short, mix of attributes and text, lots of escapes sample_ns.xml short, lots of namespaces, no escapes sample_rss.xml long, few attributes, mix of attribute lengths, escapes in text content test_writer_indent_cdata.xml test_writer_indent.xml medium length, lots of namespaces, no escaping test_writer.xml utf16be.xml utf16le.xml

(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8], Error> where P: Parser; /// Read input until comment or CDATA is finished. /// /// This method expect that `<` already was read. /// /// Returns a slice of data read up to end of comment or CDATA (`>`), /// which does not include into result. /// /// If input (`Self`) is exhausted and nothing was read, returns `None`. /// /// # Parameters /// - `buf`: Buffer that could be filled from an input (`Self`) and /// from which [events] could borrow their data /// - `position`: Will be increased by amount of bytes consumed /// /// [events]: crate::events::Event fn read_bang_element( &mut self, buf: B, position: &mut u64, ) -> Result<(BangType, &'r [u8]), Error>; /// Consume and discard all the whitespace until the next non-whitespace /// character or EOF. /// /// # Parameters /// - `position`: Will be increased by amount of bytes consumed fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()>; /// Return one character without consuming it, so that future `read_*` calls /// will still include it. On EOF, return `None`. fn peek_one(&mut self) -> io::Result>; } /// Possible elements started with ` CData, /// Comment, /// . Contains balance of '<' (+1) and '>' (-1) DocType(i32), } impl BangType { #[inline(always)] const fn new(byte: Option) -> Result { Ok(match byte { Some(b'[') => Self::CData, Some(b'-') => Self::Comment, Some(b'D') | Some(b'd') => Self::DocType(0), _ => return Err(SyntaxError::InvalidBangMarkup), }) } /// If element is finished, returns its content up to `>` symbol and /// an index of this symbol, otherwise returns `None` /// /// # Parameters /// - `buf`: buffer with data consumed on previous iterations /// - `chunk`: data read on current iteration and not yet consumed from reader #[inline(always)] fn parse<'b>(&mut self, buf: &[u8], chunk: &'b [u8]) -> Option<(&'b [u8], usize)> { match self { Self::Comment => { for i in memchr::memchr_iter(b'>', chunk) { // Need to read at least 6 symbols (`!---->`) for properly finished comment // - XML comment // 012345 - i if buf.len() + i > 4 { if chunk[..i].ends_with(b"--") { // We cannot strip last `--` from the buffer because we need it in case of // check_comments enabled option. XML standard requires that comment // will not end with `--->` sequence because this is a special case of // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments) return Some((&chunk[..i], i + 1)); // +1 for `>` } // End sequence `-|->` was splitted at | // buf --/ \-- chunk if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' { return Some((&chunk[..i], i + 1)); // +1 for `>` } // End sequence `--|>` was splitted at | // buf --/ \-- chunk if i == 0 && buf.ends_with(b"--") { return Some((&[], i + 1)); // +1 for `>` } } } } Self::CData => { for i in memchr::memchr_iter(b'>', chunk) { if chunk[..i].ends_with(b"]]") { return Some((&chunk[..i], i + 1)); // +1 for `>` } // End sequence `]|]>` was splitted at | // buf --/ \-- chunk if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' { return Some((&chunk[..i], i + 1)); // +1 for `>` } // End sequence `]]|>` was splitted at | // buf --/ \-- chunk if i == 0 && buf.ends_with(b"]]") { return Some((&[], i + 1)); // +1 for `>` } } } Self::DocType(ref mut balance) => { for i in memchr::memchr2_iter(b'<', b'>', chunk) { if chunk[i] == b'<' { *balance += 1; } else { if *balance == 0 { return Some((&chunk[..i], i + 1)); // +1 for `>` } *balance -= 1; } } } } None } #[inline] const fn to_err(&self) -> SyntaxError { match self { Self::CData => SyntaxError::UnclosedCData, Self::Comment => SyntaxError::UnclosedComment, Self::DocType(_) => SyntaxError::UnclosedDoctype, } } } //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)] mod test { /// Checks the internal implementation of the various reader methods macro_rules! check { ( #[$test:meta] $read_event:ident, $read_until_close:ident, // constructor of the XML source on which internal functions will be called $source:path, // constructor of the buffer to which read data will stored $buf:expr $(, $async:ident, $await:ident)? ) => { mod read_bang_element { use super::*; use crate::errors::{Error, SyntaxError}; use crate::reader::BangType; use crate::utils::Bytes; /// Checks that reading CDATA content works correctly mod cdata { use super::*; use pretty_assertions::assert_eq; /// Checks that if input begins like CDATA element, but CDATA start sequence /// is not finished, parsing ends with an error #[$test] #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"] $($async)? fn not_properly_start() { let buf = $buf; let mut position = 1; let mut input = b"![]]>other content".as_ref(); // ^= 1 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 1); } /// Checks that if CDATA startup sequence was matched, but an end sequence /// is not found, parsing ends with an error #[$test] $($async)? fn not_closed() { let buf = $buf; let mut position = 1; let mut input = b"![CDATA[other content".as_ref(); // ^= 1 ^= 22 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 22); } /// Checks that CDATA element without content inside parsed successfully #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 1; let mut input = b"![CDATA[]]>other content".as_ref(); // ^= 1 ^= 12 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap(); assert_eq!( (ty, Bytes(bytes)), (BangType::CData, Bytes(b"![CDATA[]]")) ); assert_eq!(position, 12); } /// Checks that CDATA element with content parsed successfully. /// Additionally checks that sequences inside CDATA that may look like /// a CDATA end sequence do not interrupt CDATA parsing #[$test] $($async)? fn with_content() { let buf = $buf; let mut position = 1; let mut input = b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref(); // ^= 1 ^= 29 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap(); assert_eq!( (ty, Bytes(bytes)), (BangType::CData, Bytes(b"![CDATA[cdata]] ]>content]]")) ); assert_eq!(position, 29); } } /// Checks that reading XML comments works correctly. According to the [specification], /// comment data can contain any sequence except `--`: /// /// ```peg /// comment = '<--' (!'--' char)* '-->'; /// char = [#x1-#x2C] /// / [#x2E-#xD7FF] /// / [#xE000-#xFFFD] /// / [#x10000-#x10FFFF] /// ``` /// /// The presence of this limitation, however, is simply a poorly designed specification /// (maybe for purpose of building of LL(1) XML parser) and quick-xml does not check for /// presence of these sequences by default. This tests allow such content. /// /// [specification]: https://www.w3.org/TR/xml11/#dt-comment mod comment { use super::*; use pretty_assertions::assert_eq; #[$test] #[ignore = "start comment sequence fully checked outside of `read_bang_element`"] $($async)? fn not_properly_start() { let buf = $buf; let mut position = 1; let mut input = b"!- -->other content".as_ref(); // ^= 1 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 1); } #[$test] $($async)? fn not_properly_end() { let buf = $buf; let mut position = 1; let mut input = b"!->other content".as_ref(); // ^= 1 ^= 17 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 17); } #[$test] $($async)? fn not_closed1() { let buf = $buf; let mut position = 1; let mut input = b"!--other content".as_ref(); // ^= 1 ^= 17 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 17); } #[$test] $($async)? fn not_closed2() { let buf = $buf; let mut position = 1; let mut input = b"!-->other content".as_ref(); // ^= 1 ^= 18 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 18); } #[$test] $($async)? fn not_closed3() { let buf = $buf; let mut position = 1; let mut input = b"!--->other content".as_ref(); // ^= 1 ^= 19 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 19); } #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 1; let mut input = b"!---->other content".as_ref(); // ^= 1 ^= 7 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap(); assert_eq!( (ty, Bytes(bytes)), (BangType::Comment, Bytes(b"!----")) ); assert_eq!(position, 7); } #[$test] $($async)? fn with_content() { let buf = $buf; let mut position = 1; let mut input = b"!--->comment<--->other content".as_ref(); // ^= 1 ^= 18 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap(); assert_eq!( (ty, Bytes(bytes)), (BangType::Comment, Bytes(b"!--->comment<---")) ); assert_eq!(position, 18); } } /// Checks that reading DOCTYPE definition works correctly mod doctype { use super::*; mod uppercase { use super::*; use pretty_assertions::assert_eq; #[$test] $($async)? fn not_properly_start() { let buf = $buf; let mut position = 1; let mut input = b"!D other content".as_ref(); // ^= 1 ^= 17 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 17); } #[$test] $($async)? fn without_space() { let buf = $buf; let mut position = 1; let mut input = b"!DOCTYPEother content".as_ref(); // ^= 1 ^= 22 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 22); } #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 1; let mut input = b"!DOCTYPE>other content".as_ref(); // ^= 1 ^= 10 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap(); assert_eq!( (ty, Bytes(bytes)), (BangType::DocType(0), Bytes(b"!DOCTYPE")) ); assert_eq!(position, 10); } #[$test] $($async)? fn not_closed() { let buf = $buf; let mut position = 1; let mut input = b"!DOCTYPE other content".as_ref(); // ^= 1 ^23 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 23); } } mod lowercase { use super::*; use pretty_assertions::assert_eq; #[$test] $($async)? fn not_properly_start() { let buf = $buf; let mut position = 1; let mut input = b"!d other content".as_ref(); // ^= 1 ^= 17 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 17); } #[$test] $($async)? fn without_space() { let buf = $buf; let mut position = 1; let mut input = b"!doctypeother content".as_ref(); // ^= 1 ^= 22 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 22); } #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 1; let mut input = b"!doctype>other content".as_ref(); // ^= 1 ^= 10 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) $(.$await)? .unwrap(); assert_eq!( (ty, Bytes(bytes)), (BangType::DocType(0), Bytes(b"!doctype")) ); assert_eq!(position, 10); } #[$test] $($async)? fn not_closed() { let buf = $buf; let mut position = 1; let mut input = b"!doctype other content".as_ref(); // ^= 1 ^= 23 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 23); } } } } mod read_text { use super::*; use crate::reader::ReadTextResult; use crate::utils::Bytes; use pretty_assertions::assert_eq; #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 1; let mut input = b"".as_ref(); // ^= 1 match $source(&mut input).read_text(buf, &mut position) $(.$await)? { ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"")), x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x), } assert_eq!(position, 1); } #[$test] $($async)? fn markup() { let buf = $buf; let mut position = 1; let mut input = b"<".as_ref(); // ^= 2 match $source(&mut input).read_text(buf, &mut position) $(.$await)? { ReadTextResult::Markup(b) => assert_eq!(b, $buf), x => panic!("Expected `Markup(_)`, but got `{:?}`", x), } assert_eq!(position, 2); } #[$test] $($async)? fn ref_() { let buf = $buf; let mut position = 1; let mut input = b"&".as_ref(); // ^= 1 match $source(&mut input).read_text(buf, &mut position) $(.$await)? { ReadTextResult::Ref(b) => assert_eq!(b, $buf), x => panic!("Expected `Ref(_)`, but got `{:?}`", x), } assert_eq!(position, 1); } #[$test] $($async)? fn up_to_markup() { let buf = $buf; let mut position = 1; let mut input = b"a<".as_ref(); // 1 ^= 3 match $source(&mut input).read_text(buf, &mut position) $(.$await)? { ReadTextResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")), x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x), } assert_eq!(position, 3); } #[$test] $($async)? fn up_to_ref() { let buf = $buf; let mut position = 1; let mut input = b"a&".as_ref(); // ^= 2 match $source(&mut input).read_text(buf, &mut position) $(.$await)? { ReadTextResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")), x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x), } assert_eq!(position, 2); } #[$test] $($async)? fn up_to_eof() { let buf = $buf; let mut position = 1; let mut input = b"a".as_ref(); // ^= 2 match $source(&mut input).read_text(buf, &mut position) $(.$await)? { ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")), x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x), } assert_eq!(position, 2); } } mod read_ref { use super::*; use crate::reader::ReadRefResult; use crate::utils::Bytes; use pretty_assertions::assert_eq; // Empty input is not allowed for `read_ref` so not tested. // Borrowed source triggers debug assertion, // buffered do nothing due to implementation details. #[$test] $($async)? fn up_to_eof() { let buf = $buf; let mut position = 1; let mut input = b"&".as_ref(); // ^= 2 match $source(&mut input).read_ref(buf, &mut position) $(.$await)? { ReadRefResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")), x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x), } assert_eq!(position, 2); } #[$test] $($async)? fn up_to_ref() { let buf = $buf; let mut position = 1; let mut input = b"&&".as_ref(); // ^= 2 match $source(&mut input).read_ref(buf, &mut position) $(.$await)? { ReadRefResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")), x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x), } assert_eq!(position, 2); } #[$test] $($async)? fn up_to_markup() { let buf = $buf; let mut position = 1; let mut input = b"&<".as_ref(); // ^= 3 match $source(&mut input).read_ref(buf, &mut position) $(.$await)? { ReadRefResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")), x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x), } assert_eq!(position, 3); } #[$test] $($async)? fn empty_ref() { let buf = $buf; let mut position = 1; let mut input = b"&;".as_ref(); // ^= 3 match $source(&mut input).read_ref(buf, &mut position) $(.$await)? { ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")), x => panic!("Expected `Ref(_)`, but got `{:?}`", x), } assert_eq!(position, 3); } #[$test] $($async)? fn normal() { let buf = $buf; let mut position = 1; let mut input = b"<".as_ref(); // ^= 5 match $source(&mut input).read_ref(buf, &mut position) $(.$await)? { ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"<")), x => panic!("Expected `Ref(_)`, but got `{:?}`", x), } assert_eq!(position, 5); } } mod read_element { use super::*; use crate::errors::{Error, SyntaxError}; use crate::parser::ElementParser; use crate::utils::Bytes; use pretty_assertions::assert_eq; /// Checks that nothing was read from empty buffer #[$test] $($async)? fn empty() { let buf = $buf; let mut position = 1; let mut input = b"".as_ref(); // ^= 1 match $source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedTag), x => panic!( "Expected `Err(Syntax(_))`, but got `{:?}`", x ), } assert_eq!(position, 1); } mod open { use super::*; use pretty_assertions::assert_eq; #[$test] $($async)? fn empty_tag() { let buf = $buf; let mut position = 1; let mut input = b">".as_ref(); // ^= 2 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"") ); assert_eq!(position, 2); } #[$test] $($async)? fn normal() { let buf = $buf; let mut position = 1; let mut input = b"tag>".as_ref(); // ^= 5 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"tag") ); assert_eq!(position, 5); } #[$test] $($async)? fn empty_ns_empty_tag() { let buf = $buf; let mut position = 1; let mut input = b":>".as_ref(); // ^= 3 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b":") ); assert_eq!(position, 3); } #[$test] $($async)? fn empty_ns() { let buf = $buf; let mut position = 1; let mut input = b":tag>".as_ref(); // ^= 6 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b":tag") ); assert_eq!(position, 6); } #[$test] $($async)? fn with_attributes() { let buf = $buf; let mut position = 1; let mut input = br#"tag attr-1=">" attr2 = '>' 3attr>"#.as_ref(); // ^= 39 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(br#"tag attr-1=">" attr2 = '>' 3attr"#) ); assert_eq!(position, 39); } } mod self_closed { use super::*; use pretty_assertions::assert_eq; #[$test] $($async)? fn empty_tag() { let buf = $buf; let mut position = 1; let mut input = b"/>".as_ref(); // ^= 3 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"/") ); assert_eq!(position, 3); } #[$test] $($async)? fn normal() { let buf = $buf; let mut position = 1; let mut input = b"tag/>".as_ref(); // ^= 6 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"tag/") ); assert_eq!(position, 6); } #[$test] $($async)? fn empty_ns_empty_tag() { let buf = $buf; let mut position = 1; let mut input = b":/>".as_ref(); // ^= 4 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b":/") ); assert_eq!(position, 4); } #[$test] $($async)? fn empty_ns() { let buf = $buf; let mut position = 1; let mut input = b":tag/>".as_ref(); // ^= 7 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b":tag/") ); assert_eq!(position, 7); } #[$test] $($async)? fn with_attributes() { let buf = $buf; let mut position = 1; let mut input = br#"tag attr-1="/>" attr2 = '/>' 3attr/>"#.as_ref(); // ^= 42 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/"#) ); assert_eq!(position, 42); } } mod close { use super::*; use pretty_assertions::assert_eq; #[$test] $($async)? fn empty_tag() { let buf = $buf; let mut position = 1; let mut input = b"/ >".as_ref(); // ^= 4 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"/ ") ); assert_eq!(position, 4); } #[$test] $($async)? fn normal() { let buf = $buf; let mut position = 1; let mut input = b"/tag>".as_ref(); // ^= 6 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"/tag") ); assert_eq!(position, 6); } #[$test] $($async)? fn empty_ns_empty_tag() { let buf = $buf; let mut position = 1; let mut input = b"/:>".as_ref(); // ^= 4 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"/:") ); assert_eq!(position, 4); } #[$test] $($async)? fn empty_ns() { let buf = $buf; let mut position = 1; let mut input = b"/:tag>".as_ref(); // ^= 7 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(b"/:tag") ); assert_eq!(position, 7); } #[$test] $($async)? fn with_attributes() { let buf = $buf; let mut position = 1; let mut input = br#"/tag attr-1=">" attr2 = '>' 3attr>"#.as_ref(); // ^= 40 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), Bytes(br#"/tag attr-1=">" attr2 = '>' 3attr"#) ); assert_eq!(position, 40); } } } /// Ensures, that no empty `Text` events are generated mod $read_event { use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event}; use crate::reader::Reader; use pretty_assertions::assert_eq; /// When `encoding` feature is enabled, encoding should be detected /// from BOM (UTF-8) and BOM should be stripped. /// /// When `encoding` feature is disabled, UTF-8 is assumed and BOM /// character should be stripped for consistency #[$test] $($async)? fn bom_from_reader() { let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes()); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Text(BytesText::from_escaped("\u{feff}")) ); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Eof ); } /// When parsing from &str, encoding is fixed (UTF-8), so /// - when `encoding` feature is disabled, the behavior the /// same as in `bom_from_reader` text /// - when `encoding` feature is enabled, the behavior should /// stay consistent, so the first BOM character is stripped #[$test] $($async)? fn bom_from_str() { let mut reader = Reader::from_str("\u{feff}\u{feff}"); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Text(BytesText::from_escaped("\u{feff}")) ); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Eof ); } #[$test] $($async)? fn declaration() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3))) ); } #[$test] $($async)? fn doctype() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::DocType(BytesText::from_escaped("x")) ); } #[$test] $($async)? fn processing_instruction() { let mut reader = Reader::from_str("\" ?>"); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::PI(BytesPI::new("xml-stylesheet '? >\" ")) ); } /// Lone closing tags are not allowed, so testing it together with start tag #[$test] $($async)? fn start_and_end() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Start(BytesStart::new("tag")) ); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::End(BytesEnd::new("tag")) ); } #[$test] $($async)? fn empty() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Empty(BytesStart::new("tag")) ); } #[$test] $($async)? fn text() { let mut reader = Reader::from_str("text"); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Text(BytesText::from_escaped("text")) ); } #[$test] $($async)? fn cdata() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::CData(BytesCData::new("")) ); } #[$test] $($async)? fn comment() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Comment(BytesText::from_escaped("")) ); } #[$test] $($async)? fn eof() { let mut reader = Reader::from_str(""); assert_eq!( reader.$read_event($buf) $(.$await)? .unwrap(), Event::Eof ); } } }; } // Export macros for the child modules: // - buffered_reader // - slice_reader pub(super) use check; } quick-xml-0.38.4/src/reader/ns_reader.rs000064400000000000000000001077761046102023000162150ustar 00000000000000//! A reader that manages namespace declarations found in the input and able //! to resolve [qualified names] to [expanded names]. //! //! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname //! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname use std::borrow::Cow; use std::fs::File; use std::io::{BufRead, BufReader}; use std::ops::Deref; use std::path::Path; use crate::errors::Result; use crate::events::Event; use crate::name::{LocalName, NamespaceBindingsIter, NamespaceResolver, QName, ResolveResult}; use crate::reader::{Config, Reader, Span, XmlSource}; /// A low level encoding-agnostic XML event reader that performs namespace resolution. /// /// Consumes a [`BufRead`] and streams XML `Event`s. #[derive(Debug, Clone)] pub struct NsReader { /// An XML reader pub(super) reader: Reader, /// A buffer to manage namespaces ns_resolver: NamespaceResolver, /// We cannot pop data from the namespace stack until returned `Empty` or `End` /// event will be processed by the user, so we only mark that we should that /// in the next [`Self::read_event_impl()`] call. pending_pop: bool, } /// Builder methods impl NsReader { /// Creates a `NsReader` that reads from a reader. #[inline] pub fn from_reader(reader: R) -> Self { Self::new(Reader::from_reader(reader)) } /// Returns reference to the parser configuration #[inline] pub const fn config(&self) -> &Config { self.reader.config() } /// Returns mutable reference to the parser configuration #[inline] pub fn config_mut(&mut self) -> &mut Config { self.reader.config_mut() } /// Returns all the prefixes currently declared except the default `xml` and `xmlns` namespaces. /// /// # Examples /// /// This example shows what results the returned iterator would return after /// reading each event of a simple XML. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::name::{Namespace, PrefixDeclaration}; /// use quick_xml::NsReader; /// /// let src = " /// /// /// /// /// /// /// "; /// let mut reader = NsReader::from_str(src); /// reader.config_mut().trim_text(true); /// // No prefixes at the beginning /// assert_eq!(reader.prefixes().collect::>(), vec![]); /// /// reader.read_resolved_event()?; // /// // No prefixes declared on root /// assert_eq!(reader.prefixes().collect::>(), vec![]); /// /// reader.read_resolved_event()?; // /// // Two prefixes declared on "a" /// assert_eq!(reader.prefixes().collect::>(), vec![ /// (PrefixDeclaration::Default, Namespace(b"a1")), /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")) /// ]); /// /// reader.read_resolved_event()?; // /// // The default prefix got overridden and new "b" prefix /// assert_eq!(reader.prefixes().collect::>(), vec![ /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")), /// (PrefixDeclaration::Default, Namespace(b"b1")), /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2")) /// ]); /// /// reader.read_resolved_event()?; // /// // Still the same /// assert_eq!(reader.prefixes().collect::>(), vec![ /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")), /// (PrefixDeclaration::Default, Namespace(b"b1")), /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2")) /// ]); /// /// reader.read_resolved_event()?; // /// // Still the same /// assert_eq!(reader.prefixes().collect::>(), vec![ /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")), /// (PrefixDeclaration::Default, Namespace(b"b1")), /// (PrefixDeclaration::Named(b"b"), Namespace(b"b2")) /// ]); /// /// reader.read_resolved_event()?; // /// // got closed so back to the prefixes declared on /// assert_eq!(reader.prefixes().collect::>(), vec![ /// (PrefixDeclaration::Default, Namespace(b"a1")), /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")) /// ]); /// /// reader.read_resolved_event()?; // /// // Still the same /// assert_eq!(reader.prefixes().collect::>(), vec![ /// (PrefixDeclaration::Default, Namespace(b"a1")), /// (PrefixDeclaration::Named(b"a"), Namespace(b"a2")) /// ]); /// /// reader.read_resolved_event()?; // /// // got closed /// assert_eq!(reader.prefixes().collect::>(), vec![]); /// # quick_xml::Result::Ok(()) /// ``` #[inline] pub const fn prefixes(&self) -> NamespaceBindingsIter<'_> { self.ns_resolver.bindings() } } /// Private methods impl NsReader { #[inline] fn new(reader: Reader) -> Self { Self { reader, ns_resolver: NamespaceResolver::default(), pending_pop: false, } } fn read_event_impl<'i, B>(&mut self, buf: B) -> Result> where R: XmlSource<'i, B>, { self.pop(); let event = self.reader.read_event_impl(buf); self.process_event(event) } pub(super) fn pop(&mut self) { if self.pending_pop { self.ns_resolver.pop(); self.pending_pop = false; } } pub(super) fn process_event<'i>(&mut self, event: Result>) -> Result> { match event { Ok(Event::Start(e)) => { self.ns_resolver.push(&e)?; Ok(Event::Start(e)) } Ok(Event::Empty(e)) => { self.ns_resolver.push(&e)?; // notify next `read_event_impl()` invocation that it needs to pop this // namespace scope self.pending_pop = true; Ok(Event::Empty(e)) } Ok(Event::End(e)) => { // notify next `read_event_impl()` invocation that it needs to pop this // namespace scope self.pending_pop = true; Ok(Event::End(e)) } e => e, } } } /// Getters impl NsReader { /// Consumes `NsReader` returning the underlying reader /// /// See the [`Reader::into_inner`] for examples #[inline] pub fn into_inner(self) -> R { self.reader.into_inner() } /// Gets a mutable reference to the underlying reader. pub fn get_mut(&mut self) -> &mut R { self.reader.get_mut() } /// Returns a storage of namespace bindings associated with this reader. #[inline] pub const fn resolver(&self) -> &NamespaceResolver { &self.ns_resolver } /// Resolves a potentially qualified **element name** or **attribute name** /// into _(namespace name, local name)_. /// /// _Qualified_ names have the form `local-name` or `prefix:local-name` where the `prefix` /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the name in question. /// /// The method returns following results depending on the `name` shape, `attribute` flag /// and the presence of the default namespace on element or any of its parents: /// /// |attribute|`xmlns="..."`|QName |ResolveResult |LocalName /// |---------|-------------|-------------------|-----------------------|------------ /// |`true` |_(any)_ |`local-name` |[`Unbound`] |`local-name` /// |`true` |_(any)_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// |`false` |Not defined |`local-name` |[`Unbound`] |`local-name` /// |`false` |Defined |`local-name` |[`Bound`] (to `xmlns`) |`local-name` /// |`false` |_(any)_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// /// If you want to clearly indicate that name that you resolve is an element /// or an attribute name, you could use [`resolve_attribute()`] or [`resolve_element()`] /// methods. /// /// # Lifetimes /// /// - `'n`: lifetime of a name. Returned local name will be bound to the same /// lifetime as the name in question. /// - returned namespace name will be bound to the reader itself /// /// [`Bound`]: ResolveResult::Bound /// [`Unbound`]: ResolveResult::Unbound /// [`Unknown`]: ResolveResult::Unknown /// [`resolve_attribute()`]: Self::resolve_attribute() /// [`resolve_element()`]: Self::resolve_element() #[inline] pub fn resolve<'n>( &self, name: QName<'n>, attribute: bool, ) -> (ResolveResult<'_>, LocalName<'n>) { self.ns_resolver.resolve(name, !attribute) } /// Resolves a potentially qualified **element name** into _(namespace name, local name)_. /// /// _Qualified_ element names have the form `prefix:local-name` where the /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the element /// in question. /// /// _Unqualified_ elements inherits the current _default namespace_. /// /// The method returns following results depending on the `name` shape and /// the presence of the default namespace: /// /// |`xmlns="..."`|QName |ResolveResult |LocalName /// |-------------|-------------------|-----------------------|------------ /// |Not defined |`local-name` |[`Unbound`] |`local-name` /// |Defined |`local-name` |[`Bound`] (default) |`local-name` /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// /// # Lifetimes /// /// - `'n`: lifetime of an element name. Returned local name will be bound /// to the same lifetime as the name in question. /// - returned namespace name will be bound to the reader itself /// /// # Examples /// /// This example shows how you can resolve qualified name into a namespace. /// Note, that in the code like this you do not need to do that manually, /// because the namespace resolution result returned by the [`read_resolved_event()`]. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(""); /// /// match reader.read_event().unwrap() { /// Event::Empty(e) => assert_eq!( /// reader.resolve_element(e.name()), /// (Bound(Namespace(b"root namespace")), QName(b"tag").into()) /// ), /// _ => unreachable!(), /// } /// ``` /// /// [`Bound`]: ResolveResult::Bound /// [`Unbound`]: ResolveResult::Unbound /// [`Unknown`]: ResolveResult::Unknown /// [`read_resolved_event()`]: Self::read_resolved_event #[inline] pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult<'_>, LocalName<'n>) { self.ns_resolver.resolve_element(name) } /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_. /// /// _Qualified_ attribute names have the form `prefix:local-name` where the /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the attribute /// in question. /// /// _Unqualified_ attribute names do *not* inherit the current _default namespace_. /// /// The method returns following results depending on the `name` shape and /// the presence of the default namespace: /// /// |`xmlns="..."`|QName |ResolveResult |LocalName /// |-------------|-------------------|-----------------------|------------ /// |Not defined |`local-name` |[`Unbound`] |`local-name` /// |Defined |`local-name` |[`Unbound`] |`local-name` /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` /// /// # Lifetimes /// /// - `'n`: lifetime of an attribute name. Returned local name will be bound /// to the same lifetime as the name in question. /// - returned namespace name will be bound to the reader itself /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(" /// /// "); /// reader.config_mut().trim_text(true); /// /// match reader.read_event().unwrap() { /// Event::Empty(e) => { /// let mut iter = e.attributes(); /// /// // Unlike elements, attributes without explicit namespace /// // not bound to any namespace /// let one = iter.next().unwrap().unwrap(); /// assert_eq!( /// reader.resolve_attribute(one.key), /// (Unbound, QName(b"one").into()) /// ); /// /// let two = iter.next().unwrap().unwrap(); /// assert_eq!( /// reader.resolve_attribute(two.key), /// (Bound(Namespace(b"other namespace")), QName(b"two").into()) /// ); /// } /// _ => unreachable!(), /// } /// ``` /// /// [`Bound`]: ResolveResult::Bound /// [`Unbound`]: ResolveResult::Unbound /// [`Unknown`]: ResolveResult::Unknown #[inline] pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult<'_>, LocalName<'n>) { self.ns_resolver.resolve_attribute(name) } } impl NsReader { /// Reads the next event into given buffer. /// /// This method manages namespaces but doesn't resolve them automatically. /// You should call [`resolver().resolve_element()`] if you want to get a namespace. /// /// You also can use [`read_resolved_event_into()`] instead if you want to resolve /// namespace as soon as you get an event. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into(&mut buf).unwrap() { /// Event::Start(e) => { /// count += 1; /// let (ns, local) = reader.resolver().resolve_element(e.name()); /// match local.as_ref() { /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))), /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))), /// _ => unreachable!(), /// } /// } /// Event::Text(e) => { /// txt.push(e.decode().unwrap().into_owned()) /// } /// Event::Eof => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` /// /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element /// [`read_resolved_event_into()`]: Self::read_resolved_event_into #[inline] pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec) -> Result> { self.read_event_impl(buf) } /// Reads the next event into given buffer and resolves its namespace (if applicable). /// /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. /// For all other events the concept of namespace is not defined, so /// a [`ResolveResult::Unbound`] is returned. /// /// If you are not interested in namespaces, you can use [`read_event_into()`] /// which will not automatically resolve namespaces for you. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut buf = Vec::new(); /// let mut txt = Vec::new(); /// loop { /// match reader.read_resolved_event_into(&mut buf).unwrap() { /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag1").into()); /// } /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag2").into()); /// } /// (_, Event::Start(_)) => unreachable!(), /// /// (_, Event::Text(e)) => { /// txt.push(e.decode().unwrap().into_owned()) /// } /// (_, Event::Eof) => break, /// _ => (), /// } /// buf.clear(); /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` /// /// [`Start`]: Event::Start /// [`Empty`]: Event::Empty /// [`End`]: Event::End /// [`read_event_into()`]: Self::read_event_into #[inline] pub fn read_resolved_event_into<'b>( &mut self, buf: &'b mut Vec, ) -> Result<(ResolveResult<'_>, Event<'b>)> { let event = self.read_event_impl(buf)?; Ok(self.ns_resolver.resolve_event(event)) } /// Reads until end element is found using provided buffer as intermediate /// storage for events content. This function is supposed to be called after /// you already read a [`Start`] event. /// /// Returns a span that cover content between `>` of an opening tag and `<` of /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`] /// will be returned. In particularly, that error will be returned if you call /// this method without consuming the corresponding [`Start`] event first. /// /// If your reader created from a string slice or byte array slice, it is /// better to use [`read_to_end()`] method, because it will not copy bytes /// into intermediate buffer. /// /// The provided `buf` buffer will be filled only by one event content at time. /// Before reading of each event the buffer will be cleared. If you know an /// appropriate size of each event, you can preallocate the buffer to reduce /// number of reallocations. /// /// The `end` parameter should contain name of the end element _in the reader /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// /// # Namespaces /// /// While the `NsReader` does namespace resolution, namespaces does not /// change the algorithm for comparing names. Although the names `a:name` /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace, /// are semantically equivalent, `` cannot close ``, because /// according to [the specification] /// /// > The end of every element that begins with a **start-tag** MUST be marked /// > by an **end-tag** containing a name that echoes the element's type as /// > given in the **start-tag** /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::name::{Namespace, ResolveResult}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// /// /// /// /// /// /// /// /// /// /// /// "#); /// reader.config_mut().trim_text(true); /// let mut buf = Vec::new(); /// /// let ns = Namespace(b"namespace 1"); /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!( /// reader.read_resolved_event_into(&mut buf).unwrap(), /// (ResolveResult::Bound(ns), Event::Start(start)) /// ); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end_into(end.name(), &mut buf).unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!( /// reader.read_resolved_event_into(&mut buf).unwrap(), /// (ResolveResult::Unbound, Event::Eof) /// ); /// ``` /// /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`IllFormed`]: crate::errors::Error::IllFormed /// [`read_to_end()`]: Self::read_to_end /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`expand_empty_elements`]: Config::expand_empty_elements /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag #[inline] pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should // match literally the start name. See `Config::check_end_names` documentation self.reader.read_to_end_into(end, buf) } } impl NsReader> { /// Creates an XML reader from a file path. pub fn from_file>(path: P) -> Result { Ok(Self::new(Reader::from_file(path)?)) } } impl<'i> NsReader<&'i [u8]> { /// Creates an XML reader from a string slice. #[inline] #[allow(clippy::should_implement_trait)] pub fn from_str(s: &'i str) -> Self { Self::new(Reader::from_str(s)) } /// Reads the next event, borrow its content from the input buffer. /// /// This method manages namespaces but doesn't resolve them automatically. /// You should call [`resolver().resolve_element()`] if you want to get a namespace. /// /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace /// as soon as you get an event. /// /// There is no asynchronous `read_event_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); /// loop { /// match reader.read_event().unwrap() { /// Event::Start(e) => { /// count += 1; /// let (ns, local) = reader.resolver().resolve_element(e.name()); /// match local.as_ref() { /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))), /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))), /// _ => unreachable!(), /// } /// } /// Event::Text(e) => { /// txt.push(e.decode().unwrap().into_owned()) /// } /// Event::Eof => break, /// _ => (), /// } /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` /// /// [`resolver().resolve_element()`]: NamespaceResolver::resolve_element /// [`read_resolved_event()`]: Self::read_resolved_event #[inline] pub fn read_event(&mut self) -> Result> { self.read_event_impl(()) } /// Reads the next event, borrow its content from the input buffer, and resolves /// its namespace (if applicable). /// /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. /// For all other events the concept of namespace is not defined, so /// a [`ResolveResult::Unbound`] is returned. /// /// If you are not interested in namespaces, you can use [`read_event()`] /// which will not automatically resolve namespaces for you. /// /// There is no asynchronous `read_resolved_event_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Examples /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::Event; /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// Test /// Test 2 /// /// "#); /// reader.config_mut().trim_text(true); /// /// let mut count = 0; /// let mut txt = Vec::new(); /// loop { /// match reader.read_resolved_event().unwrap() { /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag1").into()); /// } /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { /// count += 1; /// assert_eq!(e.local_name(), QName(b"tag2").into()); /// } /// (_, Event::Start(_)) => unreachable!(), /// /// (_, Event::Text(e)) => { /// txt.push(e.decode().unwrap().into_owned()) /// } /// (_, Event::Eof) => break, /// _ => (), /// } /// } /// assert_eq!(count, 3); /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` /// /// [`Start`]: Event::Start /// [`Empty`]: Event::Empty /// [`End`]: Event::End /// [`read_event()`]: Self::read_event #[inline] pub fn read_resolved_event(&mut self) -> Result<(ResolveResult<'_>, Event<'i>)> { let event = self.read_event_impl(())?; Ok(self.ns_resolver.resolve_event(event)) } /// Reads until end element is found. This function is supposed to be called /// after you already read a [`Start`] event. /// /// Returns a span that cover content between `>` of an opening tag and `<` of /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// If a corresponding [`End`] event is not found, an error of type [`IllFormed`] /// will be returned. In particularly, that error will be returned if you call /// this method without consuming the corresponding [`Start`] event first. /// /// The `end` parameter should contain name of the end element _in the reader /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// /// There is no asynchronous `read_to_end_async()` version of this function, /// because it is not necessary -- the contents are already in memory and no IO /// is needed, therefore there is no potential for blocking. /// /// # Namespaces /// /// While the `NsReader` does namespace resolution, namespaces does not /// change the algorithm for comparing names. Although the names `a:name` /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace, /// are semantically equivalent, `` cannot close ``, because /// according to [the specification] /// /// > The end of every element that begins with a **start-tag** MUST be marked /// > by an **end-tag** containing a name that echoes the element's type as /// > given in the **start-tag** /// /// # Examples /// /// This example shows, how you can skip XML content after you read the /// start event. /// /// ``` /// # use pretty_assertions::assert_eq; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::name::{Namespace, ResolveResult}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// /// /// /// /// /// /// /// /// /// /// /// "#); /// reader.config_mut().trim_text(true); /// /// let ns = Namespace(b"namespace 1"); /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); /// let end = start.to_end().into_owned(); /// /// // First, we read a start event... /// assert_eq!( /// reader.read_resolved_event().unwrap(), /// (ResolveResult::Bound(ns), Event::Start(start)) /// ); /// /// // ...then, we could skip all events to the corresponding end event. /// // This call will correctly handle nested elements. /// // Note, however, that this method does not handle namespaces. /// reader.read_to_end(end.name()).unwrap(); /// /// // At the end we should get an Eof event, because we ate the whole XML /// assert_eq!( /// reader.read_resolved_event().unwrap(), /// (ResolveResult::Unbound, Event::Eof) /// ); /// ``` /// /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`IllFormed`]: crate::errors::Error::IllFormed /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`expand_empty_elements`]: Config::expand_empty_elements /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag #[inline] pub fn read_to_end(&mut self, end: QName) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should // match literally the start name. See `Config::check_end_names` documentation self.reader.read_to_end(end) } /// Reads content between start and end tags, including any markup. This /// function is supposed to be called after you already read a [`Start`] event. /// /// Manages nested cases where parent and child elements have the _literally_ /// same name. /// /// This method does not unescape read data, instead it returns content /// "as is" of the XML document. This is because it has no idea what text /// it reads, and if, for example, it contains CDATA section, attempt to /// unescape it content will spoil data. /// /// Any text will be decoded using the XML current [`decoder()`]. /// /// Actually, this method perform the following code: /// /// ```ignore /// let span = reader.read_to_end(end)?; /// let text = reader.decoder().decode(&reader.inner_slice[span]); /// ``` /// /// # Examples /// /// This example shows, how you can read a HTML content from your XML document. /// /// ``` /// # use pretty_assertions::assert_eq; /// # use std::borrow::Cow; /// use quick_xml::events::{BytesStart, Event}; /// use quick_xml::reader::NsReader; /// /// let mut reader = NsReader::from_str(r#" /// /// This is a HTML text ///