toml_parser-1.0.6+spec-1.1.0/.cargo_vcs_info.json0000644000000001600000000000100147450ustar { "git": { "sha1": "e32c7a2f9b126d42fab0705e9783fec42b88e861" }, "path_in_vcs": "crates/toml_parser" }toml_parser-1.0.6+spec-1.1.0/Cargo.lock0000644000000137440000000000100127340ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "anstream" version = "0.6.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", "is_terminal_polyfill", "utf8parse", ] [[package]] name = "anstyle" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" [[package]] name = "anstyle-parse" version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" dependencies = [ "windows-sys", ] [[package]] name = "anstyle-wincon" version = "3.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" dependencies = [ "anstyle", "once_cell_polyfill", "windows-sys", ] [[package]] name = "colorchoice" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "memchr" version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "normalize-line-endings" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" [[package]] name = "once_cell_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" [[package]] name = "similar" version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" [[package]] name = "snapbox" version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96dcfc4581e3355d70ac2ee14cfdf81dce3d85c85f1ed9e2c1d3013f53b3436b" dependencies = [ "anstream", "anstyle", "normalize-line-endings", "similar", "snapbox-macros", ] [[package]] name = "snapbox-macros" version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16569f53ca23a41bb6f62e0a5084aa1661f4814a67fa33696a79073e03a664af" dependencies = [ "anstream", ] [[package]] name = "toml_parser" version = "1.0.6+spec-1.1.0" dependencies = [ "anstream", "anstyle", "snapbox", "winnow", ] [[package]] name = "utf8parse" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "windows-link" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" [[package]] name = "windows-sys" version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.53.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" dependencies = [ "windows-link", "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" [[package]] name = "windows_aarch64_msvc" version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" [[package]] name = "windows_i686_gnu" version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" [[package]] name = "windows_i686_gnullvm" version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" [[package]] name = "windows_i686_msvc" version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" [[package]] name = "windows_x86_64_gnu" version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" [[package]] name = "windows_x86_64_gnullvm" version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" [[package]] name = "windows_x86_64_msvc" version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" [[package]] name = "winnow" version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" dependencies = [ "memchr", ] toml_parser-1.0.6+spec-1.1.0/Cargo.toml0000644000000106510000000000100127510ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.76" name = "toml_parser" version = "1.0.6+spec-1.1.0" build = false include = [ "build.rs", "src/**/*", "Cargo.toml", "Cargo.lock", "LICENSE*", "README.md", "examples/**/*", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Yet another format-preserving TOML parser." readme = "README.md" keywords = [ "encoding", "toml", "no_std", ] categories = [ "encoding", "parser-implementations", "parsing", "config", ] license = "MIT OR Apache-2.0" repository = "https://github.com/toml-rs/toml" [package.metadata.docs.rs] all-features = true rustdoc-args = ["--generate-link-to-definition"] [[package.metadata.release.pre-release-replacements]] file = "CHANGELOG.md" search = "Unreleased" replace = "{{version}}" min = 1 [[package.metadata.release.pre-release-replacements]] file = "CHANGELOG.md" search = '\.\.\.HEAD' replace = "...{{tag_name}}" exactly = 1 [[package.metadata.release.pre-release-replacements]] file = "CHANGELOG.md" search = "ReleaseDate" replace = "{{date}}" min = 1 [[package.metadata.release.pre-release-replacements]] file = "CHANGELOG.md" search = "" replace = """ ## [Unreleased] - ReleaseDate """ exactly = 1 [[package.metadata.release.pre-release-replacements]] file = "CHANGELOG.md" search = "" replace = """ [Unreleased]: https://github.com/toml-rs/toml/compare/{{tag_name}}...HEAD""" exactly = 1 [features] alloc = [] debug = [ "std", "dep:anstream", "dep:anstyle", ] default = ["std"] simd = ["winnow/simd"] std = ["alloc"] unsafe = [] [lib] name = "toml_parser" path = "src/lib.rs" [dependencies.anstream] version = "0.6.20" optional = true [dependencies.anstyle] version = "1.0.11" optional = true [dependencies.winnow] version = "0.7.13" default-features = false [dev-dependencies.anstream] version = "0.6.20" features = ["test"] [dev-dependencies.snapbox] version = "0.6.21" [lints.clippy] bool_assert_comparison = "allow" branches_sharing_code = "allow" checked_conversions = "warn" collapsible_else_if = "allow" create_dir = "warn" dbg_macro = "warn" debug_assert_with_mut_call = "warn" doc_markdown = "warn" empty_enum = "warn" enum_glob_use = "warn" expl_impl_clone_on_copy = "warn" explicit_deref_methods = "warn" explicit_into_iter_loop = "warn" fallible_impl_from = "warn" filter_map_next = "warn" flat_map_option = "warn" float_cmp_const = "warn" fn_params_excessive_bools = "warn" from_iter_instead_of_collect = "warn" get_first = "allow" if_same_then_else = "allow" implicit_clone = "warn" imprecise_flops = "warn" inconsistent_struct_constructor = "warn" inefficient_to_string = "warn" infinite_loop = "warn" invalid_upcast_comparisons = "warn" large_digit_groups = "warn" large_stack_arrays = "warn" large_types_passed_by_value = "warn" let_and_return = "allow" linkedlist = "warn" lossy_float_literal = "warn" macro_use_imports = "warn" mem_forget = "warn" mutex_integer = "warn" needless_bool = "allow" needless_continue = "allow" needless_for_each = "warn" negative_feature_names = "warn" path_buf_push_overwrite = "warn" ptr_as_ptr = "warn" rc_mutex = "warn" redundant_feature_names = "warn" ref_option_ref = "warn" rest_pat_in_fully_bound_structs = "warn" result_large_err = "allow" same_functions_in_if_condition = "warn" self_named_module_files = "warn" semicolon_if_nothing_returned = "warn" str_to_string = "warn" string_add = "warn" string_add_assign = "warn" string_lit_as_bytes = "warn" string_to_string = "warn" todo = "warn" trait_duplication_in_bounds = "warn" uninlined_format_args = "warn" use_self = "warn" verbose_file_reads = "warn" wildcard_imports = "warn" zero_sized_map_values = "warn" [lints.rust] unnameable_types = "allow" unreachable_pub = "warn" unsafe_op_in_unsafe_fn = "warn" unused_lifetimes = "warn" unused_macro_rules = "warn" unused_qualifications = "warn" [lints.rust.rust_2018_idioms] level = "warn" priority = -1 toml_parser-1.0.6+spec-1.1.0/Cargo.toml.orig000064400000000000000000000030051046102023000164250ustar 00000000000000[package] name = "toml_parser" version = "1.0.6+spec-1.1.0" description = "Yet another format-preserving TOML parser." categories = ["encoding", "parser-implementations", "parsing", "config"] keywords = ["encoding", "toml", "no_std"] repository.workspace = true license.workspace = true edition.workspace = true rust-version.workspace = true include.workspace = true [package.metadata.docs.rs] all-features = true rustdoc-args = ["--generate-link-to-definition"] [package.metadata.release] pre-release-replacements = [ {file="CHANGELOG.md", search="Unreleased", replace="{{version}}", min=1}, {file="CHANGELOG.md", search="\\.\\.\\.HEAD", replace="...{{tag_name}}", exactly=1}, {file="CHANGELOG.md", search="ReleaseDate", replace="{{date}}", min=1}, {file="CHANGELOG.md", search="", replace="\n## [Unreleased] - ReleaseDate\n", exactly=1}, {file="CHANGELOG.md", search="", replace="\n[Unreleased]: https://github.com/toml-rs/toml/compare/{{tag_name}}...HEAD", exactly=1}, ] [features] default = ["std"] alloc = [] std = ["alloc"] simd = ["winnow/simd"] unsafe = [] debug = ["std", "dep:anstream", "dep:anstyle"] [dependencies] anstream = { version = "0.6.20", optional = true } anstyle = { version = "1.0.11", optional = true } winnow = { version = "0.7.13", default-features = false } [dev-dependencies] anstream = { version = "0.6.20", features = ["test"] } snapbox = "0.6.21" toml_datetime = { path = "../toml_datetime" } [lints] workspace = true toml_parser-1.0.6+spec-1.1.0/LICENSE-APACHE000064400000000000000000000261361046102023000154740ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright {yyyy} {name of copyright owner} Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. toml_parser-1.0.6+spec-1.1.0/LICENSE-MIT000064400000000000000000000020461046102023000151760ustar 00000000000000Copyright (c) Individual contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. toml_parser-1.0.6+spec-1.1.0/README.md000064400000000000000000000014521046102023000150210ustar 00000000000000# toml_parser [![Documentation](https://img.shields.io/badge/docs-master-blue.svg)](https://docs.rs/toml_parser) ![License](https://img.shields.io/crates/l/toml_parser.svg) [![Crates Status](https://img.shields.io/crates/v/toml_parser.svg)](https://crates.io/crates/toml_parser) Zero-copy parsing of TOML ## License Licensed under either of * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or ) * MIT license ([LICENSE-MIT](LICENSE-MIT) or ) at your option. ### Contribution Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. toml_parser-1.0.6+spec-1.1.0/src/debug.rs000064400000000000000000000130761046102023000157720ustar 00000000000000use crate::decoder::Encoding; use crate::ErrorSink; use crate::Span; pub(crate) struct DebugDepth(core::sync::atomic::AtomicUsize); impl DebugDepth { pub(crate) fn enter_unchecked(&self) -> usize { self.0.fetch_add(1, core::sync::atomic::Ordering::SeqCst) } pub(crate) fn exit_unchecked(&self) { let _ = self.0.fetch_sub(1, core::sync::atomic::Ordering::SeqCst); } pub(crate) fn depth(&self) -> usize { self.0.load(core::sync::atomic::Ordering::SeqCst) } } static DEBUG_DEPTH: DebugDepth = DebugDepth(core::sync::atomic::AtomicUsize::new(0)); fn render_event(span: impl Into>, text: &str, style: anstyle::Style) { #![allow(unexpected_cfgs)] // HACK: fixed in newer versions let span = span.into(); let depth = DEBUG_DEPTH.depth().min(20); anstream::eprintln!("{:depth$}{style}{text}: {span:?}{style:#}", ""); } pub(crate) struct DebugErrorSink<'s> { sink: &'s mut dyn ErrorSink, } impl<'s> DebugErrorSink<'s> { pub(crate) fn new(sink: &'s mut dyn ErrorSink) -> Self { Self { sink } } } impl ErrorSink for DebugErrorSink<'_> { fn report_error(&mut self, error: crate::ParseError) { render_event( error.unexpected(), &format!("{error:?}"), anstyle::AnsiColor::Red.on_default(), ); self.sink.report_error(error); } } pub(crate) struct DebugEventReceiver<'r> { receiver: &'r mut dyn crate::parser::EventReceiver, } impl<'r> DebugEventReceiver<'r> { pub(crate) fn new(receiver: &'r mut dyn crate::parser::EventReceiver) -> Self { Self { receiver } } } impl crate::parser::EventReceiver for DebugEventReceiver<'_> { fn std_table_open(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.std_table_open(span, error); render_event(span, "[", anstyle::Style::new() | anstyle::Effects::DIMMED); DEBUG_DEPTH.enter_unchecked(); } fn std_table_close(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.std_table_close(span, error); DEBUG_DEPTH.exit_unchecked(); render_event(span, "]", anstyle::Style::new() | anstyle::Effects::DIMMED); } fn array_table_open(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.array_table_open(span, error); render_event(span, "[[", anstyle::Style::new() | anstyle::Effects::DIMMED); DEBUG_DEPTH.enter_unchecked(); } fn array_table_close(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.array_table_close(span, error); DEBUG_DEPTH.exit_unchecked(); render_event(span, "]]", anstyle::Style::new() | anstyle::Effects::DIMMED); } fn inline_table_open(&mut self, span: Span, error: &mut dyn ErrorSink) -> bool { let allowed = self.receiver.inline_table_open(span, error); render_event(span, "{", anstyle::Style::new() | anstyle::Effects::DIMMED); DEBUG_DEPTH.enter_unchecked(); allowed } fn inline_table_close(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.inline_table_close(span, error); DEBUG_DEPTH.exit_unchecked(); render_event(span, "}", anstyle::Style::new() | anstyle::Effects::DIMMED); } fn array_open(&mut self, span: Span, error: &mut dyn ErrorSink) -> bool { let allowed = self.receiver.array_open(span, error); render_event(span, "[", anstyle::Style::new() | anstyle::Effects::DIMMED); DEBUG_DEPTH.enter_unchecked(); allowed } fn array_close(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.array_close(span, error); DEBUG_DEPTH.exit_unchecked(); render_event(span, "]", anstyle::Style::new() | anstyle::Effects::DIMMED); } fn simple_key(&mut self, span: Span, encoding: Option, error: &mut dyn ErrorSink) { self.receiver.simple_key(span, encoding, error); render_event(span, "", anstyle::AnsiColor::Magenta.on_default()); } fn key_sep(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.key_sep(span, error); render_event(span, ".", anstyle::Style::new() | anstyle::Effects::DIMMED); } fn key_val_sep(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.key_val_sep(span, error); render_event(span, "=", anstyle::Style::new() | anstyle::Effects::DIMMED); } fn scalar(&mut self, span: Span, encoding: Option, error: &mut dyn ErrorSink) { self.receiver.scalar(span, encoding, error); render_event(span, "", anstyle::AnsiColor::Green.on_default()); } fn value_sep(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.value_sep(span, error); render_event(span, ",", anstyle::Style::new() | anstyle::Effects::DIMMED); } fn whitespace(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.whitespace(span, error); render_event(span, "", anstyle::AnsiColor::Cyan.on_default()); } fn comment(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.comment(span, error); render_event(span, "", anstyle::AnsiColor::Cyan.on_default()); } fn newline(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.newline(span, error); render_event(span, "", anstyle::AnsiColor::Cyan.on_default()); } fn error(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.error(span, error); render_event(span, "", anstyle::AnsiColor::Red.on_default()); } } toml_parser-1.0.6+spec-1.1.0/src/decoder/mod.rs000064400000000000000000000057621046102023000170730ustar 00000000000000//! Decode [raw][crate::Raw] TOML values into Rust native types //! //! See //! - [`Raw::decode_key`][crate::Raw::decode_key] //! - [`Raw::decode_scalar`][crate::Raw::decode_scalar] //! - [`Raw::decode_whitespace`][crate::Raw::decode_whitespace] //! - [`Raw::decode_comment`][crate::Raw::decode_comment] //! - [`Raw::decode_newline`][crate::Raw::decode_newline] #[cfg(feature = "alloc")] use alloc::borrow::Cow; #[cfg(feature = "alloc")] use alloc::string::String; pub(crate) mod scalar; pub(crate) mod string; pub(crate) mod ws; pub use scalar::IntegerRadix; pub use scalar::ScalarKind; #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] #[repr(u8)] pub enum Encoding { LiteralString = crate::lexer::APOSTROPHE, BasicString = crate::lexer::QUOTATION_MARK, MlLiteralString = 1, MlBasicString, } impl Encoding { pub const fn description(&self) -> &'static str { match self { Self::LiteralString => crate::lexer::TokenKind::LiteralString.description(), Self::BasicString => crate::lexer::TokenKind::BasicString.description(), Self::MlLiteralString => crate::lexer::TokenKind::MlLiteralString.description(), Self::MlBasicString => crate::lexer::TokenKind::MlBasicString.description(), } } } pub trait StringBuilder<'s> { fn clear(&mut self); #[must_use] fn push_str(&mut self, append: &'s str) -> bool; #[must_use] fn push_char(&mut self, append: char) -> bool; } impl<'s> StringBuilder<'s> for () { fn clear(&mut self) {} fn push_str(&mut self, _append: &'s str) -> bool { true } fn push_char(&mut self, _append: char) -> bool { true } } impl<'s> StringBuilder<'s> for &'s str { fn clear(&mut self) { *self = &self[0..0]; } fn push_str(&mut self, append: &'s str) -> bool { if self.is_empty() { *self = append; true } else { false } } fn push_char(&mut self, _append: char) -> bool { false } } #[cfg(feature = "alloc")] impl<'s> StringBuilder<'s> for Cow<'s, str> { fn clear(&mut self) { match self { Cow::Borrowed(s) => { s.clear(); } Cow::Owned(s) => s.clear(), } } fn push_str(&mut self, append: &'s str) -> bool { match self { Cow::Borrowed(s) => { if !s.push_str(append) { self.to_mut().push_str(append); } } Cow::Owned(s) => s.push_str(append), } true } fn push_char(&mut self, append: char) -> bool { self.to_mut().push(append); true } } #[cfg(feature = "alloc")] impl<'s> StringBuilder<'s> for String { fn clear(&mut self) { self.clear(); } fn push_str(&mut self, append: &'s str) -> bool { self.push_str(append); true } fn push_char(&mut self, append: char) -> bool { self.push(append); true } } toml_parser-1.0.6+spec-1.1.0/src/decoder/scalar.rs000064400000000000000000000631751046102023000175630ustar 00000000000000use winnow::stream::ContainsToken as _; use winnow::stream::FindSlice as _; use winnow::stream::Offset as _; use winnow::stream::Stream as _; use crate::decoder::StringBuilder; use crate::ErrorSink; use crate::Expected; use crate::ParseError; use crate::Raw; use crate::Span; const ALLOCATION_ERROR: &str = "could not allocate for string"; #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub enum ScalarKind { String, Boolean(bool), DateTime, Float, Integer(IntegerRadix), } impl ScalarKind { pub fn description(&self) -> &'static str { match self { Self::String => "string", Self::Boolean(_) => "boolean", Self::DateTime => "date-time", Self::Float => "float", Self::Integer(radix) => radix.description(), } } pub fn invalid_description(&self) -> &'static str { match self { Self::String => "invalid string", Self::Boolean(_) => "invalid boolean", Self::DateTime => "invalid date-time", Self::Float => "invalid float", Self::Integer(radix) => radix.invalid_description(), } } } #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub enum IntegerRadix { #[default] Dec, Hex, Oct, Bin, } impl IntegerRadix { pub fn description(&self) -> &'static str { match self { Self::Dec => "integer", Self::Hex => "hexadecimal", Self::Oct => "octal", Self::Bin => "binary", } } pub fn value(&self) -> u32 { match self { Self::Dec => 10, Self::Hex => 16, Self::Oct => 8, Self::Bin => 2, } } pub fn invalid_description(&self) -> &'static str { match self { Self::Dec => "invalid integer number", Self::Hex => "invalid hexadecimal number", Self::Oct => "invalid octal number", Self::Bin => "invalid binary number", } } fn validator(&self) -> fn(char) -> bool { match self { Self::Dec => |c| c.is_ascii_digit(), Self::Hex => |c| c.is_ascii_hexdigit(), Self::Oct => |c| matches!(c, '0'..='7'), Self::Bin => |c| matches!(c, '0'..='1'), } } } pub(crate) fn decode_unquoted_scalar<'i>( raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) -> ScalarKind { let s = raw.as_str(); let Some(first) = s.as_bytes().first() else { return decode_invalid(raw, output, error); }; match first { // number starts b'+' | b'-' => { let value = &raw.as_str()[1..]; decode_sign_prefix(raw, value, output, error) } // Report as if they were numbers because its most likely a typo b'_' => decode_datetime_or_float_or_integer(raw.as_str(), raw, output, error), // Date/number starts b'0' => decode_zero_prefix(raw.as_str(), false, raw, output, error), b'1'..=b'9' => decode_datetime_or_float_or_integer(raw.as_str(), raw, output, error), // Report as if they were numbers because its most likely a typo b'.' => { let kind = ScalarKind::Float; let stream = raw.as_str(); ensure_float(stream, raw, error); decode_float_or_integer(stream, raw, kind, output, error) } b't' | b'T' => { const SYMBOL: &str = "true"; let kind = ScalarKind::Boolean(true); let expected = &[Expected::Literal(SYMBOL)]; decode_symbol(raw, SYMBOL, kind, expected, output, error) } b'f' | b'F' => { const SYMBOL: &str = "false"; let kind = ScalarKind::Boolean(false); let expected = &[Expected::Literal(SYMBOL)]; decode_symbol(raw, SYMBOL, kind, expected, output, error) } b'i' | b'I' => { const SYMBOL: &str = "inf"; let kind = ScalarKind::Float; let expected = &[Expected::Literal(SYMBOL)]; decode_symbol(raw, SYMBOL, kind, expected, output, error) } b'n' | b'N' => { const SYMBOL: &str = "nan"; let kind = ScalarKind::Float; let expected = &[Expected::Literal(SYMBOL)]; decode_symbol(raw, SYMBOL, kind, expected, output, error) } _ => decode_invalid(raw, output, error), } } pub(crate) fn decode_sign_prefix<'i>( raw: Raw<'i>, value: &'i str, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) -> ScalarKind { let Some(first) = value.as_bytes().first() else { return decode_invalid(raw, output, error); }; match first { // number starts b'+' | b'-' => { let start = value.offset_from(&raw.as_str()); let end = start + 1; error.report_error( ParseError::new("redundant numeric sign") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[]) .with_unexpected(Span::new_unchecked(start, end)), ); let value = &value[1..]; decode_sign_prefix(raw, value, output, error) } // Report as if they were numbers because its most likely a typo b'_' => decode_datetime_or_float_or_integer(value, raw, output, error), // Date/number starts b'0' => decode_zero_prefix(value, true, raw, output, error), b'1'..=b'9' => decode_datetime_or_float_or_integer(value, raw, output, error), // Report as if they were numbers because its most likely a typo b'.' => { let kind = ScalarKind::Float; let stream = raw.as_str(); ensure_float(stream, raw, error); decode_float_or_integer(stream, raw, kind, output, error) } b'i' | b'I' => { const SYMBOL: &str = "inf"; let kind = ScalarKind::Float; if value != SYMBOL { let expected = &[Expected::Literal(SYMBOL)]; let start = value.offset_from(&raw.as_str()); let end = start + value.len(); error.report_error( ParseError::new(kind.invalid_description()) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(expected) .with_unexpected(Span::new_unchecked(start, end)), ); decode_as(raw, SYMBOL, kind, output, error) } else { decode_as_is(raw, kind, output, error) } } b'n' | b'N' => { const SYMBOL: &str = "nan"; let kind = ScalarKind::Float; if value != SYMBOL { let expected = &[Expected::Literal(SYMBOL)]; let start = value.offset_from(&raw.as_str()); let end = start + value.len(); error.report_error( ParseError::new(kind.invalid_description()) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(expected) .with_unexpected(Span::new_unchecked(start, end)), ); decode_as(raw, SYMBOL, kind, output, error) } else { decode_as_is(raw, kind, output, error) } } _ => decode_invalid(raw, output, error), } } pub(crate) fn decode_zero_prefix<'i>( value: &'i str, signed: bool, raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) -> ScalarKind { debug_assert_eq!(value.as_bytes()[0], b'0'); if value.len() == 1 { let kind = ScalarKind::Integer(IntegerRadix::Dec); // No extra validation needed decode_float_or_integer(raw.as_str(), raw, kind, output, error) } else { let radix = value.as_bytes()[1]; match radix { b'x' | b'X' => { if signed { error.report_error( ParseError::new("integers with a radix cannot be signed") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[]) .with_unexpected(Span::new_unchecked(0, 1)), ); } if radix == b'X' { let start = value.offset_from(&raw.as_str()); let end = start + 2; error.report_error( ParseError::new("radix must be lowercase") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("0x")]) .with_unexpected(Span::new_unchecked(start, end)), ); } let radix = IntegerRadix::Hex; let kind = ScalarKind::Integer(radix); let stream = &value[2..]; ensure_radixed_value(stream, raw, radix, error); decode_float_or_integer(stream, raw, kind, output, error) } b'o' | b'O' => { if signed { error.report_error( ParseError::new("integers with a radix cannot be signed") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[]) .with_unexpected(Span::new_unchecked(0, 1)), ); } if radix == b'O' { let start = value.offset_from(&raw.as_str()); let end = start + 2; error.report_error( ParseError::new("radix must be lowercase") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("0o")]) .with_unexpected(Span::new_unchecked(start, end)), ); } let radix = IntegerRadix::Oct; let kind = ScalarKind::Integer(radix); let stream = &value[2..]; ensure_radixed_value(stream, raw, radix, error); decode_float_or_integer(stream, raw, kind, output, error) } b'b' | b'B' => { if signed { error.report_error( ParseError::new("integers with a radix cannot be signed") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[]) .with_unexpected(Span::new_unchecked(0, 1)), ); } if radix == b'B' { let start = value.offset_from(&raw.as_str()); let end = start + 2; error.report_error( ParseError::new("radix must be lowercase") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("0b")]) .with_unexpected(Span::new_unchecked(start, end)), ); } let radix = IntegerRadix::Bin; let kind = ScalarKind::Integer(radix); let stream = &value[2..]; ensure_radixed_value(stream, raw, radix, error); decode_float_or_integer(stream, raw, kind, output, error) } b'd' | b'D' => { if signed { error.report_error( ParseError::new("integers with a radix cannot be signed") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[]) .with_unexpected(Span::new_unchecked(0, 1)), ); } let radix = IntegerRadix::Dec; let kind = ScalarKind::Integer(radix); let stream = &value[2..]; error.report_error( ParseError::new("redundant integer number prefix") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[]) .with_unexpected(Span::new_unchecked(0, 2)), ); ensure_radixed_value(stream, raw, radix, error); decode_float_or_integer(stream, raw, kind, output, error) } _ => decode_datetime_or_float_or_integer(value, raw, output, error), } } } pub(crate) fn decode_datetime_or_float_or_integer<'i>( value: &'i str, raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) -> ScalarKind { let Some(digit_end) = value .as_bytes() .offset_for(|b| !(b'0'..=b'9').contains_token(b)) else { let kind = ScalarKind::Integer(IntegerRadix::Dec); let stream = raw.as_str(); ensure_no_leading_zero(value, raw, error); return decode_float_or_integer(stream, raw, kind, output, error); }; #[cfg(feature = "unsafe")] // SAFETY: ascii digits ensures UTF-8 boundary let rest = unsafe { &value.get_unchecked(digit_end..) }; #[cfg(not(feature = "unsafe"))] let rest = &value[digit_end..]; if rest.starts_with("-") || rest.starts_with(":") { decode_as_is(raw, ScalarKind::DateTime, output, error) } else if rest.contains(" ") { decode_invalid(raw, output, error) } else if is_float(rest) { let kind = ScalarKind::Float; let stream = raw.as_str(); ensure_float(value, raw, error); decode_float_or_integer(stream, raw, kind, output, error) } else if rest.starts_with("_") { let kind = ScalarKind::Integer(IntegerRadix::Dec); let stream = raw.as_str(); ensure_no_leading_zero(value, raw, error); decode_float_or_integer(stream, raw, kind, output, error) } else { decode_invalid(raw, output, error) } } /// ```abnf /// ;; Float /// /// float = float-int-part ( exp / frac [ exp ] ) /// /// float-int-part = dec-int /// frac = decimal-point zero-prefixable-int /// decimal-point = %x2E ; . /// zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) /// /// exp = "e" float-exp-part /// float-exp-part = [ minus / plus ] zero-prefixable-int /// ``` pub(crate) fn ensure_float<'i>(mut value: &'i str, raw: Raw<'i>, error: &mut dyn ErrorSink) { ensure_dec_uint(&mut value, raw, false, "invalid mantissa", error); if value.starts_with(".") { let _ = value.next_token(); ensure_dec_uint(&mut value, raw, true, "invalid fraction", error); } if value.starts_with(['e', 'E']) { let _ = value.next_token(); if value.starts_with(['+', '-']) { let _ = value.next_token(); } ensure_dec_uint(&mut value, raw, true, "invalid exponent", error); } if !value.is_empty() { let start = value.offset_from(&raw.as_str()); let end = raw.len(); error.report_error( ParseError::new(ScalarKind::Float.invalid_description()) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[]) .with_unexpected(Span::new_unchecked(start, end)), ); } } pub(crate) fn ensure_dec_uint<'i>( value: &mut &'i str, raw: Raw<'i>, zero_prefix: bool, invalid_description: &'static str, error: &mut dyn ErrorSink, ) { let start = *value; let mut digit_count = 0; while let Some(current) = value.chars().next() { if current.is_ascii_digit() { digit_count += 1; } else if current == '_' { } else { break; } let _ = value.next_token(); } match digit_count { 0 => { let start = start.offset_from(&raw.as_str()); let end = start; error.report_error( ParseError::new(invalid_description) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Description("digits")]) .with_unexpected(Span::new_unchecked(start, end)), ); } 1 => {} _ if start.starts_with("0") && !zero_prefix => { let start = start.offset_from(&raw.as_str()); let end = start + 1; error.report_error( ParseError::new("unexpected leading zero") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[]) .with_unexpected(Span::new_unchecked(start, end)), ); } _ => {} } } pub(crate) fn ensure_no_leading_zero<'i>(value: &'i str, raw: Raw<'i>, error: &mut dyn ErrorSink) { if value.starts_with("0") { let start = value.offset_from(&raw.as_str()); let end = start + 1; error.report_error( ParseError::new("unexpected leading zero") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[]) .with_unexpected(Span::new_unchecked(start, end)), ); } } pub(crate) fn ensure_radixed_value( value: &str, raw: Raw<'_>, radix: IntegerRadix, error: &mut dyn ErrorSink, ) { let invalid = ['+', '-']; let value = if let Some(value) = value.strip_prefix(invalid) { let pos = raw.as_str().find(invalid).unwrap(); error.report_error( ParseError::new("unexpected sign") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[]) .with_unexpected(Span::new_unchecked(pos, pos + 1)), ); value } else { value }; let valid = radix.validator(); for (index, c) in value.char_indices() { if !valid(c) && c != '_' { let pos = value.offset_from(&raw.as_str()) + index; error.report_error( ParseError::new(radix.invalid_description()) .with_context(Span::new_unchecked(0, raw.len())) .with_unexpected(Span::new_unchecked(pos, pos)), ); } } } pub(crate) fn decode_float_or_integer<'i>( stream: &'i str, raw: Raw<'i>, kind: ScalarKind, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) -> ScalarKind { output.clear(); let underscore = "_"; if has_underscore(stream) { if stream.starts_with(underscore) { error.report_error( ParseError::new("`_` may only go between digits") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[]) .with_unexpected(Span::new_unchecked(0, underscore.len())), ); } if 1 < stream.len() && stream.ends_with(underscore) { let start = stream.offset_from(&raw.as_str()); let end = start + stream.len(); error.report_error( ParseError::new("`_` may only go between digits") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[]) .with_unexpected(Span::new_unchecked(end - underscore.len(), end)), ); } for part in stream.split(underscore) { let part_start = part.offset_from(&raw.as_str()); let part_end = part_start + part.len(); if 0 < part_start { let first = part.as_bytes().first().copied().unwrap_or(b'0'); if !is_any_digit(first, kind) { let start = part_start - 1; let end = part_start; debug_assert_eq!(&raw.as_str()[start..end], underscore); error.report_error( ParseError::new("`_` may only go between digits") .with_context(Span::new_unchecked(0, raw.len())) .with_unexpected(Span::new_unchecked(start, end)), ); } } if 1 < part.len() && part_end < raw.len() { let last = part.as_bytes().last().copied().unwrap_or(b'0'); if !is_any_digit(last, kind) { let start = part_end; let end = start + underscore.len(); debug_assert_eq!(&raw.as_str()[start..end], underscore); error.report_error( ParseError::new("`_` may only go between digits") .with_context(Span::new_unchecked(0, raw.len())) .with_unexpected(Span::new_unchecked(start, end)), ); } } if part.is_empty() && part_start != 0 && part_end != raw.len() { let start = part_start; let end = start + 1; error.report_error( ParseError::new("`_` may only go between digits") .with_context(Span::new_unchecked(0, raw.len())) .with_unexpected(Span::new_unchecked(start, end)), ); } if !part.is_empty() && !output.push_str(part) { error.report_error( ParseError::new(ALLOCATION_ERROR) .with_unexpected(Span::new_unchecked(part_start, part_end)), ); } } } else { if !output.push_str(stream) { error.report_error( ParseError::new(ALLOCATION_ERROR) .with_unexpected(Span::new_unchecked(0, raw.len())), ); } } kind } fn is_any_digit(b: u8, kind: ScalarKind) -> bool { if kind == ScalarKind::Float { is_dec_integer_digit(b) } else { is_any_integer_digit(b) } } fn is_any_integer_digit(b: u8) -> bool { (b'0'..=b'9', b'a'..=b'f', b'A'..=b'F').contains_token(b) } fn is_dec_integer_digit(b: u8) -> bool { (b'0'..=b'9').contains_token(b) } fn has_underscore(raw: &str) -> bool { raw.as_bytes().find_slice(b'_').is_some() } fn is_float(raw: &str) -> bool { raw.as_bytes().find_slice((b'.', b'e', b'E')).is_some() } pub(crate) fn decode_as_is<'i>( raw: Raw<'i>, kind: ScalarKind, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) -> ScalarKind { let kind = decode_as(raw, raw.as_str(), kind, output, error); kind } pub(crate) fn decode_as<'i>( raw: Raw<'i>, symbol: &'i str, kind: ScalarKind, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) -> ScalarKind { output.clear(); if !output.push_str(symbol) { error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())), ); } kind } pub(crate) fn decode_symbol<'i>( raw: Raw<'i>, symbol: &'static str, kind: ScalarKind, expected: &'static [Expected], output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) -> ScalarKind { if raw.as_str() != symbol { if raw.as_str().contains(" ") { return decode_invalid(raw, output, error); } else { error.report_error( ParseError::new(kind.invalid_description()) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(expected) .with_unexpected(Span::new_unchecked(0, raw.len())), ); } } decode_as(raw, symbol, kind, output, error) } pub(crate) fn decode_invalid<'i>( raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) -> ScalarKind { if raw.as_str().ends_with("'''") { error.report_error( ParseError::new("missing opening quote") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal(r#"'''"#)]) .with_unexpected(Span::new_unchecked(0, 0)), ); } else if raw.as_str().ends_with(r#"""""#) { error.report_error( ParseError::new("missing opening quote") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Description("multi-line basic string")]) .with_expected(&[Expected::Literal(r#"""""#)]) .with_unexpected(Span::new_unchecked(0, 0)), ); } else if raw.as_str().ends_with("'") { error.report_error( ParseError::new("missing opening quote") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal(r#"'"#)]) .with_unexpected(Span::new_unchecked(0, 0)), ); } else if raw.as_str().ends_with(r#"""#) { error.report_error( ParseError::new("missing opening quote") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal(r#"""#)]) .with_unexpected(Span::new_unchecked(0, 0)), ); } else { error.report_error( ParseError::new("string values must be quoted") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Description("literal string")]) .with_unexpected(Span::new_unchecked(0, raw.len())), ); } output.clear(); if !output.push_str(raw.as_str()) { error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())), ); } ScalarKind::String } toml_parser-1.0.6+spec-1.1.0/src/decoder/string.rs000064400000000000000000001073601046102023000176170ustar 00000000000000use core::ops::RangeInclusive; use winnow::stream::ContainsToken as _; use winnow::stream::Offset as _; use winnow::stream::Stream as _; use crate::decoder::StringBuilder; use crate::lexer::APOSTROPHE; use crate::lexer::ML_BASIC_STRING_DELIM; use crate::lexer::ML_LITERAL_STRING_DELIM; use crate::lexer::QUOTATION_MARK; use crate::lexer::WSCHAR; use crate::ErrorSink; use crate::Expected; use crate::ParseError; use crate::Raw; use crate::Span; const ALLOCATION_ERROR: &str = "could not allocate for string"; /// Parse literal string /// /// ```abnf /// ;; Literal String /// /// literal-string = apostrophe *literal-char apostrophe /// /// apostrophe = %x27 ; ' apostrophe /// /// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii /// ``` pub(crate) fn decode_literal_string<'i>( raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) { const INVALID_STRING: &str = "invalid literal string"; output.clear(); let s = raw.as_str(); let s = if let Some(stripped) = s.strip_prefix(APOSTROPHE as char) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("'")]) .with_unexpected(Span::new_unchecked(0, 0)), ); s }; let s = if let Some(stripped) = s.strip_suffix(APOSTROPHE as char) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("'")]) .with_unexpected(Span::new_unchecked(raw.len(), raw.len())), ); s }; for (i, b) in s.as_bytes().iter().enumerate() { if !LITERAL_CHAR.contains_token(b) { let offset = (&s.as_bytes()[i..]).offset_from(&raw.as_bytes()); error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Description("non-single-quote visible characters")]) .with_unexpected(Span::new_unchecked(offset, offset)), ); } } if !output.push_str(s) { error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())), ); } } /// ```abnf /// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii /// ``` const LITERAL_CHAR: ( u8, RangeInclusive, RangeInclusive, RangeInclusive, ) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII); /// ```abnf /// non-ascii = %x80-D7FF / %xE000-10FFFF /// ``` /// - ASCII is 0xxxxxxx /// - First byte for UTF-8 is 11xxxxxx /// - Subsequent UTF-8 bytes are 10xxxxxx const NON_ASCII: RangeInclusive = 0x80..=0xff; /// Parse multi-line literal string /// /// ```abnf /// ;; Multiline Literal String /// /// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body /// ml-literal-string-delim /// ml-literal-string-delim = 3apostrophe /// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ] /// /// mll-content = literal-char / newline /// mll-quotes = 1*2apostrophe /// ``` pub(crate) fn decode_ml_literal_string<'i>( raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) { const INVALID_STRING: &str = "invalid multi-line literal string"; output.clear(); let s = raw.as_str(); let s = if let Some(stripped) = s.strip_prefix(ML_LITERAL_STRING_DELIM) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("'")]) .with_unexpected(Span::new_unchecked(0, 0)), ); s }; let s = strip_start_newline(s); let s = if let Some(stripped) = s.strip_suffix(ML_LITERAL_STRING_DELIM) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("'")]) .with_unexpected(Span::new_unchecked(raw.len(), raw.len())), ); s.trim_end_matches('\'') }; for (i, b) in s.as_bytes().iter().enumerate() { if *b == b'\'' || *b == b'\n' { } else if *b == b'\r' { if s.as_bytes().get(i + 1) != Some(&b'\n') { let offset = (&s.as_bytes()[i + 1..]).offset_from(&raw.as_bytes()); error.report_error( ParseError::new("carriage return must be followed by newline") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\n")]) .with_unexpected(Span::new_unchecked(offset, offset)), ); } } else if !LITERAL_CHAR.contains_token(b) { let offset = (&s.as_bytes()[i..]).offset_from(&raw.as_bytes()); error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Description("non-single-quote characters")]) .with_unexpected(Span::new_unchecked(offset, offset)), ); } } if !output.push_str(s) { error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())), ); } } /// Parse basic string /// /// ```abnf /// ;; Basic String /// /// basic-string = quotation-mark *basic-char quotation-mark /// /// basic-char = basic-unescaped / escaped /// /// escaped = escape escape-seq-char /// ``` pub(crate) fn decode_basic_string<'i>( raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) { const INVALID_STRING: &str = "invalid basic string"; output.clear(); let s = raw.as_str(); let s = if let Some(stripped) = s.strip_prefix(QUOTATION_MARK as char) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\"")]) .with_unexpected(Span::new_unchecked(0, 0)), ); s }; let mut s = if let Some(stripped) = s.strip_suffix(QUOTATION_MARK as char) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\"")]) .with_unexpected(Span::new_unchecked(raw.len(), raw.len())), ); s }; let segment = basic_unescaped(&mut s); if !output.push_str(segment) { error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())), ); } while !s.is_empty() { if s.starts_with("\\") { let _ = s.next_token(); let c = escape_seq_char(&mut s, raw, error); if !output.push_char(c) { error.report_error( ParseError::new(ALLOCATION_ERROR) .with_unexpected(Span::new_unchecked(0, raw.len())), ); } } else { let invalid = basic_invalid(&mut s); let start = invalid.offset_from(&raw.as_str()); let end = start + invalid.len(); error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[ Expected::Description("non-double-quote visible characters"), Expected::Literal("\\"), ]) .with_unexpected(Span::new_unchecked(start, end)), ); let _ = output.push_str(invalid); } let segment = basic_unescaped(&mut s); if !output.push_str(segment) { let start = segment.offset_from(&raw.as_str()); let end = start + segment.len(); error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(start, end)), ); } } } /// ```abnf /// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii /// ``` fn basic_unescaped<'i>(stream: &mut &'i str) -> &'i str { let offset = stream .as_bytes() .offset_for(|b| !BASIC_UNESCAPED.contains_token(b)) .unwrap_or(stream.len()); #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) } #[cfg(not(feature = "unsafe"))] stream.next_slice(offset) } fn basic_invalid<'i>(stream: &mut &'i str) -> &'i str { let offset = stream .as_bytes() .offset_for(|b| (BASIC_UNESCAPED, ESCAPE).contains_token(b)) .unwrap_or(stream.len()); #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) } #[cfg(not(feature = "unsafe"))] stream.next_slice(offset) } /// ```abnf /// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii /// ``` #[allow(clippy::type_complexity)] const BASIC_UNESCAPED: ( (u8, u8), u8, RangeInclusive, RangeInclusive, RangeInclusive, ) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII); /// ```abnf /// escape = %x5C ; \ /// ``` const ESCAPE: u8 = b'\\'; /// ```abnf /// escape-seq-char = %x22 ; " quotation mark U+0022 /// escape-seq-char =/ %x5C ; \ reverse solidus U+005C /// escape-seq-char =/ %x62 ; b backspace U+0008 /// escape-seq-char =/ %x65 ; e escape U+001B /// escape-seq-char =/ %x66 ; f form feed U+000C /// escape-seq-char =/ %x6E ; n line feed U+000A /// escape-seq-char =/ %x72 ; r carriage return U+000D /// escape-seq-char =/ %x74 ; t tab U+0009 /// escape-seq-char =/ %x78 2HEXDIG ; xHH U+00HH /// escape-seq-char =/ %x75 4HEXDIG ; uHHHH U+HHHH /// escape-seq-char =/ %x55 8HEXDIG ; UHHHHHHHH U+HHHHHHHH /// ``` fn escape_seq_char(stream: &mut &str, raw: Raw<'_>, error: &mut dyn ErrorSink) -> char { const EXPECTED_ESCAPES: &[Expected] = &[ Expected::Literal("b"), Expected::Literal("e"), Expected::Literal("f"), Expected::Literal("n"), Expected::Literal("r"), Expected::Literal("\\"), Expected::Literal("\""), Expected::Literal("x"), Expected::Literal("u"), Expected::Literal("U"), ]; let start = stream.checkpoint(); let Some(id) = stream.next_token() else { let offset = stream.offset_from(&raw.as_str()); error.report_error( ParseError::new("missing escaped value") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(EXPECTED_ESCAPES) .with_unexpected(Span::new_unchecked(offset, offset)), ); return '\\'; }; match id { 'b' => '\u{8}', 'e' => '\u{1b}', 'f' => '\u{c}', 'n' => '\n', 'r' => '\r', 't' => '\t', 'x' => hexescape(stream, 2, raw, error), 'u' => hexescape(stream, 4, raw, error), 'U' => hexescape(stream, 8, raw, error), '\\' => '\\', '"' => '"', _ => { stream.reset(&start); let offset = stream.offset_from(&raw.as_str()); error.report_error( ParseError::new("missing escaped value") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(EXPECTED_ESCAPES) .with_unexpected(Span::new_unchecked(offset, offset)), ); '\\' } } } fn hexescape( stream: &mut &str, num_digits: usize, raw: Raw<'_>, error: &mut dyn ErrorSink, ) -> char { let offset = stream .as_bytes() .offset_for(|b| !HEXDIG.contains_token(b)) .unwrap_or_else(|| stream.eof_offset()) .min(num_digits); #[cfg(feature = "unsafe")] // SAFETY: HEXDIG ensure `offset` is along UTF-8 boundary let value = unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] let value = stream.next_slice(offset); if value.len() != num_digits { let offset = stream.offset_from(&raw.as_str()); error.report_error( ParseError::new("too few unicode value digits") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Description("unicode hexadecimal value")]) .with_unexpected(Span::new_unchecked(offset, offset)), ); return '�'; } let Some(value) = u32::from_str_radix(value, 16).ok().and_then(char::from_u32) else { let offset = value.offset_from(&raw.as_str()); error.report_error( ParseError::new("invalid value") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Description("unicode hexadecimal value")]) .with_unexpected(Span::new_unchecked(offset, offset)), ); return '�'; }; value } /// ```abnf /// HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" /// ``` const HEXDIG: (RangeInclusive, RangeInclusive, RangeInclusive) = (DIGIT, b'A'..=b'F', b'a'..=b'f'); /// ```abnf /// DIGIT = %x30-39 ; 0-9 /// ``` const DIGIT: RangeInclusive = b'0'..=b'9'; fn strip_start_newline(s: &str) -> &str { s.strip_prefix('\n') .or_else(|| s.strip_prefix("\r\n")) .unwrap_or(s) } /// Parse multi-line basic string /// /// ```abnf /// ;; Multiline Basic String /// /// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body /// ml-basic-string-delim /// ml-basic-string-delim = 3quotation-mark /// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] /// /// mlb-content = basic-char / newline / mlb-escaped-nl /// mlb-quotes = 1*2quotation-mark /// ``` pub(crate) fn decode_ml_basic_string<'i>( raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) { const INVALID_STRING: &str = "invalid multi-line basic string"; let s = raw.as_str(); let s = if let Some(stripped) = s.strip_prefix(ML_BASIC_STRING_DELIM) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\"")]) .with_unexpected(Span::new_unchecked(0, 0)), ); s }; let s = strip_start_newline(s); let mut s = if let Some(stripped) = s.strip_suffix(ML_BASIC_STRING_DELIM) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\"")]) .with_unexpected(Span::new_unchecked(raw.len(), raw.len())), ); s }; let segment = mlb_unescaped(&mut s); if !output.push_str(segment) { error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())), ); } while !s.is_empty() { if s.starts_with("\\") { let _ = s.next_token(); if s.as_bytes() .first() .map(|b| (WSCHAR, b'\r', b'\n').contains_token(b)) .unwrap_or(false) { mlb_escaped_nl(&mut s, raw, error); } else { let c = escape_seq_char(&mut s, raw, error); if !output.push_char(c) { error.report_error( ParseError::new(ALLOCATION_ERROR) .with_unexpected(Span::new_unchecked(0, raw.len())), ); } } } else if s.starts_with("\r") { let offset = if s.starts_with("\r\n") { "\r\n".len() } else { let start = s.offset_from(&raw.as_str()) + 1; error.report_error( ParseError::new("carriage return must be followed by newline") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\n")]) .with_unexpected(Span::new_unchecked(start, start)), ); "\r".len() }; #[cfg(feature = "unsafe")] // SAFETY: Newlines ensure `offset` is along UTF-8 boundary let newline = unsafe { s.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] let newline = s.next_slice(offset); if !output.push_str(newline) { let start = newline.offset_from(&raw.as_str()); let end = start + newline.len(); error.report_error( ParseError::new(ALLOCATION_ERROR) .with_unexpected(Span::new_unchecked(start, end)), ); } } else { let invalid = mlb_invalid(&mut s); let start = invalid.offset_from(&raw.as_str()); let end = start + invalid.len(); error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\\"), Expected::Description("characters")]) .with_unexpected(Span::new_unchecked(start, end)), ); let _ = output.push_str(invalid); } let segment = mlb_unescaped(&mut s); if !output.push_str(segment) { let start = segment.offset_from(&raw.as_str()); let end = start + segment.len(); error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(start, end)), ); } } } /// ```abnf /// mlb-escaped-nl = escape ws newline *( wschar / newline ) /// ``` fn mlb_escaped_nl(stream: &mut &str, raw: Raw<'_>, error: &mut dyn ErrorSink) { const INVALID_STRING: &str = "invalid multi-line basic string"; let ws_offset = stream .as_bytes() .offset_for(|b| !WSCHAR.contains_token(b)) .unwrap_or(stream.len()); #[cfg(feature = "unsafe")] // SAFETY: WSCHAR ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(ws_offset); } #[cfg(not(feature = "unsafe"))] stream.next_slice(ws_offset); let start = stream.checkpoint(); match stream.next_token() { Some('\n') => {} Some('\r') => { if stream.as_bytes().first() == Some(&b'\n') { let _ = stream.next_token(); } else { let start = stream.offset_from(&raw.as_str()); let end = start; error.report_error( ParseError::new("carriage return must be followed by newline") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\n")]) .with_unexpected(Span::new_unchecked(start, end)), ); } } _ => { stream.reset(&start); let start = stream.offset_from(&raw.as_str()); let end = start; error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\n")]) .with_unexpected(Span::new_unchecked(start, end)), ); } } loop { let start_offset = stream.offset_from(&raw.as_str()); let offset = stream .as_bytes() .offset_for(|b| !(WSCHAR, b'\n').contains_token(b)) .unwrap_or(stream.len()); #[cfg(feature = "unsafe")] // SAFETY: WSCHAR ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset); } #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); if stream.starts_with("\r") { let offset = if stream.starts_with("\r\n") { "\r\n".len() } else { let start = stream.offset_from(&raw.as_str()) + 1; error.report_error( ParseError::new("carriage return must be followed by newline") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\n")]) .with_unexpected(Span::new_unchecked(start, start)), ); "\r".len() }; #[cfg(feature = "unsafe")] // SAFETY: Newlines ensure `offset` is along UTF-8 boundary let _ = unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] let _ = stream.next_slice(offset); } let end_offset = stream.offset_from(&raw.as_str()); if start_offset == end_offset { break; } } } /// `mlb-unescaped` extended with `mlb-quotes` and `LF` /// /// This is a specialization of [`basic_unescaped`] to help with multi-line basic strings /// /// **warning:** `newline` is not validated /// /// ```abnf /// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] /// /// mlb-content = basic-cha / newline / mlb-escaped-nl /// mlb-quotes = 1*2quotation-mark /// ``` fn mlb_unescaped<'i>(stream: &mut &'i str) -> &'i str { let offset = stream .as_bytes() .offset_for(|b| !(BASIC_UNESCAPED, b'"', b'\n').contains_token(b)) .unwrap_or(stream.len()); #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) } #[cfg(not(feature = "unsafe"))] stream.next_slice(offset) } fn mlb_invalid<'i>(stream: &mut &'i str) -> &'i str { let offset = stream .as_bytes() .offset_for(|b| (BASIC_UNESCAPED, b'"', b'\n', ESCAPE, '\r').contains_token(b)) .unwrap_or(stream.len()); #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) } #[cfg(not(feature = "unsafe"))] stream.next_slice(offset) } /// Parse unquoted key /// /// ```abnf /// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ /// ``` pub(crate) fn decode_unquoted_key<'i>( raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) { let s = raw.as_str(); if s.is_empty() { error.report_error( ParseError::new("unquoted keys cannot be empty") .with_context(Span::new_unchecked(0, s.len())) .with_expected(&[ Expected::Description("letters"), Expected::Description("numbers"), Expected::Literal("-"), Expected::Literal("_"), ]) .with_unexpected(Span::new_unchecked(0, s.len())), ); } for (i, b) in s.as_bytes().iter().enumerate() { if !UNQUOTED_CHAR.contains_token(b) { error.report_error( ParseError::new("invalid unquoted key") .with_context(Span::new_unchecked(0, s.len())) .with_expected(&[ Expected::Description("letters"), Expected::Description("numbers"), Expected::Literal("-"), Expected::Literal("_"), ]) .with_unexpected(Span::new_unchecked(i, i)), ); } } if !output.push_str(s) { error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())), ); } } /// ```abnf /// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ /// ``` const UNQUOTED_CHAR: ( RangeInclusive, RangeInclusive, RangeInclusive, u8, u8, ) = (b'A'..=b'Z', b'a'..=b'z', b'0'..=b'9', b'-', b'_'); #[cfg(test)] #[cfg(feature = "std")] mod test { use super::*; use crate::decoder::Encoding; use alloc::borrow::Cow; use snapbox::assert_data_eq; use snapbox::prelude::*; use snapbox::str; #[test] fn literal_string() { let cases = [ ( r"'C:\Users\nodejs\templates'", str![[r#"C:\Users\nodejs\templates"#]].raw(), str![[r#" [] "#]] .raw(), ), ( r"'\\ServerX\admin$\system32\'", str![[r#"\\ServerX\admin$\system32\"#]].raw(), str![[r#" [] "#]] .raw(), ), ( r#"'Tom "Dubs" Preston-Werner'"#, str![[r#"Tom "Dubs" Preston-Werner"#]].raw(), str![[r#" [] "#]] .raw(), ), ( r"'<\i\c*\s*>'", str![[r#"<\i\c*\s*>"#]].raw(), str![[r#" [] "#]] .raw(), ), ]; for (input, expected, expected_error) in cases { let mut error = Vec::new(); let mut actual = Cow::Borrowed(""); decode_literal_string( Raw::new_unchecked(input, Some(Encoding::LiteralString), Default::default()), &mut actual, &mut error, ); assert_data_eq!(actual.as_ref(), expected); assert_data_eq!(error.to_debug(), expected_error); } } #[test] fn ml_literal_string() { let cases = [ ( r"'''I [dw]on't need \d{2} apples'''", str![[r#"I [dw]on't need \d{2} apples"#]].raw(), str![[r#" [] "#]] .raw(), ), ( r#"''''one_quote''''"#, str!["'one_quote'"].raw(), str![[r#" [] "#]] .raw(), ), ( r#"''' The first newline is trimmed in raw strings. All other whitespace is preserved. '''"#, str![[r#" The first newline is trimmed in raw strings. All other whitespace is preserved. "#]] .raw(), str![[r#" [] "#]] .raw(), ), ]; for (input, expected, expected_error) in cases { let mut error = Vec::new(); let mut actual = Cow::Borrowed(""); decode_ml_literal_string( Raw::new_unchecked(input, Some(Encoding::MlLiteralString), Default::default()), &mut actual, &mut error, ); assert_data_eq!(actual.as_ref(), expected); assert_data_eq!(error.to_debug(), expected_error); } } #[test] fn basic_string() { let cases = [ ( r#""""#, str![""].raw(), str![[r#" [] "#]] .raw(), ), ( r#""content\"trailing""#, str![[r#"content"trailing"#]].raw(), str![[r#" [] "#]] .raw(), ), ( r#""content\""#, str![[r#"content\"#]].raw(), str![[r#" [ ParseError { context: Some( 0..10, ), description: "missing escaped value", expected: Some( [ Literal( "b", ), Literal( "e", ), Literal( "f", ), Literal( "n", ), Literal( "r", ), Literal( "\\", ), Literal( "\"", ), Literal( "x", ), Literal( "u", ), Literal( "U", ), ], ), unexpected: Some( 9..9, ), }, ] "#]] .raw(), ), ( r#""content trailing""#, str![[r#" content trailing "#]] .raw(), str![[r#" [ ParseError { context: Some( 0..18, ), description: "invalid basic string", expected: Some( [ Description( "non-double-quote visible characters", ), Literal( "\\", ), ], ), unexpected: Some( 8..9, ), }, ] "#]] .raw(), ), ( r#""I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF. \U0002070E""#, str![[r#" I'm a string. "You can quote me". Name José Location SF. 𠜎 "#]] .raw(), str![[r#" [] "#]] .raw(), ), ]; for (input, expected, expected_error) in cases { let mut error = Vec::new(); let mut actual = Cow::Borrowed(""); decode_basic_string( Raw::new_unchecked(input, Some(Encoding::BasicString), Default::default()), &mut actual, &mut error, ); assert_data_eq!(actual.as_ref(), expected); assert_data_eq!(error.to_debug(), expected_error); } } #[test] fn ml_basic_string() { let cases = [ ( r#"""" Roses are red Violets are blue""""#, str![[r#" Roses are red Violets are blue "#]] .raw(), str![[r#" [] "#]] .raw(), ), ( r#"""" \""" """"#, str![[r#" """ "#]].raw(), str![[r#" [] "#]] .raw(), ), ( r#"""" \\""""#, str![[r#" \"#]].raw(), str![[r#" [] "#]] .raw(), ), ( r#"""" The quick brown \ fox jumps over \ the lazy dog.""""#, str!["The quick brown fox jumps over the lazy dog."].raw(), str![[r#" [] "#]] .raw(), ), ( r#""""\ The quick brown \ fox jumps over \ the lazy dog.\ """"#, str!["The quick brown fox jumps over the lazy dog."].raw(), str![[r#" [] "#]] .raw(), ), ( r#""""\ """"#, str![""].raw(), str![[r#" [] "#]] .raw(), ), ( r#"""" \ \ """"#, str![""].raw(), str![[r#" [] "#]] .raw(), ), ( r#"""" """#, str![[r#" """#]].raw(), str![[r#" [ ParseError { context: Some( 0..7, ), description: "invalid multi-line basic string", expected: Some( [ Literal( "\"", ), ], ), unexpected: Some( 7..7, ), }, ] "#]] .raw(), ), ( r#"""" \""""#, str![[r#" \"#]].raw(), str![[r#" [ ParseError { context: Some( 0..9, ), description: "missing escaped value", expected: Some( [ Literal( "b", ), Literal( "e", ), Literal( "f", ), Literal( "n", ), Literal( "r", ), Literal( "\\", ), Literal( "\"", ), Literal( "x", ), Literal( "u", ), Literal( "U", ), ], ), unexpected: Some( 6..6, ), }, ] "#]] .raw(), ), ]; for (input, expected, expected_error) in cases { let mut error = Vec::new(); let mut actual = Cow::Borrowed(""); decode_ml_basic_string( Raw::new_unchecked(input, Some(Encoding::MlBasicString), Default::default()), &mut actual, &mut error, ); assert_data_eq!(actual.as_ref(), expected); assert_data_eq!(error.to_debug(), expected_error); } } #[test] fn unquoted_keys() { let cases = [ ( "a", str!["a"].raw(), str![[r#" [] "#]] .raw(), ), ( "hello", str!["hello"].raw(), str![[r#" [] "#]] .raw(), ), ( "-", str!["-"].raw(), str![[r#" [] "#]] .raw(), ), ( "_", str!["_"].raw(), str![[r#" [] "#]] .raw(), ), ( "-hello-world-", str!["-hello-world-"].raw(), str![[r#" [] "#]] .raw(), ), ( "_hello_world_", str!["_hello_world_"].raw(), str![[r#" [] "#]] .raw(), ), ( "", str![""].raw(), str![[r#" [ ParseError { context: Some( 0..0, ), description: "unquoted keys cannot be empty", expected: Some( [ Description( "letters", ), Description( "numbers", ), Literal( "-", ), Literal( "_", ), ], ), unexpected: Some( 0..0, ), }, ] "#]] .raw(), ), ]; for (input, expected, expected_error) in cases { let mut error = Vec::new(); let mut actual = Cow::Borrowed(""); decode_unquoted_key( Raw::new_unchecked(input, None, Default::default()), &mut actual, &mut error, ); assert_data_eq!(actual.as_ref(), expected); assert_data_eq!(error.to_debug(), expected_error); } } } toml_parser-1.0.6+spec-1.1.0/src/decoder/ws.rs000064400000000000000000000042641046102023000167410ustar 00000000000000use core::ops::RangeInclusive; use winnow::stream::ContainsToken as _; use crate::lexer::COMMENT_START_SYMBOL; use crate::ErrorSink; use crate::Expected; use crate::ParseError; use crate::Raw; use crate::Span; /// Parse comment /// /// ```abnf /// ;; Comment /// /// comment-start-symbol = %x23 ; # /// non-ascii = %x80-D7FF / %xE000-10FFFF /// non-eol = %x09 / %x20-7E / non-ascii /// /// comment = comment-start-symbol *non-eol /// ``` pub(crate) fn decode_comment(raw: Raw<'_>, error: &mut dyn ErrorSink) { let s = raw.as_bytes(); if s.first() != Some(&COMMENT_START_SYMBOL) { error.report_error( ParseError::new("missing comment start") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("#")]) .with_unexpected(Span::new_unchecked(0, 0)), ); } for (i, b) in s.iter().copied().enumerate() { if !NON_EOL.contains_token(b) { error.report_error( ParseError::new("invalid comment character") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Description("printable characters")]) .with_unexpected(Span::new_unchecked(i, i)), ); } } } // non-ascii = %x80-D7FF / %xE000-10FFFF // - ASCII is 0xxxxxxx // - First byte for UTF-8 is 11xxxxxx // - Subsequent UTF-8 bytes are 10xxxxxx pub(crate) const NON_ASCII: RangeInclusive = 0x80..=0xff; // non-eol = %x09 / %x20-7E / non-ascii pub(crate) const NON_EOL: (u8, RangeInclusive, RangeInclusive) = (0x09, 0x20..=0x7E, NON_ASCII); /// Parse newline /// /// ```abnf ///;; Newline /// /// newline = %x0A ; LF /// newline =/ %x0D.0A ; CRLF /// ``` pub(crate) fn decode_newline(raw: Raw<'_>, error: &mut dyn ErrorSink) { let s = raw.as_str(); if s == "\r" { error.report_error( ParseError::new("carriage return must be followed by newline") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\n")]) .with_unexpected(Span::new_unchecked(raw.len(), raw.len())), ); } } toml_parser-1.0.6+spec-1.1.0/src/error.rs000064400000000000000000000044701046102023000160330ustar 00000000000000use crate::Span; pub trait ErrorSink { fn report_error(&mut self, error: ParseError); } impl ErrorSink for F where F: FnMut(ParseError), { fn report_error(&mut self, error: ParseError) { (self)(error); } } impl ErrorSink for () { fn report_error(&mut self, _error: ParseError) {} } impl ErrorSink for Option { fn report_error(&mut self, error: ParseError) { self.get_or_insert(error); } } #[cfg(feature = "alloc")] #[allow(unused_qualifications)] impl ErrorSink for alloc::vec::Vec { fn report_error(&mut self, error: ParseError) { self.push(error); } } #[derive(Clone, PartialEq, Eq, Debug)] #[non_exhaustive] pub struct ParseError { context: Option, description: ErrorStr, expected: Option<&'static [Expected]>, unexpected: Option, } impl ParseError { pub fn new(description: impl Into) -> Self { Self { context: None, description: description.into(), expected: None, unexpected: None, } } pub fn with_context(mut self, context: Span) -> Self { self.context = Some(context); self } pub fn with_expected(mut self, expected: &'static [Expected]) -> Self { self.expected = Some(expected); self } pub fn with_unexpected(mut self, unexpected: Span) -> Self { self.unexpected = Some(unexpected); self } pub fn context(&self) -> Option { self.context } pub fn description(&self) -> &str { &self.description } pub fn expected(&self) -> Option<&'static [Expected]> { self.expected } pub fn unexpected(&self) -> Option { self.unexpected } pub(crate) fn rebase_spans(mut self, offset: usize) -> Self { if let Some(context) = self.context.as_mut() { *context += offset; } if let Some(unexpected) = self.unexpected.as_mut() { *unexpected += offset; } self } } #[cfg(feature = "alloc")] type ErrorStr = alloc::borrow::Cow<'static, str>; #[cfg(not(feature = "alloc"))] type ErrorStr = &'static str; #[derive(Copy, Clone, PartialEq, Eq, Debug)] #[non_exhaustive] pub enum Expected { Literal(&'static str), Description(&'static str), } toml_parser-1.0.6+spec-1.1.0/src/lexer/mod.rs000064400000000000000000000470501046102023000166010ustar 00000000000000//! Lex TOML tokens //! //! To get started, see [`Source::lex`][crate::Source::lex] #[cfg(test)] #[cfg(feature = "std")] mod test; mod token; #[cfg(feature = "alloc")] use alloc::vec::Vec; use winnow::stream::AsBStr as _; use winnow::stream::ContainsToken as _; use winnow::stream::FindSlice as _; use winnow::stream::Location; use winnow::stream::Stream as _; use crate::Span; pub use token::Token; pub use token::TokenKind; /// Lex TOML [tokens][Token] /// /// To get started, see [`Source::lex`][crate::Source::lex] pub struct Lexer<'i> { stream: Stream<'i>, eof: bool, } impl<'i> Lexer<'i> { pub(crate) fn new(input: &'i str) -> Self { let mut stream = Stream::new(input); if input.as_bytes().starts_with(BOM) { let offset = BOM.len(); #[cfg(feature = "unsafe")] // SAFETY: only called when next character is ASCII unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); } Lexer { stream, eof: false } } #[cfg(feature = "alloc")] pub fn into_vec(self) -> Vec { #![allow(unused_qualifications)] // due to MSRV of 1.66 let capacity = core::cmp::min( self.stream.len(), usize::MAX / core::mem::size_of::(), ); let mut vec = Vec::with_capacity(capacity); vec.extend(self); vec } } impl Iterator for Lexer<'_> { type Item = Token; fn next(&mut self) -> Option { let Some(peek_byte) = self.stream.as_bstr().first() else { if self.eof { return None; } else { self.eof = true; let start = self.stream.current_token_start(); let span = Span::new_unchecked(start, start); return Some(Token::new(TokenKind::Eof, span)); } }; Some(process_token(*peek_byte, &mut self.stream)) } } const BOM: &[u8] = b"\xEF\xBB\xBF"; pub(crate) type Stream<'i> = winnow::stream::LocatingSlice<&'i str>; fn process_token(peek_byte: u8, stream: &mut Stream<'_>) -> Token { let token = match peek_byte { b'.' => lex_ascii_char(stream, TokenKind::Dot), b'=' => lex_ascii_char(stream, TokenKind::Equals), b',' => lex_ascii_char(stream, TokenKind::Comma), b'[' => lex_ascii_char(stream, TokenKind::LeftSquareBracket), b']' => lex_ascii_char(stream, TokenKind::RightSquareBracket), b'{' => lex_ascii_char(stream, TokenKind::LeftCurlyBracket), b'}' => lex_ascii_char(stream, TokenKind::RightCurlyBracket), b' ' => lex_whitespace(stream), b'\t' => lex_whitespace(stream), b'#' => lex_comment(stream), b'\r' => lex_crlf(stream), b'\n' => lex_ascii_char(stream, TokenKind::Newline), b'\'' => { if stream.starts_with(ML_LITERAL_STRING_DELIM) { lex_ml_literal_string(stream) } else { lex_literal_string(stream) } } b'"' => { if stream.starts_with(ML_BASIC_STRING_DELIM) { lex_ml_basic_string(stream) } else { lex_basic_string(stream) } } _ => lex_atom(stream), }; token } /// Process an ASCII character token /// /// # Safety /// /// - `stream` must be UTF-8 /// - `stream` must be non-empty /// - `stream[0]` must be ASCII fn lex_ascii_char(stream: &mut Stream<'_>, kind: TokenKind) -> Token { debug_assert!(!stream.is_empty()); let start = stream.current_token_start(); let offset = 1; // an ascii character #[cfg(feature = "unsafe")] // SAFETY: only called when next character is ASCII unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); let end = stream.previous_token_end(); let span = Span::new_unchecked(start, end); Token::new(kind, span) } /// Process Whitespace /// /// ```abnf /// ;; Whitespace /// /// ws = *wschar /// wschar = %x20 ; Space /// wschar =/ %x09 ; Horizontal tab /// ``` /// /// # Safety /// /// - `stream` must be UTF-8 /// - `stream` must be non-empty fn lex_whitespace(stream: &mut Stream<'_>) -> Token { debug_assert!(!stream.is_empty()); let start = stream.current_token_start(); let offset = stream .as_bstr() .offset_for(|b| !WSCHAR.contains_token(b)) .unwrap_or(stream.eof_offset()); #[cfg(feature = "unsafe")] // SAFETY: WSCHAR ensures `offset` will be at UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); let end = stream.previous_token_end(); let span = Span::new_unchecked(start, end); Token::new(TokenKind::Whitespace, span) } /// ```abnf /// wschar = %x20 ; Space /// wschar =/ %x09 ; Horizontal tab /// ``` pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t'); /// Process Comment /// /// ```abnf /// ;; Comment /// /// comment-start-symbol = %x23 ; # /// non-ascii = %x80-D7FF / %xE000-10FFFF /// non-eol = %x09 / %x20-7E / non-ascii /// /// comment = comment-start-symbol *non-eol /// ``` /// /// # Safety /// /// - `stream` must be UTF-8 /// - `stream[0] == b'#'` fn lex_comment(stream: &mut Stream<'_>) -> Token { let start = stream.current_token_start(); let offset = stream .as_bytes() .find_slice((b'\r', b'\n')) .map(|s| s.start) .unwrap_or_else(|| stream.eof_offset()); #[cfg(feature = "unsafe")] // SAFETY: newlines ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); let end = stream.previous_token_end(); let span = Span::new_unchecked(start, end); Token::new(TokenKind::Comment, span) } /// ```abnf /// comment-start-symbol = %x23 ; # /// ``` pub(crate) const COMMENT_START_SYMBOL: u8 = b'#'; /// Process Newline /// /// ```abnf /// ;; Newline /// /// newline = %x0A ; LF /// newline =/ %x0D.0A ; CRLF /// ``` /// /// # Safety /// /// - `stream` must be UTF-8 /// - `stream[0] == b'\r'` fn lex_crlf(stream: &mut Stream<'_>) -> Token { let start = stream.current_token_start(); let mut offset = '\r'.len_utf8(); let has_lf = stream.as_bstr().get(1) == Some(&b'\n'); if has_lf { offset += '\n'.len_utf8(); } #[cfg(feature = "unsafe")] // SAFETY: newlines ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); let end = stream.previous_token_end(); let span = Span::new_unchecked(start, end); Token::new(TokenKind::Newline, span) } /// Process literal string /// /// ```abnf /// ;; Literal String /// /// literal-string = apostrophe *literal-char apostrophe /// /// apostrophe = %x27 ; ' apostrophe /// /// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii /// ``` /// /// # Safety /// /// - `stream` must be UTF-8 /// - `stream[0] == b'\''` fn lex_literal_string(stream: &mut Stream<'_>) -> Token { let start = stream.current_token_start(); let offset = 1; // APOSTROPHE #[cfg(feature = "unsafe")] // SAFETY: only called when next character is ASCII unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); let offset = match stream.as_bstr().find_slice((APOSTROPHE, b'\n')) { Some(span) => { if stream.as_bstr()[span.start] == APOSTROPHE { span.end } else { span.start } } None => stream.eof_offset(), }; #[cfg(feature = "unsafe")] // SAFETY: `APOSTROPHE`/newline ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); let end = stream.previous_token_end(); let span = Span::new_unchecked(start, end); Token::new(TokenKind::LiteralString, span) } /// ```abnf /// apostrophe = %x27 ; ' apostrophe /// ``` pub(crate) const APOSTROPHE: u8 = b'\''; /// Process multi-line literal string /// /// ```abnf /// ;; Multiline Literal String /// /// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body /// ml-literal-string-delim /// ml-literal-string-delim = 3apostrophe /// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ] /// /// mll-content = literal-char / newline /// mll-quotes = 1*2apostrophe /// ``` /// /// # Safety /// /// - `stream` must be UTF-8 /// - `stream.starts_with(ML_LITERAL_STRING_DELIM)` fn lex_ml_literal_string(stream: &mut Stream<'_>) -> Token { let start = stream.current_token_start(); let offset = ML_LITERAL_STRING_DELIM.len(); #[cfg(feature = "unsafe")] // SAFETY: only called when next character is ASCII unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); let offset = match stream.as_bstr().find_slice(ML_LITERAL_STRING_DELIM) { Some(span) => span.end, None => stream.eof_offset(), }; #[cfg(feature = "unsafe")] // SAFETY: `ML_LITERAL_STRING_DELIM` ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); if stream.as_bstr().peek_token() == Some(APOSTROPHE) { let offset = 1; #[cfg(feature = "unsafe")] // SAFETY: `APOSTROPHE` ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); if stream.as_bstr().peek_token() == Some(APOSTROPHE) { let offset = 1; #[cfg(feature = "unsafe")] // SAFETY: `APOSTROPHE` ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); } } let end = stream.previous_token_end(); let span = Span::new_unchecked(start, end); Token::new(TokenKind::MlLiteralString, span) } /// ```abnf /// ml-literal-string-delim = 3apostrophe /// ``` pub(crate) const ML_LITERAL_STRING_DELIM: &str = "'''"; /// Process basic string /// /// ```abnf /// ;; Basic String /// /// basic-string = quotation-mark *basic-char quotation-mark /// /// quotation-mark = %x22 ; " /// /// basic-char = basic-unescaped / escaped /// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii /// escaped = escape escape-seq-char /// /// escape = %x5C ; \ /// escape-seq-char = %x22 ; " quotation mark U+0022 /// escape-seq-char =/ %x5C ; \ reverse solidus U+005C /// escape-seq-char =/ %x62 ; b backspace U+0008 /// escape-seq-char =/ %x65 ; e escape U+001B /// escape-seq-char =/ %x66 ; f form feed U+000C /// escape-seq-char =/ %x6E ; n line feed U+000A /// escape-seq-char =/ %x72 ; r carriage return U+000D /// escape-seq-char =/ %x74 ; t tab U+0009 /// escape-seq-char =/ %x78 2HEXDIG ; xHH U+00HH /// escape-seq-char =/ %x75 4HEXDIG ; uHHHH U+HHHH /// escape-seq-char =/ %x55 8HEXDIG ; UHHHHHHHH U+HHHHHHHH /// ``` /// /// # Safety /// /// - `stream` must be UTF-8 /// - `stream[0] == b'"'` fn lex_basic_string(stream: &mut Stream<'_>) -> Token { let start = stream.current_token_start(); let offset = 1; // QUOTATION_MARK #[cfg(feature = "unsafe")] // SAFETY: only called when next character is ASCII unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); loop { // newline is present for error recovery match stream.as_bstr().find_slice((QUOTATION_MARK, ESCAPE, b'\n')) { Some(span) => { let found = stream.as_bstr()[span.start]; if found == QUOTATION_MARK { let offset = span.end; #[cfg(feature = "unsafe")] // SAFETY: `QUOTATION_MARK` ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); break; } else if found == ESCAPE { let offset = span.end; #[cfg(feature = "unsafe")] // SAFETY: `ESCAPE` / newline ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); let peek = stream.as_bstr().peek_token(); match peek { Some(ESCAPE) | Some(QUOTATION_MARK) => { let offset = 1; // ESCAPE / QUOTATION_MARK #[cfg(feature = "unsafe")] #[cfg(feature = "unsafe")] // SAFETY: `ESCAPE` / newline ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); } _ => {} } continue; } else if found == b'\n' { let offset = span.start; #[cfg(feature = "unsafe")] // SAFETY: newline ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); break; } else { unreachable!("found `{found}`"); } } None => { stream.finish(); break; } } } let end = stream.previous_token_end(); let span = Span::new_unchecked(start, end); Token::new(TokenKind::BasicString, span) } /// ```abnf /// quotation-mark = %x22 ; " /// ``` pub(crate) const QUOTATION_MARK: u8 = b'"'; /// ```abnf /// escape = %x5C ; \ /// ``` pub(crate) const ESCAPE: u8 = b'\\'; /// Process multi-line basic string /// /// ```abnf /// ;; Multiline Basic String /// /// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body /// ml-basic-string-delim /// ml-basic-string-delim = 3quotation-mark /// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] /// /// mlb-content = basic-char / newline / mlb-escaped-nl /// mlb-quotes = 1*2quotation-mark /// mlb-escaped-nl = escape ws newline *( wschar / newline ) /// ``` /// /// # Safety /// /// - `stream` must be UTF-8 /// - `stream.starts_with(ML_BASIC_STRING_DELIM)` fn lex_ml_basic_string(stream: &mut Stream<'_>) -> Token { let start = stream.current_token_start(); let offset = ML_BASIC_STRING_DELIM.len(); #[cfg(feature = "unsafe")] // SAFETY: only called when next character is ASCII unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); loop { // newline is present for error recovery match stream.as_bstr().find_slice((ML_BASIC_STRING_DELIM, "\\")) { Some(span) => { let found = stream.as_bstr()[span.start]; if found == QUOTATION_MARK { let offset = span.end; #[cfg(feature = "unsafe")] // SAFETY: `QUOTATION_MARK` ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); break; } else if found == ESCAPE { let offset = span.end; #[cfg(feature = "unsafe")] // SAFETY: `ESCAPE` ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); let peek = stream.as_bstr().peek_token(); match peek { Some(ESCAPE) | Some(QUOTATION_MARK) => { let offset = 1; // ESCAPE / QUOTATION_MARK #[cfg(feature = "unsafe")] // SAFETY: `QUOTATION_MARK`/`ESCAPE` ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); } _ => {} } continue; } else { unreachable!("found `{found}`"); } } None => { stream.finish(); break; } } } if stream.as_bstr().peek_token() == Some(QUOTATION_MARK) { let offset = 1; #[cfg(feature = "unsafe")] // SAFETY: `QUOTATION_MARK` ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); if stream.as_bstr().peek_token() == Some(QUOTATION_MARK) { let offset = 1; #[cfg(feature = "unsafe")] // SAFETY: `QUOTATION_MARK` ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); } } let end = stream.previous_token_end(); let span = Span::new_unchecked(start, end); Token::new(TokenKind::MlBasicString, span) } /// ```abnf /// ml-basic-string-delim = 3quotation-mark /// ``` pub(crate) const ML_BASIC_STRING_DELIM: &str = "\"\"\""; /// Process Atom /// /// This is everything else /// /// # Safety /// /// - `stream` must be UTF-8 /// - `stream` must be non-empty fn lex_atom(stream: &mut Stream<'_>) -> Token { let start = stream.current_token_start(); // Intentionally leaves off quotes in case the opening quote was missing const TOKEN_START: &[u8] = b".=,[]{} \t#\r\n"; let offset = stream .as_bstr() .offset_for(|b| TOKEN_START.contains_token(b)) .unwrap_or_else(|| stream.eof_offset()); #[cfg(feature = "unsafe")] // SAFETY: `TOKEN_START` ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); let end = stream.previous_token_end(); let span = Span::new_unchecked(start, end); Token::new(TokenKind::Atom, span) } toml_parser-1.0.6+spec-1.1.0/src/lexer/test.rs000064400000000000000000000576071046102023000170120ustar 00000000000000use super::*; use snapbox::assert_data_eq; use snapbox::prelude::*; use snapbox::str; #[test] fn test_lex_ascii_char() { let cases = [( ".trailing", str![[r#" Token { kind: Dot, span: 0..1, } "#]] .raw(), str!["trailing"].raw(), )]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_ascii_char(&mut stream, TokenKind::Dot); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_whitespace() { let cases = [ ( " ", str![[r#" Token { kind: Whitespace, span: 0..1, } "#]] .raw(), str![].raw(), ), ( " \t \t \t ", str![[r#" Token { kind: Whitespace, span: 0..9, } "#]] .raw(), str![].raw(), ), ( " \n", str![[r#" Token { kind: Whitespace, span: 0..1, } "#]] .raw(), str![[r#" "#]] .raw(), ), ( " #", str![[r#" Token { kind: Whitespace, span: 0..1, } "#]] .raw(), str!["#"].raw(), ), ( " a", str![[r#" Token { kind: Whitespace, span: 0..1, } "#]] .raw(), str!["a"].raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_whitespace(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_comment() { let cases = [ ( "#", str![[r#" Token { kind: Comment, span: 0..1, } "#]] .raw(), str![""].raw(), ), ( "# content", str![[r#" Token { kind: Comment, span: 0..9, } "#]] .raw(), str![""].raw(), ), ( "# content \ntrailing", str![[r#" Token { kind: Comment, span: 0..10, } "#]] .raw(), str![[r#" trailing "#]] .raw(), ), ( "# content \r\ntrailing", str![[r#" Token { kind: Comment, span: 0..10, } "#]] .raw(), str![[r#" trailing "#]] .raw(), ), ( "# content \0continue", str![[r#" Token { kind: Comment, span: 0..19, } "#]] .raw(), str![""].raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_comment(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_crlf() { let cases = [ ( "\r\ntrailing", str![[r#" Token { kind: Newline, span: 0..2, } "#]] .raw(), str!["trailing"].raw(), ), ( "\rtrailing", str![[r#" Token { kind: Newline, span: 0..1, } "#]] .raw(), str!["trailing"].raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_crlf(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_literal_string() { let cases = [ ( "''", str![[r#" Token { kind: LiteralString, span: 0..2, } "#]] .raw(), str![""].raw(), ), ( "''trailing", str![[r#" Token { kind: LiteralString, span: 0..2, } "#]] .raw(), str!["trailing"].raw(), ), ( "'content'trailing", str![[r#" Token { kind: LiteralString, span: 0..9, } "#]] .raw(), str!["trailing"].raw(), ), ( "'content", str![[r#" Token { kind: LiteralString, span: 0..8, } "#]] .raw(), str![""].raw(), ), ( "'content\ntrailing", str![[r#" Token { kind: LiteralString, span: 0..8, } "#]] .raw(), str![[r#" trailing "#]] .raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_literal_string(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_ml_literal_string() { let cases = [ ( "''''''", str![[r#" Token { kind: MlLiteralString, span: 0..6, } "#]] .raw(), str![""].raw(), ), ( "''''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..6, } "#]] .raw(), str!["trailing"].raw(), ), ( "'''content'''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..13, } "#]] .raw(), str!["trailing"].raw(), ), ( "'''content", str![[r#" Token { kind: MlLiteralString, span: 0..10, } "#]] .raw(), str![""].raw(), ), ( "'''content'", str![[r#" Token { kind: MlLiteralString, span: 0..11, } "#]] .raw(), str![""].raw(), ), ( "'''content''", str![[r#" Token { kind: MlLiteralString, span: 0..12, } "#]] .raw(), str![""].raw(), ), ( "'''content\ntrailing", str![[r#" Token { kind: MlLiteralString, span: 0..19, } "#]] .raw(), str![""].raw(), ), ( "'''''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..7, } "#]] .raw(), str!["trailing"].raw(), ), ( "''''''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..8, } "#]] .raw(), str!["trailing"].raw(), ), ( "'''''''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..8, } "#]] .raw(), str!["'trailing"].raw(), ), ( "'''''content''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..16, } "#]] .raw(), str!["trailing"].raw(), ), ( "'''''content'''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..17, } "#]] .raw(), str!["trailing"].raw(), ), ( "'''''content''''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..17, } "#]] .raw(), str!["'trailing"].raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_ml_literal_string(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_basic_string() { let cases = [ ( r#""""#, str![[r#" Token { kind: BasicString, span: 0..2, } "#]] .raw(), str![].raw(), ), ( r#"""trailing"#, str![[r#" Token { kind: BasicString, span: 0..2, } "#]] .raw(), str!["trailing"].raw(), ), ( r#""content"trailing"#, str![[r#" Token { kind: BasicString, span: 0..9, } "#]] .raw(), str!["trailing"].raw(), ), ( r#""content"#, str![[r#" Token { kind: BasicString, span: 0..8, } "#]] .raw(), str![].raw(), ), ( r#""content\ntrailing"#, str![[r#" Token { kind: BasicString, span: 0..18, } "#]] .raw(), str![].raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_basic_string(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_atom() { let cases = [ ( "hello", str![[r#" Token { kind: Atom, span: 0..5, } "#]] .raw(), str![""].raw(), ), ( "hello = world", str![[r#" Token { kind: Atom, span: 0..5, } "#]] .raw(), str![" = world"].raw(), ), ( "1.100e100 ]", str![[r#" Token { kind: Atom, span: 0..1, } "#]] .raw(), str![".100e100 ]"].raw(), ), ( "a.b.c = 5", str![[r#" Token { kind: Atom, span: 0..1, } "#]] .raw(), str![".b.c = 5"].raw(), ), ( "true ]", str![[r#" Token { kind: Atom, span: 0..4, } "#]] .raw(), str![" ]"].raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_atom(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[track_caller] fn t(input: &str, expected: impl IntoData) { let source = crate::Source::new(input); let actual = source.lex().into_vec(); assert_data_eq!(actual.to_debug(), expected); if !actual.is_empty() { let spans = actual.iter().map(|t| t.span()).collect::>(); assert_eq!(spans.first().unwrap().start(), 0); assert_eq!(spans.last().unwrap().end(), input.len()); for i in 0..(spans.len() - 1) { let current = &spans[i]; let next = &spans[i + 1]; assert_eq!(current.end(), next.start()); } } } #[test] fn literal_strings() { t( "''", str![[r#" [ Token { kind: LiteralString, span: 0..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); t( "''''''", str![[r#" [ Token { kind: MlLiteralString, span: 0..6, }, Token { kind: Eof, span: 6..6, }, ] "#]] .raw(), ); t( "'''\n'''", str![[r#" [ Token { kind: MlLiteralString, span: 0..7, }, Token { kind: Eof, span: 7..7, }, ] "#]] .raw(), ); t( "'a'", str![[r#" [ Token { kind: LiteralString, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "'\"a'", str![[r#" [ Token { kind: LiteralString, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( "''''a'''", str![[r#" [ Token { kind: MlLiteralString, span: 0..8, }, Token { kind: Eof, span: 8..8, }, ] "#]] .raw(), ); t( "'''\n'a\n'''", str![[r#" [ Token { kind: MlLiteralString, span: 0..10, }, Token { kind: Eof, span: 10..10, }, ] "#]] .raw(), ); t( "'''a\n'a\r\n'''", str![[r#" [ Token { kind: MlLiteralString, span: 0..12, }, Token { kind: Eof, span: 12..12, }, ] "#]] .raw(), ); } #[test] fn basic_strings() { t( r#""""#, str![[r#" [ Token { kind: BasicString, span: 0..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); t( r#""""""""#, str![[r#" [ Token { kind: MlBasicString, span: 0..6, }, Token { kind: Eof, span: 6..6, }, ] "#]] .raw(), ); t( r#""a""#, str![[r#" [ Token { kind: BasicString, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( r#""""a""""#, str![[r#" [ Token { kind: MlBasicString, span: 0..7, }, Token { kind: Eof, span: 7..7, }, ] "#]] .raw(), ); t( r#""\t""#, str![[r#" [ Token { kind: BasicString, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( r#""\u0000""#, str![[r#" [ Token { kind: BasicString, span: 0..8, }, Token { kind: Eof, span: 8..8, }, ] "#]] .raw(), ); t( r#""\U00000000""#, str![[r#" [ Token { kind: BasicString, span: 0..12, }, Token { kind: Eof, span: 12..12, }, ] "#]] .raw(), ); t( r#""\U000A0000""#, str![[r#" [ Token { kind: BasicString, span: 0..12, }, Token { kind: Eof, span: 12..12, }, ] "#]] .raw(), ); t( r#""\\t""#, str![[r#" [ Token { kind: BasicString, span: 0..5, }, Token { kind: Eof, span: 5..5, }, ] "#]] .raw(), ); t( "\"\t\"", str![[r#" [ Token { kind: BasicString, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "\"\"\"\n\t\"\"\"", str![[r#" [ Token { kind: MlBasicString, span: 0..8, }, Token { kind: Eof, span: 8..8, }, ] "#]] .raw(), ); t( "\"\"\"\\\n\"\"\"", str![[r#" [ Token { kind: MlBasicString, span: 0..8, }, Token { kind: Eof, span: 8..8, }, ] "#]] .raw(), ); t( "\"\"\"\\\n \t \t \\\r\n \t \n \t \r\n\"\"\"", str![[r#" [ Token { kind: MlBasicString, span: 0..34, }, Token { kind: Eof, span: 34..34, }, ] "#]] .raw(), ); t( r#""\r""#, str![[r#" [ Token { kind: BasicString, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( r#""\n""#, str![[r#" [ Token { kind: BasicString, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( r#""\b""#, str![[r#" [ Token { kind: BasicString, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( r#""a\fa""#, str![[r#" [ Token { kind: BasicString, span: 0..6, }, Token { kind: Eof, span: 6..6, }, ] "#]] .raw(), ); t( r#""\"a""#, str![[r#" [ Token { kind: BasicString, span: 0..5, }, Token { kind: Eof, span: 5..5, }, ] "#]] .raw(), ); t( "\"\"\"\na\"\"\"", str![[r#" [ Token { kind: MlBasicString, span: 0..8, }, Token { kind: Eof, span: 8..8, }, ] "#]] .raw(), ); t( "\"\"\"\n\"\"\"", str![[r#" [ Token { kind: MlBasicString, span: 0..7, }, Token { kind: Eof, span: 7..7, }, ] "#]] .raw(), ); t( r#""""a\"""b""""#, str![[r#" [ Token { kind: MlBasicString, span: 0..12, }, Token { kind: Eof, span: 12..12, }, ] "#]] .raw(), ); t( r#""\a"#, str![[r#" [ Token { kind: BasicString, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "\"\\\n", str![[r#" [ Token { kind: BasicString, span: 0..2, }, Token { kind: Newline, span: 2..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "\"\\\r\n", str![[r#" [ Token { kind: BasicString, span: 0..3, }, Token { kind: Newline, span: 3..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( "\"\\", str![[r#" [ Token { kind: BasicString, span: 0..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); t( "\"\u{0}", str![[r#" [ Token { kind: BasicString, span: 0..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); t( r#""\U00""#, str![[r#" [ Token { kind: BasicString, span: 0..6, }, Token { kind: Eof, span: 6..6, }, ] "#]] .raw(), ); t( r#""\U00"#, str![[r#" [ Token { kind: BasicString, span: 0..5, }, Token { kind: Eof, span: 5..5, }, ] "#]] .raw(), ); t( r#""\uD800"#, str![[r#" [ Token { kind: BasicString, span: 0..7, }, Token { kind: Eof, span: 7..7, }, ] "#]] .raw(), ); t( r#""\UFFFFFFFF"#, str![[r#" [ Token { kind: BasicString, span: 0..11, }, Token { kind: Eof, span: 11..11, }, ] "#]] .raw(), ); } #[test] fn keylike() { t( "foo", str![[r#" [ Token { kind: Atom, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "0bar", str![[r#" [ Token { kind: Atom, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( "bar0", str![[r#" [ Token { kind: Atom, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( "1234", str![[r#" [ Token { kind: Atom, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( "a-b", str![[r#" [ Token { kind: Atom, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "a_B", str![[r#" [ Token { kind: Atom, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "-_-", str![[r#" [ Token { kind: Atom, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "___", str![[r#" [ Token { kind: Atom, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); } #[test] fn all() { t( " a ", str![[r#" [ Token { kind: Whitespace, span: 0..1, }, Token { kind: Atom, span: 1..2, }, Token { kind: Whitespace, span: 2..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( " a\t [[]] \t [] {} , . =\n# foo \r\n#foo \n ", str![[r#" [ Token { kind: Whitespace, span: 0..1, }, Token { kind: Atom, span: 1..2, }, Token { kind: Whitespace, span: 2..4, }, Token { kind: LeftSquareBracket, span: 4..5, }, Token { kind: LeftSquareBracket, span: 5..6, }, Token { kind: RightSquareBracket, span: 6..7, }, Token { kind: RightSquareBracket, span: 7..8, }, Token { kind: Whitespace, span: 8..11, }, Token { kind: LeftSquareBracket, span: 11..12, }, Token { kind: RightSquareBracket, span: 12..13, }, Token { kind: Whitespace, span: 13..14, }, Token { kind: LeftCurlyBracket, span: 14..15, }, Token { kind: RightCurlyBracket, span: 15..16, }, Token { kind: Whitespace, span: 16..17, }, Token { kind: Comma, span: 17..18, }, Token { kind: Whitespace, span: 18..19, }, Token { kind: Dot, span: 19..20, }, Token { kind: Whitespace, span: 20..21, }, Token { kind: Equals, span: 21..22, }, Token { kind: Newline, span: 22..23, }, Token { kind: Comment, span: 23..29, }, Token { kind: Newline, span: 29..31, }, Token { kind: Comment, span: 31..36, }, Token { kind: Newline, span: 36..37, }, Token { kind: Whitespace, span: 37..38, }, Token { kind: Eof, span: 38..38, }, ] "#]] .raw(), ); } #[test] fn bare_cr_bad() { t( "\r", str![[r#" [ Token { kind: Newline, span: 0..1, }, Token { kind: Eof, span: 1..1, }, ] "#]] .raw(), ); t( "'\n", str![[r#" [ Token { kind: LiteralString, span: 0..1, }, Token { kind: Newline, span: 1..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); t( "'\u{0}", str![[r#" [ Token { kind: LiteralString, span: 0..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); t( "'", str![[r#" [ Token { kind: LiteralString, span: 0..1, }, Token { kind: Eof, span: 1..1, }, ] "#]] .raw(), ); t( "\u{0}", str![[r#" [ Token { kind: Atom, span: 0..1, }, Token { kind: Eof, span: 1..1, }, ] "#]] .raw(), ); } #[test] fn bad_comment() { t( "#\u{0}", str![[r#" [ Token { kind: Comment, span: 0..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); } toml_parser-1.0.6+spec-1.1.0/src/lexer/token.rs000064400000000000000000000055531046102023000171440ustar 00000000000000//! Lexed TOML tokens use super::Span; use super::APOSTROPHE; use super::COMMENT_START_SYMBOL; use super::QUOTATION_MARK; use super::WSCHAR; use crate::decoder::Encoding; /// An unvalidated TOML Token #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub struct Token { pub(super) kind: TokenKind, pub(super) span: Span, } impl Token { pub(super) fn new(kind: TokenKind, span: Span) -> Self { Self { kind, span } } #[inline(always)] pub fn kind(&self) -> TokenKind { self.kind } #[inline(always)] pub fn span(&self) -> Span { self.span } } #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] #[repr(u8)] pub enum TokenKind { /// Either for dotted-key or float Dot = b'.', /// Key-value separator Equals = b'=', /// Value separator Comma = b',', /// Either array or standard-table start LeftSquareBracket = b'[', /// Either array or standard-table end RightSquareBracket = b']', /// Inline table start LeftCurlyBracket = b'{', /// Inline table end RightCurlyBracket = b'}', Whitespace = WSCHAR.0, Comment = COMMENT_START_SYMBOL, Newline = b'\n', LiteralString = APOSTROPHE, BasicString = QUOTATION_MARK, MlLiteralString = 1, MlBasicString, /// Anything else Atom, Eof, } impl TokenKind { pub const fn description(&self) -> &'static str { match self { Self::Dot => "`.`", Self::Equals => "`=`", Self::Comma => "`,`", Self::LeftSquareBracket => "`[`", Self::RightSquareBracket => "`]`", Self::LeftCurlyBracket => "`{`", Self::RightCurlyBracket => "`}`", Self::Whitespace => "whitespace", Self::Comment => "comment", Self::Newline => "newline", Self::LiteralString => "literal string", Self::BasicString => "basic string", Self::MlLiteralString => "multi-line literal string", Self::MlBasicString => "multi-line basic string", Self::Atom => "token", Self::Eof => "end-of-input", } } pub fn encoding(&self) -> Option { match self { Self::LiteralString => Some(Encoding::LiteralString), Self::BasicString => Some(Encoding::BasicString), Self::MlLiteralString => Some(Encoding::MlLiteralString), Self::MlBasicString => Some(Encoding::MlBasicString), Self::Atom | Self::LeftSquareBracket | Self::RightSquareBracket | Self::Dot | Self::Equals | Self::Comma | Self::RightCurlyBracket | Self::LeftCurlyBracket | Self::Whitespace | Self::Newline | Self::Comment | Self::Eof => None, } } } toml_parser-1.0.6+spec-1.1.0/src/lib.rs000064400000000000000000000023711046102023000154460ustar 00000000000000//! TOML lexer and parser //! //! Characteristics: //! - Error recovery //! - Lazy validation //! - `forbid(unsafe)` by default, requiring the `unsafe` feature otherwise //! - `no_std` support, including putting users in charge of allocation choices (including not //! allocating) //! //! Full parsing is broken into three phases: //! 1. [Lexing tokens][lexer] //! 2. [Parsing tokens][parser] (push parser) //! 3. Organizing the physical layout into the logical layout, //! including [decoding keys and values][decoder] #![cfg_attr(all(not(feature = "std"), not(test)), no_std)] #![cfg_attr(docsrs, feature(doc_cfg))] #![cfg_attr(not(feature = "unsafe"), forbid(unsafe_code))] #![warn(clippy::std_instead_of_core)] #![warn(clippy::std_instead_of_alloc)] #![warn(clippy::print_stderr)] #![warn(clippy::print_stdout)] #[cfg(feature = "alloc")] extern crate alloc; #[macro_use] mod macros; #[cfg(feature = "debug")] pub(crate) mod debug; mod error; mod source; pub mod decoder; pub mod lexer; pub mod parser; pub use error::ErrorSink; pub use error::Expected; pub use error::ParseError; pub use source::Raw; pub use source::Source; pub use source::SourceIndex; pub use source::Span; #[doc = include_str!("../README.md")] #[cfg(doctest)] pub struct ReadmeDoctests; toml_parser-1.0.6+spec-1.1.0/src/macros.rs000064400000000000000000000000011046102023000161500ustar 00000000000000 toml_parser-1.0.6+spec-1.1.0/src/parser/document.rs000064400000000000000000001565371046102023000200300ustar 00000000000000use winnow::stream::Offset as _; use winnow::stream::Stream as _; use winnow::stream::TokenSlice; use super::EventReceiver; #[cfg(feature = "debug")] use crate::debug::DebugErrorSink; #[cfg(feature = "debug")] use crate::debug::DebugEventReceiver; use crate::decoder::Encoding; use crate::lexer::Token; use crate::lexer::TokenKind; use crate::ErrorSink; use crate::Expected; use crate::ParseError; /// Parse lexed tokens into [`Event`][super::Event]s pub fn parse_document( tokens: &[Token], receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { let mut tokens = TokenSlice::new(tokens); #[cfg(feature = "debug")] let mut receiver = DebugEventReceiver::new(receiver); #[cfg(feature = "debug")] let receiver = &mut receiver; #[cfg(feature = "debug")] let mut error = DebugErrorSink::new(error); #[cfg(feature = "debug")] let error = &mut error; document(&mut tokens, receiver, error); eof(&mut tokens, receiver, error); } /// Parse lexed tokens into [`Event`][super::Event]s pub fn parse_key(tokens: &[Token], receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink) { let mut tokens = TokenSlice::new(tokens); #[cfg(feature = "debug")] let mut receiver = DebugEventReceiver::new(receiver); #[cfg(feature = "debug")] let receiver = &mut receiver; #[cfg(feature = "debug")] let mut error = DebugErrorSink::new(error); #[cfg(feature = "debug")] let error = &mut error; key(&mut tokens, "invalid key", receiver, error); eof(&mut tokens, receiver, error); } /// Parse lexed tokens into [`Event`][super::Event]s pub fn parse_simple_key( tokens: &[Token], receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { let mut tokens = TokenSlice::new(tokens); #[cfg(feature = "debug")] let mut receiver = DebugEventReceiver::new(receiver); #[cfg(feature = "debug")] let receiver = &mut receiver; #[cfg(feature = "debug")] let mut error = DebugErrorSink::new(error); #[cfg(feature = "debug")] let error = &mut error; simple_key(&mut tokens, "invalid key", receiver, error); eof(&mut tokens, receiver, error); } /// Parse lexed tokens into [`Event`][super::Event]s pub fn parse_value(tokens: &[Token], receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink) { let mut tokens = TokenSlice::new(tokens); #[cfg(feature = "debug")] let mut receiver = DebugEventReceiver::new(receiver); #[cfg(feature = "debug")] let receiver = &mut receiver; #[cfg(feature = "debug")] let mut error = DebugErrorSink::new(error); #[cfg(feature = "debug")] let error = &mut error; value(&mut tokens, receiver, error); eof(&mut tokens, receiver, error); } type Stream<'i> = TokenSlice<'i, Token>; /// Parse a TOML Document /// /// Only the order of [`Event`][super::Event]s is validated and not [`Event`][super::Event] content nor semantics like duplicate /// keys. /// /// ```abnf /// toml = expression *( newline expression ) /// /// expression = ws [ comment ] /// expression =/ ws keyval ws [ comment ] /// expression =/ ws table ws [ comment ] /// /// ;; Key-Value pairs /// /// keyval = key keyval-sep val /// key = simple-key / dotted-key /// val = string / boolean / array / inline-table / date-time / float / integer /// /// simple-key = quoted-key / unquoted-key /// /// ;; Quoted and dotted key /// /// quoted-key = basic-string / literal-string /// dotted-key = simple-key 1*( dot-sep simple-key ) /// /// dot-sep = ws %x2E ws ; . Period /// keyval-sep = ws %x3D ws ; = /// /// ;; Array /// /// array = array-open [ array-values ] ws-comment-newline array-close /// /// array-open = %x5B ; [ /// array-close = %x5D ; ] /// /// array-values = ws-comment-newline val ws-comment-newline array-sep array-values /// array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ] /// /// array-sep = %x2C ; , Comma /// /// ;; Table /// /// table = std-table / array-table /// /// ;; Standard Table /// /// std-table = std-table-open key std-table-close /// /// ;; Inline Table /// /// inline-table = inline-table-open [ inline-table-keyvals ] ws-comment-newline inline-table-close /// /// inline-table-keyvals = ws-comment-newline keyval ws-comment-newline inline-table-sep inline-table-keyvals /// inline-table-keyvals =/ ws-comment-newline keyval ws-comment-newline [ inline-table-sep ] /// /// ;; Array Table /// /// array-table = array-table-open key array-table-close /// ``` fn document(tokens: &mut Stream<'_>, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink) { while let Some(current_token) = tokens.next_token() { match current_token.kind() { TokenKind::LeftSquareBracket => on_table(tokens, current_token, receiver, error), TokenKind::RightSquareBracket => { on_missing_std_table(tokens, current_token, receiver, error); } TokenKind::LiteralString => on_expression_key( tokens, current_token, Some(Encoding::LiteralString), receiver, error, ), TokenKind::BasicString => on_expression_key( tokens, current_token, Some(Encoding::BasicString), receiver, error, ), TokenKind::MlLiteralString => on_expression_key( tokens, current_token, Some(Encoding::MlLiteralString), receiver, error, ), TokenKind::MlBasicString => on_expression_key( tokens, current_token, Some(Encoding::MlBasicString), receiver, error, ), TokenKind::Atom => on_expression_key(tokens, current_token, None, receiver, error), TokenKind::Equals => { let fake_key = current_token.span().before(); let encoding = None; receiver.simple_key(fake_key, encoding, error); on_expression_key_val_sep(tokens, current_token, receiver, error); } TokenKind::Dot => { on_expression_dot(tokens, current_token, receiver, error); } TokenKind::Comma | TokenKind::RightCurlyBracket | TokenKind::LeftCurlyBracket => { on_missing_expression_key(tokens, current_token, receiver, error); } TokenKind::Whitespace => receiver.whitespace(current_token.span(), error), TokenKind::Newline => receiver.newline(current_token.span(), error), TokenKind::Comment => on_comment(tokens, current_token, receiver, error), TokenKind::Eof => { break; } } } } /// Start a table from the open token /// /// This eats to EOL /// /// ```abnf /// ;; Table /// /// table = std-table / array-table /// /// ;; Standard Table /// /// std-table = std-table-open key std-table-close /// /// ;; Array Table /// /// array-table = array-table-open key array-table-close /// ``` fn on_table( tokens: &mut Stream<'_>, open_token: &Token, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { let is_array_table = if let Some(second_open_token) = next_token_if(tokens, |k| matches!(k, TokenKind::LeftSquareBracket)) { let span = open_token.span().append(second_open_token.span()); receiver.array_table_open(span, error); true } else { let span = open_token.span(); receiver.std_table_open(span, error); false }; opt_whitespace(tokens, receiver, error); let valid_key = key(tokens, "invalid table", receiver, error); opt_whitespace(tokens, receiver, error); let mut success = false; if let Some(close_token) = next_token_if(tokens, |k| matches!(k, TokenKind::RightSquareBracket)) { if is_array_table { if let Some(second_close_token) = next_token_if(tokens, |k| matches!(k, TokenKind::RightSquareBracket)) { let span = close_token.span().append(second_close_token.span()); receiver.array_table_close(span, error); success = true; } else { let context = open_token.span().append(close_token.span()); error.report_error( ParseError::new("unclosed array table") .with_context(context) .with_expected(&[Expected::Literal("]")]) .with_unexpected(close_token.span().after()), ); } } else { receiver.std_table_close(close_token.span(), error); success = true; } } else if valid_key { let last_key_token = tokens .previous_tokens() .find(|t| t.kind() != TokenKind::Whitespace) .unwrap_or(open_token); let context = open_token.span().append(last_key_token.span()); if is_array_table { error.report_error( ParseError::new("unclosed array table") .with_context(context) .with_expected(&[Expected::Literal("]]")]) .with_unexpected(last_key_token.span().after()), ); } else { error.report_error( ParseError::new("unclosed table") .with_context(context) .with_expected(&[Expected::Literal("]")]) .with_unexpected(last_key_token.span().after()), ); } } if success { ws_comment_newline(tokens, receiver, error); } else { ignore_to_newline(tokens, receiver, error); } } /// Parse a TOML key /// /// ```abnf /// ;; Key-Value pairs /// /// key = simple-key / dotted-key /// /// simple-key = quoted-key / unquoted-key /// /// ;; Quoted and dotted key /// /// quoted-key = basic-string / literal-string /// dotted-key = simple-key 1*( dot-sep simple-key ) /// /// dot-sep = ws %x2E ws ; . Period /// ``` fn key( tokens: &mut Stream<'_>, invalid_description: &'static str, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) -> bool { while let Some(current_token) = tokens.next_token() { let encoding = match current_token.kind() { TokenKind::RightSquareBracket | TokenKind::Comment | TokenKind::Equals | TokenKind::Comma | TokenKind::LeftSquareBracket | TokenKind::LeftCurlyBracket | TokenKind::RightCurlyBracket | TokenKind::Newline | TokenKind::Eof => { let fake_key = current_token.span().before(); let encoding = None; receiver.simple_key(fake_key, encoding, error); seek(tokens, -1); return false; } TokenKind::Whitespace => { receiver.whitespace(current_token.span(), error); continue; } TokenKind::Dot => { let fake_key = current_token.span().before(); let encoding = None; receiver.simple_key(fake_key, encoding, error); receiver.key_sep(current_token.span(), error); continue; } TokenKind::LiteralString => Some(Encoding::LiteralString), TokenKind::BasicString => Some(Encoding::BasicString), TokenKind::MlLiteralString => Some(Encoding::MlLiteralString), TokenKind::MlBasicString => Some(Encoding::MlBasicString), TokenKind::Atom => None, }; receiver.simple_key(current_token.span(), encoding, error); return opt_dot_keys(tokens, receiver, error); } let previous_span = tokens .previous_tokens() .find(|t| { !matches!( t.kind(), TokenKind::Whitespace | TokenKind::Comment | TokenKind::Newline | TokenKind::Eof ) }) .map(|t| t.span()) .unwrap_or_default(); error.report_error( ParseError::new(invalid_description) .with_context(previous_span) .with_expected(&[Expected::Description("key")]) .with_unexpected(previous_span.after()), ); false } /// Start an expression from a key compatible token type /// /// ```abnf /// expression = ws [ comment ] /// expression =/ ws keyval ws [ comment ] /// expression =/ ws table ws [ comment ] /// /// ;; Key-Value pairs /// /// keyval = key keyval-sep val /// ``` fn on_expression_key<'i>( tokens: &mut Stream<'i>, key_token: &'i Token, encoding: Option, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { receiver.simple_key(key_token.span(), encoding, error); opt_dot_keys(tokens, receiver, error); opt_whitespace(tokens, receiver, error); let Some(eq_token) = next_token_if(tokens, |k| matches!(k, TokenKind::Equals)) else { if let Some(peek_token) = tokens.first() { let span = peek_token.span().before(); error.report_error( ParseError::new("key with no value") .with_context(span) .with_expected(&[Expected::Literal("=")]) .with_unexpected(span), ); } ignore_to_newline(tokens, receiver, error); return; }; on_expression_key_val_sep(tokens, eq_token, receiver, error); } fn on_expression_dot<'i>( tokens: &mut Stream<'i>, dot_token: &'i Token, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { receiver.simple_key(dot_token.span().before(), None, error); seek(tokens, -1); opt_dot_keys(tokens, receiver, error); opt_whitespace(tokens, receiver, error); let Some(eq_token) = next_token_if(tokens, |k| matches!(k, TokenKind::Equals)) else { if let Some(peek_token) = tokens.first() { let span = peek_token.span().before(); error.report_error( ParseError::new("missing value for key") .with_context(span) .with_expected(&[Expected::Literal("=")]) .with_unexpected(span), ); } ignore_to_newline(tokens, receiver, error); return; }; on_expression_key_val_sep(tokens, eq_token, receiver, error); } fn on_expression_key_val_sep<'i>( tokens: &mut Stream<'i>, eq_token: &'i Token, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { receiver.key_val_sep(eq_token.span(), error); opt_whitespace(tokens, receiver, error); value(tokens, receiver, error); ws_comment_newline(tokens, receiver, error); } /// Parse a TOML simple key /// /// ```abnf /// ;; Key-Value pairs /// /// simple-key = quoted-key / unquoted-key /// /// ;; Quoted and dotted key /// /// quoted-key = basic-string / literal-string /// ``` fn simple_key( tokens: &mut Stream<'_>, invalid_description: &'static str, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { let Some(current_token) = tokens.next_token() else { let previous_span = tokens .previous_tokens() .find(|t| { !matches!( t.kind(), TokenKind::Whitespace | TokenKind::Comment | TokenKind::Newline | TokenKind::Eof ) }) .map(|t| t.span()) .unwrap_or_default(); error.report_error( ParseError::new(invalid_description) .with_context(previous_span) .with_expected(&[Expected::Description("key")]) .with_unexpected(previous_span.after()), ); return; }; const EXPECTED_KEYS: [Expected; 3] = [ Expected::Description(Encoding::LiteralString.description()), Expected::Description(Encoding::BasicString.description()), Expected::Description(UNQUOTED_STRING), ]; let kind = match current_token.kind() { TokenKind::Dot | TokenKind::RightSquareBracket | TokenKind::Comment | TokenKind::Equals | TokenKind::Comma | TokenKind::LeftSquareBracket | TokenKind::LeftCurlyBracket | TokenKind::RightCurlyBracket | TokenKind::Newline | TokenKind::Eof | TokenKind::Whitespace => { on_missing_key(tokens, current_token, invalid_description, receiver, error); return; } TokenKind::LiteralString => Some(Encoding::LiteralString), TokenKind::BasicString => Some(Encoding::BasicString), TokenKind::MlLiteralString => { error.report_error( ParseError::new(invalid_description) .with_context(current_token.span()) .with_expected(&EXPECTED_KEYS) .with_unexpected(current_token.span()), ); Some(Encoding::MlLiteralString) } TokenKind::MlBasicString => { error.report_error( ParseError::new(invalid_description) .with_context(current_token.span()) .with_expected(&EXPECTED_KEYS) .with_unexpected(current_token.span()), ); Some(Encoding::MlBasicString) } TokenKind::Atom => None, }; receiver.simple_key(current_token.span(), kind, error); } /// Start a key from the first key compatible token type /// /// Returns the last key on success /// /// This will swallow the trailing [`TokenKind::Whitespace`] /// /// ```abnf /// key = simple-key / dotted-key /// /// simple-key = quoted-key / unquoted-key /// /// ;; Quoted and dotted key /// /// quoted-key = basic-string / literal-string /// dotted-key = simple-key 1*( dot-sep simple-key ) /// /// dot-sep = ws %x2E ws ; . Period /// ``` fn opt_dot_keys( tokens: &mut Stream<'_>, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) -> bool { opt_whitespace(tokens, receiver, error); let mut success = true; 'dot: while let Some(dot_token) = next_token_if(tokens, |k| matches!(k, TokenKind::Dot)) { receiver.key_sep(dot_token.span(), error); while let Some(current_token) = tokens.next_token() { let kind = match current_token.kind() { TokenKind::Equals | TokenKind::Comma | TokenKind::LeftSquareBracket | TokenKind::RightSquareBracket | TokenKind::LeftCurlyBracket | TokenKind::RightCurlyBracket | TokenKind::Comment | TokenKind::Newline | TokenKind::Eof => { let fake_key = current_token.span().before(); let encoding = None; receiver.simple_key(fake_key, encoding, error); seek(tokens, -1); success = false; break 'dot; } TokenKind::Whitespace => { receiver.whitespace(current_token.span(), error); continue; } TokenKind::Dot => { let fake_key = current_token.span().before(); let encoding = None; receiver.simple_key(fake_key, encoding, error); receiver.key_sep(current_token.span(), error); continue; } TokenKind::LiteralString => Some(Encoding::LiteralString), TokenKind::BasicString => Some(Encoding::BasicString), TokenKind::MlLiteralString => Some(Encoding::MlLiteralString), TokenKind::MlBasicString => Some(Encoding::MlBasicString), TokenKind::Atom => None, }; receiver.simple_key(current_token.span(), kind, error); opt_whitespace(tokens, receiver, error); continue 'dot; } let fake_key = dot_token.span().after(); let encoding = None; receiver.simple_key(fake_key, encoding, error); } success } /// Parse a value /// /// ```abnf /// val = string / boolean / array / inline-table / date-time / float / integer /// ``` fn value(tokens: &mut Stream<'_>, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink) { let Some(current_token) = tokens.next_token() else { let previous_span = tokens .previous_tokens() .find(|t| { !matches!( t.kind(), TokenKind::Whitespace | TokenKind::Comment | TokenKind::Newline | TokenKind::Eof ) }) .map(|t| t.span()) .unwrap_or_default(); error.report_error( ParseError::new("missing value") .with_context(previous_span) .with_expected(&[Expected::Description("value")]) .with_unexpected(previous_span.after()), ); return; }; match current_token.kind() { TokenKind::Comment | TokenKind::Comma | TokenKind::Newline | TokenKind::Eof | TokenKind::Whitespace => { let fake_key = current_token.span().before(); let encoding = None; receiver.scalar(fake_key, encoding, error); seek(tokens, -1); } TokenKind::Equals => { error.report_error( ParseError::new("extra `=`") .with_context(current_token.span()) .with_expected(&[]) .with_unexpected(current_token.span()), ); receiver.error(current_token.span(), error); value(tokens, receiver, error); } TokenKind::LeftCurlyBracket => { on_inline_table_open(tokens, current_token, receiver, error); } TokenKind::RightCurlyBracket => { error.report_error( ParseError::new("missing inline table opening") .with_context(current_token.span()) .with_expected(&[Expected::Literal("{")]) .with_unexpected(current_token.span().before()), ); let _ = receiver.inline_table_open(current_token.span().before(), error); receiver.inline_table_close(current_token.span(), error); } TokenKind::LeftSquareBracket => { on_array_open(tokens, current_token, receiver, error); } TokenKind::RightSquareBracket => { error.report_error( ParseError::new("missing array opening") .with_context(current_token.span()) .with_expected(&[Expected::Literal("[")]) .with_unexpected(current_token.span().before()), ); let _ = receiver.array_open(current_token.span().before(), error); receiver.array_close(current_token.span(), error); } TokenKind::LiteralString | TokenKind::BasicString | TokenKind::MlLiteralString | TokenKind::MlBasicString | TokenKind::Dot | TokenKind::Atom => { on_scalar(tokens, current_token, receiver, error); } } } /// Parse a scalar value /// /// ```abnf /// val = string / boolean / array / inline-table / date-time / float / integer /// ``` fn on_scalar( tokens: &mut Stream<'_>, scalar: &Token, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { let mut span = scalar.span(); let encoding = match scalar.kind() { TokenKind::Comment | TokenKind::Comma | TokenKind::Newline | TokenKind::Eof | TokenKind::Whitespace | TokenKind::Equals | TokenKind::LeftCurlyBracket | TokenKind::RightCurlyBracket | TokenKind::LeftSquareBracket | TokenKind::RightSquareBracket => { unreachable!() } TokenKind::LiteralString => Some(Encoding::LiteralString), TokenKind::BasicString => Some(Encoding::BasicString), TokenKind::MlLiteralString => Some(Encoding::MlLiteralString), TokenKind::MlBasicString => Some(Encoding::MlBasicString), TokenKind::Dot | TokenKind::Atom => { while let Some(next_token) = tokens.first() { match next_token.kind() { TokenKind::Comment | TokenKind::Comma | TokenKind::Newline | TokenKind::Eof | TokenKind::Equals | TokenKind::LeftCurlyBracket | TokenKind::RightCurlyBracket | TokenKind::LeftSquareBracket | TokenKind::RightSquareBracket | TokenKind::LiteralString | TokenKind::BasicString | TokenKind::MlLiteralString | TokenKind::MlBasicString => { break; } TokenKind::Whitespace => { if let Some(second) = tokens.get(1) { if second.kind() == TokenKind::Atom { span = span.append(second.span()); let _ = tokens.next_slice(2); continue; } } break; } TokenKind::Dot | TokenKind::Atom => { span = span.append(next_token.span()); let _ = tokens.next_token(); } } } None } }; receiver.scalar(span, encoding, error); } /// Parse an array /// /// ```abnf /// ;; Array /// /// array = array-open [ array-values ] ws-comment-newline array-close /// /// array-values = ws-comment-newline val ws-comment-newline array-sep array-values /// array-values =/ ws-comment-newline val ws-comment-newline [ array-sep ] /// ``` fn on_array_open( tokens: &mut Stream<'_>, array_open: &Token, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { if !receiver.array_open(array_open.span(), error) { ignore_to_value_close(tokens, TokenKind::RightSquareBracket, receiver, error); return; } enum State { NeedsValue, NeedsComma, } let mut state = State::NeedsValue; while let Some(current_token) = tokens.next_token() { match current_token.kind() { TokenKind::Comment => { on_comment(tokens, current_token, receiver, error); } TokenKind::Whitespace => { receiver.whitespace(current_token.span(), error); } TokenKind::Newline => { receiver.newline(current_token.span(), error); } TokenKind::Eof => { error.report_error( ParseError::new("unclosed array") .with_context(array_open.span()) .with_expected(&[Expected::Literal("]")]) .with_unexpected(current_token.span()), ); receiver.array_close(current_token.span().before(), error); return; } TokenKind::Comma => match state { State::NeedsValue => { error.report_error( ParseError::new("extra comma in array") .with_context(array_open.span()) .with_expected(&[Expected::Description("value")]) .with_unexpected(current_token.span()), ); receiver.error(current_token.span(), error); } State::NeedsComma => { receiver.value_sep(current_token.span(), error); state = State::NeedsValue; } }, TokenKind::Equals => { error.report_error( ParseError::new("unexpected `=` in array") .with_context(array_open.span()) .with_expected(&[Expected::Description("value"), Expected::Literal("]")]) .with_unexpected(current_token.span()), ); receiver.error(current_token.span(), error); } TokenKind::LeftCurlyBracket => { if !matches!(state, State::NeedsValue) { error.report_error( ParseError::new("missing comma between array elements") .with_context(array_open.span()) .with_expected(&[Expected::Literal(",")]) .with_unexpected(current_token.span().before()), ); receiver.value_sep(current_token.span().before(), error); } on_inline_table_open(tokens, current_token, receiver, error); state = State::NeedsComma; } TokenKind::RightCurlyBracket => { if !matches!(state, State::NeedsValue) { error.report_error( ParseError::new("missing comma between array elements") .with_context(array_open.span()) .with_expected(&[Expected::Literal(",")]) .with_unexpected(current_token.span().before()), ); receiver.value_sep(current_token.span().before(), error); } error.report_error( ParseError::new("missing inline table opening") .with_context(current_token.span()) .with_expected(&[Expected::Literal("{")]) .with_unexpected(current_token.span().before()), ); let _ = receiver.inline_table_open(current_token.span().before(), error); receiver.inline_table_close(current_token.span(), error); state = State::NeedsComma; } TokenKind::LeftSquareBracket => { if !matches!(state, State::NeedsValue) { error.report_error( ParseError::new("missing comma between array elements") .with_context(array_open.span()) .with_expected(&[Expected::Literal(",")]) .with_unexpected(current_token.span().before()), ); receiver.value_sep(current_token.span().before(), error); } on_array_open(tokens, current_token, receiver, error); state = State::NeedsComma; } TokenKind::RightSquareBracket => { receiver.array_close(current_token.span(), error); return; } TokenKind::LiteralString | TokenKind::BasicString | TokenKind::MlLiteralString | TokenKind::MlBasicString | TokenKind::Dot | TokenKind::Atom => { if !matches!(state, State::NeedsValue) { error.report_error( ParseError::new("missing comma between array elements") .with_context(array_open.span()) .with_expected(&[Expected::Literal(",")]) .with_unexpected(current_token.span().before()), ); receiver.value_sep(current_token.span().before(), error); } on_scalar(tokens, current_token, receiver, error); state = State::NeedsComma; } } } let previous_span = tokens .previous_tokens() .find(|t| { !matches!( t.kind(), TokenKind::Whitespace | TokenKind::Comment | TokenKind::Newline | TokenKind::Eof ) }) .map(|t| t.span()) .unwrap_or_default(); error.report_error( ParseError::new("unclosed array") .with_context(array_open.span()) .with_expected(&[Expected::Literal("]")]) .with_unexpected(previous_span.after()), ); receiver.array_close(previous_span.after(), error); } /// Parse an inline table /// /// ```abnf /// ;; Inline Table /// /// inline-table = inline-table-open [ inline-table-keyvals ] ws-comment-newline inline-table-close /// /// inline-table-keyvals = ws-comment-newline keyval ws-comment-newline inline-table-sep inline-table-keyvals /// inline-table-keyvals =/ ws-comment-newline keyval ws-comment-newline [ inline-table-sep ] /// ``` fn on_inline_table_open( tokens: &mut Stream<'_>, inline_table_open: &Token, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { if !receiver.inline_table_open(inline_table_open.span(), error) { ignore_to_value_close(tokens, TokenKind::RightCurlyBracket, receiver, error); return; } #[allow(clippy::enum_variant_names)] #[derive(Debug)] enum State { NeedsKey, NeedsEquals, NeedsValue, NeedsComma, } impl State { fn expected(&self) -> &'static [Expected] { match self { Self::NeedsKey => &[Expected::Description("key")], Self::NeedsEquals => &[Expected::Literal("=")], Self::NeedsValue => &[Expected::Description("value")], Self::NeedsComma => &[Expected::Literal(",")], } } } let mut state = State::NeedsKey; while let Some(current_token) = tokens.next_token() { match current_token.kind() { TokenKind::Comment => { on_comment(tokens, current_token, receiver, error); } TokenKind::Whitespace => { receiver.whitespace(current_token.span(), error); } TokenKind::Newline => { receiver.newline(current_token.span(), error); } TokenKind::Eof => { error.report_error( ParseError::new("unclosed inline table") .with_context(inline_table_open.span()) .with_expected(&[Expected::Literal("}")]) .with_unexpected(current_token.span()), ); receiver.inline_table_close(current_token.span().before(), error); return; } TokenKind::Comma => match state { State::NeedsKey | State::NeedsEquals | State::NeedsValue => { error.report_error( ParseError::new("extra comma in inline table") .with_context(inline_table_open.span()) .with_expected(state.expected()) .with_unexpected(current_token.span().before()), ); receiver.error(current_token.span(), error); } State::NeedsComma => { receiver.value_sep(current_token.span(), error); state = State::NeedsKey; } }, TokenKind::Equals => match state { State::NeedsKey => { let fake_key = current_token.span().before(); let encoding = None; receiver.simple_key(fake_key, encoding, error); receiver.key_val_sep(current_token.span(), error); state = State::NeedsValue; } State::NeedsEquals => { receiver.key_val_sep(current_token.span(), error); state = State::NeedsValue; } State::NeedsValue | State::NeedsComma => { error.report_error( ParseError::new("extra assignment between key-value pairs") .with_context(inline_table_open.span()) .with_expected(state.expected()) .with_unexpected(current_token.span().before()), ); receiver.error(current_token.span(), error); } }, TokenKind::LeftCurlyBracket => match state { State::NeedsKey | State::NeedsComma => { error.report_error( ParseError::new("missing key for inline table element") .with_context(inline_table_open.span()) .with_expected(state.expected()) .with_unexpected(current_token.span().before()), ); receiver.error(current_token.span(), error); ignore_to_value_close(tokens, TokenKind::RightCurlyBracket, receiver, error); } State::NeedsEquals => { error.report_error( ParseError::new("missing assignment between key-value pairs") .with_context(inline_table_open.span()) .with_expected(state.expected()) .with_unexpected(current_token.span().before()), ); on_inline_table_open(tokens, current_token, receiver, error); state = State::NeedsComma; } State::NeedsValue => { on_inline_table_open(tokens, current_token, receiver, error); state = State::NeedsComma; } }, TokenKind::RightCurlyBracket => { receiver.inline_table_close(current_token.span(), error); return; } TokenKind::LeftSquareBracket => match state { State::NeedsKey | State::NeedsComma => { error.report_error( ParseError::new("missing key for inline table element") .with_context(inline_table_open.span()) .with_expected(state.expected()) .with_unexpected(current_token.span().before()), ); receiver.error(current_token.span(), error); ignore_to_value_close(tokens, TokenKind::RightSquareBracket, receiver, error); } State::NeedsEquals => { error.report_error( ParseError::new("missing assignment between key-value pairs") .with_context(inline_table_open.span()) .with_expected(state.expected()) .with_unexpected(current_token.span().before()), ); on_array_open(tokens, current_token, receiver, error); state = State::NeedsComma; } State::NeedsValue => { on_array_open(tokens, current_token, receiver, error); state = State::NeedsComma; } }, TokenKind::RightSquareBracket => match state { State::NeedsKey | State::NeedsEquals | State::NeedsComma => { error.report_error( ParseError::new("invalid inline table element") .with_context(inline_table_open.span()) .with_expected(state.expected()) .with_unexpected(current_token.span().before()), ); receiver.error(current_token.span(), error); } State::NeedsValue => { error.report_error( ParseError::new("missing array opening") .with_context(current_token.span()) .with_expected(&[Expected::Literal("[")]) .with_unexpected(current_token.span().before()), ); let _ = receiver.array_open(current_token.span().before(), error); receiver.array_close(current_token.span(), error); state = State::NeedsComma; } }, TokenKind::LiteralString | TokenKind::BasicString | TokenKind::MlLiteralString | TokenKind::MlBasicString | TokenKind::Dot | TokenKind::Atom => match state { State::NeedsKey => { if current_token.kind() == TokenKind::Dot { receiver.simple_key( current_token.span().before(), current_token.kind().encoding(), error, ); seek(tokens, -1); opt_dot_keys(tokens, receiver, error); state = State::NeedsEquals; } else { receiver.simple_key( current_token.span(), current_token.kind().encoding(), error, ); opt_dot_keys(tokens, receiver, error); state = State::NeedsEquals; } } State::NeedsEquals => { error.report_error( ParseError::new("missing assignment between key-value pairs") .with_context(inline_table_open.span()) .with_expected(state.expected()) .with_unexpected(current_token.span().before()), ); on_scalar(tokens, current_token, receiver, error); state = State::NeedsComma; } State::NeedsValue => { on_scalar(tokens, current_token, receiver, error); state = State::NeedsComma; } State::NeedsComma => { error.report_error( ParseError::new("missing comma between key-value pairs") .with_context(inline_table_open.span()) .with_expected(state.expected()) .with_unexpected(current_token.span().before()), ); if current_token.kind() == TokenKind::Dot { receiver.simple_key( current_token.span().before(), current_token.kind().encoding(), error, ); seek(tokens, -1); opt_dot_keys(tokens, receiver, error); state = State::NeedsEquals; } else { receiver.simple_key( current_token.span(), current_token.kind().encoding(), error, ); opt_dot_keys(tokens, receiver, error); state = State::NeedsEquals; } } }, } } let previous_span = tokens .previous_tokens() .find(|t| { !matches!( t.kind(), TokenKind::Whitespace | TokenKind::Comment | TokenKind::Newline | TokenKind::Eof ) }) .map(|t| t.span()) .unwrap_or_default(); error.report_error( ParseError::new("unclosed inline table") .with_context(inline_table_open.span()) .with_expected(&[Expected::Literal("}")]) .with_unexpected(previous_span.after()), ); receiver.array_close(previous_span.after(), error); } /// Parse whitespace, if present /// /// ```abnf /// ws = *wschar /// ``` fn opt_whitespace( tokens: &mut Stream<'_>, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { if let Some(ws_token) = next_token_if(tokens, |k| matches!(k, TokenKind::Whitespace)) { receiver.whitespace(ws_token.span(), error); } } /// Parse EOL decor, if present /// /// ```abnf /// toml = expression *( newline expression ) /// /// expression = ws [ on_comment ] /// expression =/ ws keyval ws [ on_comment ] /// expression =/ ws table ws [ on_comment ] /// /// ;; Whitespace /// /// ws = *wschar /// wschar = %x20 ; Space /// wschar =/ %x09 ; Horizontal tab /// /// ;; Newline /// /// newline = %x0A ; LF /// newline =/ %x0D.0A ; CRLF /// /// ;; Comment /// /// comment = comment-start-symbol *non-eol /// ``` fn ws_comment_newline( tokens: &mut Stream<'_>, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { let mut first = None; while let Some(current_token) = tokens.next_token() { let first = first.get_or_insert(current_token.span()); match current_token.kind() { TokenKind::Dot | TokenKind::Equals | TokenKind::Comma | TokenKind::LeftSquareBracket | TokenKind::RightSquareBracket | TokenKind::LeftCurlyBracket | TokenKind::RightCurlyBracket | TokenKind::LiteralString | TokenKind::BasicString | TokenKind::MlLiteralString | TokenKind::MlBasicString | TokenKind::Atom => { let context = first.append(current_token.span()); error.report_error( ParseError::new("unexpected key or value") .with_context(context) .with_expected(&[Expected::Literal("\n"), Expected::Literal("#")]) .with_unexpected(current_token.span().before()), ); receiver.error(current_token.span(), error); ignore_to_newline(tokens, receiver, error); break; } TokenKind::Comment => { on_comment(tokens, current_token, receiver, error); break; } TokenKind::Whitespace => { receiver.whitespace(current_token.span(), error); continue; } TokenKind::Newline => { receiver.newline(current_token.span(), error); break; } TokenKind::Eof => { break; } } } } /// Start EOL from [`TokenKind::Comment`] fn on_comment( tokens: &mut Stream<'_>, comment_token: &Token, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { receiver.comment(comment_token.span(), error); let Some(current_token) = tokens.next_token() else { return; }; match current_token.kind() { TokenKind::Dot | TokenKind::Equals | TokenKind::Comma | TokenKind::LeftSquareBracket | TokenKind::RightSquareBracket | TokenKind::LeftCurlyBracket | TokenKind::RightCurlyBracket | TokenKind::Whitespace | TokenKind::Comment | TokenKind::LiteralString | TokenKind::BasicString | TokenKind::MlLiteralString | TokenKind::MlBasicString | TokenKind::Atom => { let context = comment_token.span().append(current_token.span()); error.report_error( ParseError::new("unexpected content between comment and newline") .with_context(context) .with_expected(&[Expected::Literal("\n")]) .with_unexpected(current_token.span().before()), ); receiver.error(current_token.span(), error); ignore_to_newline(tokens, receiver, error); } TokenKind::Newline => { receiver.newline(current_token.span(), error); } TokenKind::Eof => {} } } fn eof(tokens: &mut Stream<'_>, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink) { let Some(current_token) = tokens.next_token() else { return; }; match current_token.kind() { TokenKind::Dot | TokenKind::Equals | TokenKind::Comma | TokenKind::LeftSquareBracket | TokenKind::RightSquareBracket | TokenKind::LeftCurlyBracket | TokenKind::RightCurlyBracket | TokenKind::LiteralString | TokenKind::BasicString | TokenKind::MlLiteralString | TokenKind::MlBasicString | TokenKind::Atom | TokenKind::Comment | TokenKind::Whitespace | TokenKind::Newline => { error.report_error( ParseError::new("unexpected content") .with_context(current_token.span()) .with_expected(&[]) .with_unexpected(current_token.span().before()), ); receiver.error(current_token.span(), error); while let Some(current_token) = tokens.next_token() { if current_token.kind() == TokenKind::Eof { continue; } receiver.error(current_token.span(), error); } } TokenKind::Eof => {} } } // Don't bother recovering until [`TokenKind::Newline`] #[cold] fn ignore_to_newline( tokens: &mut Stream<'_>, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { while let Some(current_token) = tokens.next_token() { match current_token.kind() { TokenKind::Dot | TokenKind::Equals | TokenKind::Comma | TokenKind::LeftSquareBracket | TokenKind::RightSquareBracket | TokenKind::LeftCurlyBracket | TokenKind::RightCurlyBracket | TokenKind::LiteralString | TokenKind::BasicString | TokenKind::MlLiteralString | TokenKind::MlBasicString | TokenKind::Atom => { receiver.error(current_token.span(), error); } TokenKind::Comment => { on_comment(tokens, current_token, receiver, error); break; } TokenKind::Whitespace => { receiver.whitespace(current_token.span(), error); } TokenKind::Newline => { receiver.newline(current_token.span(), error); break; } TokenKind::Eof => { break; } } } } /// Don't bother recovering until the matching [`TokenKind`] /// /// Attempts to ignore nested `[]`, `{}`. #[cold] fn ignore_to_value_close( tokens: &mut Stream<'_>, closing_kind: TokenKind, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { let mut array_count: usize = 0; let mut inline_table_count: usize = 0; while let Some(current_token) = tokens.next_token() { match current_token.kind() { TokenKind::Dot | TokenKind::Equals | TokenKind::Comma | TokenKind::LiteralString | TokenKind::BasicString | TokenKind::MlLiteralString | TokenKind::MlBasicString | TokenKind::Atom => { receiver.error(current_token.span(), error); } TokenKind::Comment => { on_comment(tokens, current_token, receiver, error); } TokenKind::Whitespace => { receiver.whitespace(current_token.span(), error); } TokenKind::Newline => { receiver.newline(current_token.span(), error); } TokenKind::LeftSquareBracket => { receiver.error(current_token.span(), error); array_count += 1; } TokenKind::RightSquareBracket => { if array_count == 0 && current_token.kind() == closing_kind { receiver.array_close(current_token.span(), error); break; } else { receiver.error(current_token.span(), error); array_count = array_count.saturating_sub(1); } } TokenKind::LeftCurlyBracket => { receiver.error(current_token.span(), error); inline_table_count += 1; } TokenKind::RightCurlyBracket => { if inline_table_count == 0 && current_token.kind() == closing_kind { receiver.inline_table_close(current_token.span(), error); break; } else { receiver.error(current_token.span(), error); inline_table_count = inline_table_count.saturating_sub(1); } } TokenKind::Eof => { break; } } } } #[cold] fn on_missing_key( tokens: &mut Stream<'_>, token: &Token, invalid_description: &'static str, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { error.report_error( ParseError::new(invalid_description) .with_context(token.span()) .with_expected(&[Expected::Description("key")]) .with_unexpected(token.span().before()), ); if token.kind() == TokenKind::Eof { } else if token.kind() == TokenKind::Newline { receiver.newline(token.span(), error); } else if token.kind() == TokenKind::Comment { on_comment(tokens, token, receiver, error); } else { receiver.error(token.span(), error); } } #[cold] fn on_missing_expression_key( tokens: &mut Stream<'_>, token: &Token, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { error.report_error( ParseError::new("invalid key-value pair") .with_context(token.span()) .with_expected(&[Expected::Description("key")]) .with_unexpected(token.span().before()), ); receiver.error(token.span(), error); ignore_to_newline(tokens, receiver, error); } #[cold] fn on_missing_std_table( tokens: &mut Stream<'_>, token: &Token, receiver: &mut dyn EventReceiver, error: &mut dyn ErrorSink, ) { error.report_error( ParseError::new("missing table open") .with_context(token.span()) .with_expected(&[Expected::Literal("[")]) .with_unexpected(token.span().before()), ); receiver.error(token.span(), error); ignore_to_newline(tokens, receiver, error); } fn next_token_if<'i, F: Fn(TokenKind) -> bool>( tokens: &mut Stream<'i>, pred: F, ) -> Option<&'i Token> { match tokens.first() { Some(next) if pred(next.kind()) => tokens.next_token(), _ => None, } } fn seek(stream: &mut Stream<'_>, offset: isize) { let current = stream.checkpoint(); stream.reset_to_start(); let start = stream.checkpoint(); let old_offset = current.offset_from(&start); let new_offset = (old_offset as isize).saturating_add(offset) as usize; if new_offset < stream.eof_offset() { #[cfg(feature = "unsafe")] // SAFETY: bounds were checked unsafe { stream.next_slice_unchecked(new_offset) }; #[cfg(not(feature = "unsafe"))] stream.next_slice(new_offset); } else { stream.finish(); } } const UNQUOTED_STRING: &str = "unquoted string"; toml_parser-1.0.6+spec-1.1.0/src/parser/event.rs000064400000000000000000000423741046102023000173240ustar 00000000000000use crate::decoder::Encoding; use crate::ErrorSink; use crate::ParseError; use crate::Source; use crate::Span; pub trait EventReceiver { fn std_table_open(&mut self, _span: Span, _error: &mut dyn ErrorSink) {} fn std_table_close(&mut self, _span: Span, _error: &mut dyn ErrorSink) {} fn array_table_open(&mut self, _span: Span, _error: &mut dyn ErrorSink) {} fn array_table_close(&mut self, _span: Span, _error: &mut dyn ErrorSink) {} /// Returns if entering the inline table is allowed #[must_use] fn inline_table_open(&mut self, _span: Span, _error: &mut dyn ErrorSink) -> bool { true } fn inline_table_close(&mut self, _span: Span, _error: &mut dyn ErrorSink) {} /// Returns if entering the array is allowed #[must_use] fn array_open(&mut self, _span: Span, _error: &mut dyn ErrorSink) -> bool { true } fn array_close(&mut self, _span: Span, _error: &mut dyn ErrorSink) {} fn simple_key(&mut self, _span: Span, _kind: Option, _error: &mut dyn ErrorSink) {} fn key_sep(&mut self, _span: Span, _error: &mut dyn ErrorSink) {} fn key_val_sep(&mut self, _span: Span, _error: &mut dyn ErrorSink) {} fn scalar(&mut self, _span: Span, _kind: Option, _error: &mut dyn ErrorSink) {} fn value_sep(&mut self, _span: Span, _error: &mut dyn ErrorSink) {} fn whitespace(&mut self, _span: Span, _error: &mut dyn ErrorSink) {} fn comment(&mut self, _span: Span, _error: &mut dyn ErrorSink) {} fn newline(&mut self, _span: Span, _error: &mut dyn ErrorSink) {} fn error(&mut self, _span: Span, _error: &mut dyn ErrorSink) {} } impl EventReceiver for F where F: FnMut(Event), { fn std_table_open(&mut self, span: Span, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::StdTableOpen, encoding: None, span, }); } fn std_table_close(&mut self, span: Span, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::StdTableClose, encoding: None, span, }); } fn array_table_open(&mut self, span: Span, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::ArrayTableOpen, encoding: None, span, }); } fn array_table_close(&mut self, span: Span, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::ArrayTableClose, encoding: None, span, }); } fn inline_table_open(&mut self, span: Span, _error: &mut dyn ErrorSink) -> bool { (self)(Event { kind: EventKind::InlineTableOpen, encoding: None, span, }); true } fn inline_table_close(&mut self, span: Span, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::InlineTableClose, encoding: None, span, }); } fn array_open(&mut self, span: Span, _error: &mut dyn ErrorSink) -> bool { (self)(Event { kind: EventKind::ArrayOpen, encoding: None, span, }); true } fn array_close(&mut self, span: Span, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::ArrayClose, encoding: None, span, }); } fn simple_key(&mut self, span: Span, encoding: Option, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::SimpleKey, encoding, span, }); } fn key_sep(&mut self, span: Span, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::KeySep, encoding: None, span, }); } fn key_val_sep(&mut self, span: Span, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::KeyValSep, encoding: None, span, }); } fn scalar(&mut self, span: Span, encoding: Option, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::Scalar, encoding, span, }); } fn value_sep(&mut self, span: Span, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::ValueSep, encoding: None, span, }); } fn whitespace(&mut self, span: Span, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::Whitespace, encoding: None, span, }); } fn comment(&mut self, span: Span, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::Comment, encoding: None, span, }); } fn newline(&mut self, span: Span, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::Newline, encoding: None, span, }); } fn error(&mut self, span: Span, _error: &mut dyn ErrorSink) { (self)(Event { kind: EventKind::Error, encoding: None, span, }); } } #[cfg(feature = "alloc")] #[allow(unused_qualifications)] impl EventReceiver for alloc::vec::Vec { fn std_table_open(&mut self, span: Span, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::StdTableOpen, encoding: None, span, }); } fn std_table_close(&mut self, span: Span, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::StdTableClose, encoding: None, span, }); } fn array_table_open(&mut self, span: Span, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::ArrayTableOpen, encoding: None, span, }); } fn array_table_close(&mut self, span: Span, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::ArrayTableClose, encoding: None, span, }); } fn inline_table_open(&mut self, span: Span, _error: &mut dyn ErrorSink) -> bool { self.push(Event { kind: EventKind::InlineTableOpen, encoding: None, span, }); true } fn inline_table_close(&mut self, span: Span, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::InlineTableClose, encoding: None, span, }); } fn array_open(&mut self, span: Span, _error: &mut dyn ErrorSink) -> bool { self.push(Event { kind: EventKind::ArrayOpen, encoding: None, span, }); true } fn array_close(&mut self, span: Span, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::ArrayClose, encoding: None, span, }); } fn simple_key(&mut self, span: Span, encoding: Option, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::SimpleKey, encoding, span, }); } fn key_sep(&mut self, span: Span, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::KeySep, encoding: None, span, }); } fn key_val_sep(&mut self, span: Span, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::KeyValSep, encoding: None, span, }); } fn scalar(&mut self, span: Span, encoding: Option, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::Scalar, encoding, span, }); } fn value_sep(&mut self, span: Span, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::ValueSep, encoding: None, span, }); } fn whitespace(&mut self, span: Span, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::Whitespace, encoding: None, span, }); } fn comment(&mut self, span: Span, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::Comment, encoding: None, span, }); } fn newline(&mut self, span: Span, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::Newline, encoding: None, span, }); } fn error(&mut self, span: Span, _error: &mut dyn ErrorSink) { self.push(Event { kind: EventKind::Error, encoding: None, span, }); } } impl EventReceiver for () {} /// Centralize validation for all whitespace-like content pub struct ValidateWhitespace<'r, 's> { receiver: &'r mut dyn EventReceiver, source: Source<'s>, } impl<'r, 's> ValidateWhitespace<'r, 's> { pub fn new(receiver: &'r mut dyn EventReceiver, source: Source<'s>) -> Self { Self { receiver, source } } } impl EventReceiver for ValidateWhitespace<'_, '_> { fn std_table_open(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.std_table_open(span, error); } fn std_table_close(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.std_table_close(span, error); } fn array_table_open(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.array_table_open(span, error); } fn array_table_close(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.array_table_close(span, error); } fn inline_table_open(&mut self, span: Span, error: &mut dyn ErrorSink) -> bool { self.receiver.inline_table_open(span, error) } fn inline_table_close(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.inline_table_close(span, error); } fn array_open(&mut self, span: Span, error: &mut dyn ErrorSink) -> bool { self.receiver.array_open(span, error) } fn array_close(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.array_close(span, error); } fn simple_key(&mut self, span: Span, encoding: Option, error: &mut dyn ErrorSink) { self.receiver.simple_key(span, encoding, error); } fn key_sep(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.key_sep(span, error); } fn key_val_sep(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.key_val_sep(span, error); } fn scalar(&mut self, span: Span, encoding: Option, error: &mut dyn ErrorSink) { self.receiver.scalar(span, encoding, error); } fn value_sep(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.value_sep(span, error); } fn whitespace(&mut self, span: Span, error: &mut dyn ErrorSink) { #[cfg(feature = "unsafe")] // SAFETY: callers must use valid span let raw = unsafe { self.source.get_unchecked(span) }; #[cfg(not(feature = "unsafe"))] let raw = self.source.get(span).expect("token spans are valid"); raw.decode_whitespace(error); self.receiver.whitespace(span, error); } fn comment(&mut self, span: Span, error: &mut dyn ErrorSink) { #[cfg(feature = "unsafe")] // SAFETY: callers must use valid span let raw = unsafe { self.source.get_unchecked(span) }; #[cfg(not(feature = "unsafe"))] let raw = self.source.get(span).expect("token spans are valid"); raw.decode_comment(error); self.receiver.comment(span, error); } fn newline(&mut self, span: Span, error: &mut dyn ErrorSink) { #[cfg(feature = "unsafe")] // SAFETY: callers must use valid span let raw = unsafe { self.source.get_unchecked(span) }; #[cfg(not(feature = "unsafe"))] let raw = self.source.get(span).expect("token spans are valid"); raw.decode_newline(error); self.receiver.newline(span, error); } fn error(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.error(span, error); } } pub struct RecursionGuard<'r> { receiver: &'r mut dyn EventReceiver, max_depth: u32, depth: i64, } impl<'r> RecursionGuard<'r> { pub fn new(receiver: &'r mut dyn EventReceiver, max_depth: u32) -> Self { Self { receiver, max_depth, depth: 0, } } fn within_depth(&self) -> bool { self.depth <= self.max_depth as i64 } } impl EventReceiver for RecursionGuard<'_> { fn std_table_open(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.std_table_open(span, error); } fn std_table_close(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.std_table_close(span, error); } fn array_table_open(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.array_table_open(span, error); } fn array_table_close(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.array_table_close(span, error); } fn inline_table_open(&mut self, span: Span, error: &mut dyn ErrorSink) -> bool { let allowed = self.receiver.inline_table_open(span, error); self.depth += 1; let within_depth = self.within_depth(); if allowed && !within_depth { error.report_error( ParseError::new("cannot recurse further; max recursion depth met") .with_unexpected(span), ); } allowed && within_depth } fn inline_table_close(&mut self, span: Span, error: &mut dyn ErrorSink) { self.depth -= 1; self.receiver.inline_table_close(span, error); } fn array_open(&mut self, span: Span, error: &mut dyn ErrorSink) -> bool { let allowed = self.receiver.array_open(span, error); self.depth += 1; let within_depth = self.within_depth(); if allowed && !within_depth { error.report_error( ParseError::new("cannot recurse further; max recursion depth met") .with_unexpected(span), ); } allowed && within_depth } fn array_close(&mut self, span: Span, error: &mut dyn ErrorSink) { self.depth -= 1; self.receiver.array_close(span, error); } fn simple_key(&mut self, span: Span, encoding: Option, error: &mut dyn ErrorSink) { self.receiver.simple_key(span, encoding, error); } fn key_sep(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.key_sep(span, error); } fn key_val_sep(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.key_val_sep(span, error); } fn scalar(&mut self, span: Span, encoding: Option, error: &mut dyn ErrorSink) { self.receiver.scalar(span, encoding, error); } fn value_sep(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.value_sep(span, error); } fn whitespace(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.whitespace(span, error); } fn comment(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.comment(span, error); } fn newline(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.newline(span, error); } fn error(&mut self, span: Span, error: &mut dyn ErrorSink) { self.receiver.error(span, error); } } #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub struct Event { kind: EventKind, encoding: Option, span: Span, } impl Event { pub fn new_unchecked(kind: EventKind, encoding: Option, span: Span) -> Self { Self { kind, encoding, span, } } #[inline(always)] pub fn kind(&self) -> EventKind { self.kind } #[inline(always)] pub fn encoding(&self) -> Option { self.encoding } #[inline(always)] pub fn span(&self) -> Span { self.span } } #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub enum EventKind { StdTableOpen, StdTableClose, ArrayTableOpen, ArrayTableClose, InlineTableOpen, InlineTableClose, ArrayOpen, ArrayClose, SimpleKey, KeySep, KeyValSep, Scalar, ValueSep, Whitespace, Comment, Newline, Error, } impl EventKind { pub const fn description(&self) -> &'static str { match self { Self::StdTableOpen => "std-table open", Self::StdTableClose => "std-table close", Self::ArrayTableOpen => "array-table open", Self::ArrayTableClose => "array-table close", Self::InlineTableOpen => "inline-table open", Self::InlineTableClose => "inline-table close", Self::ArrayOpen => "array open", Self::ArrayClose => "array close", Self::SimpleKey => "key", Self::KeySep => "key separator", Self::KeyValSep => "key-value separator", Self::Scalar => "value", Self::ValueSep => "value separator", Self::Whitespace => "whitespace", Self::Comment => "comment", Self::Newline => "newline", Self::Error => "error", } } } toml_parser-1.0.6+spec-1.1.0/src/parser/mod.rs000064400000000000000000000006701046102023000167530ustar 00000000000000//! A TOML push [parser][parse_document] //! //! This takes TOML [tokens][crate::lexer::Token] and [emits][EventReceiver] [events][Event]. mod document; mod event; pub use document::parse_document; pub use document::parse_key; pub use document::parse_simple_key; pub use document::parse_value; pub use event::Event; pub use event::EventKind; pub use event::EventReceiver; pub use event::RecursionGuard; pub use event::ValidateWhitespace; toml_parser-1.0.6+spec-1.1.0/src/source.rs000064400000000000000000000303401046102023000161750ustar 00000000000000use crate::decoder::Encoding; use crate::decoder::StringBuilder; use crate::lexer::Lexer; use crate::ErrorSink; use crate::Expected; /// Data encoded as TOML #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct Source<'i> { input: &'i str, } impl<'i> Source<'i> { pub fn new(input: &'i str) -> Self { Self { input } } /// Start lexing the TOML encoded data pub fn lex(&self) -> Lexer<'i> { Lexer::new(self.input) } /// Access the TOML encoded `&str` pub fn input(&self) -> &'i str { self.input } /// Return a subslice of the input pub fn get(&self, span: impl SourceIndex) -> Option> { span.get(self) } /// Return an unchecked subslice of the input /// /// ## Safety /// /// Callers of this function are responsible that these preconditions are satisfied: /// - The starting index must not exceed the ending index; /// - Indexes must be within bounds of the original slice; /// - Indexes must lie on UTF-8 sequence boundaries. /// /// Or one of: /// - `span` came from [`Source::lex`] /// /// Failing any of those, the returned string slice may reference invalid memory or violate the invariants communicated by `str` type. #[cfg(feature = "unsafe")] pub unsafe fn get_unchecked(&self, span: impl SourceIndex) -> Raw<'i> { // SAFETY: Same safety guarantees are required unsafe { span.get_unchecked(self) } } /// Return a subslice of the input fn get_raw_str(&self, span: Span) -> Option<&'i str> { let index = span.start()..span.end(); self.input.get(index) } /// Return an unchecked subslice of the input /// /// ## Safety /// /// Callers of this function are responsible that these preconditions are satisfied: /// - The starting index must not exceed the ending index; /// - Indexes must be within bounds of the original slice; /// - Indexes must lie on UTF-8 sequence boundaries. /// /// Or one of: /// - `span` came from [`Source::lex`] /// /// Failing any of those, the returned string slice may reference invalid memory or violate the invariants communicated by `str` type. #[cfg(feature = "unsafe")] unsafe fn get_raw_str_unchecked(&self, span: Span) -> &'i str { let index = span.start()..span.end(); // SAFETY: Same safety guarantees are required unsafe { self.input.get_unchecked(index) } } } /// A slice of [`Source`] #[derive(Copy, Clone, Debug)] pub struct Raw<'i> { raw: &'i str, encoding: Option, span: Span, } impl<'i> Raw<'i> { pub fn new_unchecked(raw: &'i str, encoding: Option, span: Span) -> Self { Self { raw, encoding, span, } } pub fn decode_key(&self, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink) { let mut error = |err: crate::ParseError| { error.report_error(err.rebase_spans(self.span.start)); }; match self.encoding { Some(Encoding::LiteralString) => { crate::decoder::string::decode_literal_string(*self, output, &mut error); } Some(Encoding::BasicString) => { crate::decoder::string::decode_basic_string(*self, output, &mut error); } Some(Encoding::MlLiteralString) => { error.report_error( crate::ParseError::new("keys cannot be multi-line literal strings") .with_expected(&[ Expected::Description("basic string"), Expected::Description("literal string"), ]) .with_unexpected(Span::new_unchecked(0, self.len())), ); crate::decoder::string::decode_ml_literal_string(*self, output, &mut error); } Some(Encoding::MlBasicString) => { error.report_error( crate::ParseError::new("keys cannot be multi-line basic strings") .with_expected(&[ Expected::Description("basic string"), Expected::Description("literal string"), ]) .with_unexpected(Span::new_unchecked(0, self.len())), ); crate::decoder::string::decode_ml_basic_string(*self, output, &mut error); } None => crate::decoder::string::decode_unquoted_key(*self, output, &mut error), } } #[must_use] pub fn decode_scalar( &self, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) -> crate::decoder::scalar::ScalarKind { let mut error = |err: crate::ParseError| { error.report_error(err.rebase_spans(self.span.start)); }; match self.encoding { Some(Encoding::LiteralString) => { crate::decoder::string::decode_literal_string(*self, output, &mut error); crate::decoder::scalar::ScalarKind::String } Some(Encoding::BasicString) => { crate::decoder::string::decode_basic_string(*self, output, &mut error); crate::decoder::scalar::ScalarKind::String } Some(Encoding::MlLiteralString) => { crate::decoder::string::decode_ml_literal_string(*self, output, &mut error); crate::decoder::scalar::ScalarKind::String } Some(Encoding::MlBasicString) => { crate::decoder::string::decode_ml_basic_string(*self, output, &mut error); crate::decoder::scalar::ScalarKind::String } None => crate::decoder::scalar::decode_unquoted_scalar(*self, output, &mut error), } } pub fn decode_whitespace(&self, _error: &mut dyn ErrorSink) { // whitespace is always valid } pub fn decode_comment(&self, error: &mut dyn ErrorSink) { let mut error = |err: crate::ParseError| { error.report_error(err.rebase_spans(self.span.start)); }; crate::decoder::ws::decode_comment(*self, &mut error); } pub fn decode_newline(&self, error: &mut dyn ErrorSink) { let mut error = |err: crate::ParseError| { error.report_error(err.rebase_spans(self.span.start)); }; crate::decoder::ws::decode_newline(*self, &mut error); } pub fn as_str(&self) -> &'i str { self.raw } pub fn as_bytes(&self) -> &'i [u8] { self.raw.as_bytes() } pub fn len(&self) -> usize { self.raw.len() } pub fn is_empty(&self) -> bool { self.raw.is_empty() } } /// Location within the [`Source`] #[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Span { start: usize, end: usize, } impl Span { pub fn new_unchecked(start: usize, end: usize) -> Self { Self { start, end } } pub fn is_empty(&self) -> bool { self.end <= self.start } pub fn len(&self) -> usize { self.end - self.start } pub fn start(&self) -> usize { self.start } pub fn end(&self) -> usize { self.end } pub fn before(&self) -> Self { Self::new_unchecked(self.start, self.start) } pub fn after(&self) -> Self { Self::new_unchecked(self.end, self.end) } /// Extend this `Raw` to the end of `after` #[must_use] pub fn append(&self, after: Self) -> Self { Self::new_unchecked(self.start, after.end) } } impl core::fmt::Debug for Span { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { (self.start..self.end).fmt(f) } } impl core::ops::Add for Span { type Output = Self; fn add(self, offset: usize) -> Self::Output { Self::Output { start: self.start + offset, end: self.end + offset, } } } impl core::ops::Add for usize { type Output = Span; fn add(self, span: Span) -> Self::Output { Self::Output { start: span.start + self, end: span.end + self, } } } impl core::ops::AddAssign for Span { fn add_assign(&mut self, rhs: usize) { self.start += rhs; self.end += rhs; } } /// A helper trait used for indexing operations on [`Source`] pub trait SourceIndex: sealed::Sealed { /// Return a subslice of the input fn get<'i>(self, source: &Source<'i>) -> Option>; /// Return an unchecked subslice of the input /// /// ## Safety /// /// Callers of this function are responsible that these preconditions are satisfied: /// - The starting index must not exceed the ending index; /// - Indexes must be within bounds of the original slice; /// - Indexes must lie on UTF-8 sequence boundaries. /// /// Or one of: /// - `span` came from [`Source::lex`] /// /// Failing any of those, the returned string slice may reference invalid memory or violate the invariants communicated by `str` type. #[cfg(feature = "unsafe")] unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i>; } impl SourceIndex for Span { fn get<'i>(self, source: &Source<'i>) -> Option> { (&self).get(source) } #[cfg(feature = "unsafe")] unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i> { // SAFETY: Same safety guarantees are required unsafe { (&self).get_unchecked(source) } } } impl SourceIndex for &Span { fn get<'i>(self, source: &Source<'i>) -> Option> { let encoding = None; source .get_raw_str(*self) .map(|s| Raw::new_unchecked(s, encoding, *self)) } #[cfg(feature = "unsafe")] unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i> { let encoding = None; // SAFETY: Same safety guarantees are required let raw = unsafe { source.get_raw_str_unchecked(*self) }; Raw::new_unchecked(raw, encoding, *self) } } impl SourceIndex for crate::lexer::Token { fn get<'i>(self, source: &Source<'i>) -> Option> { (&self).get(source) } #[cfg(feature = "unsafe")] unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i> { // SAFETY: Same safety guarantees are required unsafe { (&self).get_unchecked(source) } } } impl SourceIndex for &crate::lexer::Token { fn get<'i>(self, source: &Source<'i>) -> Option> { let encoding = self.kind().encoding(); source .get_raw_str(self.span()) .map(|s| Raw::new_unchecked(s, encoding, self.span())) } #[cfg(feature = "unsafe")] unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i> { let encoding = self.kind().encoding(); // SAFETY: Same safety guarantees are required let raw = unsafe { source.get_raw_str_unchecked(self.span()) }; Raw::new_unchecked(raw, encoding, self.span()) } } impl SourceIndex for crate::parser::Event { fn get<'i>(self, source: &Source<'i>) -> Option> { (&self).get(source) } #[cfg(feature = "unsafe")] unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i> { // SAFETY: Same safety guarantees are required unsafe { (&self).get_unchecked(source) } } } impl SourceIndex for &crate::parser::Event { fn get<'i>(self, source: &Source<'i>) -> Option> { let encoding = self.encoding(); source .get_raw_str(self.span()) .map(|s| Raw::new_unchecked(s, encoding, self.span())) } #[cfg(feature = "unsafe")] unsafe fn get_unchecked<'i>(self, source: &Source<'i>) -> Raw<'i> { let encoding = self.encoding(); // SAFETY: Same safety guarantees are required let raw = unsafe { source.get_raw_str_unchecked(self.span()) }; Raw::new_unchecked(raw, encoding, self.span()) } } mod sealed { pub trait Sealed {} impl Sealed for crate::Span {} impl Sealed for &crate::Span {} impl Sealed for crate::lexer::Token {} impl Sealed for &crate::lexer::Token {} impl Sealed for crate::parser::Event {} impl Sealed for &crate::parser::Event {} }