arf-strings-0.7.3/.cargo_vcs_info.json0000644000000001360000000000100133020ustar { "git": { "sha1": "fc670bdaf91780e090f3ab7b01d212be8b21c98e" }, "path_in_vcs": "" }arf-strings-0.7.3/.gitignore000064400000000000000000000000221046102023000140540ustar 00000000000000target Cargo.lock arf-strings-0.7.3/.rustfmt.toml000064400000000000000000000001051046102023000145450ustar 00000000000000# This file tells tools we use rustfmt. We use the default settings. arf-strings-0.7.3/COPYRIGHT000064400000000000000000000015511046102023000133670ustar 00000000000000Short version for non-lawyers: `arf-strings` is triple-licensed under Apache 2.0 with the LLVM Exception, Apache 2.0, and MIT terms. Longer version: Copyrights in the `arf-strings` project are retained by their contributors. No copyright assignment is required to contribute to the `arf-strings` project. Some files include code derived from Rust's `libstd`; see the comments in the code for details. Except as otherwise noted (below and/or in individual files), `arf-strings` is licensed under: - the Apache License, Version 2.0, with the LLVM Exception or - the Apache License, Version 2.0 or , - or the MIT license or , at your option. arf-strings-0.7.3/Cargo.lock0000644000000067450000000000100112710ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 4 [[package]] name = "arf-strings" version = "0.7.3" dependencies = [ "rustix", ] [[package]] name = "bitflags" version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" [[package]] name = "errno" version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", "windows-sys", ] [[package]] name = "libc" version = "0.2.170" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" [[package]] name = "linux-raw-sys" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db9c683daf087dc577b7506e9695b3d556a9f3849903fa28186283afd6809e9" [[package]] name = "rustix" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7178faa4b75a30e269c71e61c353ce2748cf3d76f0c44c393f4e60abf49b825" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", "windows-sys", ] [[package]] name = "windows-sys" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" arf-strings-0.7.3/Cargo.toml0000644000000020640000000000100113020ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "arf-strings" version = "0.7.3" authors = ["Dan Gohman "] build = false exclude = ["/.github"] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Encoding and decoding for ARF strings" documentation = "https://docs.rs/arf-strings" readme = "README.md" license = "Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT" repository = "https://github.com/bytecodealliance/arf-strings" [lib] name = "arf_strings" path = "src/lib.rs" [target."cfg(not(windows))".dependencies.rustix] version = "1.0.0" arf-strings-0.7.3/Cargo.toml.orig000064400000000000000000000006521046102023000147640ustar 00000000000000[package] name = "arf-strings" version = "0.7.3" authors = ["Dan Gohman "] edition = "2021" description = "Encoding and decoding for ARF strings" license = "Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT" documentation = "https://docs.rs/arf-strings" repository = "https://github.com/bytecodealliance/arf-strings" exclude = ["/.github"] [target.'cfg(not(windows))'.dependencies] rustix = "1.0.0" arf-strings-0.7.3/LICENSE-APACHE000064400000000000000000000251371046102023000140260ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. arf-strings-0.7.3/LICENSE-Apache-2.0_WITH_LLVM-exception000064400000000000000000000277231046102023000202470ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --- LLVM Exceptions to the Apache 2.0 License ---- As an exception, if, as a result of your compiling your source code, portions of this Software are embedded into an Object form of such source code, you may redistribute such embedded portions in such Object form without complying with the conditions of Sections 4(a), 4(b) and 4(d) of the License. In addition, if you combine or link compiled forms of this Software with software that is licensed under the GPLv2 ("Combined Software") and if a court of competent jurisdiction determines that the patent provision (Section 3), the indemnity provision (Section 9) or other Section of the License conflicts with the conditions of the GPLv2, you may retroactively and prospectively choose to deem waived or otherwise exclude such Section(s) of the License, but only in their entirety and only with respect to the Combined Software. arf-strings-0.7.3/LICENSE-MIT000064400000000000000000000017771046102023000135420ustar 00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. arf-strings-0.7.3/ORG_CODE_OF_CONDUCT.md000064400000000000000000000160721046102023000154060ustar 00000000000000# Bytecode Alliance Organizational Code of Conduct (OCoC) *Note*: this Code of Conduct pertains to organizations' behavior. Please also see the [Individual Code of Conduct](CODE_OF_CONDUCT.md). ## Preamble The Bytecode Alliance (BA) welcomes involvement from organizations, including commercial organizations. This document is an *organizational* code of conduct, intended particularly to provide guidance to commercial organizations. It is distinct from the [Individual Code of Conduct (ICoC)](CODE_OF_CONDUCT.md), and does not replace the ICoC. This OCoC applies to any group of people acting in concert as a BA member or as a participant in BA activities, whether or not that group is formally incorporated in some jurisdiction. The code of conduct described below is not a set of rigid rules, and we did not write it to encompass every conceivable scenario that might arise. For example, it is theoretically possible there would be times when asserting patents is in the best interest of the BA community as a whole. In such instances, consult with the BA, strive for consensus, and interpret these rules with an intent that is generous to the community the BA serves. While we may revise these guidelines from time to time based on real-world experience, overall they are based on a simple principle: *Bytecode Alliance members should observe the distinction between public community functions and private functions — especially commercial ones — and should ensure that the latter support, or at least do not harm, the former.* ## Guidelines * **Do not cause confusion about Wasm standards or interoperability.** Having an interoperable WebAssembly core is a high priority for the BA, and members should strive to preserve that core. It is fine to develop additional non-standard features or APIs, but they should always be clearly distinguished from the core interoperable Wasm. Treat the WebAssembly name and any BA-associated names with respect, and follow BA trademark and branding guidelines. If you distribute a customized version of software originally produced by the BA, or if you build a product or service using BA-derived software, use names that clearly distinguish your work from the original. (You should still provide proper attribution to the original, of course, wherever such attribution would normally be given.) Further, do not use the WebAssembly name or BA-associated names in other public namespaces in ways that could cause confusion, e.g., in company names, names of commercial service offerings, domain names, publicly-visible social media accounts or online service accounts, etc. It may sometimes be reasonable, however, to register such a name in a new namespace and then immediately donate control of that account to the BA, because that would help the project maintain its identity. For further guidance, see the BA Trademark and Branding Policy [TODO: create policy, then insert link]. * **Do not restrict contributors.** If your company requires employees or contractors to sign non-compete agreements, those agreements must not prevent people from participating in the BA or contributing to related projects. This does not mean that all non-compete agreements are incompatible with this code of conduct. For example, a company may restrict an employee's ability to solicit the company's customers. However, an agreement must not block any form of technical or social participation in BA activities, including but not limited to the implementation of particular features. The accumulation of experience and expertise in individual persons, who are ultimately free to direct their energy and attention as they decide, is one of the most important drivers of progress in open source projects. A company that limits this freedom may hinder the success of the BA's efforts. * **Do not use patents as offensive weapons.** If any BA participant prevents the adoption or development of BA technologies by asserting its patents, that undermines the purpose of the coalition. The collaboration fostered by the BA cannot include members who act to undermine its work. * **Practice responsible disclosure** for security vulnerabilities. Use designated, non-public reporting channels to disclose technical vulnerabilities, and give the project a reasonable period to respond, remediate, and patch. [TODO: optionally include the security vulnerability reporting URL here.] Vulnerability reporters may patch their company's own offerings, as long as that patching does not significantly delay the reporting of the vulnerability. Vulnerability information should never be used for unilateral commercial advantage. Vendors may legitimately compete on the speed and reliability with which they deploy security fixes, but withholding vulnerability information damages everyone in the long run by risking harm to the BA project's reputation and to the security of all users. * **Respect the letter and spirit of open source practice.** While there is not space to list here all possible aspects of standard open source practice, some examples will help show what we mean: * Abide by all applicable open source license terms. Do not engage in copyright violation or misattribution of any kind. * Do not claim others' ideas or designs as your own. * When others engage in publicly visible work (e.g., an upcoming demo that is coordinated in a public issue tracker), do not unilaterally announce early releases or early demonstrations of that work ahead of their schedule in order to secure private advantage (such as marketplace advantage) for yourself. The BA reserves the right to determine what constitutes good open source practices and to take action as it deems appropriate to encourage, and if necessary enforce, such practices. ## Enforcement Instances of organizational behavior in violation of the OCoC may be reported by contacting the Bytecode Alliance CoC team at [report@bytecodealliance.org](mailto:report@bytecodealliance.org). The CoC team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The CoC team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. When the BA deems an organization in violation of this OCoC, the BA will, at its sole discretion, determine what action to take. The BA will decide what type, degree, and duration of corrective action is needed, if any, before a violating organization can be considered for membership (if it was not already a member) or can have its membership reinstated (if it was a member and the BA canceled its membership due to the violation). In practice, the BA's first approach will be to start a conversation, with punitive enforcement used only as a last resort. Violations often turn out to be unintentional and swiftly correctable with all parties acting in good faith. arf-strings-0.7.3/README.md000064400000000000000000000065721046102023000133630ustar 00000000000000# ARF strings ARF is the Alternative Representation for Filenames, an encoding for representing NUL-terminated non-UTF-8 strings as valid (and non-NUL-terminated) [UTF-8] strings. It's intended for use in environments that need a way to represent [POSIX-compatible] and Windows-compatible path names within UTF-8 string types. This is an experiment, and the Windows encoding scheme is particularly experimental. [UTF-8]: https://en.wikipedia.org/wiki/UTF-8 [POSIX-compatible]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_271 ## Description ARF strings have the following form: ``` arf-string ::= U+FEFF lossy-portion U+0000 NUL-escaped-portion ``` `U+FEFF` is the Byte Order Mark (BOM) code point. The `lossy-portion` consists of the original string data (excluding the terminating NUL) with any unencodable bytes replaced by `U+FFFD`, the Unicode replacement character. `U+0000` is the NULL (NUL) code point. The `NUL-escaped-portion` of an ARF string consists of the original string data (again, excluding the terminating NUL) with any unencodable bytes replaced by `U+0000` followed by: - On POSIX-ish platforms, the invalid byte with the most significant bit set to 0. - On Windows, a Unicode scalar value between `U+0` and `U+7FF`, representing the offset in the surrogate codepoint space (`U+D800` through `U+DFFF`). ## Example The ARF encoding of `"foo\xffbar"` on POSIX-ish platforms is `"\xef\xbb\xbffoo\xef\xbf\xbdbar\x00foo\x00\x7fbar"`: - `"\xef\xbb\xbf"` is the UTF-8 encoding of `U+FEFF`. - `"foo\xef\xbf\xbdbar"`is the string with the unencodable byte replaced by the UTF-8 encoding for `U+FFFD`. - `"\x00"` is the UTF-8 encoding for `U+0000`. - `"foo\0\x7fbar"` is the string with the unencodable byte replaced by a `NUL` followed by the invalid byte with the most significant bit set to 0. ## Rationale Unencodable pathnames are very rare in practice, so this design doesn't attempt to make them efficient. In the worst cases, ARF strings may be several times the size of the corresponding input strings (though they're still O(n)). The redundancy is used to protect against accidental misuse by code not aware of ARF strings. C and POSIX code represent paths as NUL-terminated strings. When given an ARF string, such code will only see the BOM and the lossy portion containing replacement characters. In most cases, attempts to open such a pathname will produce `ENOENT` errors, since the leading UTF-8 BOM and UTF-8 replacement byte sequences are unlikely to appear in non-UTF-8 filenames. Typical application error messages will include the pathname, where the replacement characters will serve as a hint as to the nature of the problem. Consequently, by default, ARF-unaware C and POSIX code will not be able to open unencodable pathnames. For many applications, this limitation is worth the advantage of being able to assume that all pathnames are UTF-8. Applications that wish to work with unencodable pathnames can opt in by being explicitly aware of ARF strings, optionally with the help of the Rust and C libraries in this repository. Another tricky case is code which modifies paths. ARF-unaware code may modify ARF strings without being aware of the ARF encoding. Such code won't know to update the NUL-escaped portion of the ARF string, and the resulting ARF string will subsequently be detected as invalid, leading to errors rather than surprising behavior. arf-strings-0.7.3/arf-proposal.md000064400000000000000000000046401046102023000150250ustar 00000000000000# Non-Unicode filenames, command-line args, and environment variables ## Status This document is not yet an official WASI proposal. It may be in the future, but for now it's just an experiment. ## Background POSIX-ish platforms don't provide a way to reliably determine the encoding for filenames, command-line arguments, and environment variables. And on Windows, filenames, command-line arguments, and environment variables aren't guaranteed to be well-formed UTF-16. However, applications often need to know the encoding to correctly display, compare, or serialize these names. POSIX provides a flock of environment variables, but for filenames, that assumes that the program is running with the same locale as the user which created the files, which isn't always true. There are also heuristics which can help guess at the encodings of filenames, however they're not reliable. There isn't a great reason for imposing the resulting complexity on applications, other than POSIX simply predating Unicode and UTF-8. In practice, many applications end up assuming that the inputs are UTF-8 anyway. For WASI, rather than forever insisting that such applications are at fault, it's preferable to define APIs so that the things applications want to do are straightforward. ## Proposal All WASI filenames, command-line arguments, and environment variables are valid UTF-8 strings, using the same definition as the [wasm core spec](https://webassembly.github.io/spec/core/binary/values.html#binary-utf8). When the host environment needs to pass in strings which aren't a known and well-formed Unicode encoding, implementations may do any of the following: - Use the host-specific mechanisms to determine the encoding and transparently transcode the strings into UTF-8, - Fail with `EILSEQ`, or - Translate strings into *ARF strings*. Similarly, when Wasm code passes a string into a WASI API which is an *ARF string*, implementations may either: - Fail with `EILSEQ`, or - Translate *ARF strings* into host-specific strings. ## ARF strings ARF is the Alternative Representation for Filenames, an encoding for representing NUL-terminated strings of potentially non-UTF-8 or ill-formed UTF-16 data, as can occur within filesystem paths, as non-NUL-terminated and valid [UTF-8] strings. [UTF-8]: https://en.wikipedia.org/wiki/UTF-8 For a description of ARF strings, see [this page](https://github.com/bytecodealliance/arf-strings/blob/main/README.md). arf-strings-0.7.3/c/arf.c000064400000000000000000000201251046102023000132300ustar 00000000000000//! ARF library for converting to and from ARFs. #include "arf.h" #include #include #include #include // UTF-8 encoding for U+FEFF, which marks at the beginning of an ARF string. static const uint8_t utf8_bom[] = { 0xef, 0xbb, 0xbf }; // UTF-8 encoding for U+FFFD, which is used in the lossy portion of an ARF // string to replace invalid bytes. static const uint8_t utf8_replacement[] = { 0xef, 0xbf, 0xbd }; // The number of bytes in a byte escape sequence. static const size_t sizeof_escaped_byte = 2; // Return a pointer to the first invalid byte, or a pointer to one past the end // if the entire string is valid UTF-8. static const uint8_t *find_invalid_utf8(const uint8_t *ptr, size_t len) { const uint8_t *end = ptr + len; while (ptr != end) { if (ptr[0] < 0x80) { ptr += 1; } else if ((ptr[0] & 0xe0) == 0xc0) { if ((end - ptr) < 2 || (ptr[0] & 0xfe) == 0xc0 || (ptr[1] & 0xc0) != 0x80) { break; } ptr += 2; } else if ((ptr[0] & 0xf0) == 0xe0) { if ((end - ptr) < 3 || (ptr[0] == 0xe0 && (ptr[1] & 0xe0) == 0x80) || (ptr[0] == 0xed && (ptr[1] & 0xe0) == 0xa0) || (ptr[0] == 0xef && ptr[1] == 0xbf && (ptr[2] & 0xfe) == 0xbe) || (ptr[1] & 0xc0) != 0x80 || (ptr[2] & 0xc0) != 0x80) { break; } ptr += 3; } else if ((ptr[0] & 0xf8) == 0xf0) { if ((end - ptr) < 4 || ptr[0] > 0xf4 || (ptr[0] == 0xf0 && (ptr[1] & 0xf0) == 0x80) || (ptr[0] == 0xf4 && ptr[1] > 0x8f) || (ptr[1] & 0xc0) != 0x80 || (ptr[2] & 0xc0) != 0x80 || (ptr[3] & 0xc0) != 0x80) { break; } ptr += 4; } else { break; } } return ptr; } bool arf_is_valid_c_str(const char *c_str) { // Check that the C-string is all valid UTF-8. size_t c_str_len = strlen(c_str); return find_invalid_utf8((const uint8_t *)c_str, c_str_len) == (const uint8_t *)c_str + c_str_len; } bool arf_has_arf_magic(const uint8_t *ptr, size_t len) { // ARF strings start with a UTF-8 BOM. return len >= sizeof(utf8_bom) && memcmp(ptr, utf8_bom, sizeof(utf8_bom)) == 0; } bool arf_is_valid_arf(const uint8_t *ptr, size_t len) { // ARF strings begin and end with fixed bytes. if (!arf_has_arf_magic(ptr, len)) { return false; } ptr += sizeof(utf8_bom); len -= sizeof(utf8_bom); // ARF strings are valid UTF-8. if (find_invalid_utf8(ptr, len) != ptr + len) { return false; } // ARF strings contain a NUL byte separating the replacement portion from // the NUL-escaped portion. size_t first_len = strlen((const char *)ptr); if (first_len >= len) { return false; } // Check that the lossy portion translates to the NUL-escaped portion. bool any_invalid_bytes = false; size_t second_begin = first_len + 1; size_t i = 0, j = second_begin; while (i != first_len) { if (ptr[j] == 0) { // Check the NUL-escaped encoding. if (len - j < sizeof_escaped_byte || (int8_t)ptr[j + 1] < 0) { return false; } // Check that the escaped string contains a replacement character. if (first_len - i < sizeof(utf8_replacement) || memcmp(ptr + i, utf8_replacement, sizeof(utf8_replacement)) != 0) { return false; } i += sizeof(utf8_replacement); j += sizeof_escaped_byte; any_invalid_bytes = true; } else { // Check that the bytes match. if (ptr[i] != ptr[j]) { return false; } i += 1; j += 1; } } // If there weren't any invalid bytes, we shouldn't have an ARF string. if (!any_invalid_bytes) { return false; } // Arf! return true; } /// Like `arf_sizeof_c_str_arf`, but has `strlen(c_str)` passed in so that it /// doesn't need to be recomputed. static size_t arf_sizeof_c_str_arf_impl(const char *c_str, size_t c_str_len) { // Start with the length of the fixed-length parts of an ARF string. size_t len = sizeof(utf8_bom) + 1; // Add the size of both the lossy portion and the NUL-escaped portion. for (size_t i = 0; i != c_str_len; ) { const uint8_t *found = find_invalid_utf8((const uint8_t *)c_str + i, c_str_len - i); // Copy in valid UTF-8 bytes. size_t valid_len = (size_t)(found - ((const uint8_t *)c_str + i)); if (__builtin_add_overflow(len, valid_len * 2, &len)) return SIZE_MAX; i += valid_len; if (i == c_str_len) break; // Handle an invalid byte. size_t more = sizeof(utf8_replacement) + sizeof_escaped_byte; if (__builtin_add_overflow(len, more, &len)) return SIZE_MAX; i += 1; } return len; } bool arf_categorize_c_str(const char *c_str, size_t *restrict len) { size_t c_str_len = strlen(c_str); if (__builtin_expect(find_invalid_utf8((const uint8_t *)c_str, c_str_len) == (const uint8_t *)c_str + c_str_len, true)) { *len = c_str_len; return true; } *len = arf_sizeof_c_str_arf_impl(c_str, c_str_len); return false; } size_t arf_sizeof_c_str_arf(const char *c_str) { return arf_sizeof_c_str_arf_impl(c_str, strlen(c_str)); } void arf_c_str_arf(const char *c_str, uint8_t *ptr) { size_t c_str_len = strlen(c_str); memcpy(ptr, utf8_bom, sizeof(utf8_bom)); ptr += sizeof(utf8_bom); // Encode the replacement-encoded portion. const uint8_t *in = (const uint8_t*)c_str; for (size_t len = c_str_len; len != 0; ) { const uint8_t *invalid = find_invalid_utf8(in, len); // Copy in valid UTF-8 bytes. size_t valid_len = (size_t)(invalid - in); memcpy(ptr, in, valid_len); ptr += valid_len; in += valid_len; len -= valid_len; if (len == 0) break; // Handle an invalid byte. memcpy(ptr, utf8_replacement, sizeof(utf8_replacement)); ptr += sizeof(utf8_replacement); in += 1; len -= 1; } *ptr++ = '\0'; // Encode the full-encoded portion. in = (const uint8_t*)c_str; for (size_t len = c_str_len; len != 0; ) { const uint8_t *invalid = find_invalid_utf8(in, len); // Copy in valid UTF-8 bytes. size_t valid_len = (size_t)(invalid - in); memcpy(ptr, in, valid_len); ptr += valid_len; in += valid_len; len -= valid_len; if (len == 0) break; // Emit a NUL-escaped byte. *ptr++ = '\0'; *ptr++ = *in & INT8_MAX; in += 1; len -= 1; } } size_t arf_sizeof_arf_c_str(const uint8_t *ptr, size_t len) { assert(arf_is_valid_arf(ptr, len)); const uint8_t *end = ptr + len; // Examine the NUL-escaped portion, which is the non-lossy portion. ptr += sizeof(utf8_bom); ptr += strlen((const char *)ptr) + 1; size_t c_str_len = 0; while (ptr != end) { if (*ptr++ == '\0') ptr++; c_str_len += 1; } // Add one for the terminating NUL. c_str_len += 1; return c_str_len; } void arf_arf_c_str(const uint8_t *ptr, size_t len, char *__restrict__ c_str) { assert(arf_is_valid_arf(ptr, len)); const uint8_t *end = ptr + len; // Examine the NUL-escaped portion, which is the non-lossy portion. ptr += sizeof(utf8_bom); ptr += strlen((const char *)ptr) + 1; // Copy the string data, inverting any escaped bytes. while (ptr != end) { uint8_t b = *ptr++; if (b == '\0') b = *ptr++ | (uint8_t)INT8_MIN; *c_str++ = (char)b; } // Append the terminating NUL. *c_str = '\0'; } arf-strings-0.7.3/c/arf.h000064400000000000000000000036361046102023000132450ustar 00000000000000#ifndef ARF_H #define ARF_H #include #include #ifndef __cplusplus #include #endif #ifdef __cplusplus extern "C" { #endif /// Test whether the provided C-string is already valid UTF-8. bool arf_is_valid_c_str(const char *c_str) __attribute__((__pure__, __nonnull__(1), __nothrow__)); /// Quickly test whether the provided buffer looks like an ARF string. bool arf_has_arf_magic(const uint8_t *ptr, size_t len) __attribute__((__pure__, __nothrow__)); /// Test whether the provided buffer is a valid ARF string. bool arf_is_valid_arf(const uint8_t *ptr, size_t len) __attribute__((__pure__, __nothrow__)); /// Test whether the provided C-string is already valid UTF-8. If it is, store /// the length in *len and return true. If it isn't, store the length of the ARF /// string needed to represent it in *len and return false. bool arf_categorize_c_str(const char *c_str, size_t *__restrict__ len) __attribute__((__nonnull__(1, 2), __nothrow__)); /// Return the length of an ARF string for the given C-string. Returns /// `SIZE_MAX` on overflow. size_t arf_sizeof_c_str_arf(const char *c_str) __attribute__((__pure__, __nonnull__(1), __nothrow__)); /// Write the ARF string for the given C-string into the provided buffer. Use /// `arf_sizeof_c_str_arf` to determine the required buffer size. void arf_c_str_arf(const char *c_str, uint8_t *__restrict__ ptr) __attribute__((__nonnull__(1, 2), __nothrow__)); /// Return the length of a C-string for the given ARF. size_t arf_sizeof_arf_c_str(const uint8_t *ptr, size_t len) __attribute__((__pure__, __nonnull__(1), __nothrow__)); /// Write the C-string for the given ARF into the provided buffer. Use /// `arf_sizeof_arf_c_str` to determine the required buffer size. void arf_arf_c_str(const uint8_t *ptr, size_t len, char *__restrict__ c_str) __attribute__((__nonnull__(1, 3), __nothrow__)); #ifdef __cplusplus } #endif #endif arf-strings-0.7.3/c/test.c000064400000000000000000000340031046102023000134370ustar 00000000000000#undef NDEBUG #include "arf.h" #include #include #include #define UTF8_BOM "\xef\xbb\xbf" #define UTF8_REPLACEMENT "\xef\xbf\xbd" #define ptr_len(s) ((const uint8_t *)(s)), (sizeof(s) - 1) int main(void) { assert(arf_is_valid_c_str("")); assert(arf_is_valid_c_str("foo")); assert(arf_is_valid_c_str("%")); assert(arf_is_valid_c_str("%ff")); assert(arf_is_valid_c_str("🐰�")); assert(arf_is_valid_c_str("\2/yes/\3")); assert(arf_is_valid_c_str("\xc2\x80")); assert(arf_is_valid_c_str(UTF8_BOM)); assert(arf_is_valid_c_str(UTF8_REPLACEMENT)); assert(arf_is_valid_c_str("\xd0\x80")); assert(!arf_is_valid_c_str("\xc0")); assert(!arf_is_valid_c_str("\xf5")); assert(!arf_is_valid_c_str("\xf0\xd0\x80\x80")); assert(!arf_is_valid_c_str("\xf0\x80\xd0\x80")); assert(!arf_is_valid_c_str("\xf0\x80\x80\xd0")); assert(!arf_is_valid_c_str("\xef\xbf\xbe")); assert(!arf_is_valid_c_str("\xf0\x80")); assert(!arf_is_valid_c_str("\xf0\xf0")); assert(!arf_is_valid_c_str("\xf3\x90")); assert(!arf_is_valid_c_str("\xf4\xf4")); assert(!arf_is_valid_c_str("\xf4\x90")); assert(!arf_is_valid_c_str("\xf5\x90")); assert(!arf_is_valid_c_str("\x80")); assert(!arf_is_valid_c_str("\x8f")); assert(!arf_is_valid_c_str("\x90")); assert(!arf_is_valid_c_str("\x9f")); assert(!arf_is_valid_c_str("\xa0")); assert(!arf_is_valid_c_str("\xbf")); assert(!arf_is_valid_c_str("\xc0\x80")); assert(!arf_is_valid_c_str("\xc0\xbf")); assert(!arf_is_valid_c_str("\xc1\x80")); assert(!arf_is_valid_c_str("\xc1\xbf")); assert(!arf_is_valid_c_str("\xc2")); assert(!arf_is_valid_c_str("\xc2\x2e")); assert(!arf_is_valid_c_str("\xc2\x7f")); assert(!arf_is_valid_c_str("\xc2\x80\x80")); assert(!arf_is_valid_c_str("\xc2\xc0")); assert(!arf_is_valid_c_str("\xc2\xfd")); assert(!arf_is_valid_c_str("\xdf")); assert(!arf_is_valid_c_str("\xdf\x7f")); assert(!arf_is_valid_c_str("\xdf\xc0")); assert(!arf_is_valid_c_str("\xdf\xfd")); assert(!arf_is_valid_c_str("\xe0")); assert(!arf_is_valid_c_str("\xe0\x7f\xa0")); assert(!arf_is_valid_c_str("\xe0\x80\x80")); assert(!arf_is_valid_c_str("\xe0\x80\xa0")); assert(!arf_is_valid_c_str("\xe0\x9f\xa0")); assert(!arf_is_valid_c_str("\xe0\x9f\xbf")); assert(!arf_is_valid_c_str("\xe0\xa0")); assert(!arf_is_valid_c_str("\xe0\xa0\x7f")); assert(!arf_is_valid_c_str("\xe0\xa0\xc0")); assert(!arf_is_valid_c_str("\xe0\xa0\xfd")); assert(!arf_is_valid_c_str("\xe0\xc0\xa0")); assert(!arf_is_valid_c_str("\xe0\xfd\xa0")); assert(!arf_is_valid_c_str("\xe1")); assert(!arf_is_valid_c_str("\xe1\x2e")); assert(!arf_is_valid_c_str("\xe1\x7f\x80")); assert(!arf_is_valid_c_str("\xe1\x80")); assert(!arf_is_valid_c_str("\xe1\x80\x2e")); assert(!arf_is_valid_c_str("\xe1\x80\x7f")); assert(!arf_is_valid_c_str("\xe1\x80\x80\x80")); assert(!arf_is_valid_c_str("\xe1\x80\xc0")); assert(!arf_is_valid_c_str("\xe1\x80\xfd")); assert(!arf_is_valid_c_str("\xe1\xc0\x80")); assert(!arf_is_valid_c_str("\xe1\xfd\x80")); assert(!arf_is_valid_c_str("\xec")); assert(!arf_is_valid_c_str("\xec\x7f\x80")); assert(!arf_is_valid_c_str("\xec\x80")); assert(!arf_is_valid_c_str("\xec\x80\x7f")); assert(!arf_is_valid_c_str("\xec\x80\xc0")); assert(!arf_is_valid_c_str("\xec\x80\xfd")); assert(!arf_is_valid_c_str("\xec\xc0\x80")); assert(!arf_is_valid_c_str("\xec\xfd\x80")); assert(!arf_is_valid_c_str("\xed")); assert(!arf_is_valid_c_str("\xed\x7f\x80")); assert(!arf_is_valid_c_str("\xed\x80")); assert(!arf_is_valid_c_str("\xed\x80\x7f")); assert(!arf_is_valid_c_str("\xed\x80\xc0")); assert(!arf_is_valid_c_str("\xed\x80\xfd")); assert(!arf_is_valid_c_str("\xed\xa0\x80")); assert(!arf_is_valid_c_str("\xed\xa0\xbf")); assert(!arf_is_valid_c_str("\xed\xbf\x80")); assert(!arf_is_valid_c_str("\xed\xbf\xbf")); assert(!arf_is_valid_c_str("\xed\xc0\x80")); assert(!arf_is_valid_c_str("\xed\xfd\x80")); assert(!arf_is_valid_c_str("\xee")); assert(!arf_is_valid_c_str("\xee\x7f\x80")); assert(!arf_is_valid_c_str("\xee\x80")); assert(!arf_is_valid_c_str("\xee\x80\x7f")); assert(!arf_is_valid_c_str("\xee\x80\xc0")); assert(!arf_is_valid_c_str("\xee\x80\xfd")); assert(!arf_is_valid_c_str("\xee\xc0\x80")); assert(!arf_is_valid_c_str("\xee\xfd\x80")); assert(!arf_is_valid_c_str("\xef")); assert(!arf_is_valid_c_str("\xef\x7f\x80")); assert(!arf_is_valid_c_str("\xef\x80")); assert(!arf_is_valid_c_str("\xef\x80\x7f")); assert(!arf_is_valid_c_str("\xef\x80\xc0")); assert(!arf_is_valid_c_str("\xef\x80\xfd")); assert(!arf_is_valid_c_str("\xef\xc0\x80")); assert(!arf_is_valid_c_str("\xef\xfd\x80")); assert(!arf_is_valid_c_str("\xf0")); assert(!arf_is_valid_c_str("\xf0\x7f\x90\x90")); assert(!arf_is_valid_c_str("\xf0\x80\x80\x80")); assert(!arf_is_valid_c_str("\xf0\x80\x90\x90")); assert(!arf_is_valid_c_str("\xf0\x8f\x90\x90")); assert(!arf_is_valid_c_str("\xf0\x8f\xbf\xbf")); assert(!arf_is_valid_c_str("\xf0\x90")); assert(!arf_is_valid_c_str("\xf0\x90\x7f\x90")); assert(!arf_is_valid_c_str("\xf0\x90\x90")); assert(!arf_is_valid_c_str("\xf0\x90\x90\x7f")); assert(!arf_is_valid_c_str("\xf0\x90\x90\xc0")); assert(!arf_is_valid_c_str("\xf0\x90\x90\xfd")); assert(!arf_is_valid_c_str("\xf0\x90\xc0\x90")); assert(!arf_is_valid_c_str("\xf0\x90\xfd\x90")); assert(!arf_is_valid_c_str("\xf0\xc0\x90\x90")); assert(!arf_is_valid_c_str("\xf0\xfd\x90\x90")); assert(!arf_is_valid_c_str("\xf1")); assert(!arf_is_valid_c_str("\xf1\x23")); assert(!arf_is_valid_c_str("\xf1\x7f\x80\x80")); assert(!arf_is_valid_c_str("\xf1\x80")); assert(!arf_is_valid_c_str("\xf1\x80\x23")); assert(!arf_is_valid_c_str("\xf1\x80\x7f\x80")); assert(!arf_is_valid_c_str("\xf1\x80\x80")); assert(!arf_is_valid_c_str("\xf1\x80\x80\x23")); assert(!arf_is_valid_c_str("\xf1\x80\x80\x7f")); assert(!arf_is_valid_c_str("\xf1\x80\x80\x80\x80")); assert(!arf_is_valid_c_str("\xf1\x80\x80\xc0")); assert(!arf_is_valid_c_str("\xf1\x80\x80\xfd")); assert(!arf_is_valid_c_str("\xf1\x80\xc0\x80")); assert(!arf_is_valid_c_str("\xf1\x80\xfd\x80")); assert(!arf_is_valid_c_str("\xf1\xc0\x80\x80")); assert(!arf_is_valid_c_str("\xf1\xfd\x80\x80")); assert(!arf_is_valid_c_str("\xf3")); assert(!arf_is_valid_c_str("\xf3\x7f\x80\x80")); assert(!arf_is_valid_c_str("\xf3\x80")); assert(!arf_is_valid_c_str("\xf3\x80\x7f\x80")); assert(!arf_is_valid_c_str("\xf3\x80\x80")); assert(!arf_is_valid_c_str("\xf3\x80\x80\x7f")); assert(!arf_is_valid_c_str("\xf3\x80\x80\xc0")); assert(!arf_is_valid_c_str("\xf3\x80\x80\xfd")); assert(!arf_is_valid_c_str("\xf3\x80\xc0\x80")); assert(!arf_is_valid_c_str("\xf3\x80\xfd\x80")); assert(!arf_is_valid_c_str("\xf3\xc0\x80\x80")); assert(!arf_is_valid_c_str("\xf3\xfd\x80\x80")); assert(!arf_is_valid_c_str("\xf4")); assert(!arf_is_valid_c_str("\xf4\x7f\x80\x80")); assert(!arf_is_valid_c_str("\xf4\x80")); assert(!arf_is_valid_c_str("\xf4\x80\x7f\x80")); assert(!arf_is_valid_c_str("\xf4\x80\x80")); assert(!arf_is_valid_c_str("\xf4\x80\x80\x7f")); assert(!arf_is_valid_c_str("\xf4\x80\x80\xc0")); assert(!arf_is_valid_c_str("\xf4\x80\x80\xfd")); assert(!arf_is_valid_c_str("\xf4\x80\xc0\x80")); assert(!arf_is_valid_c_str("\xf4\x80\xfd\x80")); assert(!arf_is_valid_c_str("\xf4\x90\x80\x80")); assert(!arf_is_valid_c_str("\xf4\xbf\x80\x80")); assert(!arf_is_valid_c_str("\xf4\xc0\x80\x80")); assert(!arf_is_valid_c_str("\xf4\xfd\x80\x80")); assert(!arf_is_valid_c_str("\xf5\x80\x80\x80")); assert(!arf_is_valid_c_str("\xf7\x80\x80\x80")); assert(!arf_is_valid_c_str("\xf7\xbf\xbf\xbf")); assert(!arf_is_valid_c_str("\xf8\x23")); assert(!arf_is_valid_c_str("\xf8\x80\x23")); assert(!arf_is_valid_c_str("\xf8\x80\x80\x23")); assert(!arf_is_valid_c_str("\xf8\x80\x80\x80\x23")); assert(!arf_is_valid_c_str("\xf8\x80\x80\x80\x80\x80")); assert(!arf_is_valid_c_str("\xf8\x80\x80\x80\x80")); assert(!arf_is_valid_c_str("\xf8\x80\x80\x80")); assert(!arf_is_valid_c_str("\xf8\x80\x80")); assert(!arf_is_valid_c_str("\xf8\x80")); assert(!arf_is_valid_c_str("\xf8")); assert(!arf_is_valid_c_str("\xfb\xbf\xbf\xbf\xbf")); assert(!arf_is_valid_c_str("\xfc\x23")); assert(!arf_is_valid_c_str("\xfc\x80\x23")); assert(!arf_is_valid_c_str("\xfc\x80\x80\x23")); assert(!arf_is_valid_c_str("\xfc\x80\x80\x80\x23")); assert(!arf_is_valid_c_str("\xfc\x80\x80\x80\x80\x23")); assert(!arf_is_valid_c_str("\xfc\x80\x80\x80\x80\x80\x80")); assert(!arf_is_valid_c_str("\xfc\x80\x80\x80\x80\x80")); assert(!arf_is_valid_c_str("\xfc\x80\x80\x80\x80")); assert(!arf_is_valid_c_str("\xfc\x80\x80\x80")); assert(!arf_is_valid_c_str("\xfc\x80\x80")); assert(!arf_is_valid_c_str("\xfc\x80")); assert(!arf_is_valid_c_str("\xfc")); assert(!arf_is_valid_c_str("\xfd\xbf\xbf\xbf\xbf\xbf")); assert(!arf_is_valid_c_str("\xfe")); assert(!arf_is_valid_c_str("\xfe\xff")); assert(!arf_is_valid_c_str("\xff")); assert(!arf_is_valid_c_str("\xff\xfe")); assert(!arf_has_arf_magic(ptr_len(""))); assert(!arf_has_arf_magic(ptr_len("f"))); assert(!arf_has_arf_magic(ptr_len("foo"))); assert(!arf_has_arf_magic(ptr_len(UTF8_REPLACEMENT))); assert(!arf_has_arf_magic(ptr_len(UTF8_REPLACEMENT "foo"))); assert(arf_has_arf_magic(ptr_len(UTF8_BOM "foo"))); assert(!arf_is_valid_arf(ptr_len(""))); assert(!arf_is_valid_arf(ptr_len("foo"))); assert(!arf_is_valid_arf(ptr_len(UTF8_BOM "\xc0" "\0" "\xc0"))); assert(!arf_is_valid_arf(ptr_len(UTF8_BOM "\xc0" "\0" "\0\xc0"))); assert(!arf_is_valid_arf(ptr_len(UTF8_BOM "\xc0" "\0" "\0\x40"))); assert(!arf_is_valid_arf(ptr_len(UTF8_BOM "foo"))); assert(!arf_is_valid_arf(ptr_len(UTF8_BOM "foo" "\0" "foo"))); assert(!arf_is_valid_arf(ptr_len(UTF8_BOM "foox" "\0" "foo%FF"))); assert(!arf_is_valid_arf(ptr_len(UTF8_BOM "foo\xef\xbb\xbf" "\0" "foo%FF"))); assert(!arf_is_valid_arf(ptr_len(UTF8_BOM "foo�" "\0" "foo%ff"))); assert(!arf_is_valid_arf(ptr_len(UTF8_BOM "foo�" "\0" "goo\0\x7F"))); assert(!arf_is_valid_arf(ptr_len(UTF8_BOM "foo�" "\0" "foo\0"))); assert(!arf_is_valid_arf(ptr_len(UTF8_BOM "foo�" "\0" "foo\0\x80"))); assert(!arf_is_valid_arf(ptr_len(UTF8_BOM "foo\xef\xbf\xbc" "\0" "foo\0\x7F"))); assert(!arf_is_valid_arf(ptr_len(UTF8_REPLACEMENT "foo�" "\0" UTF8_REPLACEMENT "foo\0\x7F"))); assert(!arf_is_valid_arf(ptr_len(UTF8_REPLACEMENT UTF8_BOM "foo�" "\0" UTF8_REPLACEMENT "foo\0\x7F"))); assert(arf_is_valid_arf(ptr_len(UTF8_BOM "foo\xef\xbf\xbd" "\0" "foo\0\x7F"))); assert(arf_is_valid_arf(ptr_len(UTF8_BOM "foo�" "\0" "foo\0\x7F"))); assert(arf_is_valid_arf(ptr_len(UTF8_BOM "�foo���bar�" "\0" "\0\x7F" "foo\0\x7F\0\x7E\0\x7D" "bar\0\x7C"))); assert(arf_is_valid_arf(ptr_len(UTF8_BOM UTF8_BOM "foo�" "\0" UTF8_BOM "foo\0\x7F"))); assert(arf_is_valid_arf(ptr_len(UTF8_BOM UTF8_REPLACEMENT "foo�" "\0" UTF8_REPLACEMENT "foo\0\x7F"))); size_t len; assert(arf_categorize_c_str("", &len)); assert(len == 0); assert(arf_categorize_c_str("foo", &len)); assert(len == 3); assert(!arf_categorize_c_str("foo\xff", &len)); assert(len == 15); assert(arf_categorize_c_str(UTF8_BOM, &len)); assert(len == sizeof(UTF8_BOM) - 1); assert(arf_categorize_c_str(UTF8_REPLACEMENT, &len)); assert(len == sizeof(UTF8_REPLACEMENT) - 1); assert(arf_sizeof_c_str_arf("foo") == 10); assert(arf_sizeof_c_str_arf("\xff") == 9); assert(arf_sizeof_c_str_arf("foo\xff") == 15); assert(arf_sizeof_c_str_arf("foo\xff" "bar\xfe") == 26); assert(arf_sizeof_c_str_arf(UTF8_BOM) == 10); assert(arf_sizeof_c_str_arf(UTF8_REPLACEMENT) == 10); uint8_t buffer[1024]; arf_c_str_arf("foo", buffer); assert(memcmp(buffer, ptr_len(UTF8_BOM "foo\0foo")) == 0); arf_c_str_arf("\xff", buffer); assert(memcmp(buffer, ptr_len(UTF8_BOM "�\0" "\0\x7F")) == 0); arf_c_str_arf("foo\xff", buffer); assert(memcmp(buffer, ptr_len(UTF8_BOM "foo�\0" "foo\0\x7F")) == 0); arf_c_str_arf("foo\xff" "bar\xfe", buffer); assert(memcmp(buffer, ptr_len(UTF8_BOM "foo�bar�\0" "foo\0\x7F" "bar\0\x7E")) == 0); arf_c_str_arf("foo\xff" "bar\xfe" "end", buffer); assert(memcmp(buffer, ptr_len(UTF8_BOM "foo�bar�end\0" "foo\0\x7F" "bar\0\x7E" "end")) == 0); arf_c_str_arf(UTF8_BOM "foo\xff" "bar\xfe" "end", buffer); assert(memcmp(buffer, ptr_len(UTF8_BOM UTF8_BOM "foo�bar�end\0" UTF8_BOM "foo\0\x7F" "bar\0\x7E" "end")) == 0); arf_c_str_arf(UTF8_REPLACEMENT "foo\xff" "bar\xfe" "end", buffer); assert(memcmp(buffer, ptr_len(UTF8_BOM UTF8_REPLACEMENT "foo�bar�end\0" UTF8_REPLACEMENT "foo\0\x7F" "bar\0\x7E" "end")) == 0); arf_c_str_arf("\xe6\x96", buffer); assert(memcmp(buffer, ptr_len(UTF8_BOM "��\0" "\0f" "\0\x16")) == 0); assert(arf_sizeof_arf_c_str(ptr_len(UTF8_BOM "�\0" "\0\x7F")) == 2); assert(arf_sizeof_arf_c_str(ptr_len(UTF8_BOM "foo�\0" "foo\0\x7F")) == 5); assert(arf_sizeof_arf_c_str(ptr_len(UTF8_BOM "foo�bar�\0" "foo\0\x7F" "bar\0\x7E")) == 9); assert(arf_sizeof_arf_c_str(ptr_len(UTF8_BOM "foo�bar�end\0" "foo\0\x7F" "bar\0\x7E" "end")) == 12); arf_arf_c_str(ptr_len(UTF8_BOM "�\0" "\0\x7F"), (char *)buffer); assert(strcmp((const char *)buffer, "\xff") == 0); arf_arf_c_str(ptr_len(UTF8_BOM "foo�\0" "foo\0\x7F"), (char *)buffer); assert(strcmp((const char *)buffer, "foo\xff") == 0); arf_arf_c_str(ptr_len(UTF8_BOM "foo�bar�\0" "foo\0\x7F" "bar\0\x7E"), (char *)buffer); assert(strcmp((const char *)buffer, "foo\xff" "bar\xfe") == 0); arf_arf_c_str(ptr_len(UTF8_BOM "foo�bar�end\0" "foo\0\x7F" "bar\0\x7E" "end"), (char *)buffer); assert(strcmp((const char *)buffer, "foo\xff" "bar\xfe" "end") == 0); return 0; } arf-strings-0.7.3/src/lib.rs000064400000000000000000000005011046102023000137710ustar 00000000000000#[cfg(not(windows))] mod rustix; #[cfg(windows)] mod windows; #[cfg(not(windows))] pub use crate::rustix::{ bytes_to_host, host_c_str_to_bytes, host_c_str_to_str, host_os_str_to_bytes, host_os_str_to_str, str_to_host, }; #[cfg(windows)] pub use windows::{bytes_to_host, host_to_bytes, host_to_str, str_to_host}; arf-strings-0.7.3/src/rustix.rs000064400000000000000000000266551046102023000146030ustar 00000000000000use std::borrow::Cow; use std::ffi::{CStr, CString, OsStr}; #[cfg(unix)] use std::os::unix::ffi::OsStrExt; #[cfg(target_os = "wasi")] use std::os::wasi::ffi::OsStrExt; use std::{io, str}; /// Convert a byte sequence which is either plain UTF-8 or an ARF encoding into /// a `CString` ready for use in POSIX-style APIs. pub fn bytes_to_host(bytes: &[u8]) -> io::Result { let s = str::from_utf8(bytes).map_err(|_| encoding_error())?; str_to_host(s) } /// Convert a `&str` which is either plain UTF-8 or an ARF encoding into a /// `CString` ready for use in POSIX-style APIs. pub fn str_to_host(s: &str) -> io::Result { match CString::new(s) { Ok(c_string) => Ok(c_string), Err(e) => from_arf(s, e.nul_position()), } } /// Convert an `&OsStr` produced by POSIX-style APIs into a `Cow` which /// is either plain UTF-8 or an ARF encoding. Returns an error if the input /// string contains NUL bytes. pub fn host_os_str_to_str(host: &OsStr) -> io::Result> { if host.as_bytes().contains(&b'\0') { return Err(encoding_error()); } Ok(if let Ok(s) = str::from_utf8(host.as_bytes()) { Cow::Borrowed(s) } else { Cow::Owned(to_arf(host.as_bytes())) }) } /// Convert an `&OsStr` produced by POSIX-style APIs into a `Cow<[u8]>` which /// is either plain UTF-8 or an ARF encoding. Returns an error if the input /// string contains NUL bytes. pub fn host_os_str_to_bytes(host: &OsStr) -> io::Result> { Ok(match host_os_str_to_str(host)? { Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()), Cow::Owned(b) => Cow::Owned(b.into_bytes()), }) } /// Convert an `&CStr` produced by POSIX-style APIs into a `Cow` which /// is either plain UTF-8 or an ARF encoding. pub fn host_c_str_to_str(host: &CStr) -> Cow { if let Ok(s) = str::from_utf8(host.to_bytes()) { Cow::Borrowed(s) } else { Cow::Owned(to_arf(host.to_bytes())) } } /// Convert an `&CStr` produced by POSIX-style APIs into a `Cow<[u8]>` which /// is either plain UTF-8 or an ARF encoding. pub fn host_c_str_to_bytes(host: &CStr) -> Cow<[u8]> { let bytes = host_c_str_to_str(host); match bytes { Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()), Cow::Owned(b) => Cow::Owned(b.into_bytes()), } } /// Slow path for `str_to_host`. #[cold] fn from_arf(s: &str, nul: usize) -> io::Result { if !s.starts_with('\u{feff}') { return Err(encoding_error()); } let mut lossy = s.bytes().skip('\u{feff}'.len_utf8()); let mut nul_escaped = s.bytes().skip(nul + 1); let mut any_invalid = false; let mut vec = Vec::new(); while let Some(b) = nul_escaped.next() { if b == b'\0' { let more = nul_escaped.next().ok_or_else(encoding_error)?; if (more & 0x80) != 0 { return Err(encoding_error()); } // Test for U+FFFD. let l0 = lossy.next().ok_or_else(encoding_error)?; let l1 = lossy.next().ok_or_else(encoding_error)?; let l2 = lossy.next().ok_or_else(encoding_error)?; if [l0, l1, l2] != [0xef, 0xbf, 0xbd] { return Err(encoding_error()); } any_invalid = true; vec.push(more | 0x80); } else { if lossy.next() != Some(b) { return Err(encoding_error()); } vec.push(b); } } if !any_invalid { return Err(encoding_error()); } if lossy.next() != Some(b'\0') { return Err(encoding_error()); } // Validation succeeded. Ok(unsafe { CString::from_vec_unchecked(vec) }) } /// Slow path for `host_to_bytes`. #[cold] fn to_arf(bytes: &[u8]) -> String { let mut data = String::new(); data.push('\u{feff}'); let mut input = bytes; // This loop and `unsafe` follow the example in the documentation: // loop { match std::str::from_utf8(input) { Ok(valid) => { data.push_str(valid); break; } Err(error) => { let (valid, after_valid) = input.split_at(error.valid_up_to()); unsafe { data.push_str(str::from_utf8_unchecked(valid)) } data.push('\u{FFFD}'); if let Some((_, remaining)) = after_valid.split_first() { input = remaining; } else { break; } } } } data.push('\0'); // This loop and `unsafe` follow the example in the documentation // mentioned above. let mut input = bytes; loop { match std::str::from_utf8(input) { Ok(valid) => { data.push_str(valid); break; } Err(error) => { let (valid, after_valid) = input.split_at(error.valid_up_to()); unsafe { data.push_str(str::from_utf8_unchecked(valid)) } if let Some((byte, remaining)) = after_valid.split_first() { data.push('\0'); data.push((byte & 0x7f) as char); input = remaining; } else { break; } } } } data } #[cold] fn encoding_error() -> io::Error { ::rustix::io::Errno::ILSEQ.into() } #[test] fn utf8_inputs() { assert_eq!(str_to_host("").unwrap().to_bytes(), b""); assert_eq!(str_to_host("f").unwrap().to_bytes(), b"f"); assert_eq!(str_to_host("foo").unwrap().to_bytes(), b"foo"); assert_eq!( str_to_host("\u{fffd}").unwrap().to_bytes(), "\u{fffd}".as_bytes() ); assert_eq!( str_to_host("\u{fffd}foo").unwrap().to_bytes(), "\u{fffd}foo".as_bytes() ); assert_eq!( str_to_host("\u{feff}foo").unwrap().to_bytes(), "\u{feff}foo".as_bytes() ); } #[test] fn arf_inputs() { assert_eq!( str_to_host("\u{feff}hello\u{fffd}world\0hello\0\x05world") .unwrap() .to_bytes(), b"hello\x85world" ); assert_eq!( str_to_host("\u{feff}hello\u{fffd}\0hello\0\x05") .unwrap() .to_bytes(), b"hello\x85" ); } #[test] fn errors_from_bytes() { assert!(bytes_to_host(b"\xfe").is_err()); assert!(bytes_to_host(b"\xc0\xff").is_err()); } #[test] fn errors_from_str() { assert!(str_to_host("\u{feff}hello world\0hello world").is_err()); assert!(str_to_host("\u{feff}hello world\0\0hello world\0").is_err()); assert!(str_to_host("\u{feff}hello\u{fffd}world\0\0hello\0\x05world\0").is_err()); assert!(str_to_host("\u{fffe}hello\u{fffd}world\0hello\0\x05world").is_err()); assert!(str_to_host("\u{feff}hello\u{fffd}\0hello\0").is_err()); } #[test] fn valid_utf8() { assert_eq!(host_os_str_to_str(OsStr::from_bytes(b"")).unwrap(), ""); assert_eq!( host_os_str_to_str(OsStr::from_bytes(b"foo")).unwrap(), "foo" ); // Same thing, now with `CStr`s. assert_eq!( host_c_str_to_str(CStr::from_bytes_with_nul(b"\0").unwrap()), "" ); assert_eq!( host_c_str_to_str(CStr::from_bytes_with_nul(b"foo\0").unwrap()), "foo" ); } #[test] fn not_utf8() { assert_eq!( host_os_str_to_str(OsStr::from_bytes(b"\xfe")).unwrap(), "\u{feff}\u{fffd}\0\0\u{7e}" ); assert_eq!( host_os_str_to_str(OsStr::from_bytes(b"\xc0\xff")).unwrap(), "\u{feff}\u{fffd}\u{fffd}\0\0\u{40}\0\u{7f}" ); assert_eq!( host_os_str_to_str(OsStr::from_bytes(b"\xef\xbb\xbf")).unwrap(), "\u{feff}" ); assert_eq!( host_os_str_to_str(OsStr::from_bytes(b"\xef\xbb\xbf\xfd")).unwrap(), "\u{feff}\u{feff}\u{fffd}\0\u{feff}\0\x7d" ); assert_eq!( host_os_str_to_str(OsStr::from_bytes(b"\xe2\x98")).unwrap(), "\u{feff}\u{fffd}\u{fffd}\0\0\u{62}\0\u{18}" ); assert_eq!( host_os_str_to_str(OsStr::from_bytes(b"\xf0\x9f")).unwrap(), "\u{feff}\u{fffd}\u{fffd}\0\0\u{70}\0\u{1f}" ); assert_eq!( host_os_str_to_str(OsStr::from_bytes(b"\xf0\x9f\x92")).unwrap(), "\u{feff}\u{fffd}\u{fffd}\u{fffd}\0\0\u{70}\0\u{1f}\0\u{12}" ); // Same thing, now with `CStr`s. assert_eq!( host_c_str_to_str(CStr::from_bytes_with_nul(b"\xfe\0").unwrap()), "\u{feff}\u{fffd}\0\0\u{7e}" ); assert_eq!( host_c_str_to_str(CStr::from_bytes_with_nul(b"\xc0\xff\0").unwrap()), "\u{feff}\u{fffd}\u{fffd}\0\0\u{40}\0\u{7f}" ); assert_eq!( host_c_str_to_str(CStr::from_bytes_with_nul(b"\xef\xbb\xbf\0").unwrap()), "\u{feff}" ); assert_eq!( host_c_str_to_str(CStr::from_bytes_with_nul(b"\xef\xbb\xbf\xfd\0").unwrap()), "\u{feff}\u{feff}\u{fffd}\0\u{feff}\0\x7d" ); assert_eq!( host_c_str_to_str(CStr::from_bytes_with_nul(b"\xe2\x98\0").unwrap()), "\u{feff}\u{fffd}\u{fffd}\0\0\u{62}\0\u{18}" ); assert_eq!( host_c_str_to_str(CStr::from_bytes_with_nul(b"\xf0\x9f\0").unwrap()), "\u{feff}\u{fffd}\u{fffd}\0\0\u{70}\0\u{1f}" ); assert_eq!( host_c_str_to_str(CStr::from_bytes_with_nul(b"\xf0\x9f\x92\0").unwrap()), "\u{feff}\u{fffd}\u{fffd}\u{fffd}\0\0\u{70}\0\u{1f}\0\u{12}" ); } #[test] fn round_trip() { assert_eq!( host_os_str_to_str(OsStr::from_bytes(bytes_to_host(b"").unwrap().as_bytes())).unwrap(), "" ); assert_eq!( host_os_str_to_str(OsStr::from_bytes( bytes_to_host(b"hello").unwrap().as_bytes() )) .unwrap(), "hello" ); assert_eq!( str_to_host(&host_os_str_to_str(OsStr::from_bytes(b"hello")).unwrap()) .unwrap() .as_bytes(), b"hello" ); assert_eq!( str_to_host(&host_os_str_to_str(OsStr::from_bytes(b"h\xc0ello\xc1")).unwrap()) .unwrap() .as_bytes(), b"h\xc0ello\xc1" ); assert_eq!( str_to_host(&host_os_str_to_str(OsStr::from_bytes(b"\xf5\xff")).unwrap()) .unwrap() .as_bytes(), b"\xf5\xff" ); assert_eq!( str_to_host(&host_os_str_to_str(OsStr::from_bytes(b"")).unwrap()) .unwrap() .as_bytes(), b"" ); assert_eq!( str_to_host(&host_os_str_to_str(OsStr::from_bytes(b"\xe6\x96")).unwrap()) .unwrap() .as_bytes(), b"\xe6\x96" ); // Same thing, now with `CStr`s. assert_eq!( str_to_host(&host_c_str_to_str( CStr::from_bytes_with_nul(b"hello\0").unwrap() )) .unwrap() .as_bytes(), b"hello" ); assert_eq!( str_to_host(&host_c_str_to_str( CStr::from_bytes_with_nul(b"h\xc0ello\xc1\0").unwrap() )) .unwrap() .as_bytes(), b"h\xc0ello\xc1" ); assert_eq!( str_to_host(&host_c_str_to_str( CStr::from_bytes_with_nul(b"\xf5\xff\0").unwrap() )) .unwrap() .as_bytes(), b"\xf5\xff" ); assert_eq!( str_to_host(&host_c_str_to_str( CStr::from_bytes_with_nul(b"\0").unwrap() )) .unwrap() .as_bytes(), b"" ); assert_eq!( str_to_host(&host_c_str_to_str( CStr::from_bytes_with_nul(b"\xe6\x96\0").unwrap() )) .unwrap() .as_bytes(), b"\xe6\x96" ); } arf-strings-0.7.3/src/windows.rs000064400000000000000000000164121046102023000147250ustar 00000000000000use std::char::decode_utf16; use std::ffi::{OsStr, OsString}; use std::os::windows::ffi::{OsStrExt, OsStringExt}; use std::{io, str}; /// Convert a byte sequence which is either plain UTF-8 or an ARF encoding into /// a `OsString` ready for use in Windows-style APIs. pub fn bytes_to_host(bytes: &[u8]) -> io::Result { let s = str::from_utf8(bytes).map_err(|_| encoding_error())?; str_to_host(s) } /// Convert a `&str` which is either plain UTF-8 or an ARF encoding into a /// `OsString` ready for use in Windows-style APIs. pub fn str_to_host(s: &str) -> io::Result { if let Some(nul_position) = s.chars().position(|c| c == '\0') { from_arf(s, nul_position) } else { Ok(OsString::from_wide(&s.encode_utf16().collect::>())) } } /// Convert an `&OsStr` produced by Windows-style APIs into a `Cow` which /// is either plain UTF-8 or an ARF encoding. pub fn host_to_str(host: &OsStr) -> io::Result { let wide = host.encode_wide().collect::>(); if wide.contains(&0) { return Err(encoding_error()); } Ok(if let Ok(s) = String::from_utf16(&wide) { s } else { to_arf(&wide) }) } /// Convert an `&OsStr` produced by Windows-style APIs into a `Cow<[u8]>` which /// is either plain UTF-8 or an ARF encoding. pub fn host_to_bytes(host: &OsStr) -> io::Result> { host_to_str(host).map(String::into_bytes) } /// Slow path for `str_to_host`. #[cold] fn from_arf(s: &str, nul: usize) -> io::Result { let mut lossy = s.chars(); if lossy.next() != Some('\u{feff}') { return Err(encoding_error()); } let mut nul_escaped = s.chars().skip(nul + 1); let mut any_invalid = false; let mut vec = Vec::new(); while let Some(c) = nul_escaped.next() { if c == '\0' { let more = nul_escaped.next().ok_or_else(encoding_error)?; if more > '\u{7ff}' { return Err(encoding_error()); } // Test for U+FFFD. let l = lossy.next().ok_or_else(encoding_error)?; if l != '\u{fffd}' { return Err(encoding_error()); } any_invalid = true; let unit = u16::try_from((more as u16) + 0xd800).map_err(|_| encoding_error())?; vec.push(unit); } else { if lossy.next() != Some(c) { return Err(encoding_error()); } let mut buf = [0; 2]; let utf16 = c.encode_utf16(&mut buf); for unit in utf16 { vec.push(*unit); } } } if !any_invalid { return Err(encoding_error()); } if lossy.next() != Some('\0') { return Err(encoding_error()); } // Validation succeeded. Ok(OsString::from_wide(&vec)) } /// Slow path for `host_to_bytes`. #[cold] fn to_arf(units: &[u16]) -> String { let mut data = String::new(); data.push('\u{feff}'); for unit in decode_utf16(units.iter().copied()) { match unit { Ok(c) => data.push(c), Err(_) => data.push('\u{fffd}'), } } data.push('\0'); for unit in decode_utf16(units.iter().copied()) { match unit { Ok(c) => data.push(c), Err(e) => { let bad = e.unpaired_surrogate(); assert!(bad >= 0xd800 && bad <= 0xdfff); data.push('\0'); data.push(std::char::from_u32(u32::from(bad - 0xd800)).unwrap()); } } } data } #[cold] fn encoding_error() -> io::Error { io::Error::new(io::ErrorKind::InvalidData, "invalid path string") } #[test] fn utf16_inputs() { assert_eq!( String::from_utf16(&str_to_host("").unwrap().encode_wide().collect::>()).unwrap(), "" ); str_to_host("\0").unwrap_err(); assert_eq!( String::from_utf16(&str_to_host("f").unwrap().encode_wide().collect::>()).unwrap(), "f" ); assert_eq!( String::from_utf16( &str_to_host("foo") .unwrap() .encode_wide() .collect::>() ) .unwrap(), "foo" ); assert_eq!( String::from_utf16( &str_to_host("\u{fffd}") .unwrap() .encode_wide() .collect::>() ) .unwrap(), "\u{fffd}" ); assert_eq!( String::from_utf16( &str_to_host("\u{fffd}foo") .unwrap() .encode_wide() .collect::>() ) .unwrap(), "\u{fffd}foo" ); assert_eq!( String::from_utf16( &str_to_host("\u{feff}foo") .unwrap() .encode_wide() .collect::>() ) .unwrap(), "\u{feff}foo" ); } #[test] fn arf_inputs() { assert_eq!( str_to_host("\u{feff}hello\u{fffd}world\0hello\0\x05world") .unwrap() .encode_wide() .collect::>(), [ 'h' as u16, 'e' as u16, 'l' as u16, 'l' as u16, 'o' as u16, 0xd805_u16, 'w' as u16, 'o' as u16, 'r' as u16, 'l' as u16, 'd' as u16 ] ); assert_eq!( str_to_host("\u{feff}hello\u{fffd}\0hello\0\x05") .unwrap() .encode_wide() .collect::>(), ['h' as u16, 'e' as u16, 'l' as u16, 'l' as u16, 'o' as u16, 0xd805_u16] ); } #[test] fn errors_from_bytes() { assert!(bytes_to_host(b"\xfe").is_err()); assert!(bytes_to_host(b"\xc0\xff").is_err()); } #[test] fn errors_from_str() { assert!(str_to_host("\u{feff}hello world\0hello world").is_err()); assert!(str_to_host("\u{feff}hello world\0\0hello world\0").is_err()); assert!(str_to_host("\u{feff}hello\u{fffd}world\0\0hello\0\x05world\0").is_err()); assert!(str_to_host("\u{fffe}hello\u{fffd}world\0hello\0\x05world").is_err()); assert!(str_to_host("\u{feff}hello\u{fffd}\0hello\0").is_err()); } #[test] fn valid_utf16() { assert_eq!(host_to_str(OsStr::new("")).unwrap(), ""); assert_eq!(host_to_str(OsStr::new("foo")).unwrap(), "foo"); } #[test] fn not_utf16() { assert_eq!( host_to_str(&OsString::from_wide(&[0xd800_u16])).unwrap(), "\u{feff}\u{fffd}\0\0\u{0}" ); assert_eq!( host_to_str(&OsString::from_wide(&[0xdfff_u16])).unwrap(), "\u{feff}\u{fffd}\0\0\u{7ff}" ); } #[test] fn round_trip() { assert_eq!(host_to_str(&bytes_to_host(b"").unwrap()).unwrap(), ""); assert_eq!( host_to_str(&bytes_to_host(b"hello").unwrap()).unwrap(), "hello" ); assert_eq!( str_to_host(&host_to_str(OsStr::new("hello")).unwrap()).unwrap(), OsStr::new("hello") ); assert_eq!( str_to_host(&host_to_str(&OsString::from_wide(&[0x47_u16, 0xd800_u16, 0x48_u16])).unwrap()) .unwrap(), OsString::from_wide(&[0x47_u16, 0xd800_u16, 0x48_u16]) ); assert_eq!( str_to_host(&host_to_str(&OsString::from_wide(&[0x49_u16, 0xdfff_u16, 0x50_u16])).unwrap()) .unwrap(), OsString::from_wide(&[0x49_u16, 0xdfff_u16, 0x50_u16]) ); assert_eq!( str_to_host(&host_to_str(OsStr::new("")).unwrap()).unwrap(), OsStr::new("") ); }