widestring-0.4.0/.gitattributes010064400017500001750000000001031333520214200147730ustar0000000000000000* text=auto *.rs whitespace=tab-in-indent,trailing-space,tabwidth=4widestring-0.4.0/.gitignore010064400017500001750000000001221333520214200140710ustar0000000000000000# Rust target/ Cargo.lock **/*.rs.bak # IntelliJ .idea/ *.iml # VS Code .vscode/widestring-0.4.0/.travis.yml010064400017500001750000000001431333520214200142150ustar0000000000000000sudo: false language: rust rust: - stable - 1.26.0 - beta - nightly script: - cargo test --verbose widestring-0.4.0/CHANGELOG.md010064400017500001750000000057321333573333700137450ustar0000000000000000# Changelog The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] ### Added - New `U32String`, `U32Str`, `U32CString`, and `U32CStr` types for dealing with UTF-32 FFI. These new types are roughly equivalent to the existing UTF-16 types. - `WideChar` is a type alias to `u16` on Windows but `u32` on non-Windows platforms. - The generic types `UString`, `UStr`, `UCString` and `UCStr` are used to implement the string types. ### Changed - **Breaking Change** Existing wide string types have been renamed to `U16String`, `U16Str`, `U16CString`, and `U16CStr` (previously `WideString`, `WideStr`, etc.). Some function have also been renamed to reflect this change (`wide_str` to `u16_str`, etc.). - **Breaking Change** `WideString`, `WideStr`, `WideCString`, and `WideCStr` are now type aliases that vary between platforms. On Windows, these are aliases to the `U16` types and are equivalent to the previous version, but on non-Windows platforms these alias the new `U32` types instead. See crate documentation for more details. ## [0.3.0] - 2018-03-17 ### Added - Additional unchecked functions on `WideCString`. - All types now implement `Default`. - `WideString::shrink_to_fit` - `WideString::into_boxed_wide_str` and `Box::into_wide_string`. - `WideCString::into_boxed_wide_c_str` and `Box::into_wide_c_string`. - `From` and `Default` implementations for boxed `WideStr` and boxed `WideCStr`. ### Changed - Renamed `WideCString::from_vec` to replace `WideCString::new`. To create empty string, use `WideCString::default()` now. - `WideCString` now implements `Drop`, which sets the string to an empty string to prevent invalid unsafe code from working correctly when it should otherwise break. Also see `Drop` implementation of `CString`. - Writing changelog manually. - Upgraded winapi dev dependency. - Now requires at least Rust 1.17+ to compile (previously, was Rust 1.8). ## [0.2.2] - 2016-09-09 ### Fixed - Make `WideCString::into_raw` correctly forget the original self. ## [0.2.1] - 2016-08-12 ### Added - `into_raw`/`from_raw` on `WideCString`. Closes [#2]. ## [0.2.0] - 2016-05-31 ### Added - `Default` trait to wide strings. - Traits for conversion of strings to `Cow`. ### Changed - Methods & traits to bring to parity with Rust 1.9 string APIs. ## 0.1.0 - 2016-02-06 ### Added - Initial release. [#2]: https://github.com/starkat99/widestring-rs/issues/2 [Unreleased]: https://github.com/starkat99/widestring-rs/compare/v0.3.0...HEAD [0.3.0]: https://github.com/starkat99/widestring-rs/compare/v0.2.2...v0.3.0 [0.2.2]: https://github.com/starkat99/widestring-rs/compare/v0.2.1...v0.2.2 [0.2.1]: https://github.com/starkat99/widestring-rs/compare/v0.2.0...v0.2.1 [0.2.0]: https://github.com/starkat99/widestring-rs/compare/v0.1.0...v0.2.0 widestring-0.4.0/Cargo.toml.orig010064400017500001750000000013451333573651600150210ustar0000000000000000[package] name = "widestring" version = "0.4.0" authors = ["Kathryn Long "] description = "A wide string FFI library for converting to and from wide strings, such as those often used in Windows API or other FFI libaries. Both UTF-16 and UTF-32 types are provided." repository = "https://github.com/starkat99/widestring-rs.git" readme = "README.md" keywords = ["wide", "string", "win32", "utf-16", "utf-32"] categories = ["text-processing", "encoding"] license = "MIT/Apache-2.0" [badges] appveyor = { repository = "starkat99/widestring-rs" } travis-ci = { repository = "starkat99/widestring-rs" } maintenance = { status = "passively-maintained" } [dev-dependencies] winapi = { version = "0.3", features = ["winbase"] }widestring-0.4.0/Cargo.toml0000644000000023530000000000000112540ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g. crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] name = "widestring" version = "0.4.0" authors = ["Kathryn Long "] description = "A wide string FFI library for converting to and from wide strings, such as those often used in Windows API or other FFI libaries. Both UTF-16 and UTF-32 types are provided." readme = "README.md" keywords = ["wide", "string", "win32", "utf-16", "utf-32"] categories = ["text-processing", "encoding"] license = "MIT/Apache-2.0" repository = "https://github.com/starkat99/widestring-rs.git" [dev-dependencies.winapi] version = "0.3" features = ["winbase"] [badges.appveyor] repository = "starkat99/widestring-rs" [badges.maintenance] status = "passively-maintained" [badges.travis-ci] repository = "starkat99/widestring-rs" widestring-0.4.0/LICENSE-APACHE010064400017500001750000000251411333520214200140350ustar0000000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.widestring-0.4.0/LICENSE-MIT010064400017500001750000000020371333520214200135440ustar0000000000000000Copyright (c) 2016 Kathryn Long Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.widestring-0.4.0/README.md010064400017500001750000000026001333572334700134020ustar0000000000000000# widestring [![Crates.io](https://img.shields.io/crates/v/widestring.svg)](https://crates.io/crates/widestring/) [![docs.rs](https://docs.rs/widestring/badge.svg)](https://docs.rs/widestring/) [![Build status](https://ci.appveyor.com/api/projects/status/97pmbv6kk79bicww?svg=true)](https://ci.appveyor.com/project/starkat99/widestring-rs) [![Build Status](https://travis-ci.org/starkat99/widestring-rs.svg?branch=master)](https://travis-ci.org/starkat99/widestring-rs) A wide string Rust FFI library for converting to and from wide strings, such as those often used in Windows API or other FFI libaries. Both UTF-16 and UTF-32 types are provided, including support for malformed encoding. ## Documentation - [Crate API Reference](https://docs.rs/widestring/) - [Latest Changes](CHANGELOG.md) ## License This library is distributed under the terms of either of: * MIT license ([LICENSE-MIT](LICENSE-MIT) or [http://opensource.org/licenses/MIT](http://opensource.org/licenses/MIT)) * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or [http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0)) at your option. ### Contributing Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.widestring-0.4.0/appveyor.yml010064400017500001750000000077231333520214200145070ustar0000000000000000# Appveyor configuration template for Rust using rustup for Rust installation # https://github.com/starkat99/appveyor-rust ## Operating System (VM environment) ## # Rust needs at least Visual Studio 2013 Appveyor OS for MSVC targets. os: Visual Studio 2017 ## Build Matrix ## # This configuration will setup a build for each channel & target combination (12 windows # combinations in all). # # There are 3 channels: stable, beta, and nightly. # # Alternatively, the full version may be specified for the channel to build using that specific # version (e.g. channel: 1.5.0) # # The values for target are the set of windows Rust build targets. Each value is of the form # # ARCH-pc-windows-TOOLCHAIN # # Where ARCH is the target architecture, either x86_64 or i686, and TOOLCHAIN is the linker # toolchain to use, either msvc or gnu. See https://www.rust-lang.org/downloads.html#win-foot for # a description of the toolchain differences. # See https://github.com/rust-lang-nursery/rustup.rs/#toolchain-specification for description of # toolchains and host triples. # # Comment out channel/target combos you do not wish to build in CI. # # You may use the `cargoflags` and `RUSTFLAGS` variables to set additional flags for cargo commands # and rustc, respectively. For instance, you can uncomment the cargoflags lines in the nightly # channels to enable unstable features when building for nightly. Or you could add additional # matrix entries to test different combinations of features. environment: matrix: ### MSVC Toolchains ### # Stable 64-bit MSVC - channel: stable target: x86_64-pc-windows-msvc # Stable 32-bit MSVC - channel: stable target: i686-pc-windows-msvc # Beta 64-bit MSVC - channel: beta target: x86_64-pc-windows-msvc # Beta 32-bit MSVC - channel: beta target: i686-pc-windows-msvc # Nightly 64-bit MSVC - channel: nightly target: x86_64-pc-windows-msvc # Nightly 32-bit MSVC - channel: nightly target: i686-pc-windows-msvc ### GNU Toolchains ### # Stable 64-bit GNU - channel: stable target: x86_64-pc-windows-gnu # Stable 32-bit GNU - channel: stable target: i686-pc-windows-gnu # Beta 64-bit GNU - channel: beta target: x86_64-pc-windows-gnu # Beta 32-bit GNU - channel: beta target: i686-pc-windows-gnu # Nightly 64-bit GNU - channel: nightly target: x86_64-pc-windows-gnu # Nightly 32-bit GNU - channel: nightly target: i686-pc-windows-gnu ### Allowed failures ### # See Appveyor documentation for specific details. In short, place any channel or targets you wish # to allow build failures on (usually nightly at least is a wise choice). This will prevent a build # or test failure in the matching channels/targets from failing the entire build. #matrix: # allow_failures: # - channel: nightly # If you only care about stable channel build failures, uncomment the following line: #- channel: beta ## Install Script ## # This is the most important part of the Appveyor configuration. This installs the version of Rust # specified by the 'channel' and 'target' environment variables from the build matrix. This uses # rustup to install Rust. # # For simple configurations, instead of using the build matrix, you can simply set the # default-toolchain and default-host manually here. install: - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe - rustup-init -yv --default-toolchain %channel% --default-host %target% - set PATH=%PATH%;%USERPROFILE%\.cargo\bin - rustc -vV - cargo -vV ## Build Script ## # 'cargo test' takes care of building for us, so disable Appveyor's build stage. This prevents # the "directory does not contain a project or solution file" error. build: false # Uses 'cargo test' to run tests and build. Alternatively, the project may call compiled programs #directly or perform other testing commands. Rust will automatically be placed in the PATH # environment variable. test_script: - cargo test --verbose widestring-0.4.0/src/lib.rs010064400017500001750000000454321333572632200140340ustar0000000000000000//! A wide string FFI module for converting to and from wide string variants. //! //! This module provides multiple types of wide strings: `U16String`, `U16CString`, `U32String`, //! and `U32CString`. These types are backed by two generic implementations parameterized by //! element size: `UString` and `UCString`. The `UCString` types are analogous to the //! standard `CString` FFI type, while the `UString` types are analogous to `OsString`. Otherwise, //! `U16` and `U32` types differ only in character width and encoding methods. //! //! For `U16String` and `U32String`, no guarantees are made about the underlying string data; they //! are simply a sequence of UTF-16 *code units* or UTF-32 code points, both of which may be //! ill-formed or contain nul values. `U16CString` and `U32CString`, on the other hand, are aware //! of nul values and are guaranteed to be terminated with a nul value (unless unchecked methods //! are used to construct the strings). Because `U16CString` and `U32CString` are C-style, //! nul-terminated strings, they will have no interior nul values. All four string types may still //! have unpaired UTF-16 surrogates or invalid UTF-32 code points; ill-formed data is preserved //! until conversion to a basic Rust `String`. //! //! Use `U16String` or `U32String` when you simply need to pass-through strings, or when you know //! or don't care if you're not dealing with a nul-terminated string, such as when string lengths //! are provided and you are only reading strings from FFI, not writing them out to a FFI. //! //! Use `U16CString` or `U32CString` when you must properly handle nul values, and must deal with //! nul-terminated C-style wide strings, such as when you pass strings into FFI functions. //! //! # Relationship to other Rust Strings //! //! Standard Rust strings `String` and `str` are well-formed Unicode data encoded as UTF-8. The //! standard strings provide proper handling of Unicode and ensure strong safety guarantees. //! //! `CString` and `CStr` are strings used for C FFI. They handle nul-terminated C-style strings. //! However, they do not have a builtin encoding, and conversions between C-style and other Rust //! strings must specifically encode and decode the strings, and handle possibly invalid encoding //! data. They are safe to use only in passing string-like data back and forth from C APIs but do //! not provide any other guarantees, so may not be well-formed. //! //! `OsString` and `OsStr` are also strings for use with FFI. Unlike `CString`, they do no special //! handling of nul values, but instead have an OS-specified encoding. While, for example, on Linux //! systems this is usually the UTF-8 encoding, this is not the case for every platform. The //! encoding may not even be 8-bit: on Windows, `OsString` uses a malformed encoding sometimes //! referred to as "WTF-8". In any case, like `CString`, `OsString` has no additional guarantees //! and may not be well-formed. //! //! Due to the loss of safety of these other string types, conversion to standard Rust `String` is //! lossy, and may require knowledge of the underlying encoding, including platform-specific //! quirks. //! //! The wide strings in this crate are roughly based on the principles of the string types in //! `std::ffi`, though there are differences. `U16String`, `U32String`, `U16Str`, and `U32Str` are //! roughly similar in role to `OsString` and `OsStr`, while `U16CString`, `U32CString`, `U16CStr`, //! and `U32CStr` are roughly similar in role to `CString` and `CStr`. Conversion to FFI string //! types is generally very straight forward and safe, while conversion directly between standard //! Rust `String` is a lossy conversion just as `OsString` is. //! //! `U16String` and `U16CString` are treated as though they use UTF-16 encoding, even if they may //! contain unpaired surrogates. `U32String` and `U32CString` are treated as though they use UTF-32 //! encoding, even if they may contain values outside the valid Unicode character range. //! //! # Remarks on UTF-16 Code Units //! //! *Code units* are the 16-bit units that comprise UTF-16 sequences. Code units //! can specify Unicode code points either as single units or in *surrogate pairs*. Because every //! code unit might be part of a surrogate pair, many regular string operations, including //! indexing into a wide string, writing to a wide string, or even iterating a wide string should //! be handled with care and are greatly discouraged. Some operations have safer alternatives //! provided, such as Unicode code point iteration instead of code unit iteration. Always keep in //! mind that the number of code units (`len()`) of a wide string is **not** equivalent to the //! number of Unicode characters in the string, merely the length of the UTF-16 encoding sequence. //! In fact, Unicode code points do not even have a one-to-one mapping with characters! //! //! UTF-32 simply encodes Unicode code points as-is in 32-bit values, but Unicode character code //! points are reserved only for 21-bits. Again, Unicode code points do not have a one-to-one //! mapping with the concept of a visual character glyph. //! //! # FFI with C/C++ `wchar_t` //! //! C/C++'s `wchar_t` (and C++'s corresponding `widestring`) varies in size depending on compiler //! and platform. Typically, `wchar_t` is 16-bits on Windows and 32-bits on most Unix-based //! platforms. For convenience when using `wchar_t`-based FFI's, type aliases for the corresponding //! string types are provided: `WideString` aliases `U16String` on Windows or `U32String` //! elsewhere, `WideCString` aliases `U16CString` or `U32CString`, etc. The `WideChar` alias //! is also provided, aliasing `u16` or `u32`. //! //! When not interacting with a FFI using `wchar_t`, it is recommended to use the string types //! directly rather than via the wide alias. //! //! # Examples //! //! The following example uses `U16String` to get Windows error messages, since `FormatMessageW` //! returns a string length for us and we don't need to pass error messages into other FFI //! functions so we don't need to worry about nul values. //! //! ```rust //! # #[cfg(not(windows))] //! # fn main() {} //! # extern crate winapi; //! # extern crate widestring; //! # #[cfg(windows)] //! # fn main() { //! use winapi::um::winbase::{FormatMessageW, LocalFree, FORMAT_MESSAGE_FROM_SYSTEM, //! FORMAT_MESSAGE_ALLOCATE_BUFFER, FORMAT_MESSAGE_IGNORE_INSERTS}; //! use winapi::shared::ntdef::LPWSTR; //! use winapi::shared::minwindef::HLOCAL; //! use std::ptr; //! use widestring::U16String; //! # use winapi::shared::minwindef::DWORD; //! # let error_code: DWORD = 0; //! //! let U16Str: U16String; //! unsafe { //! // First, get a string buffer from some windows api such as FormatMessageW... //! let mut buffer: LPWSTR = ptr::null_mut(); //! let strlen = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM | //! FORMAT_MESSAGE_ALLOCATE_BUFFER | //! FORMAT_MESSAGE_IGNORE_INSERTS, //! ptr::null(), //! error_code, // error code from GetLastError() //! 0, //! (&mut buffer as *mut LPWSTR) as LPWSTR, //! 0, //! ptr::null_mut()); //! //! // Get the buffer as a wide string //! U16Str = U16String::from_ptr(buffer, strlen as usize); //! // Since U16String creates an owned copy, it's safe to free original buffer now //! // If you didn't want an owned copy, you could use &U16Str. //! LocalFree(buffer as HLOCAL); //! } //! // Convert to a regular Rust String and use it to your heart's desire! //! let message = U16Str.to_string_lossy(); //! # assert_eq!(message, "The operation completed successfully.\r\n"); //! # } //! ``` //! //! The following example is the functionally the same, only using `U16CString` instead. //! //! ```rust //! # #[cfg(not(windows))] //! # fn main() {} //! # extern crate winapi; //! # extern crate widestring; //! # #[cfg(windows)] //! # fn main() { //! use winapi::um::winbase::{FormatMessageW, LocalFree, FORMAT_MESSAGE_FROM_SYSTEM, //! FORMAT_MESSAGE_ALLOCATE_BUFFER, FORMAT_MESSAGE_IGNORE_INSERTS}; //! use winapi::shared::ntdef::LPWSTR; //! use winapi::shared::minwindef::HLOCAL; //! use std::ptr; //! use widestring::U16CString; //! # use winapi::shared::minwindef::DWORD; //! # let error_code: DWORD = 0; //! //! let U16Str: U16CString; //! unsafe { //! // First, get a string buffer from some windows api such as FormatMessageW... //! let mut buffer: LPWSTR = ptr::null_mut(); //! FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM | //! FORMAT_MESSAGE_ALLOCATE_BUFFER | //! FORMAT_MESSAGE_IGNORE_INSERTS, //! ptr::null(), //! error_code, // error code from GetLastError() //! 0, //! (&mut buffer as *mut LPWSTR) as LPWSTR, //! 0, //! ptr::null_mut()); //! //! // Get the buffer as a wide string //! U16Str = U16CString::from_ptr_str(buffer); //! // Since U16CString creates an owned copy, it's safe to free original buffer now //! // If you didn't want an owned copy, you could use &U16CStr. //! LocalFree(buffer as HLOCAL); //! } //! // Convert to a regular Rust String and use it to your heart's desire! //! let message = U16Str.to_string_lossy(); //! # assert_eq!(message, "The operation completed successfully.\r\n"); //! # } //! ``` #![deny(future_incompatible)] #![warn( unused, anonymous_parameters, missing_docs, missing_copy_implementations, missing_debug_implementations, trivial_casts, trivial_numeric_casts )] use std::fmt::Debug; mod platform; mod ucstring; mod ustring; pub use ucstring::*; pub use ustring::*; /// Marker trait for primitive types used to represent UTF character data. Should not be used /// directly. pub trait UChar: Debug + Sized + Copy + Ord + Eq { /// NUL character value const NUL: Self; } impl UChar for u16 { const NUL: u16 = 0; } impl UChar for u32 { const NUL: u32 = 0; } /// String slice reference for `U16String`. /// /// `U16Str` is to `U16String` as `str` is to `String`. /// /// `U16Str` is not aware of nul values. Strings may or may not be nul-terminated, and may /// contain invalid and ill-formed UTF-16 data. These strings are intended to be used with /// FFI functions that directly use string length, where the strings are known to have proper /// nul-termination already, or where strings are merely being passed through without modification. /// /// `WideCStr` should be used instead of nul-aware strings are required. /// /// `U16Str` can be converted to many other string types, including `OsString` and `String`, making /// proper Unicode FFI safe and easy. pub type U16Str = UStr; /// An owned, mutable "wide" string for FFI that is **not** nul-aware. /// /// `U16String` is not aware of nul values. Strings may or may not be nul-terminated, and may /// contain invalid and ill-formed UTF-16 data. These strings are intended to be used with /// FFI functions that directly use string length, where the strings are known to have proper /// nul-termination already, or where strings are merely being passed through without modification. /// /// `WideCString` should be used instead if nul-aware strings are required. /// /// `U16String` can be converted to and from many other standard Rust string types, including /// `OsString` and `String`, making proper Unicode FFI safe and easy. /// /// # Examples /// /// The following example constructs a `U16String` and shows how to convert a `U16String` to a /// regular Rust `String`. /// /// ```rust /// use widestring::U16String; /// let s = "Test"; /// // Create a wide string from the rust string /// let wstr = U16String::from_str(s); /// // Convert back to a rust string /// let rust_str = wstr.to_string_lossy(); /// assert_eq!(rust_str, "Test"); /// ``` pub type U16String = UString; /// C-style wide string reference for `U16CString`. /// /// `U16CStr` is aware of nul values. Unless unchecked conversions are used, all `U16CStr` /// strings end with a nul-terminator in the underlying buffer and contain no internal nul values. /// The strings may still contain invalid or ill-formed UTF-16 data. These strings are intended to /// be used with FFI functions such as Windows API that may require nul-terminated strings. /// /// `U16CStr` can be converted to and from many other string types, including `U16String`, /// `OsString`, and `String`, making proper Unicode FFI safe and easy. pub type U16CStr = UCStr; /// An owned, mutable C-style "wide" string for FFI that is nul-aware and nul-terminated. /// /// `U16CString` is aware of nul values. Unless unchecked conversions are used, all `U16CString` /// strings end with a nul-terminator in the underlying buffer and contain no internal nul values. /// The strings may still contain invalid or ill-formed UTF-16 data. These strings are intended to /// be used with FFI functions such as Windows API that may require nul-terminated strings. /// /// `U16CString` can be converted to and from many other string types, including `U16String`, /// `OsString`, and `String`, making proper Unicode FFI safe and easy. /// /// # Examples /// /// The following example constructs a `U16CString` and shows how to convert a `U16CString` to a /// regular Rust `String`. /// /// ```rust /// use widestring::U16CString; /// let s = "Test"; /// // Create a wide string from the rust string /// let wstr = U16CString::from_str(s).unwrap(); /// // Convert back to a rust string /// let rust_str = wstr.to_string_lossy(); /// assert_eq!(rust_str, "Test"); /// ``` pub type U16CString = UCString; /// String slice reference for `U32String`. /// /// `U32Str` is to `U32String` as `str` is to `String`. /// /// `U32Str` is not aware of nul values. Strings may or may not be nul-terminated, and may /// contain invalid and ill-formed UTF-32 data. These strings are intended to be used with /// FFI functions that directly use string length, where the strings are known to have proper /// nul-termination already, or where strings are merely being passed through without modification. /// /// `WideCStr` should be used instead of nul-aware strings are required. /// /// `U32Str` can be converted to many other string types, including `OsString` and `String`, making /// proper Unicode FFI safe and easy. pub type U32Str = UStr; /// An owned, mutable 32-bit wide string for FFI that is **not** nul-aware. /// /// `U32String` is not aware of nul values. Strings may or may not be nul-terminated, and may /// contain invalid and ill-formed UTF-32 data. These strings are intended to be used with /// FFI functions that directly use string length, where the strings are known to have proper /// nul-termination already, or where strings are merely being passed through without modification. /// /// `U32CString` should be used instead if nul-aware 32-bit strings are required. /// /// `U32String` can be converted to and from many other standard Rust string types, including /// `OsString` and `String`, making proper Unicode FFI safe and easy. /// /// # Examples /// /// The following example constructs a `U32String` and shows how to convert a `U32String` to a /// regular Rust `String`. /// /// ```rust /// use widestring::U32String; /// let s = "Test"; /// // Create a wide string from the rust string /// let wstr = U32String::from_str(s); /// // Convert back to a rust string /// let rust_str = wstr.to_string_lossy(); /// assert_eq!(rust_str, "Test"); /// ``` pub type U32String = UString; /// C-style wide string reference for `U32CString`. /// /// `U32CStr` is aware of nul values. Unless unchecked conversions are used, all `U32CStr` /// strings end with a nul-terminator in the underlying buffer and contain no internal nul values. /// The strings may still contain invalid or ill-formed UTF-32 data. These strings are intended to /// be used with FFI functions such as Windows API that may require nul-terminated strings. /// /// `U32CStr` can be converted to and from many other string types, including `U32String`, /// `OsString`, and `String`, making proper Unicode FFI safe and easy. pub type U32CStr = UCStr; /// An owned, mutable C-style wide string for FFI that is nul-aware and nul-terminated. /// /// `U32CString` is aware of nul values. Unless unchecked conversions are used, all `U32CString` /// strings end with a nul-terminator in the underlying buffer and contain no internal nul values. /// The strings may still contain invalid or ill-formed UTF-32 data. These strings are intended to /// be used with FFI functions such as Windows API that may require nul-terminated strings. /// /// `U32CString` can be converted to and from many other string types, including `U32String`, /// `OsString`, and `String`, making proper Unicode FFI safe and easy. /// /// # Examples /// /// The following example constructs a `U32CString` and shows how to convert a `U32CString` to a /// regular Rust `String`. /// /// ```rust /// use widestring::U32CString; /// let s = "Test"; /// // Create a wide string from the rust string /// let wstr = U32CString::from_str(s).unwrap(); /// // Convert back to a rust string /// let rust_str = wstr.to_string_lossy(); /// assert_eq!(rust_str, "Test"); /// ``` pub type U32CString = UCString; #[cfg(not(windows))] /// Alias for `U16String` or `U32String` depending on platform. Intended to match typical C `wchar_t` size on platform. pub type WideString = U32String; #[cfg(not(windows))] /// Alias for `U16CString` or `U32CString` depending on platform. Intended to match typical C `wchar_t` size on platform. pub type WideCString = U32CString; #[cfg(not(windows))] /// Alias for `U16Str` or `U32Str` depending on platform. Intended to match typical C `wchar_t` size on platform. pub type WideStr = U32Str; #[cfg(not(windows))] /// Alias for `U16CStr` or `U32CStr` depending on platform. Intended to match typical C `wchar_t` size on platform. pub type WideCStr = U32CStr; #[cfg(not(windows))] /// Alias for `u16` or `u32` depending on platform. Intended to match typical C `wchar_t` size on platform. pub type WideChar = u32; #[cfg(windows)] /// Alias for `U16String` or `U32String` depending on platform. Intended to match typical C `wchar_t` size on platform. pub type WideString = U16String; #[cfg(windows)] /// Alias for `U16CString` or `U32CString` depending on platform. Intended to match typical C `wchar_t` size on platform. pub type WideCString = U16CString; #[cfg(windows)] /// Alias for `U16Str` or `U32Str` depending on platform. Intended to match typical C `wchar_t` size on platform. pub type WideStr = U16Str; #[cfg(windows)] /// Alias for `U16CStr` or `U32CStr` depending on platform. Intended to match typical C `wchar_t` size on platform. pub type WideCStr = U16CStr; #[cfg(windows)] /// Alias for `u16` or `u32` depending on platform. Intended to match typical C `wchar_t` size on platform. pub type WideChar = u16; widestring-0.4.0/src/platform/mod.rs010064400017500001750000000002431333520214200156450ustar0000000000000000#[cfg(windows)] mod windows; #[cfg(windows)] pub(crate) use self::windows::*; #[cfg(not(windows))] mod other; #[cfg(not(windows))] pub(crate) use self::other::*; widestring-0.4.0/src/platform/other.rs010064400017500001750000000003561333520214200162140ustar0000000000000000use std::ffi::{OsStr, OsString}; pub(crate) fn os_to_wide(s: &OsStr) -> Vec { s.to_string_lossy().encode_utf16().collect() } pub(crate) fn os_from_wide(s: &[u16]) -> OsString { OsString::from(String::from_utf16_lossy(s)) } widestring-0.4.0/src/platform/windows.rs010064400017500001750000000004131333520214200165570ustar0000000000000000#![cfg(windows)] use std::ffi::{OsStr, OsString}; use std::os::windows::ffi::{OsStrExt, OsStringExt}; pub(crate) fn os_to_wide(s: &OsStr) -> Vec { s.encode_wide().collect() } pub(crate) fn os_from_wide(s: &[u16]) -> OsString { OsString::from_wide(s) } widestring-0.4.0/src/ucstring.rs010066400017500001750000002065051333573313400151250ustar0000000000000000use super::platform; use super::{FromUtf32Error, UChar, UStr, UString}; use std; use std::ffi::{OsStr, OsString}; use std::mem; /// An owned, mutable C-style "wide" string for FFI that is nul-aware and nul-terminated. /// /// `UCString` is aware of nul values. Unless unchecked conversions are used, all `UCString` /// strings end with a nul-terminator in the underlying buffer and contain no internal nul values. /// The strings may still contain invalid or ill-formed UTF-16 or UTF-32 data. These strings are /// intended to be used with FFI functions such as Windows API that may require nul-terminated /// strings. /// /// `UCString` can be converted to and from many other string types, including `UString`, /// `OsString`, and `String`, making proper Unicode FFI safe and easy. /// /// Please prefer using the type aliases `U16CString` or `U32CString` or `WideCString` to using /// this type directly. /// /// # Examples /// /// The following example constructs a `U16CString` and shows how to convert a `U16CString` to a /// regular Rust `String`. /// /// ```rust /// use widestring::U16CString; /// let s = "Test"; /// // Create a wide string from the rust string /// let wstr = U16CString::from_str(s).unwrap(); /// // Convert back to a rust string /// let rust_str = wstr.to_string_lossy(); /// assert_eq!(rust_str, "Test"); /// ``` /// /// The same example using `U32CString`: /// /// ```rust /// use widestring::U32CString; /// let s = "Test"; /// // Create a wide string from the rust string /// let wstr = U32CString::from_str(s).unwrap(); /// // Convert back to a rust string /// let rust_str = wstr.to_string_lossy(); /// assert_eq!(rust_str, "Test"); /// ``` #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct UCString { inner: Box<[C]>, } /// C-style wide string reference for `UCString`. /// /// `UCStr` is aware of nul values. Unless unchecked conversions are used, all `UCStr` /// strings end with a nul-terminator in the underlying buffer and contain no internal nul values. /// The strings may still contain invalid or ill-formed UTF-16 or UTF-32 data. These strings are /// intended to be used with FFI functions such as Windows API that may require nul-terminated /// strings. /// /// `UCStr` can be converted to and from many other string types, including `UString`, /// `OsString`, and `String`, making proper Unicode FFI safe and easy. /// /// Please prefer using the type aliases `U16CStr` or `U32CStr` or `WideCStr` to using /// this type directly. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct UCStr { inner: [C], } /// An error returned from `UCString` to indicate that an invalid nul value was found. /// /// The error indicates the position in the vector where the nul value was found, as well as /// returning the ownership of the invalid vector. #[derive(Debug, Clone, PartialEq, Eq)] pub struct NulError(usize, Vec); /// An error returned from `UCString` and `UCStr` to indicate that a terminating nul value /// was missing. /// /// The error optionally returns the ownership of the invalid vector whenever a vector was owned. #[derive(Debug, Clone, PartialEq, Eq)] pub struct MissingNulError(Option>); impl UCString { /// Constructs a `UCString` from a container of wide character data. /// /// This method will consume the provided data and use the underlying elements to construct a /// new string. The data will be scanned for invalid nul values. /// /// # Failures /// /// This function will return an error if the data contains a nul value. /// The returned error will contain the `Vec` as well as the position of the nul value. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// let v = vec![84u16, 104u16, 101u16]; // 'T' 'h' 'e' /// # let cloned = v.clone(); /// // Create a wide string from the vector /// let wcstr = U16CString::new(v).unwrap(); /// # assert_eq!(wcstr.into_vec(), cloned); /// ``` /// /// ```rust /// use widestring::U32CString; /// let v = vec![84u32, 104u32, 101u32]; // 'T' 'h' 'e' /// # let cloned = v.clone(); /// // Create a wide string from the vector /// let wcstr = U32CString::new(v).unwrap(); /// # assert_eq!(wcstr.into_vec(), cloned); /// ``` /// /// The following example demonstrates errors from nul values in a vector. /// /// ```rust /// use widestring::U16CString; /// let v = vec![84u16, 0u16, 104u16, 101u16]; // 'T' NUL 'h' 'e' /// // Create a wide string from the vector /// let res = U16CString::new(v); /// assert!(res.is_err()); /// assert_eq!(res.err().unwrap().nul_position(), 1); /// ``` /// /// ```rust /// use widestring::U32CString; /// let v = vec![84u32, 0u32, 104u32, 101u32]; // 'T' NUL 'h' 'e' /// // Create a wide string from the vector /// let res = U32CString::new(v); /// assert!(res.is_err()); /// assert_eq!(res.err().unwrap().nul_position(), 1); /// ``` pub fn new(v: impl Into>) -> Result> { let v = v.into(); // Check for nul vals match v.iter().position(|&val| val == UChar::NUL) { None => Ok(unsafe { UCString::from_vec_unchecked(v) }), Some(pos) => Err(NulError(pos, v)), } } /// Constructs a `UCString` from a nul-terminated container of UTF-16 or UTF-32 data. /// /// This method will consume the provided data and use the underlying elements to construct a /// new string. The string will be truncated at the first nul value in the string. /// /// # Failures /// /// This function will return an error if the data does not contain a nul to terminate the /// string. The returned error will contain the consumed `Vec`. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// let v = vec![84u16, 104u16, 101u16, 0u16]; // 'T' 'h' 'e' NUL /// # let cloned = v[..3].to_owned(); /// // Create a wide string from the vector /// let wcstr = U16CString::from_vec_with_nul(v).unwrap(); /// # assert_eq!(wcstr.into_vec(), cloned); /// ``` /// /// ```rust /// use widestring::U32CString; /// let v = vec![84u32, 104u32, 101u32, 0u32]; // 'T' 'h' 'e' NUL /// # let cloned = v[..3].to_owned(); /// // Create a wide string from the vector /// let wcstr = U32CString::from_vec_with_nul(v).unwrap(); /// # assert_eq!(wcstr.into_vec(), cloned); /// ``` /// /// The following example demonstrates errors from missing nul values in a vector. /// /// ```rust /// use widestring::U16CString; /// let v = vec![84u16, 104u16, 101u16]; // 'T' 'h' 'e' /// // Create a wide string from the vector /// let res = U16CString::from_vec_with_nul(v); /// assert!(res.is_err()); /// ``` /// /// ```rust /// use widestring::U32CString; /// let v = vec![84u32, 104u32, 101u32]; // 'T' 'h' 'e' /// // Create a wide string from the vector /// let res = U32CString::from_vec_with_nul(v); /// assert!(res.is_err()); /// ``` pub fn from_vec_with_nul(v: impl Into>) -> Result> { let mut v = v.into(); // Check for nul vals match v.iter().position(|&val| val == UChar::NUL) { None => Err(MissingNulError(Some(v))), Some(pos) => { v.truncate(pos + 1); Ok(unsafe { UCString::from_vec_with_nul_unchecked(v) }) } } } /// Creates a `UCString` from a vector without checking for interior nul values. /// /// A terminating nul value will be appended if the vector does not already have a terminating /// nul. /// /// # Safety /// /// This method is equivalent to `new` except that no runtime assertion is made that `v` /// contains no nul values. Providing a vector with nul values will result in an invalid /// `UCString`. pub unsafe fn from_vec_unchecked(v: impl Into>) -> Self { let mut v = v.into(); match v.last() { None => v.push(UChar::NUL), Some(&c) if c != UChar::NUL => v.push(UChar::NUL), Some(_) => (), } UCString::from_vec_with_nul_unchecked(v) } /// Creates a `UCString` from a vector that should have a nul terminator, without checking /// for any nul values. /// /// # Safety /// /// This method is equivalent to `from_vec_with_nul` except that no runtime assertion is made /// that `v` contains no nul values. Providing a vector with interior nul values or without a /// terminating nul value will result in an invalid `UCString`. pub unsafe fn from_vec_with_nul_unchecked(v: impl Into>) -> Self { UCString { inner: v.into().into_boxed_slice(), } } /// Constructs a `UCString` from anything that can be converted to a `UStr`. /// /// The string will be scanned for invalid nul values. /// /// # Failures /// /// This function will return an error if the data contains a nul value. /// The returned error will contain a `Vec` as well as the position of the nul value. pub fn from_ustr(s: impl AsRef>) -> Result> { UCString::new(s.as_ref().as_slice()) } /// Constructs a `UCString` from anything that can be converted to a `UStr`, without /// scanning for invalid nul values. /// /// # Safety /// /// This method is equivalent to `from_u16_str` except that no runtime assertion is made that /// `s` contains no nul values. Providing a string with nul values will result in an invalid /// `UCString`. pub unsafe fn from_ustr_unchecked(s: impl AsRef>) -> Self { UCString::from_vec_unchecked(s.as_ref().as_slice()) } /// Constructs a `UCString` from anything that can be converted to a `UStr` with a nul /// terminator. /// /// The string will be truncated at the first nul value in the string. /// /// # Failures /// /// This function will return an error if the data does not contain a nul to terminate the /// string. The returned error will contain the consumed `Vec`. pub fn from_ustr_with_nul(s: impl AsRef>) -> Result> { UCString::from_vec_with_nul(s.as_ref().as_slice()) } /// Constructs a `UCString` from anything that can be converted to a `UStr` with a nul /// terminator, without checking the string for any invalid interior nul values. /// /// # Safety /// /// This method is equivalent to `from_u16_str_with_nul` except that no runtime assertion is /// made that `s` contains no nul values. Providing a vector with interior nul values or /// without a terminating nul value will result in an invalid `UCString`. pub unsafe fn from_ustr_with_nul_unchecked(s: impl AsRef>) -> Self { UCString::from_vec_with_nul_unchecked(s.as_ref().as_slice()) } /// Constructs a new `UCString` copied from a nul-terminated string pointer. /// /// This will scan for nul values beginning with `p`. The first nul value will be used as the /// nul terminator for the string, similar to how libc string functions such as `strlen` work. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid or has a /// nul terminator, and the function could scan past the underlying buffer. /// /// `p` must be non-null. /// /// # Panics /// /// This function panics if `p` is null. /// /// # Caveat /// /// The lifetime for the returned string is inferred from its usage. To prevent accidental /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the /// context, such as by providing a helper function taking the lifetime of a host value for the /// string, or by explicit annotation. pub unsafe fn from_ptr_str(p: *const C) -> Self { assert!(!p.is_null()); let mut i: isize = 0; while *p.offset(i) != UChar::NUL { i = i + 1; } let slice = std::slice::from_raw_parts(p, i as usize + 1); UCString::from_vec_with_nul_unchecked(slice) } /// Converts to a `UCStr` reference. pub fn as_ucstr(&self) -> &UCStr { self } /// Converts the wide string into a `Vec` without a nul terminator, consuming the string in /// the process. /// /// The resulting vector will **not** contain a nul-terminator, and will contain no other nul /// values. pub fn into_vec(self) -> Vec { let mut v = self.into_inner().into_vec(); v.pop(); v } /// Converts the wide string into a `Vec`, consuming the string in the process. /// /// The resulting vector will contain a nul-terminator and no interior nul values. pub fn into_vec_with_nul(self) -> Vec { self.into_inner().into_vec() } /// Transfers ownership of the wide string to a C caller. /// /// # Safety /// /// The pointer must be returned to Rust and reconstituted using `from_raw` to be properly /// deallocated. Specifically, one should _not_ use the standard C `free` function to /// deallocate this string. /// /// Failure to call `from_raw` will lead to a memory leak. pub fn into_raw(self) -> *mut C { Box::into_raw(self.into_inner()) as *mut C } /// Retakes ownership of a `UCString` that was transferred to C. /// /// # Safety /// /// This should only ever be called with a pointer that was earlier obtained by calling /// `into_raw` on a `UCString`. Additionally, the length of the string will be recalculated /// from the pointer. pub unsafe fn from_raw(p: *mut C) -> Self { assert!(!p.is_null()); let mut i: isize = 0; while *p.offset(i) != UChar::NUL { i += 1; } let slice = std::slice::from_raw_parts_mut(p, i as usize + 1); UCString { inner: mem::transmute(slice), } } /// Converts this `UCString` into a boxed `UCStr`. /// /// # Examples /// /// ``` /// use widestring::{U16CString, U16CStr}; /// /// let mut v = vec![102u16, 111u16, 111u16]; // "foo" /// let c_string = U16CString::new(v.clone()).unwrap(); /// let boxed = c_string.into_boxed_ucstr(); /// v.push(0); /// assert_eq!(&*boxed, U16CStr::from_slice_with_nul(&v).unwrap()); /// ``` /// /// ``` /// use widestring::{U32CString, U32CStr}; /// /// let mut v = vec![102u32, 111u32, 111u32]; // "foo" /// let c_string = U32CString::new(v.clone()).unwrap(); /// let boxed = c_string.into_boxed_ucstr(); /// v.push(0); /// assert_eq!(&*boxed, U32CStr::from_slice_with_nul(&v).unwrap()); /// ``` pub fn into_boxed_ucstr(self) -> Box> { unsafe { Box::from_raw(Box::into_raw(self.into_inner()) as *mut UCStr) } } /// Bypass "move out of struct which implements [`Drop`] trait" restriction. /// /// [`Drop`]: ../ops/trait.Drop.html fn into_inner(self) -> Box<[C]> { unsafe { let result = std::ptr::read(&self.inner); mem::forget(self); result } } } impl UCString { /// Constructs a `U16CString` from a `str`. /// /// The string will be scanned for invalid nul values. /// /// # Failures /// /// This function will return an error if the data contains a nul value. /// The returned error will contain a `Vec` as well as the position of the nul value. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// let s = "MyString"; /// // Create a wide string from the string /// let wcstr = U16CString::from_str(s).unwrap(); /// # assert_eq!(wcstr.to_string_lossy(), s); /// ``` /// /// The following example demonstrates errors from nul values in a vector. /// /// ```rust /// use widestring::U16CString; /// let s = "My\u{0}String"; /// // Create a wide string from the string /// let res = U16CString::from_str(s); /// assert!(res.is_err()); /// assert_eq!(res.err().unwrap().nul_position(), 2); /// ``` pub fn from_str(s: impl AsRef) -> Result> { let v: Vec = s.as_ref().encode_utf16().collect(); UCString::new(v) } /// Constructs a `U16CString` from a `str`, without checking for interior nul values. /// /// # Safety /// /// This method is equivalent to `from_str` except that no runtime assertion is made that `s` /// contains no nul values. Providing a string with nul values will result in an invalid /// `U16CString`. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// let s = "MyString"; /// // Create a wide string from the string /// let wcstr = unsafe { U16CString::from_str_unchecked(s) }; /// # assert_eq!(wcstr.to_string_lossy(), s); /// ``` pub unsafe fn from_str_unchecked(s: impl AsRef) -> Self { let v: Vec = s.as_ref().encode_utf16().collect(); UCString::from_vec_unchecked(v) } /// Constructs a `U16CString` from a `str` with a nul terminator. /// /// The string will be truncated at the first nul value in the string. /// /// # Failures /// /// This function will return an error if the data does not contain a nul to terminate the /// string. The returned error will contain the consumed `Vec`. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// let s = "My\u{0}String"; /// // Create a wide string from the string /// let wcstr = U16CString::from_str_with_nul(s).unwrap(); /// assert_eq!(wcstr.to_string_lossy(), "My"); /// ``` /// /// The following example demonstrates errors from missing nul values in a vector. /// /// ```rust /// use widestring::U16CString; /// let s = "MyString"; /// // Create a wide string from the string /// let res = U16CString::from_str_with_nul(s); /// assert!(res.is_err()); /// ``` pub fn from_str_with_nul(s: impl AsRef) -> Result> { let v: Vec = s.as_ref().encode_utf16().collect(); UCString::from_vec_with_nul(v) } /// Constructs a `U16CString` from str `str` that should have a terminating nul, but without /// checking for any nul values. /// /// # Safety /// /// This method is equivalent to `from_str_with_nul` except that no runtime assertion is made /// that `s` contains no nul values. Providing a vector with interior nul values or without a /// terminating nul value will result in an invalid `U16CString`. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// let s = "My String\u{0}"; /// // Create a wide string from the string /// let wcstr = unsafe { U16CString::from_str_with_nul_unchecked(s) }; /// assert_eq!(wcstr.to_string_lossy(), "My String"); /// ``` pub unsafe fn from_str_with_nul_unchecked(s: impl AsRef) -> Self { let v: Vec = s.as_ref().encode_utf16().collect(); UCString::from_vec_with_nul_unchecked(v) } /// Constructs a new `U16CString` copied from a `u16` pointer and a length. /// /// The `len` argument is the number of `u16` elements, **not** the number of bytes. /// /// The string will be scanned for invalid nul values. /// /// # Failures /// /// This function will return an error if the data contains a nul value. /// The returned error will contain a `Vec` as well as the position of the nul value. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_ptr(p: *const u16, len: usize) -> Result> { if len == 0 { return Ok(UCString::default()); } assert!(!p.is_null()); let slice = std::slice::from_raw_parts(p, len); UCString::new(slice) } /// Constructs a new `U16CString` copied from a `u16` pointer and a length. /// /// The `len` argument is the number of `u16` elements, **not** the number of bytes. /// /// The string will **not** be checked for invalid nul values. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. In addition, no checking for invalid nul values is performed, so if any elements /// of `p` are a nul value, the resulting `U16CString` will be invalid. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_ptr_unchecked(p: *const u16, len: usize) -> Self { if len == 0 { return UCString::default(); } assert!(!p.is_null()); let slice = std::slice::from_raw_parts(p, len); UCString::from_vec_unchecked(slice) } /// Constructs a new `U16String` copied from a `u16` pointer and a length. /// /// The `len` argument is the number of `u16` elements, **not** the number of bytes. /// /// The string will be truncated at the first nul value in the string. /// /// # Failures /// /// This function will return an error if the data does not contain a nul to terminate the /// string. The returned error will contain the consumed `Vec`. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_ptr_with_nul( p: *const u16, len: usize, ) -> Result> { if len == 0 { return Ok(UCString::default()); } assert!(!p.is_null()); let slice = std::slice::from_raw_parts(p, len); UCString::from_vec_with_nul(slice) } /// Constructs a new `U16String` copied from a `u16` pointer and a length. /// /// The `len` argument is the number of `u16` elements, **not** the number of bytes. /// /// The data should end with a nul terminator, but no checking is done on whether the data /// actually ends with a nul terminator, or if the data contains any interior nul values. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. In addition, no checking for nul values is performed, so if there data does not /// end with a nul terminator, or if there are any interior nul values, the resulting /// `U16CString` will be invalid. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_ptr_with_nul_unchecked(p: *const u16, len: usize) -> Self { if len == 0 { return UCString::default(); } assert!(!p.is_null()); let slice = std::slice::from_raw_parts(p, len); UCString::from_vec_with_nul_unchecked(slice) } /// Constructs a `U16CString` from anything that can be converted to an `OsStr`. /// /// The string will be scanned for invalid nul values. /// /// # Failures /// /// This function will return an error if the data contains a nul value. /// The returned error will contain a `Vec` as well as the position of the nul value. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// let s = "MyString"; /// // Create a wide string from the string /// let wcstr = U16CString::from_os_str(s).unwrap(); /// # assert_eq!(wcstr.to_string_lossy(), s); /// ``` /// /// The following example demonstrates errors from nul values in a vector. /// /// ```rust /// use widestring::U16CString; /// let s = "My\u{0}String"; /// // Create a wide string from the string /// let res = U16CString::from_os_str(s); /// assert!(res.is_err()); /// assert_eq!(res.err().unwrap().nul_position(), 2); /// ``` pub fn from_os_str(s: impl AsRef) -> Result> { let v = platform::os_to_wide(s.as_ref()); UCString::new(v) } /// Constructs a `U16CString` from anything that can be converted to an `OsStr`, without /// checking for interior nul values. /// /// # Safety /// /// This method is equivalent to `from_os_str` except that no runtime assertion is made that /// `s` contains no nul values. Providing a string with nul values will result in an invalid /// `U16CString`. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// let s = "MyString"; /// // Create a wide string from the string /// let wcstr = unsafe { U16CString::from_os_str_unchecked(s) }; /// # assert_eq!(wcstr.to_string_lossy(), s); /// ``` pub unsafe fn from_os_str_unchecked(s: impl AsRef) -> Self { let v = platform::os_to_wide(s.as_ref()); UCString::from_vec_unchecked(v) } /// Constructs a `U16CString` from anything that can be converted to an `OsStr` with a nul /// terminator. /// /// The string will be truncated at the first nul value in the string. /// /// # Failures /// /// This function will return an error if the data does not contain a nul to terminate the /// string. The returned error will contain the consumed `Vec`. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// let s = "My\u{0}String"; /// // Create a wide string from the string /// let wcstr = U16CString::from_os_str_with_nul(s).unwrap(); /// assert_eq!(wcstr.to_string_lossy(), "My"); /// ``` /// /// The following example demonstrates errors from missing nul values in a vector. /// /// ```rust /// use widestring::U16CString; /// let s = "MyString"; /// // Create a wide string from the string /// let res = U16CString::from_os_str_with_nul(s); /// assert!(res.is_err()); /// ``` pub fn from_os_str_with_nul(s: impl AsRef) -> Result> { let v = platform::os_to_wide(s.as_ref()); UCString::from_vec_with_nul(v) } /// Constructs a `U16CString` from anything that can be converted to an `OsStr` that should /// have a terminating nul, but without checking for any nul values. /// /// # Safety /// /// This method is equivalent to `from_os_str_with_nul` except that no runtime assertion is /// made that `s` contains no nul values. Providing a vector with interior nul values or /// without a terminating nul value will result in an invalid `U16CString`. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// let s = "My String\u{0}"; /// // Create a wide string from the string /// let wcstr = unsafe { U16CString::from_os_str_with_nul_unchecked(s) }; /// assert_eq!(wcstr.to_string_lossy(), "My String"); /// ``` pub unsafe fn from_os_str_with_nul_unchecked(s: impl AsRef) -> Self { let v = platform::os_to_wide(s.as_ref()); UCString::from_vec_with_nul_unchecked(v) } } impl UCString { /// Constructs a `U32CString` from a container of wide character data. /// /// This method will consume the provided data and use the underlying elements to construct a /// new string. The data will be scanned for invalid nul values. /// /// # Failures /// /// This function will return an error if the data contains a nul value. /// The returned error will contain the `Vec` as well as the position of the nul value. /// /// # Examples /// /// ```rust /// use widestring::U32CString; /// let v: Vec = "Test".chars().collect(); /// # let cloned: Vec = v.iter().map(|&c| c as u32).collect(); /// // Create a wide string from the vector /// let wcstr = U32CString::from_chars(v).unwrap(); /// # assert_eq!(wcstr.into_vec(), cloned); /// ``` /// /// The following example demonstrates errors from nul values in a vector. /// /// ```rust /// use widestring::U32CString; /// let v: Vec = "T\u{0}est".chars().collect(); /// // Create a wide string from the vector /// let res = U32CString::from_chars(v); /// assert!(res.is_err()); /// assert_eq!(res.err().unwrap().nul_position(), 1); /// ``` pub fn from_chars(v: impl Into>) -> Result> { let v: Vec = unsafe { mem::transmute(v.into()) }; UCString::new(v) } /// Constructs a `U32CString` from a nul-terminated container of UTF-32 data. /// /// This method will consume the provided data and use the underlying elements to construct a /// new string. The string will be truncated at the first nul value in the string. /// /// # Failures /// /// This function will return an error if the data does not contain a nul to terminate the /// string. The returned error will contain the consumed `Vec`. /// /// # Examples /// /// ```rust /// use widestring::U32CString; /// let v: Vec = "Test\u{0}".chars().collect(); /// # let cloned: Vec = v[..4].iter().map(|&c| c as u32).collect(); /// // Create a wide string from the vector /// let wcstr = U32CString::from_chars_with_nul(v).unwrap(); /// # assert_eq!(wcstr.into_vec(), cloned); /// ``` /// /// The following example demonstrates errors from missing nul values in a vector. /// /// ```rust /// use widestring::U32CString; /// let v: Vec = "Test".chars().collect(); /// // Create a wide string from the vector /// let res = U32CString::from_chars_with_nul(v); /// assert!(res.is_err()); /// ``` pub fn from_chars_with_nul(v: impl Into>) -> Result> { let v: Vec = unsafe { mem::transmute(v.into()) }; UCString::from_vec_with_nul(v) } /// Creates a `U32CString` from a vector without checking for interior nul values. /// /// A terminating nul value will be appended if the vector does not already have a terminating /// nul. /// /// # Safety /// /// This method is equivalent to `new` except that no runtime assertion is made that `v` /// contains no nul values. Providing a vector with nul values will result in an invalid /// `U32CString`. pub unsafe fn from_chars_unchecked(v: impl Into>) -> Self { let v: Vec = mem::transmute(v.into()); UCString::from_vec_unchecked(v) } /// Creates a `U32CString` from a vector that should have a nul terminator, without checking /// for any nul values. /// /// # Safety /// /// This method is equivalent to `from_vec_with_nul` except that no runtime assertion is made /// that `v` contains no nul values. Providing a vector with interior nul values or without a /// terminating nul value will result in an invalid `U32CString`. pub unsafe fn from_chars_with_nul_unchecked(v: impl Into>) -> Self { let v: Vec = mem::transmute(v.into()); UCString::from_vec_with_nul_unchecked(v) } /// Constructs a `U32CString` from a `str`. /// /// The string will be scanned for invalid nul values. /// /// # Failures /// /// This function will return an error if the data contains a nul value. /// The returned error will contain a `Vec` as well as the position of the nul value. /// /// # Examples /// /// ```rust /// use widestring::U32CString; /// let s = "MyString"; /// // Create a wide string from the string /// let wcstr = U32CString::from_str(s).unwrap(); /// # assert_eq!(wcstr.to_string_lossy(), s); /// ``` /// /// The following example demonstrates errors from nul values in a vector. /// /// ```rust /// use widestring::U32CString; /// let s = "My\u{0}String"; /// // Create a wide string from the string /// let res = U32CString::from_str(s); /// assert!(res.is_err()); /// assert_eq!(res.err().unwrap().nul_position(), 2); /// ``` pub fn from_str(s: impl AsRef) -> Result> { let v: Vec = s.as_ref().chars().collect(); UCString::from_chars(v) } /// Constructs a `U32CString` from a `str`, without checking for interior nul values. /// /// # Safety /// /// This method is equivalent to `from_str` except that no runtime assertion is made that `s` /// contains no nul values. Providing a string with nul values will result in an invalid /// `U32CString`. /// /// # Examples /// /// ```rust /// use widestring::U32CString; /// let s = "MyString"; /// // Create a wide string from the string /// let wcstr = unsafe { U32CString::from_str_unchecked(s) }; /// # assert_eq!(wcstr.to_string_lossy(), s); /// ``` pub unsafe fn from_str_unchecked(s: impl AsRef) -> Self { let v: Vec = s.as_ref().chars().collect(); UCString::from_chars_unchecked(v) } /// Constructs a `U32CString` from a `str` with a nul terminator. /// /// The string will be truncated at the first nul value in the string. /// /// # Failures /// /// This function will return an error if the data does not contain a nul to terminate the /// string. The returned error will contain the consumed `Vec`. /// /// # Examples /// /// ```rust /// use widestring::U32CString; /// let s = "My\u{0}String"; /// // Create a wide string from the string /// let wcstr = U32CString::from_str_with_nul(s).unwrap(); /// assert_eq!(wcstr.to_string_lossy(), "My"); /// ``` /// /// The following example demonstrates errors from missing nul values in a vector. /// /// ```rust /// use widestring::U32CString; /// let s = "MyString"; /// // Create a wide string from the string /// let res = U32CString::from_str_with_nul(s); /// assert!(res.is_err()); /// ``` pub fn from_str_with_nul(s: impl AsRef) -> Result> { let v: Vec = s.as_ref().chars().collect(); UCString::from_chars_with_nul(v) } /// Constructs a `U32CString` from a `str` that should have a terminating nul, but without /// checking for any nul values. /// /// # Safety /// /// This method is equivalent to `from_str_with_nul` except that no runtime assertion is made /// that `s` contains no nul values. Providing a vector with interior nul values or without a /// terminating nul value will result in an invalid `U32CString`. /// /// # Examples /// /// ```rust /// use widestring::U32CString; /// let s = "My String\u{0}"; /// // Create a wide string from the string /// let wcstr = unsafe { U32CString::from_str_with_nul_unchecked(s) }; /// assert_eq!(wcstr.to_string_lossy(), "My String"); /// ``` pub unsafe fn from_str_with_nul_unchecked(s: impl AsRef) -> Self { let v: Vec = s.as_ref().chars().collect(); UCString::from_chars_with_nul_unchecked(v) } /// Constructs a new `U32CString` copied from a `u32` pointer and a length. /// /// The `len` argument is the number of `u32` elements, **not** the number of bytes. /// /// The string will be scanned for invalid nul values. /// /// # Failures /// /// This function will return an error if the data contains a nul value. /// The returned error will contain a `Vec` as well as the position of the nul value. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_ptr(p: *const u32, len: usize) -> Result> { if len == 0 { return Ok(UCString::default()); } assert!(!p.is_null()); let slice = std::slice::from_raw_parts(p, len); UCString::new(slice) } /// Constructs a new `U32CString` copied from a `u32` pointer and a length. /// /// The `len` argument is the number of `u32` elements, **not** the number of bytes. /// /// The string will **not** be checked for invalid nul values. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. In addition, no checking for invalid nul values is performed, so if any elements /// of `p` are a nul value, the resulting `U16CString` will be invalid. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_ptr_unchecked(p: *const u32, len: usize) -> Self { if len == 0 { return UCString::default(); } assert!(!p.is_null()); let slice = std::slice::from_raw_parts(p, len); UCString::from_vec_unchecked(slice) } /// Constructs a new `U32String` copied from a `u32` pointer and a length. /// /// The `len` argument is the number of `u32` elements, **not** the number of bytes. /// /// The string will be truncated at the first nul value in the string. /// /// # Failures /// /// This function will return an error if the data does not contain a nul to terminate the /// string. The returned error will contain the consumed `Vec`. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_ptr_with_nul( p: *const u32, len: usize, ) -> Result> { if len == 0 { return Ok(UCString::default()); } assert!(!p.is_null()); let slice = std::slice::from_raw_parts(p, len); UCString::from_vec_with_nul(slice) } /// Constructs a new `U32String` copied from a `u32` pointer and a length. /// /// The `len` argument is the number of `u32` elements, **not** the number of bytes. /// /// The data should end with a nul terminator, but no checking is done on whether the data /// actually ends with a nul terminator, or if the data contains any interior nul values. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. In addition, no checking for nul values is performed, so if there data does not /// end with a nul terminator, or if there are any interior nul values, the resulting /// `U32CString` will be invalid. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_ptr_with_nul_unchecked(p: *const u32, len: usize) -> Self { if len == 0 { return UCString::default(); } assert!(!p.is_null()); let slice = std::slice::from_raw_parts(p, len); UCString::from_vec_with_nul_unchecked(slice) } /// Constructs a new `U32CString` copied from a `char` pointer and a length. /// /// The `len` argument is the number of `char` elements, **not** the number of bytes. /// /// The string will be scanned for invalid nul values. /// /// # Failures /// /// This function will return an error if the data contains a nul value. /// The returned error will contain a `Vec` as well as the position of the nul value. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_char_ptr(p: *const char, len: usize) -> Result> { UCString::::from_ptr(p as *const u32, len) } /// Constructs a new `U32CString` copied from a `char` pointer and a length. /// /// The `len` argument is the number of `char` elements, **not** the number of bytes. /// /// The string will **not** be checked for invalid nul values. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. In addition, no checking for invalid nul values is performed, so if any elements /// of `p` are a nul value, the resulting `U32CString` will be invalid. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_char_ptr_unchecked(p: *const char, len: usize) -> Self { UCString::::from_ptr_unchecked(p as *const u32, len) } /// Constructs a new `U32String` copied from a `char` pointer and a length. /// /// The `len` argument is the number of `char` elements, **not** the number of bytes. /// /// The string will be truncated at the first nul value in the string. /// /// # Failures /// /// This function will return an error if the data does not contain a nul to terminate the /// string. The returned error will contain the consumed `Vec`. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_char_ptr_with_nul( p: *const char, len: usize, ) -> Result> { UCString::::from_ptr_with_nul(p as *const u32, len) } /// Constructs a new `U32String` copied from a `char` pointer and a length. /// /// The `len` argument is the number of `char` elements, **not** the number of bytes. /// /// The data should end with a nul terminator, but no checking is done on whether the data /// actually ends with a nul terminator, or if the data contains any interior nul values. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. In addition, no checking for nul values is performed, so if there data does not /// end with a nul terminator, or if there are any interior nul values, the resulting /// `U32CString` will be invalid. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_char_ptr_with_nul_unchecked(p: *const char, len: usize) -> Self { UCString::::from_ptr_with_nul_unchecked(p as *const u32, len) } /// Constructs a `U32CString` from anything that can be converted to an `OsStr`. /// /// The string will be scanned for invalid nul values. /// /// # Failures /// /// This function will return an error if the data contains a nul value. /// The returned error will contain a `Vec` as well as the position of the nul value. /// /// # Examples /// /// ```rust /// use widestring::U32CString; /// let s = "MyString"; /// // Create a wide string from the string /// let wcstr = U32CString::from_os_str(s).unwrap(); /// # assert_eq!(wcstr.to_string_lossy(), s); /// ``` /// /// The following example demonstrates errors from nul values in a vector. /// /// ```rust /// use widestring::U32CString; /// let s = "My\u{0}String"; /// // Create a wide string from the string /// let res = U32CString::from_os_str(s); /// assert!(res.is_err()); /// assert_eq!(res.err().unwrap().nul_position(), 2); /// ``` pub fn from_os_str(s: impl AsRef) -> Result> { let v: Vec = s.as_ref().to_string_lossy().chars().collect(); UCString::from_chars(v) } /// Constructs a `U32CString` from anything that can be converted to an `OsStr`, without /// checking for interior nul values. /// /// # Safety /// /// This method is equivalent to `from_os_str` except that no runtime assertion is made that /// `s` contains no nul values. Providing a string with nul values will result in an invalid /// `U32CString`. /// /// # Examples /// /// ```rust /// use widestring::U32CString; /// let s = "MyString"; /// // Create a wide string from the string /// let wcstr = unsafe { U32CString::from_os_str_unchecked(s) }; /// # assert_eq!(wcstr.to_string_lossy(), s); /// ``` pub unsafe fn from_os_str_unchecked(s: impl AsRef) -> Self { let v: Vec = s.as_ref().to_string_lossy().chars().collect(); UCString::from_chars_unchecked(v) } /// Constructs a `U32CString` from anything that can be converted to an `OsStr` with a nul /// terminator. /// /// The string will be truncated at the first nul value in the string. /// /// # Failures /// /// This function will return an error if the data does not contain a nul to terminate the /// string. The returned error will contain the consumed `Vec`. /// /// # Examples /// /// ```rust /// use widestring::U32CString; /// let s = "My\u{0}String"; /// // Create a wide string from the string /// let wcstr = U32CString::from_os_str_with_nul(s).unwrap(); /// assert_eq!(wcstr.to_string_lossy(), "My"); /// ``` /// /// The following example demonstrates errors from missing nul values in a vector. /// /// ```rust /// use widestring::U32CString; /// let s = "MyString"; /// // Create a wide string from the string /// let res = U32CString::from_os_str_with_nul(s); /// assert!(res.is_err()); /// ``` pub fn from_os_str_with_nul(s: impl AsRef) -> Result> { let v: Vec = s.as_ref().to_string_lossy().chars().collect(); UCString::from_chars_with_nul(v) } /// Constructs a `U32CString` from anything that can be converted to an `OsStr` that should /// have a terminating nul, but without checking for any nul values. /// /// # Safety /// /// This method is equivalent to `from_os_str_with_nul` except that no runtime assertion is /// made that `s` contains no nul values. Providing a vector with interior nul values or /// without a terminating nul value will result in an invalid `U32CString`. /// /// # Examples /// /// ```rust /// use widestring::U32CString; /// let s = "My String\u{0}"; /// // Create a wide string from the string /// let wcstr = unsafe { U32CString::from_os_str_with_nul_unchecked(s) }; /// assert_eq!(wcstr.to_string_lossy(), "My String"); /// ``` pub unsafe fn from_os_str_with_nul_unchecked(s: impl AsRef) -> Self { let v: Vec = s.as_ref().to_string_lossy().chars().collect(); UCString::from_chars_with_nul_unchecked(v) } } impl UCStr { /// Coerces a value into a `UCStr`. pub fn new> + ?Sized>(s: &S) -> &Self { s.as_ref() } /// Constructs a `UStr` from a nul-terminated string pointer. /// /// This will scan for nul values beginning with `p`. The first nul value will be used as the /// nul terminator for the string, similar to how libc string functions such as `strlen` work. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid or has a /// nul terminator, and the function could scan past the underlying buffer. /// /// `p` must be non-null. /// /// # Panics /// /// This function panics if `p` is null. /// /// # Caveat /// /// The lifetime for the returned string is inferred from its usage. To prevent accidental /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the /// context, such as by providing a helper function taking the lifetime of a host value for the /// string, or by explicit annotation. pub unsafe fn from_ptr_str<'a>(p: *const C) -> &'a Self { assert!(!p.is_null()); let mut i: isize = 0; while *p.offset(i) != UChar::NUL { i = i + 1; } mem::transmute(std::slice::from_raw_parts(p, i as usize + 1)) } /// Constructs a `UStr` from a pointer and a length. /// /// The `len` argument is the number of elements, **not** the number of bytes, and does /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and means that /// `p` is a pointer directly to the nul terminator of the string. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. /// /// `p` must be non-null, even for zero `len`. /// /// The interior values of the pointer are not scanned for nul. Any interior nul values will /// result in an invalid `UCStr`. /// /// # Panics /// /// This function panics if `p` is null or if a nul value is not found at offset `len` of `p`. /// Only pointers with a nul terminator are valid. /// /// # Caveat /// /// The lifetime for the returned string is inferred from its usage. To prevent accidental /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the /// context, such as by providing a helper function taking the lifetime of a host value for the /// string, or by explicit annotation. pub unsafe fn from_ptr_with_nul<'a>(p: *const C, len: usize) -> &'a Self { assert!(*p.offset(len as isize) == UChar::NUL); mem::transmute(std::slice::from_raw_parts(p, len + 1)) } /// Constructs a `UCStr` from a slice of values that has a nul terminator. /// /// The slice will be scanned for nul values. When a nul value is found, it is treated as the /// terminator for the string, and the `UCStr` slice will be truncated to that nul. /// /// # Failure /// /// If there are no no nul values in the slice, an error is returned. pub fn from_slice_with_nul(slice: &[C]) -> Result<&Self, MissingNulError> { match slice.iter().position(|x| *x == UChar::NUL) { None => Err(MissingNulError(None)), Some(i) => Ok(unsafe { UCStr::from_slice_with_nul_unchecked(&slice[..i + 1]) }), } } /// Constructs a `UCStr` from a slice of values that has a nul terminator. No /// checking for nul values is performed. /// /// # Safety /// /// This function is unsafe because it can lead to invalid `UCStr` values when the slice /// is missing a terminating nul value or there are non-terminating interior nul values /// in the slice. pub unsafe fn from_slice_with_nul_unchecked(slice: &[C]) -> &Self { std::mem::transmute(slice) } /// Copies the wide string to an new owned `UString`. pub fn to_ucstring(&self) -> UCString { unsafe { UCString::from_vec_with_nul_unchecked(self.inner.to_owned()) } } /// Copies the wide string to a new owned `UString`. /// /// The `UString` will **not** have a nul terminator. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// let wcstr = U16CString::from_str("MyString").unwrap(); /// // Convert U16CString to a U16String /// let wstr = wcstr.to_ustring(); /// /// // U16CString will have a terminating nul /// let wcvec = wcstr.into_vec_with_nul(); /// assert_eq!(wcvec[wcvec.len()-1], 0); /// // The resulting U16String will not have the terminating nul /// let wvec = wstr.into_vec(); /// assert_ne!(wvec[wvec.len()-1], 0); /// ``` /// /// ```rust /// use widestring::U32CString; /// let wcstr = U32CString::from_str("MyString").unwrap(); /// // Convert U32CString to a U32String /// let wstr = wcstr.to_ustring(); /// /// // U32CString will have a terminating nul /// let wcvec = wcstr.into_vec_with_nul(); /// assert_eq!(wcvec[wcvec.len()-1], 0); /// // The resulting U32String will not have the terminating nul /// let wvec = wstr.into_vec(); /// assert_ne!(wvec[wvec.len()-1], 0); /// ``` pub fn to_ustring(&self) -> UString { UString::from_vec(self.as_slice()) } /// Converts to a slice of the wide string. /// /// The slice will **not** include the nul terminator. pub fn as_slice(&self) -> &[C] { &self.inner[..self.len()] } /// Converts to a slice of the wide string, including the nul terminator. pub fn as_slice_with_nul(&self) -> &[C] { &self.inner } /// Returns a raw pointer to the wide string. /// /// The pointer is valid only as long as the lifetime of this reference. pub fn as_ptr(&self) -> *const C { self.inner.as_ptr() } /// Returns the length of the wide string as number of elements (**not** number of bytes) /// **not** including nul terminator. pub fn len(&self) -> usize { self.inner.len() - 1 } /// Returns whether this wide string contains no data (i.e. is only the nul terminator). pub fn is_empty(&self) -> bool { self.len() == 0 } /// Converts a `Box` into a `UCString` without copying or allocating. /// /// # Examples /// /// ``` /// use widestring::U16CString; /// /// let v = vec![102u16, 111u16, 111u16]; // "foo" /// let c_string = U16CString::new(v.clone()).unwrap(); /// let boxed = c_string.into_boxed_ucstr(); /// assert_eq!(boxed.into_ucstring(), U16CString::new(v).unwrap()); /// ``` /// /// ``` /// use widestring::U32CString; /// /// let v = vec![102u32, 111u32, 111u32]; // "foo" /// let c_string = U32CString::new(v.clone()).unwrap(); /// let boxed = c_string.into_boxed_ucstr(); /// assert_eq!(boxed.into_ucstring(), U32CString::new(v).unwrap()); /// ``` pub fn into_ucstring(self: Box) -> UCString { let raw = Box::into_raw(self) as *mut [C]; UCString { inner: unsafe { Box::from_raw(raw) }, } } fn from_inner(slice: &[C]) -> &UCStr { unsafe { mem::transmute(slice) } } } impl UCStr { /// Decodes a wide string to an owned `OsString`. /// /// This makes a string copy of the `U16CStr`. Since `U16CStr` makes no guarantees that it is /// valid UTF-16, there is no guarantee that the resulting `OsString` will be valid data. The /// `OsString` will **not** have a nul terminator. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// use std::ffi::OsString; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U16CString::from_str(s).unwrap(); /// // Create an OsString from the wide string /// let osstr = wstr.to_os_string(); /// /// assert_eq!(osstr, OsString::from(s)); /// ``` pub fn to_os_string(&self) -> OsString { platform::os_from_wide(self.as_slice()) } /// Copies the wide string to a `String` if it contains valid UTF-16 data. /// /// # Failures /// /// Returns an error if the string contains any invalid UTF-16 data. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U16CString::from_str(s).unwrap(); /// // Create a regular string from the wide string /// let s2 = wstr.to_string().unwrap(); /// /// assert_eq!(s2, s); /// ``` pub fn to_string(&self) -> Result { String::from_utf16(self.as_slice()) } /// Copies the wide string to a `String`. /// /// Any non-Unicode sequences are replaced with U+FFFD REPLACEMENT CHARACTER. /// /// # Examples /// /// ```rust /// use widestring::U16CString; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U16CString::from_str(s).unwrap(); /// // Create a regular string from the wide string /// let s2 = wstr.to_string_lossy(); /// /// assert_eq!(s2, s); /// ``` pub fn to_string_lossy(&self) -> String { String::from_utf16_lossy(self.as_slice()) } } impl UCStr { /// Constructs a `U32Str` from a `char` nul-terminated string pointer. /// /// This will scan for nul values beginning with `p`. The first nul value will be used as the /// nul terminator for the string, similar to how libc string functions such as `strlen` work. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid or has a /// nul terminator, and the function could scan past the underlying buffer. /// /// `p` must be non-null. /// /// # Panics /// /// This function panics if `p` is null. /// /// # Caveat /// /// The lifetime for the returned string is inferred from its usage. To prevent accidental /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the /// context, such as by providing a helper function taking the lifetime of a host value for the /// string, or by explicit annotation. pub unsafe fn from_char_ptr_str<'a>(p: *const char) -> &'a Self { UCStr::from_ptr_str(p as *const u32) } /// Constructs a `U32Str` from a `char` pointer and a length. /// /// The `len` argument is the number of `char` elements, **not** the number of bytes, and does /// **not** include the nul terminator of the string. Thus, a `len` of 0 is valid and means that /// `p` is a pointer directly to the nul terminator of the string. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. /// /// `p` must be non-null, even for zero `len`. /// /// The interior values of the pointer are not scanned for nul. Any interior nul values will /// result in an invalid `U32CStr`. /// /// # Panics /// /// This function panics if `p` is null or if a nul value is not found at offset `len` of `p`. /// Only pointers with a nul terminator are valid. /// /// # Caveat /// /// The lifetime for the returned string is inferred from its usage. To prevent accidental /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the /// context, such as by providing a helper function taking the lifetime of a host value for the /// string, or by explicit annotation. pub unsafe fn from_char_ptr_with_nul<'a>(p: *const char, len: usize) -> &'a Self { UCStr::from_ptr_with_nul(p as *const u32, len) } /// Constructs a `U32CStr` from a slice of `char` values that has a nul terminator. /// /// The slice will be scanned for nul values. When a nul value is found, it is treated as the /// terminator for the string, and the `U32CStr` slice will be truncated to that nul. /// /// # Failure /// /// If there are no no nul values in `slice`, an error is returned. pub fn from_char_slice_with_nul(slice: &[char]) -> Result<&Self, MissingNulError> { UCStr::from_slice_with_nul(unsafe { mem::transmute(slice) }) } /// Constructs a `U32CStr` from a slice of `char` values that has a nul terminator. No /// checking for nul values is performed. /// /// # Safety /// /// This function is unsafe because it can lead to invalid `U32CStr` values when `slice` /// is missing a terminating nul value or there are non-terminating interior nul values /// in the slice. pub unsafe fn from_char_slice_with_nul_unchecked(slice: &[char]) -> &Self { UCStr::from_slice_with_nul_unchecked(mem::transmute(slice)) } /// Decodes a wide string to an owned `OsString`. /// /// This makes a string copy of the `U32CStr`. Since `U32CStr` makes no guarantees that it is /// valid UTF-32, there is no guarantee that the resulting `OsString` will be valid data. The /// `OsString` will **not** have a nul terminator. /// /// # Examples /// /// ```rust /// use widestring::U32CString; /// use std::ffi::OsString; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U32CString::from_str(s).unwrap(); /// // Create an OsString from the wide string /// let osstr = wstr.to_os_string(); /// /// assert_eq!(osstr, OsString::from(s)); /// ``` pub fn to_os_string(&self) -> OsString { self.to_ustring().to_os_string() } /// Copies the wide string to a `String` if it contains valid UTF-32 data. /// /// # Failures /// /// Returns an error if the string contains any invalid UTF-32 data. /// /// # Examples /// /// ```rust /// use widestring::U32CString; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U32CString::from_str(s).unwrap(); /// // Create a regular string from the wide string /// let s2 = wstr.to_string().unwrap(); /// /// assert_eq!(s2, s); /// ``` pub fn to_string(&self) -> Result { self.to_ustring().to_string() } /// Copies the wide string to a `String`. /// /// Any non-Unicode sequences are replaced with U+FFFD REPLACEMENT CHARACTER. /// /// # Examples /// /// ```rust /// use widestring::U32CString; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U32CString::from_str(s).unwrap(); /// // Create a regular string from the wide string /// let s2 = wstr.to_string_lossy(); /// /// assert_eq!(s2, s); /// ``` pub fn to_string_lossy(&self) -> String { self.to_ustring().to_string_lossy() } } impl Into> for UCString { fn into(self) -> Vec { self.into_vec() } } impl<'a> From> for std::borrow::Cow<'a, UCStr> { fn from(s: UCString) -> std::borrow::Cow<'a, UCStr> { std::borrow::Cow::Owned(s) } } impl<'a> From> for std::borrow::Cow<'a, UCStr> { fn from(s: UCString) -> std::borrow::Cow<'a, UCStr> { std::borrow::Cow::Owned(s) } } impl From> for OsString { fn from(s: UCString) -> OsString { s.to_os_string() } } impl From> for OsString { fn from(s: UCString) -> OsString { s.to_os_string() } } impl From> for UString { fn from(s: UCString) -> Self { s.to_ustring() } } impl<'a, C: UChar, T: ?Sized + AsRef>> From<&'a T> for UCString { fn from(s: &'a T) -> Self { s.as_ref().to_ucstring() } } impl std::ops::Index for UCString { type Output = UCStr; #[inline] fn index(&self, _index: std::ops::RangeFull) -> &UCStr { UCStr::from_inner(&self.inner) } } impl std::ops::Deref for UCString { type Target = UCStr; #[inline] fn deref(&self) -> &UCStr { &self[..] } } impl<'a> Default for &'a UCStr { fn default() -> Self { const SLICE: &'static [u16] = &[UChar::NUL]; unsafe { UCStr::from_slice_with_nul_unchecked(SLICE) } } } impl<'a> Default for &'a UCStr { fn default() -> Self { const SLICE: &'static [u32] = &[UChar::NUL]; unsafe { UCStr::from_slice_with_nul_unchecked(SLICE) } } } impl Default for UCString { fn default() -> Self { let def: &UCStr = Default::default(); def.to_ucstring() } } impl Default for UCString { fn default() -> Self { let def: &UCStr = Default::default(); def.to_ucstring() } } // Turns this `U16CString` into an empty string to prevent // memory unsafe code from working by accident. Inline // to prevent LLVM from optimizing it away in debug builds. impl Drop for UCString { #[inline] fn drop(&mut self) { unsafe { *self.inner.get_unchecked_mut(0) = UChar::NUL; } } } impl std::borrow::Borrow> for UCString { fn borrow(&self) -> &UCStr { &self[..] } } impl ToOwned for UCStr { type Owned = UCString; fn to_owned(&self) -> UCString { self.to_ucstring() } } impl<'a> From<&'a UCStr> for std::borrow::Cow<'a, UCStr> { fn from(s: &'a UCStr) -> std::borrow::Cow<'a, UCStr> { std::borrow::Cow::Borrowed(s) } } impl<'a> From<&'a UCStr> for std::borrow::Cow<'a, UCStr> { fn from(s: &'a UCStr) -> std::borrow::Cow<'a, UCStr> { std::borrow::Cow::Borrowed(s) } } impl AsRef> for UCStr { fn as_ref(&self) -> &Self { self } } impl AsRef> for UCString { fn as_ref(&self) -> &UCStr { self } } impl AsRef<[C]> for UCStr { fn as_ref(&self) -> &[C] { self.as_slice() } } impl AsRef<[C]> for UCString { fn as_ref(&self) -> &[C] { self.as_slice() } } impl<'a, C: UChar> From<&'a UCStr> for Box> { fn from(s: &'a UCStr) -> Box> { let boxed: Box<[C]> = Box::from(s.as_slice_with_nul()); unsafe { Box::from_raw(Box::into_raw(boxed) as *mut UCStr) } } } impl From>> for UCString { #[inline] fn from(s: Box>) -> Self { s.into_ucstring() } } impl From> for Box> { #[inline] fn from(s: UCString) -> Box> { s.into_boxed_ucstr() } } impl Default for Box> { fn default() -> Box> { let boxed: Box<[C]> = Box::from([UChar::NUL]); unsafe { Box::from_raw(Box::into_raw(boxed) as *mut UCStr) } } } impl NulError { /// Returns the position of the nul value in the slice that was provided to `U16CString`. pub fn nul_position(&self) -> usize { self.0 } /// Consumes this error, returning the underlying vector of u16 values which generated the error /// in the first place. pub fn into_vec(self) -> Vec { self.1 } } impl Into> for NulError { fn into(self) -> Vec { self.into_vec() } } impl std::fmt::Display for NulError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "nul value found at position {}", self.0) } } impl std::error::Error for NulError { fn description(&self) -> &str { "nul value found" } } impl MissingNulError { /// Consumes this error, returning the underlying vector of `u16` values which generated the /// error in the first place. pub fn into_vec(self) -> Option> { self.0 } } impl std::fmt::Display for MissingNulError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "missing terminating nul value") } } impl std::error::Error for MissingNulError { fn description(&self) -> &str { "missing terminating nul value" } } widestring-0.4.0/src/ustring.rs010066400017500001750000000726341333573070700147710ustar0000000000000000use super::platform; use super::UChar; use std; use std::char; use std::ffi::{OsStr, OsString}; use std::mem; /// An owned, mutable "wide" string for FFI that is **not** nul-aware. /// /// `UString` is not aware of nul values. Strings may or may not be nul-terminated, and may /// contain invalid and ill-formed UTF-16 or UTF-32 data. These strings are intended to be used /// with FFI functions that directly use string length, where the strings are known to have proper /// nul-termination already, or where strings are merely being passed through without modification. /// /// `UCString` should be used instead if nul-aware strings are required. /// /// `UString` can be converted to and from many other standard Rust string types, including /// `OsString` and `String`, making proper Unicode FFI safe and easy. /// /// Please prefer using the type aliases `U16String` or `U32String` or `WideString` to using this /// type directly. /// /// # Examples /// /// The following example constructs a `U16String` and shows how to convert a `U16String` to a /// regular Rust `String`. /// /// ```rust /// use widestring::U16String; /// let s = "Test"; /// // Create a wide string from the rust string /// let wstr = U16String::from_str(s); /// // Convert back to a rust string /// let rust_str = wstr.to_string_lossy(); /// assert_eq!(rust_str, "Test"); /// ``` /// /// The same example using `U32String` instead: /// /// ```rust /// use widestring::U32String; /// let s = "Test"; /// // Create a wide string from the rust string /// let wstr = U32String::from_str(s); /// // Convert back to a rust string /// let rust_str = wstr.to_string_lossy(); /// assert_eq!(rust_str, "Test"); /// ``` #[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct UString { inner: Vec, } /// String slice reference for `U16String`. /// /// `UStr` is to `UString` as `str` is to `String`. /// /// `UStr` is not aware of nul values. Strings may or may not be nul-terminated, and may /// contain invalid and ill-formed UTF-16 or UTF-32 data. These strings are intended to be used /// with FFI functions that directly use string length, where the strings are known to have proper /// nul-termination already, or where strings are merely being passed through without modification. /// /// `UCStr` should be used instead of nul-aware strings are required. /// /// `UStr` can be converted to many other string types, including `OsString` and `String`, making /// proper Unicode FFI safe and easy. /// /// Please prefer using the type aliases `U16Str` or `U32Str` or `WideStr` to using this type /// directly. #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct UStr { inner: [C], } /// A possible error value when converting a String from a UTF-32 byte slice. /// /// This type is the error type for the `to_string` method on `U32Str`. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct FromUtf32Error(); impl UString { /// Constructs a new empty `UString`. pub fn new() -> Self { Self { inner: vec![] } } /// Constructs a `UString` from a vector of possibly invalid or ill-formed UTF-16 or UTF-32 /// data. /// /// No checks are made on the contents of the vector. /// /// # Examples /// /// ```rust /// use widestring::U16String; /// let v = vec![84u16, 104u16, 101u16]; // 'T' 'h' 'e' /// # let cloned = v.clone(); /// // Create a wide string from the vector /// let wstr = U16String::from_vec(v); /// # assert_eq!(wstr.into_vec(), cloned); /// ``` /// /// ```rust /// use widestring::U32String; /// let v = vec![84u32, 104u32, 101u32]; // 'T' 'h' 'e' /// # let cloned = v.clone(); /// // Create a wide string from the vector /// let wstr = U32String::from_vec(v); /// # assert_eq!(wstr.into_vec(), cloned); /// ``` pub fn from_vec(raw: impl Into>) -> Self { Self { inner: raw.into() } } /// Constructs a `UString` from a pointer and a length. /// /// The `len` argument is the number of elements, **not** the number of bytes. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_ptr(p: *const C, len: usize) -> Self { if len == 0 { return Self::new(); } assert!(!p.is_null()); let slice = std::slice::from_raw_parts(p, len); Self::from_vec(slice) } /// Creates a `UString` with the given capacity. /// /// The string will be able to hold exactly `capacity` partial code units without reallocating. /// If `capacity` is set to 0, the string will not initially allocate. pub fn with_capacity(capacity: usize) -> Self { Self { inner: Vec::with_capacity(capacity), } } /// Returns the capacity this `UString` can hold without reallocating. pub fn capacity(&self) -> usize { self.inner.capacity() } /// Truncate the `UString` to zero length. pub fn clear(&mut self) { self.inner.clear() } /// Reserves the capacity for at least `additional` more capacity to be inserted in the given /// `UString`. /// /// More space may be reserved to avoid frequent allocations. pub fn reserve(&mut self, additional: usize) { self.inner.reserve(additional) } /// Reserves the minimum capacity for exactly `additional` more capacity to be inserted in the /// given `UString`. Does nothing if the capcity is already sufficient. /// /// Note that the allocator may give more space than is requested. Therefore capacity can not /// be relied upon to be precisely minimal. Prefer `reserve` if future insertions are expected. pub fn reserve_exact(&mut self, additional: usize) { self.inner.reserve_exact(additional) } /// Converts the wide string into a `Vec`, consuming the string in the process. pub fn into_vec(self) -> Vec { self.inner } /// Converts to a `UStr` reference. pub fn as_ustr(&self) -> &UStr { self } /// Extends the wide string with the given `&UStr`. /// /// No checks are performed on the strings. It is possible to end up nul values inside the /// string, and it is up to the caller to determine if that is acceptable. /// /// # Examples /// /// ```rust /// use widestring::U16String; /// let s = "MyString"; /// let mut wstr = U16String::from_str(s); /// let cloned = wstr.clone(); /// // Push the clone to the end, repeating the string twice. /// wstr.push(cloned); /// /// assert_eq!(wstr.to_string().unwrap(), "MyStringMyString"); /// ``` /// /// ```rust /// use widestring::U32String; /// let s = "MyString"; /// let mut wstr = U32String::from_str(s); /// let cloned = wstr.clone(); /// // Push the clone to the end, repeating the string twice. /// wstr.push(cloned); /// /// assert_eq!(wstr.to_string().unwrap(), "MyStringMyString"); /// ``` pub fn push(&mut self, s: impl AsRef>) { self.inner.extend_from_slice(&s.as_ref().inner) } /// Extends the wide string with the given slice. /// /// No checks are performed on the strings. It is possible to end up nul values inside the /// string, and it is up to the caller to determine if that is acceptable. /// /// # Examples /// /// ```rust /// use widestring::U16String; /// let s = "MyString"; /// let mut wstr = U16String::from_str(s); /// let cloned = wstr.clone(); /// // Push the clone to the end, repeating the string twice. /// wstr.push_slice(cloned); /// /// assert_eq!(wstr.to_string().unwrap(), "MyStringMyString"); /// ``` /// /// ```rust /// use widestring::U32String; /// let s = "MyString"; /// let mut wstr = U32String::from_str(s); /// let cloned = wstr.clone(); /// // Push the clone to the end, repeating the string twice. /// wstr.push_slice(cloned); /// /// assert_eq!(wstr.to_string().unwrap(), "MyStringMyString"); /// ``` pub fn push_slice(&mut self, s: impl AsRef<[C]>) { self.inner.extend_from_slice(&s.as_ref()) } /// Shrinks the capacity of the `UString` to match its length. /// /// # Examples /// /// ```rust /// use widestring::U16String; /// /// let mut s = U16String::from_str("foo"); /// /// s.reserve(100); /// assert!(s.capacity() >= 100); /// /// s.shrink_to_fit(); /// assert_eq!(3, s.capacity()); /// ``` /// /// ```rust /// use widestring::U32String; /// /// let mut s = U32String::from_str("foo"); /// /// s.reserve(100); /// assert!(s.capacity() >= 100); /// /// s.shrink_to_fit(); /// assert_eq!(3, s.capacity()); /// ``` pub fn shrink_to_fit(&mut self) { self.inner.shrink_to_fit(); } /// Converts this `UString` into a boxed `UStr`. /// /// # Examples /// /// ``` /// use widestring::{U16String, U16Str}; /// /// let s = U16String::from_str("hello"); /// /// let b: Box = s.into_boxed_ustr(); /// ``` /// /// ``` /// use widestring::{U32String, U32Str}; /// /// let s = U32String::from_str("hello"); /// /// let b: Box = s.into_boxed_ustr(); /// ``` pub fn into_boxed_ustr(self) -> Box> { let rw = Box::into_raw(self.inner.into_boxed_slice()) as *mut UStr; unsafe { Box::from_raw(rw) } } } impl UString { /// Encodes a `U16String` copy from a `str`. /// /// This makes a wide string copy of the `str`. Since `str` will always be valid UTF-8, the /// resulting `U16String` will also be valid UTF-16. /// /// # Examples /// /// ```rust /// use widestring::U16String; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U16String::from_str(s); /// /// assert_eq!(wstr.to_string().unwrap(), s); /// ``` pub fn from_str + ?Sized>(s: &S) -> Self { Self { inner: s.as_ref().encode_utf16().collect(), } } /// Encodes a `U16String` copy from an `OsStr`. /// /// This makes a wide string copy of the `OsStr`. Since `OsStr` makes no guarantees that it is /// valid data, there is no guarantee that the resulting `U16String` will be valid UTF-16. /// /// # Examples /// /// ```rust /// use widestring::U16String; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U16String::from_os_str(s); /// /// assert_eq!(wstr.to_string().unwrap(), s); /// ``` pub fn from_os_str + ?Sized>(s: &S) -> Self { Self { inner: platform::os_to_wide(s.as_ref()), } } /// Extends the string with the given `&str`. /// /// No checks are performed on the strings. It is possible to end up nul values inside the /// string, and it is up to the caller to determine if that is acceptable. /// /// # Examples /// /// ```rust /// use widestring::U16String; /// let s = "MyString"; /// let mut wstr = U16String::from_str(s); /// // Push the original to the end, repeating the string twice. /// wstr.push_str(s); /// /// assert_eq!(wstr.to_string().unwrap(), "MyStringMyString"); /// ``` pub fn push_str(&mut self, s: impl AsRef) { self.inner.extend(s.as_ref().encode_utf16()) } /// Extends the string with the given `&OsStr`. /// /// No checks are performed on the strings. It is possible to end up nul values inside the /// string, and it is up to the caller to determine if that is acceptable. /// /// # Examples /// /// ```rust /// use widestring::U16String; /// let s = "MyString"; /// let mut wstr = U16String::from_str(s); /// // Push the original to the end, repeating the string twice. /// wstr.push_os_str(s); /// /// assert_eq!(wstr.to_string().unwrap(), "MyStringMyString"); /// ``` pub fn push_os_str(&mut self, s: impl AsRef) { self.inner.extend(platform::os_to_wide(s.as_ref())) } } impl UString { /// Constructs a `U32String` from a vector of UTF-32 data. /// /// No checks are made on the contents of the vector. /// /// # Examples /// /// ```rust /// use widestring::U32String; /// let v: Vec = "Test".chars().collect(); /// # let cloned: Vec = v.iter().map(|&c| c as u32).collect(); /// // Create a wide string from the vector /// let wstr = U32String::from_chars(v); /// # assert_eq!(wstr.into_vec(), cloned); /// ``` pub fn from_chars(raw: impl Into>) -> Self { UString { inner: unsafe { mem::transmute(raw.into()) }, } } /// Encodes a `U32String` copy from a `str`. /// /// This makes a wide string copy of the `str`. Since `str` will always be valid UTF-8, the /// resulting `U32String` will also be valid UTF-32. /// /// # Examples /// /// ```rust /// use widestring::U32String; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U32String::from_str(s); /// /// assert_eq!(wstr.to_string().unwrap(), s); /// ``` pub fn from_str + ?Sized>(s: &S) -> Self { let v: Vec = s.as_ref().chars().collect(); UString::from_chars(v) } /// Encodes a `U32String` copy from an `OsStr`. /// /// This makes a wide string copy of the `OsStr`. Since `OsStr` makes no guarantees that it is /// valid data, there is no guarantee that the resulting `U32String` will be valid UTF-32. /// /// # Examples /// /// ```rust /// use widestring::U32String; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U32String::from_os_str(s); /// /// assert_eq!(wstr.to_string().unwrap(), s); /// ``` pub fn from_os_str + ?Sized>(s: &S) -> Self { let v: Vec = s.as_ref().to_string_lossy().chars().collect(); UString::from_chars(v) } /// Constructs a `U32String` from a `char` pointer and a length. /// /// The `len` argument is the number of `char` elements, **not** the number of bytes. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. /// /// # Panics /// /// Panics if `len` is greater than 0 but `p` is a null pointer. pub unsafe fn from_char_ptr(p: *const char, len: usize) -> Self { UString::from_ptr(p as *const u32, len) } /// Extends the string with the given `&str`. /// /// No checks are performed on the strings. It is possible to end up nul values inside the /// string, and it is up to the caller to determine if that is acceptable. /// /// # Examples /// /// ```rust /// use widestring::U32String; /// let s = "MyString"; /// let mut wstr = U32String::from_str(s); /// // Push the original to the end, repeating the string twice. /// wstr.push_str(s); /// /// assert_eq!(wstr.to_string().unwrap(), "MyStringMyString"); /// ``` pub fn push_str(&mut self, s: impl AsRef) { self.inner.extend(s.as_ref().chars().map(|c| c as u32)) } /// Extends the string with the given `&OsStr`. /// /// No checks are performed on the strings. It is possible to end up nul values inside the /// string, and it is up to the caller to determine if that is acceptable. /// /// # Examples /// /// ```rust /// use widestring::U32String; /// let s = "MyString"; /// let mut wstr = U32String::from_str(s); /// // Push the original to the end, repeating the string twice. /// wstr.push_os_str(s); /// /// assert_eq!(wstr.to_string().unwrap(), "MyStringMyString"); /// ``` pub fn push_os_str(&mut self, s: impl AsRef) { self.inner .extend(s.as_ref().to_string_lossy().chars().map(|c| c as u32)) } } impl UStr { /// Coerces a value into a `UStr`. pub fn new + ?Sized>(s: &S) -> &Self { s.as_ref() } /// Constructs a `UStr` from a pointer and a length. /// /// The `len` argument is the number of elements, **not** the number of bytes. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. /// /// # Panics /// /// This function panics if `p` is null. /// /// # Caveat /// /// The lifetime for the returned string is inferred from its usage. To prevent accidental /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the /// context, such as by providing a helper function taking the lifetime of a host value for the /// string, or by explicit annotation. pub unsafe fn from_ptr<'a>(p: *const C, len: usize) -> &'a Self { assert!(!p.is_null()); mem::transmute(std::slice::from_raw_parts(p, len)) } /// Constructs a `UStr` from a slice of code points. /// /// No checks are performed on the slice. pub fn from_slice(slice: &[C]) -> &Self { unsafe { mem::transmute(slice) } } /// Copies the wide string to a new owned `UString`. pub fn to_ustring(&self) -> UString { UString::from_vec(&self.inner) } /// Converts to a slice of the wide string. pub fn as_slice(&self) -> &[C] { &self.inner } /// Returns a raw pointer to the wide string. /// /// The pointer is valid only as long as the lifetime of this reference. pub fn as_ptr(&self) -> *const C { self.inner.as_ptr() } /// Returns the length of the wide string as number of elements (**not** number of bytes). pub fn len(&self) -> usize { self.inner.len() } /// Returns whether this wide string contains no data. pub fn is_empty(&self) -> bool { self.inner.is_empty() } /// Converts a `Box` into a `UString` without copying or allocating. pub fn into_ustring(self: Box) -> UString { let boxed = unsafe { Box::from_raw(Box::into_raw(self) as *mut [C]) }; UString { inner: boxed.into_vec(), } } } impl UStr { /// Decodes a wide string to an owned `OsString`. /// /// This makes a string copy of the `U16Str`. Since `U16Str` makes no guarantees that it is /// valid UTF-16, there is no guarantee that the resulting `OsString` will be valid data. /// /// # Examples /// /// ```rust /// use widestring::U16String; /// use std::ffi::OsString; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U16String::from_str(s); /// // Create an OsString from the wide string /// let osstr = wstr.to_os_string(); /// /// assert_eq!(osstr, OsString::from(s)); /// ``` pub fn to_os_string(&self) -> OsString { platform::os_from_wide(&self.inner) } /// Copies the wide string to a `String` if it contains valid UTF-16 data. /// /// # Failures /// /// Returns an error if the string contains any invalid UTF-16 data. /// /// # Examples /// /// ```rust /// use widestring::U16String; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U16String::from_str(s); /// // Create a regular string from the wide string /// let s2 = wstr.to_string().unwrap(); /// /// assert_eq!(s2, s); /// ``` pub fn to_string(&self) -> Result { String::from_utf16(&self.inner) } /// Copies the wide string to a `String`. /// /// Any non-Unicode sequences are replaced with *U+FFFD REPLACEMENT CHARACTER*. /// /// # Examples /// /// ```rust /// use widestring::U16String; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U16String::from_str(s); /// // Create a regular string from the wide string /// let lossy = wstr.to_string_lossy(); /// /// assert_eq!(lossy, s); /// ``` pub fn to_string_lossy(&self) -> String { String::from_utf16_lossy(&self.inner) } } impl UStr { /// Constructs a `U32Str` from a `char` pointer and a length. /// /// The `len` argument is the number of `char` elements, **not** the number of bytes. /// /// # Safety /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// elements. /// /// # Panics /// /// This function panics if `p` is null. /// /// # Caveat /// /// The lifetime for the returned string is inferred from its usage. To prevent accidental /// misuse, it's suggested to tie the lifetime to whichever source lifetime is safe in the /// context, such as by providing a helper function taking the lifetime of a host value for the /// string, or by explicit annotation. pub unsafe fn from_char_ptr<'a>(p: *const char, len: usize) -> &'a Self { UStr::from_ptr(p as *const u32, len) } /// Constructs a `U32Str` from a slice of `u32` code points. /// /// No checks are performed on the slice. pub fn from_char_slice(slice: &[char]) -> &Self { unsafe { mem::transmute(slice) } } /// Decodes a wide string to an owned `OsString`. /// /// This makes a string copy of the `U32Str`. Since `U32Str` makes no guarantees that it is /// valid UTF-32, there is no guarantee that the resulting `OsString` will be valid data. /// /// # Examples /// /// ```rust /// use widestring::U32String; /// use std::ffi::OsString; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U32String::from_str(s); /// // Create an OsString from the wide string /// let osstr = wstr.to_os_string(); /// /// assert_eq!(osstr, OsString::from(s)); /// ``` pub fn to_os_string(&self) -> OsString { self.to_string_lossy().into() } /// Copies the wide string to a `String` if it contains valid UTF-32 data. /// /// # Failures /// /// Returns an error if the string contains any invalid UTF-32 data. /// /// # Examples /// /// ```rust /// use widestring::U32String; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U32String::from_str(s); /// // Create a regular string from the wide string /// let s2 = wstr.to_string().unwrap(); /// /// assert_eq!(s2, s); /// ``` pub fn to_string(&self) -> Result { let chars: Vec> = self.inner.iter().map(|c| char::from_u32(*c)).collect(); if chars.iter().any(|c| c.is_none()) { return Err(FromUtf32Error()); } let size = chars.iter().filter_map(|o| o.map(|c| c.len_utf8())).sum(); let mut vec = Vec::with_capacity(size); unsafe { vec.set_len(size) }; let mut i = 0; for c in chars.iter().filter_map(|&o| o) { c.encode_utf8(&mut vec[i..]); i += c.len_utf8(); } Ok(unsafe { String::from_utf8_unchecked(vec) }) } /// Copies the wide string to a `String`. /// /// Any non-Unicode sequences are replaced with *U+FFFD REPLACEMENT CHARACTER*. /// /// # Examples /// /// ```rust /// use widestring::U32String; /// let s = "MyString"; /// // Create a wide string from the string /// let wstr = U32String::from_str(s); /// // Create a regular string from the wide string /// let lossy = wstr.to_string_lossy(); /// /// assert_eq!(lossy, s); /// ``` pub fn to_string_lossy(&self) -> String { let chars: Vec = self .inner .iter() .map(|&c| char::from_u32(c).unwrap_or(char::REPLACEMENT_CHARACTER)) .collect(); let size = chars.iter().map(|c| c.len_utf8()).sum(); let mut vec = Vec::with_capacity(size); unsafe { vec.set_len(size) }; let mut i = 0; for c in chars { c.encode_utf8(&mut vec[i..]); i += c.len_utf8(); } unsafe { String::from_utf8_unchecked(vec) } } } impl Into> for UString { fn into(self) -> Vec { self.into_vec() } } impl<'a> From> for std::borrow::Cow<'a, UStr> { fn from(s: UString) -> Self { std::borrow::Cow::Owned(s) } } impl<'a> From> for std::borrow::Cow<'a, UStr> { fn from(s: UString) -> Self { std::borrow::Cow::Owned(s) } } impl Into> for Vec { fn into(self) -> UString { UString::from_vec(self) } } impl Into> for Vec { fn into(self) -> UString { UString::from_vec(self) } } impl Into> for Vec { fn into(self) -> UString { UString::from_chars(self) } } impl From for UString { fn from(s: String) -> Self { Self::from_str(&s) } } impl From for UString { fn from(s: String) -> Self { Self::from_str(&s) } } impl From for UString { fn from(s: OsString) -> Self { Self::from_os_str(&s) } } impl From for UString { fn from(s: OsString) -> Self { Self::from_os_str(&s) } } impl From> for OsString { fn from(s: UString) -> Self { s.to_os_string() } } impl From> for OsString { fn from(s: UString) -> Self { s.to_os_string() } } impl<'a, C: UChar, T: ?Sized + AsRef>> From<&'a T> for UString { fn from(s: &'a T) -> Self { s.as_ref().to_ustring() } } impl std::ops::Index for UString { type Output = UStr; #[inline] fn index(&self, _index: std::ops::RangeFull) -> &UStr { UStr::from_slice(&self.inner) } } impl std::ops::Deref for UString { type Target = UStr; #[inline] fn deref(&self) -> &UStr { &self[..] } } impl PartialEq> for UString { #[inline] fn eq(&self, other: &UStr) -> bool { self.as_ustr() == other } } impl PartialOrd> for UString { #[inline] fn partial_cmp(&self, other: &UStr) -> Option { self.as_ustr().partial_cmp(other) } } impl<'a, C: UChar> PartialEq<&'a UStr> for UString { #[inline] fn eq(&self, other: &&'a UStr) -> bool { self.as_ustr() == *other } } impl<'a, C: UChar> PartialOrd<&'a UStr> for UString { #[inline] fn partial_cmp(&self, other: &&'a UStr) -> Option { self.as_ustr().partial_cmp(*other) } } impl<'a, C: UChar> PartialEq>> for UString { #[inline] fn eq(&self, other: &std::borrow::Cow<'a, UStr>) -> bool { self.as_ustr() == other.as_ref() } } impl<'a, C: UChar> PartialOrd>> for UString { #[inline] fn partial_cmp(&self, other: &std::borrow::Cow<'a, UStr>) -> Option { self.as_ustr().partial_cmp(other.as_ref()) } } impl std::borrow::Borrow> for UString { fn borrow(&self) -> &UStr { &self[..] } } impl ToOwned for UStr { type Owned = UString; fn to_owned(&self) -> UString { self.to_ustring() } } impl<'a> From<&'a UStr> for std::borrow::Cow<'a, UStr> { fn from(s: &'a UStr) -> Self { std::borrow::Cow::Borrowed(s) } } impl<'a> From<&'a UStr> for std::borrow::Cow<'a, UStr> { fn from(s: &'a UStr) -> Self { std::borrow::Cow::Borrowed(s) } } impl AsRef> for UStr { fn as_ref(&self) -> &Self { self } } impl AsRef> for UString { fn as_ref(&self) -> &UStr { self } } impl AsRef<[C]> for UStr { fn as_ref(&self) -> &[C] { self.as_slice() } } impl AsRef<[C]> for UString { fn as_ref(&self) -> &[C] { self.as_slice() } } impl<'a, C: UChar> From<&'a UStr> for Box> { fn from(s: &'a UStr) -> Self { let boxed: Box<[C]> = Box::from(&s.inner); let rw = Box::into_raw(boxed) as *mut UStr; unsafe { Box::from_raw(rw) } } } impl From>> for UString { fn from(boxed: Box>) -> Self { boxed.into_ustring() } } impl From> for Box> { fn from(s: UString) -> Self { s.into_boxed_ustr() } } impl Default for Box> { fn default() -> Self { let boxed: Box<[C]> = Box::from([]); let rw = Box::into_raw(boxed) as *mut UStr; unsafe { Box::from_raw(rw) } } } impl std::fmt::Display for FromUtf32Error { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "error converting from UTF-32 to UTF-8") } } impl std::error::Error for FromUtf32Error { fn description(&self) -> &str { "error converting from UTF-32 to UTF-8" } }