arcstr-1.2.0/.cargo_vcs_info.json0000644000000001360000000000100123320ustar { "git": { "sha1": "faa7692b0d6662bb177b3aefa80a6a13f897554d" }, "path_in_vcs": "" }arcstr-1.2.0/Cargo.toml0000644000000032660000000000100103370ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.57.0" name = "arcstr" version = "1.2.0" authors = ["Thom Chiovoloni "] build = false include = [ "src/**/*", "LICENSE-*", "README.md", ] autobins = false autoexamples = false autotests = false autobenches = false description = "A better reference-counted string type, with zero-cost (allocation-free) support for string literals, and reference counted substrings." homepage = "https://github.com/thomcc/arcstr" documentation = "https://docs.rs/arcstr" readme = "README.md" keywords = [ "arc", "refcount", "arc_str", "rc_str", "string", ] categories = [ "concurrency", "memory-management", "data-structures", "no-std", "rust-patterns", ] license = "Apache-2.0 OR MIT OR Zlib" repository = "https://github.com/thomcc/arcstr" [package.metadata.docs.rs] features = [ "std", "substr", ] [lib] name = "arcstr" path = "src/lib.rs" [dependencies.serde] version = "1" optional = true default-features = false [dev-dependencies.serde_test] version = "1" default-features = false [features] default = ["substr"] std = [] substr = [] substr-usize-indices = ["substr"] [target."cfg(loom)".dev-dependencies.loom] version = "0.7.1" arcstr-1.2.0/Cargo.toml.orig000064400000000000000000000020641046102023000140130ustar 00000000000000[package] name = "arcstr" version = "1.2.0" rust-version = "1.57.0" authors = ["Thom Chiovoloni "] edition = "2021" description = "A better reference-counted string type, with zero-cost (allocation-free) support for string literals, and reference counted substrings." license = "Apache-2.0 OR MIT OR Zlib" readme = "README.md" keywords = ["arc", "refcount", "arc_str", "rc_str", "string"] categories = [ "concurrency", "memory-management", "data-structures", "no-std", "rust-patterns", ] repository = "https://github.com/thomcc/arcstr" documentation = "https://docs.rs/arcstr" homepage = "https://github.com/thomcc/arcstr" include = ["src/**/*", "LICENSE-*", "README.md"] [features] std = [] default = ["substr"] substr = [] substr-usize-indices = ["substr"] [dependencies] serde = { version = "1", default-features = false, optional = true } [dev-dependencies] serde_test = { version = "1", default-features = false } [target.'cfg(loom)'.dev-dependencies] loom = "0.7.1" [package.metadata.docs.rs] features = ["std", "substr"] arcstr-1.2.0/LICENSE-APACHE000064400000000000000000000251321046102023000130510ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2016 The Miri Developers Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. arcstr-1.2.0/LICENSE-MIT000064400000000000000000000020421046102023000125540ustar 00000000000000Copyright (c) 2020 Thom Chiovoloni Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.arcstr-1.2.0/LICENSE-ZLIB000064400000000000000000000015301046102023000126640ustar 00000000000000Copyright (c) 2020 Thom Chiovoloni This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. arcstr-1.2.0/README.md000064400000000000000000000163351046102023000124110ustar 00000000000000# `arcstr`: Better reference-counted strings. [![Build Status](https://github.com/thomcc/arcstr/workflows/CI/badge.svg)](https://github.com/thomcc/arcstr/actions) [![codecov](https://codecov.io/gh/thomcc/arcstr/branch/main/graph/badge.svg)](https://codecov.io/gh/thomcc/arcstr) [![Docs](https://docs.rs/arcstr/badge.svg)](https://docs.rs/arcstr) [![Latest Version](https://img.shields.io/crates/v/arcstr.svg)](https://crates.io/crates/arcstr) ![Minimum Rust Version](https://img.shields.io/badge/MSRV%201.57-blue.svg) This crate defines `ArcStr`, a reference counted string type. It's essentially trying to be a better `Arc` or `Arc`, at least for most use cases. ArcStr intentionally gives up some of the features of `Arc` which are rarely-used for `Arc` (`Weak`, `Arc::make_mut`, ...). And in exchange, it gets a number of features that are very useful, especially for strings. Notably robust support for cheap/zero-cost `ArcStr`s holding static data (for example, string literals). (Aside from this, it's also a single pointer, which can be good for performance and FFI) Additionally, if the `substr` feature is enabled (and it is by default) we provide a `Substr` type which is essentially a `(ArcStr, Range)` with better ergonomics and more functionality, which represents a shared slice of a "parent" `ArcStr` (Note that in reality, `u32` is used for the index type, but this is not exposed in the API, and can be transparently changed via a cargo feature). ## Feature overview A quick tour of the distinguishing features (note that there's a list of [benefits](https://docs.rs/arcstr/%2a/arcstr/struct.ArcStr.html#benefits-of-arcstr-over-arcstr) in the `ArcStr` documentation which covers some of the reasons you might want to use it over other alternatives). Note that it offers essentially the full set of functionality string-like functionality you probably would expect from an immutable string type — these are just the unique selling points: ```rust use arcstr::ArcStr; // Works in const: const AMAZING: ArcStr = arcstr::literal!("amazing constant"); assert_eq!(AMAZING, "amazing constant"); // `arcstr::literal!` input can come from `include_str!` too: const MY_BEST_FILES: ArcStr = arcstr::literal!(include_str!("my-best-files.txt")); ``` Or, you can define the literals in normal expressions. Note that these literals are essentially ["Zero Cost"][zero-cost]. Specifically, below we not only don't allocate any heap memory to instantiate `wow` or any of the clones, we also don't have to perform any atomic reads or writes when cloning, or dropping them (or during any other operations on them). [zero-cost]: https://docs.rs/arcstr/%2a/arcstr/struct.ArcStr.html#what-does-zero-cost-literals-mean ```rust let wow: ArcStr = arcstr::literal!("Wow!"); assert_eq!("Wow!", wow); // This line is probably not something you want to do regularly, // but as mentioned, causes no extra allocations, nor performs any // atomic loads, stores, rmws, etc. let wowzers = wow.clone().clone().clone().clone(); // At some point in the future, we can get a `&'static str` out of one // of the literal `ArcStr`s too. let static_str: Option<&'static str> = ArcStr::as_static(&wowzers); assert_eq!(static_str, Some("Wow!")); // Note that this returns `None` for dynamically allocated `ArcStr`: let dynamic_arc = ArcStr::from(format!("cool {}", 123)); assert_eq!(ArcStr::as_static(&dynamic_arc), None); ``` Open TODO: Include `Substr` usage here, as it has some compelling use cases too! ## Usage It's a normal rust crate, drop it in your `Cargo.toml`'s dependencies section. In the somewhat unlikely case that you're here and don't know how: ```toml [dependencies] # ... arcstr = { version = "...", features = ["..."] } ``` The following cargo features are available. Only `substr` is on by default currently. - `std` (off by default): Turn on to use `std::process`'s aborting, instead of triggering an abort using the "double-panic trick". Essentially, there's one case we need to abort, and that's during a catastrophic error where you leak the same (dynamic) `ArcStr` 2^31 on 32-bit systems, or 2^63 in 64-bit systems. If this happens, we follow `libstd`'s lead and just abort because we're hosed anyway. If `std` is enabled, we use the real `std::process::abort`. If `std` is not enabled, we trigger an `abort` by triggering a panic while another panic is unwinding, which is either defined to cause an abort, or causes one in practice. In pratice you will never hit this edge case, and it still works in no_std, so no_std is the default. If you have to turn this on, because you hit this ridiculous case and found our handling bad, let me know. Concretely, the difference here is that without this, this case becomes a call to `core::intrinsics::abort`, and not `std::process::abort`. It's a ridiculously unlikely edge case to hit, but if you are to hit it, `std::process::abort` results in a `SIGABRT` whereas `core::intrinsics::abort` results in a `SIGILL`, and the former has meaningfully better UX. That said, it's extraordinarially unlikely that you manage to leak `2^31` or `2^63` copies of the same `ArcStr`, so it's not really worth depending on `std` by default for in our opinion. - `serde` (off by default): enable serde serialization of `ArcStr`. Note that this doesn't do any fancy deduping or whatever. - `substr` (**on by default**): implement the `Substr` type and related functions. - `substr-usize-indices` (off by default, implies `substr`): Use `usize` under the hood for the boundaries, instead of `u32`. Without this, if you use `Substr` and an index would overflow a `u32` we unceremoniously panic. ## Use of `unsafe` and testing strategy While this crate does contain a decent amount of unsafe code, we justify this in the following ways: 1. We have a very high test coverage ratio (essentially the only uncovered functions are the out-of-memory handler (which just calls `alloc::handle_alloc_error`), and an extremely pathological integer overflow where we just abort). 2. All tests pass under various sanitizers: `asan`, `msan`, `tsan`, and `miri`. 3. We have a few [`loom`](https://crates.io/crates/loom) models although I'd love to have more. 4. Our tests pass on a ton of different targets (thanks to [`cross`](https://github.com/rust-embedded/cross/) for many of these possible — easy even): - Linux x86, x86_64, armv7 (arm32), aarch64 (arm64), riscv64, mips32, and mips64 (the mips32 and mips64 targets allow us to check both big-endian 32bit and 64bit. Although we don't have any endian-specific code at the moment). - Windows 32-bit and 64-bit, on both GNU and MSVC toolchains. - MacOS on x86_64. Additionally, we test on Rust stable, beta, nightly, and our MSRV (see badge above for MSRV). #### Supported platforms Note that the above is *not* a list of supported platforms. In general I expect `arcstr` to support all platform's Rust supports, except for ones with `target_pointer_width="16"`, which *should* work if you turn off the `substr` feature. That said, if you'd like me to add a platform to the CI coverage to ensure it doesn't break, just ask\* (although, if it's harder than adding a line for another `cross` target, I'll probably need you to justify why it's likely to not be covered by the existing platform tests). \* This is why there are riscv64. arcstr-1.2.0/src/arc_str.rs000064400000000000000000001551631046102023000137270ustar 00000000000000#![allow( // We follow libstd's lead and prefer to define both. clippy::partialeq_ne_impl, // This is a really annoying clippy lint, since it's required for so many cases... clippy::cast_ptr_alignment, // For macros clippy::redundant_slicing, )] use core::alloc::Layout; use core::mem::{align_of, size_of, MaybeUninit}; use core::ptr::NonNull; #[cfg(not(all(loom, test)))] pub(crate) use core::sync::atomic::{AtomicUsize, Ordering}; #[cfg(all(loom, test))] pub(crate) use loom::sync::atomic::{AtomicUsize, Ordering}; #[cfg(feature = "substr")] use crate::Substr; use alloc::borrow::Cow; use alloc::boxed::Box; use alloc::string::String; /// A better atomically-reference counted string type. /// /// ## Benefits of `ArcStr` over `Arc` /// /// - It's possible to create a const `ArcStr` from a literal via the /// [`arcstr::literal!`][crate::literal] macro. This is probably the killer /// feature, to be honest. /// /// These "static" `ArcStr`s are zero cost, take no heap allocation, and don't /// even need to perform atomic reads/writes when being cloned or dropped (nor /// at any other time). /// /// They even get stored in the read-only memory of your executable, which can /// be beneficial for performance and memory usage. (In theory your linker may /// even dedupe these for you, but usually not) /// /// - `ArcStr`s from `arcstr::literal!` can be turned into `&'static str` safely /// at any time using [`ArcStr::as_static`]. (This returns an Option, which is /// `None` if the `ArcStr` was not static) /// /// - This should be unsurprising given the literal functionality, but /// [`ArcStr::new`] is able to be a `const` function. /// /// - `ArcStr` is thin, e.g. only a single pointer. Great for cases where you /// want to keep the data structure lightweight or need to do some FFI stuff /// with it. /// /// - `ArcStr` is totally immutable. No need to lose sleep because you're afraid /// of code which thinks it has a right to mutate your `Arc`s just because it /// holds the only reference... /// /// - Lower reference counting operations are lower overhead because we don't /// support `Weak` references. This can be a drawback for some use cases, but /// improves performance for the common case of no-weak-refs. /// /// ## What does "zero-cost literals" mean? /// /// In a few places I call the literal arcstrs "zero-cost". No overhead most /// accesses accesses (aside from stuff like `as_static` which obviously /// requires it). and it imposes a extra branch in both `clone` and `drop`. /// /// This branch in `clone`/`drop` is not on the result of an atomic load, and is /// just a normal memory read. This is actually what allows literal/static /// `ArcStr`s to avoid needing to perform any atomic operations in those /// functions, which seems likely more than cover the cost. /// /// (Additionally, it's almost certain that in the future we'll be able to /// reduce the synchronization required for atomic instructions. This is due to /// our guarantee of immutability and lack of support for `Weak`.) /// /// # Usage /// /// ## As a `const` /// /// The big unique feature of `ArcStr` is the ability to create static/const /// `ArcStr`s. (See [the macro](crate::literal) docs or the [feature /// overview][feats] /// /// [feats]: index.html#feature-overview /// /// ``` /// # use arcstr::ArcStr; /// const WOW: ArcStr = arcstr::literal!("cool robot!"); /// assert_eq!(WOW, "cool robot!"); /// ``` /// /// ## As a `str` /// /// (This is not unique to `ArcStr`, but is a frequent source of confusion I've /// seen): `ArcStr` implements `Deref`, and so all functions and /// methods from `str` work on it, even though we don't expose them on `ArcStr` /// directly. /// /// ``` /// # use arcstr::ArcStr; /// let s = ArcStr::from("something"); /// // These go through `Deref`, so they work even though /// // there is no `ArcStr::eq_ignore_ascii_case` function /// assert!(s.eq_ignore_ascii_case("SOMETHING")); /// ``` /// /// Additionally, `&ArcStr` can be passed to any function which accepts `&str`. /// For example: /// /// ``` /// # use arcstr::ArcStr; /// fn accepts_str(s: &str) { /// # let _ = s; /// // s... /// } /// /// let test_str: ArcStr = "test".into(); /// // This works even though `&test_str` is normally an `&ArcStr` /// accepts_str(&test_str); /// /// // Of course, this works for functionality from the standard library as well. /// let test_but_loud = ArcStr::from("TEST"); /// assert!(test_str.eq_ignore_ascii_case(&test_but_loud)); /// ``` #[repr(transparent)] pub struct ArcStr(NonNull); unsafe impl Sync for ArcStr {} unsafe impl Send for ArcStr {} impl ArcStr { /// Construct a new empty string. /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// let s = ArcStr::new(); /// assert_eq!(s, ""); /// ``` #[inline] pub const fn new() -> Self { EMPTY } /// Attempt to copy the provided string into a newly allocated `ArcStr`, but /// return `None` if we cannot allocate the required memory. /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// /// # fn do_stuff_with(s: ArcStr) {} /// /// let some_big_str = "please pretend this is a very long string"; /// if let Some(s) = ArcStr::try_alloc(some_big_str) { /// do_stuff_with(s); /// } else { /// // Complain about allocation failure, somehow. /// } /// ``` #[inline] pub fn try_alloc(copy_from: &str) -> Option { if let Ok(inner) = ThinInner::try_allocate(copy_from, false) { Some(Self(inner)) } else { None } } /// Attempt to allocate memory for an [`ArcStr`] of length `n`, and use the /// provided callback to fully initialize the provided buffer with valid /// UTF-8 text. /// /// This function returns `None` if memory allocation fails, see /// [`ArcStr::init_with_unchecked`] for a version which calls /// [`handle_alloc_error`](alloc::alloc::handle_alloc_error). /// /// # Safety /// The provided `initializer` callback must fully initialize the provided /// buffer with valid UTF-8 text. /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// # use core::mem::MaybeUninit; /// let arcstr = unsafe { /// ArcStr::try_init_with_unchecked(10, |s: &mut [MaybeUninit]| { /// s.fill(MaybeUninit::new(b'a')); /// }).unwrap() /// }; /// assert_eq!(arcstr, "aaaaaaaaaa") /// ``` #[inline] pub unsafe fn try_init_with_unchecked(n: usize, initializer: F) -> Option where F: FnOnce(&mut [MaybeUninit]), { if let Ok(inner) = ThinInner::try_allocate_with(n, false, AllocInit::Uninit, initializer) { Some(Self(inner)) } else { None } } /// Allocate memory for an [`ArcStr`] of length `n`, and use the provided /// callback to fully initialize the provided buffer with valid UTF-8 text. /// /// This function calls /// [`handle_alloc_error`](alloc::alloc::handle_alloc_error) if memory /// allocation fails, see [`ArcStr::try_init_with_unchecked`] for a version /// which returns `None` /// /// # Safety /// The provided `initializer` callback must fully initialize the provided /// buffer with valid UTF-8 text. /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// # use core::mem::MaybeUninit; /// let arcstr = unsafe { /// ArcStr::init_with_unchecked(10, |s: &mut [MaybeUninit]| { /// s.fill(MaybeUninit::new(b'a')); /// }) /// }; /// assert_eq!(arcstr, "aaaaaaaaaa") /// ``` #[inline] pub unsafe fn init_with_unchecked(n: usize, initializer: F) -> Self where F: FnOnce(&mut [MaybeUninit]), { match ThinInner::try_allocate_with(n, false, AllocInit::Uninit, initializer) { Ok(inner) => Self(inner), Err(None) => panic!("capacity overflow"), Err(Some(layout)) => alloc::alloc::handle_alloc_error(layout), } } /// Attempt to allocate memory for an [`ArcStr`] of length `n`, and use the /// provided callback to initialize the provided (initially-zeroed) buffer /// with valid UTF-8 text. /// /// Note: This function is provided with a zeroed buffer, and performs UTF-8 /// validation after calling the initializer. While both of these are fast /// operations, some high-performance use cases will be better off using /// [`ArcStr::try_init_with_unchecked`] as the building block. /// /// # Errors /// The provided `initializer` callback must initialize the provided buffer /// with valid UTF-8 text, or a UTF-8 error will be returned. /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// /// let s = ArcStr::init_with(5, |slice| { /// slice /// .iter_mut() /// .zip(b'0'..b'5') /// .for_each(|(db, sb)| *db = sb); /// }).unwrap(); /// assert_eq!(s, "01234"); /// ``` #[inline] pub fn init_with(n: usize, initializer: F) -> Result where F: FnOnce(&mut [u8]), { let mut failed = None::; let wrapper = |zeroed_slice: &mut [MaybeUninit]| { debug_assert_eq!(n, zeroed_slice.len()); // Safety: we pass `AllocInit::Zero`, so this is actually initialized let slice = unsafe { core::slice::from_raw_parts_mut(zeroed_slice.as_mut_ptr().cast::(), n) }; initializer(slice); if let Err(e) = core::str::from_utf8(slice) { failed = Some(e); } }; match unsafe { ThinInner::try_allocate_with(n, false, AllocInit::Zero, wrapper) } { Ok(inner) => { // Ensure we clean up the allocation even on error. let this = Self(inner); if let Some(e) = failed { Err(e) } else { Ok(this) } } Err(None) => panic!("capacity overflow"), Err(Some(layout)) => alloc::alloc::handle_alloc_error(layout), } } /// Extract a string slice containing our data. /// /// Note: This is an equivalent to our `Deref` implementation, but can be /// more readable than `&*s` in the cases where a manual invocation of /// `Deref` would be required. /// /// # Examples // TODO: find a better example where `&*` would have been required. /// ``` /// # use arcstr::ArcStr; /// let s = ArcStr::from("abc"); /// assert_eq!(s.as_str(), "abc"); /// ``` #[inline] pub fn as_str(&self) -> &str { self } /// Returns the length of this `ArcStr` in bytes. /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// let a = ArcStr::from("foo"); /// assert_eq!(a.len(), 3); /// ``` #[inline] pub fn len(&self) -> usize { self.get_inner_len_flag().uint_part() } #[inline] fn get_inner_len_flag(&self) -> PackedFlagUint { unsafe { ThinInner::get_len_flag(self.0.as_ptr()) } } /// Returns true if this `ArcStr` is empty. /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// assert!(!ArcStr::from("foo").is_empty()); /// assert!(ArcStr::new().is_empty()); /// ``` #[inline] pub fn is_empty(&self) -> bool { self.len() == 0 } /// Convert us to a `std::string::String`. /// /// This is provided as an inherent method to avoid needing to route through /// the `Display` machinery, but is equivalent to `ToString::to_string`. /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// let s = ArcStr::from("abc"); /// assert_eq!(s.to_string(), "abc"); /// ``` #[inline] #[allow(clippy::inherent_to_string_shadow_display)] pub fn to_string(&self) -> String { #[cfg(not(feature = "std"))] use alloc::borrow::ToOwned; self.as_str().to_owned() } /// Extract a byte slice containing the string's data. /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// let foobar = ArcStr::from("foobar"); /// assert_eq!(foobar.as_bytes(), b"foobar"); /// ``` #[inline] pub fn as_bytes(&self) -> &[u8] { let len = self.len(); let p = self.0.as_ptr(); unsafe { let data = p.cast::().add(OFFSET_DATA); debug_assert_eq!(core::ptr::addr_of!((*p).data).cast::(), data); core::slice::from_raw_parts(data, len) } } /// Return the raw pointer this `ArcStr` wraps, for advanced use cases. /// /// Note that in addition to the `NonNull` constraint expressed in the type /// signature, we also guarantee the pointer has an alignment of at least 8 /// bytes, even on platforms where a lower alignment would be acceptable. /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// let s = ArcStr::from("abcd"); /// let p = ArcStr::into_raw(s); /// // Some time later... /// let s = unsafe { ArcStr::from_raw(p) }; /// assert_eq!(s, "abcd"); /// ``` #[inline] pub fn into_raw(this: Self) -> NonNull<()> { let p = this.0; core::mem::forget(this); p.cast() } /// The opposite version of [`Self::into_raw`]. Still intended only for /// advanced use cases. /// /// # Safety /// /// This function must be used on a valid pointer returned from /// [`ArcStr::into_raw`]. Additionally, you must ensure that a given `ArcStr` /// instance is only dropped once. /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// let s = ArcStr::from("abcd"); /// let p = ArcStr::into_raw(s); /// // Some time later... /// let s = unsafe { ArcStr::from_raw(p) }; /// assert_eq!(s, "abcd"); /// ``` #[inline] pub unsafe fn from_raw(ptr: NonNull<()>) -> Self { Self(ptr.cast()) } /// Returns true if the two `ArcStr`s point to the same allocation. /// /// Note that functions like `PartialEq` check this already, so there's /// no performance benefit to doing something like `ArcStr::ptr_eq(&a1, &a2) || (a1 == a2)`. /// /// Caveat: `const`s aren't guaranteed to only occur in an executable a /// single time, and so this may be non-deterministic for `ArcStr` defined /// in a `const` with [`arcstr::literal!`][crate::literal], unless one /// was created by a `clone()` on the other. /// /// # Examples /// /// ``` /// use arcstr::ArcStr; /// /// let foobar = ArcStr::from("foobar"); /// let same_foobar = foobar.clone(); /// let other_foobar = ArcStr::from("foobar"); /// assert!(ArcStr::ptr_eq(&foobar, &same_foobar)); /// assert!(!ArcStr::ptr_eq(&foobar, &other_foobar)); /// /// const YET_AGAIN_A_DIFFERENT_FOOBAR: ArcStr = arcstr::literal!("foobar"); /// let strange_new_foobar = YET_AGAIN_A_DIFFERENT_FOOBAR.clone(); /// let wild_blue_foobar = strange_new_foobar.clone(); /// assert!(ArcStr::ptr_eq(&strange_new_foobar, &wild_blue_foobar)); /// ``` #[inline] pub fn ptr_eq(lhs: &Self, rhs: &Self) -> bool { core::ptr::eq(lhs.0.as_ptr(), rhs.0.as_ptr()) } /// Returns the number of references that exist to this `ArcStr`. If this is /// a static `ArcStr` (For example, one from /// [`arcstr::literal!`][crate::literal]), returns `None`. /// /// Despite the difference in return type, this is named to match the method /// from the stdlib's Arc: /// [`Arc::strong_count`][alloc::sync::Arc::strong_count]. /// /// If you aren't sure how to handle static `ArcStr` in the context of this /// return value, `ArcStr::strong_count(&s).unwrap_or(usize::MAX)` is /// frequently reasonable. /// /// # Safety /// /// This method by itself is safe, but using it correctly requires extra /// care. Another thread can change the strong count at any time, including /// potentially between calling this method and acting on the result. /// /// However, it may never change from `None` to `Some` or from `Some` to /// `None` for a given `ArcStr` — whether or not it is static is determined /// at construction, and never changes. /// /// # Examples /// /// ### Dynamic ArcStr /// ``` /// # use arcstr::ArcStr; /// let foobar = ArcStr::from("foobar"); /// assert_eq!(Some(1), ArcStr::strong_count(&foobar)); /// let also_foobar = ArcStr::clone(&foobar); /// assert_eq!(Some(2), ArcStr::strong_count(&foobar)); /// assert_eq!(Some(2), ArcStr::strong_count(&also_foobar)); /// ``` /// /// ### Static ArcStr /// ``` /// # use arcstr::ArcStr; /// let baz = arcstr::literal!("baz"); /// assert_eq!(None, ArcStr::strong_count(&baz)); /// // Similarly: /// assert_eq!(None, ArcStr::strong_count(&ArcStr::default())); /// ``` #[inline] pub fn strong_count(this: &Self) -> Option { let cf = Self::load_count_flag(this, Ordering::Acquire)?; if cf.flag_part() { None } else { Some(cf.uint_part()) } } /// Safety: Unsafe to use `this` is stored in static memory (check /// `Self::has_static_lenflag`) #[inline] unsafe fn load_count_flag_raw(this: &Self, ord_if_needed: Ordering) -> PackedFlagUint { PackedFlagUint::from_encoded((*this.0.as_ptr()).count_flag.load(ord_if_needed)) } #[inline] fn load_count_flag(this: &Self, ord_if_needed: Ordering) -> Option { if Self::has_static_lenflag(this) { None } else { let count_and_flag = PackedFlagUint::from_encoded(unsafe { (*this.0.as_ptr()).count_flag.load(ord_if_needed) }); Some(count_and_flag) } } /// Convert the `ArcStr` into a "static" `ArcStr`, even if it was originally /// created from runtime values. The `&'static str` is returned. /// /// This is useful if you want to use [`ArcStr::as_static`] or /// [`ArcStr::is_static`] on a value only known at runtime. /// /// If the `ArcStr` is already static, then this is a noop. /// /// # Caveats /// Calling this function on an ArcStr will cause us to never free it, thus /// leaking it's memory. Doing this excessively can lead to problems. /// /// # Examples /// ```no_run /// # // This isn't run because it needs a leakcheck suppression, /// # // which I can't seem to make work in CI (no symbols for /// # // doctests?). Instead, we test this in tests/arc_str.rs /// # use arcstr::ArcStr; /// let s = ArcStr::from("foobar"); /// assert!(!ArcStr::is_static(&s)); /// assert!(ArcStr::as_static(&s).is_none()); /// /// let leaked: &'static str = s.leak(); /// assert_eq!(leaked, s); /// assert!(ArcStr::is_static(&s)); /// assert_eq!(ArcStr::as_static(&s), Some("foobar")); /// ``` #[inline] pub fn leak(&self) -> &'static str { if Self::has_static_lenflag(self) { return unsafe { Self::to_static_unchecked(self) }; } let is_static_count = unsafe { // Not sure about ordering, maybe relaxed would be fine. Self::load_count_flag_raw(self, Ordering::Acquire) }; if is_static_count.flag_part() { return unsafe { Self::to_static_unchecked(self) }; } unsafe { Self::become_static(self, is_static_count.uint_part() == 1) }; debug_assert!(Self::is_static(self)); unsafe { Self::to_static_unchecked(self) } } unsafe fn become_static(this: &Self, is_unique: bool) { if is_unique { core::ptr::addr_of_mut!((*this.0.as_ptr()).count_flag).write(AtomicUsize::new( PackedFlagUint::new_raw(true, 1).encoded_value(), )); let lenp = core::ptr::addr_of_mut!((*this.0.as_ptr()).len_flag); debug_assert!(!lenp.read().flag_part()); lenp.write(lenp.read().with_flag(true)); } else { let flag_bit = PackedFlagUint::new_raw(true, 0).encoded_value(); let atomic_count_flag = &*core::ptr::addr_of!((*this.0.as_ptr()).count_flag); atomic_count_flag.fetch_or(flag_bit, Ordering::Release); } } #[inline] unsafe fn to_static_unchecked(this: &Self) -> &'static str { &*Self::str_ptr(this) } #[inline] fn bytes_ptr(this: &Self) -> *const [u8] { let len = this.get_inner_len_flag().uint_part(); unsafe { let p: *const ThinInner = this.0.as_ptr(); let data = p.cast::().add(OFFSET_DATA); debug_assert_eq!(core::ptr::addr_of!((*p).data).cast::(), data,); core::ptr::slice_from_raw_parts(data, len) } } #[inline] fn str_ptr(this: &Self) -> *const str { Self::bytes_ptr(this) as *const str } /// Returns true if `this` is a "static" ArcStr. For example, if it was /// created from a call to [`arcstr::literal!`][crate::literal]), /// returned by `ArcStr::new`, etc. /// /// Static `ArcStr`s can be converted to `&'static str` for free using /// [`ArcStr::as_static`], without leaking memory — they're static constants /// in the program (somewhere). /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// const STATIC: ArcStr = arcstr::literal!("Electricity!"); /// assert!(ArcStr::is_static(&STATIC)); /// /// let still_static = arcstr::literal!("Shocking!"); /// assert!(ArcStr::is_static(&still_static)); /// assert!( /// ArcStr::is_static(&still_static.clone()), /// "Cloned statics are still static" /// ); /// /// let nonstatic = ArcStr::from("Grounded..."); /// assert!(!ArcStr::is_static(&nonstatic)); /// ``` #[inline] pub fn is_static(this: &Self) -> bool { // We align this to 16 bytes and keep the `is_static` flags in the same // place. In theory this means that if `cfg(target_feature = "avx")` // (where aligned 16byte loads are atomic), the compiler *could* // implement this function using the equivalent of: // ``` // let vec = _mm_load_si128(self.0.as_ptr().cast()); // let mask = _mm_movemask_pd(_mm_srli_epi64(vac, 63)); // mask != 0 // ``` // and that's all; one load, no branching. (I don't think it *does*, but // I haven't checked so I'll be optimistic and keep the `#[repr(align)]` // -- hey, maybe the CPU can peephole-optimize it). // // That said, unless I did it in asm, *I* can't implement it that way, // since Rust's semantics don't allow me to make that change // optimization on my own (that load isn't considered atomic, for // example). this.get_inner_len_flag().flag_part() || unsafe { Self::load_count_flag_raw(this, Ordering::Relaxed).flag_part() } } /// This is true for any `ArcStr` that has been static from the time when it /// was created. It's cheaper than `has_static_rcflag`. #[inline] fn has_static_lenflag(this: &Self) -> bool { this.get_inner_len_flag().flag_part() } /// Returns true if `this` is a "static"/`"literal"` ArcStr. For example, if /// it was created from a call to [`literal!`][crate::literal]), returned by /// `ArcStr::new`, etc. /// /// Static `ArcStr`s can be converted to `&'static str` for free using /// [`ArcStr::as_static`], without leaking memory — they're static constants /// in the program (somewhere). /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// const STATIC: ArcStr = arcstr::literal!("Electricity!"); /// assert_eq!(ArcStr::as_static(&STATIC), Some("Electricity!")); /// /// // Note that they don't have to be consts, just made using `literal!`: /// let still_static = arcstr::literal!("Shocking!"); /// assert_eq!(ArcStr::as_static(&still_static), Some("Shocking!")); /// // Cloning a static still produces a static. /// assert_eq!(ArcStr::as_static(&still_static.clone()), Some("Shocking!")); /// /// // But it won't work for strings from other sources. /// let nonstatic = ArcStr::from("Grounded..."); /// assert_eq!(ArcStr::as_static(&nonstatic), None); /// ``` #[inline] pub fn as_static(this: &Self) -> Option<&'static str> { if Self::is_static(this) { // We know static strings live forever, so they can have a static lifetime. Some(unsafe { &*(this.as_str() as *const str) }) } else { None } } // Not public API. Exists so the `arcstr::literal` macro can call it. #[inline] #[doc(hidden)] pub const unsafe fn _private_new_from_static_data( ptr: &'static StaticArcStrInner, ) -> Self { Self(NonNull::new_unchecked(ptr as *const _ as *mut ThinInner)) } /// `feature = "substr"` Returns a substr of `self` over the given range. /// /// # Examples /// /// ``` /// use arcstr::{ArcStr, Substr}; /// /// let a = ArcStr::from("abcde"); /// let b: Substr = a.substr(2..); /// /// assert_eq!(b, "cde"); /// ``` /// /// # Panics /// If any of the following are untrue, we panic /// - `range.start() <= range.end()` /// - `range.end() <= self.len()` /// - `self.is_char_boundary(start) && self.is_char_boundary(end)` /// - These can be conveniently verified in advance using /// `self.get(start..end).is_some()` if needed. #[cfg(feature = "substr")] #[inline] pub fn substr(&self, range: impl core::ops::RangeBounds) -> Substr { Substr::from_parts(self, range) } /// `feature = "substr"` Returns a [`Substr`] of self over the given `&str`. /// /// It is not rare to end up with a `&str` which holds a view into a /// `ArcStr`'s backing data. A common case is when using functionality that /// takes and returns `&str` and are entirely unaware of `arcstr`, for /// example: `str::trim()`. /// /// This function allows you to reconstruct a [`Substr`] from a `&str` which /// is a view into this `ArcStr`'s backing string. /// /// # Examples /// /// ``` /// use arcstr::{ArcStr, Substr}; /// let text = ArcStr::from(" abc"); /// let trimmed = text.trim(); /// let substr: Substr = text.substr_from(trimmed); /// assert_eq!(substr, "abc"); /// // for illustration /// assert!(ArcStr::ptr_eq(substr.parent(), &text)); /// assert_eq!(substr.range(), 3..6); /// ``` /// /// # Panics /// /// Panics if `substr` isn't a view into our memory. /// /// Also panics if `substr` is a view into our memory but is >= `u32::MAX` /// bytes away from our start, if we're a 64-bit machine and /// `substr-usize-indices` is not enabled. #[cfg(feature = "substr")] pub fn substr_from(&self, substr: &str) -> Substr { if substr.is_empty() { return Substr::new(); } let self_start = self.as_ptr() as usize; let self_end = self_start + self.len(); let substr_start = substr.as_ptr() as usize; let substr_end = substr_start + substr.len(); if substr_start < self_start || substr_end > self_end { out_of_range(self, &substr); } let index = substr_start - self_start; let end = index + substr.len(); self.substr(index..end) } /// `feature = "substr"` If possible, returns a [`Substr`] of self over the /// given `&str`. /// /// This is a fallible version of [`ArcStr::substr_from`]. /// /// It is not rare to end up with a `&str` which holds a view into a /// `ArcStr`'s backing data. A common case is when using functionality that /// takes and returns `&str` and are entirely unaware of `arcstr`, for /// example: `str::trim()`. /// /// This function allows you to reconstruct a [`Substr`] from a `&str` which /// is a view into this `ArcStr`'s backing string. /// /// # Examples /// /// ``` /// use arcstr::{ArcStr, Substr}; /// let text = ArcStr::from(" abc"); /// let trimmed = text.trim(); /// let substr: Option = text.try_substr_from(trimmed); /// assert_eq!(substr.unwrap(), "abc"); /// // `&str`s not derived from `self` will return None. /// let not_substr = text.try_substr_from("abc"); /// assert!(not_substr.is_none()); /// ``` /// /// # Panics /// /// Panics if `substr` is a view into our memory but is >= `u32::MAX` bytes /// away from our start, if we're a 64-bit machine and /// `substr-usize-indices` is not enabled. #[cfg(feature = "substr")] pub fn try_substr_from(&self, substr: &str) -> Option { if substr.is_empty() { return Some(Substr::new()); } let self_start = self.as_ptr() as usize; let self_end = self_start + self.len(); let substr_start = substr.as_ptr() as usize; let substr_end = substr_start + substr.len(); if substr_start < self_start || substr_end > self_end { return None; } let index = substr_start - self_start; let end = index + substr.len(); debug_assert!(self.get(index..end).is_some()); Some(self.substr(index..end)) } /// `feature = "substr"` Compute a derived `&str` a function of `&str` => /// `&str`, and produce a Substr of the result if possible. /// /// The function may return either a derived string, or any empty string. /// /// This function is mainly a wrapper around [`ArcStr::try_substr_from`]. If /// you're coming to `arcstr` from the `shared_string` crate, this is the /// moral equivalent of the `slice_with` function. /// /// # Examples /// /// ``` /// use arcstr::{ArcStr, Substr}; /// let text = ArcStr::from(" abc"); /// let trimmed: Option = text.try_substr_using(str::trim); /// assert_eq!(trimmed.unwrap(), "abc"); /// let other = text.try_substr_using(|_s| "different string!"); /// assert_eq!(other, None); /// // As a special case, this is allowed. /// let empty = text.try_substr_using(|_s| ""); /// assert_eq!(empty.unwrap(), ""); /// ``` #[cfg(feature = "substr")] pub fn try_substr_using(&self, f: impl FnOnce(&str) -> &str) -> Option { self.try_substr_from(f(self.as_str())) } /// `feature = "substr"` Compute a derived `&str` a function of `&str` => /// `&str`, and produce a Substr of the result. /// /// The function may return either a derived string, or any empty string. /// Returning anything else will result in a panic. /// /// This function is mainly a wrapper around [`ArcStr::try_substr_from`]. If /// you're coming to `arcstr` from the `shared_string` crate, this is the /// likely closest to the `slice_with_unchecked` function, but this panics /// instead of UB on dodginess. /// /// # Examples /// /// ``` /// use arcstr::{ArcStr, Substr}; /// let text = ArcStr::from(" abc"); /// let trimmed: Substr = text.substr_using(str::trim); /// assert_eq!(trimmed, "abc"); /// // As a special case, this is allowed. /// let empty = text.substr_using(|_s| ""); /// assert_eq!(empty, ""); /// ``` #[cfg(feature = "substr")] pub fn substr_using(&self, f: impl FnOnce(&str) -> &str) -> Substr { self.substr_from(f(self.as_str())) } /// Creates an `ArcStr` by repeating the source string `n` times /// /// # Errors /// /// This function returns an error if the capacity overflows or allocation /// fails. /// /// # Examples /// /// ``` /// use arcstr::ArcStr; /// /// let source = "A"; /// let repeated = ArcStr::try_repeat(source, 10); /// assert_eq!(repeated.unwrap(), "AAAAAAAAAA"); /// ``` pub fn try_repeat(source: &str, n: usize) -> Option { // If the source string is empty or the user asked for zero repetitions, // return an empty string if source.is_empty() || n == 0 { return Some(Self::new()); } // Calculate the capacity for the allocated string let capacity = source.len().checked_mul(n)?; let inner = ThinInner::try_allocate_maybe_uninit(capacity, false, AllocInit::Uninit).ok()?; unsafe { let mut data_ptr = ThinInner::data_ptr(inner); let data_end = data_ptr.add(capacity); // Copy `source` into the allocated string `n` times while data_ptr < data_end { core::ptr::copy_nonoverlapping(source.as_ptr(), data_ptr, source.len()); data_ptr = data_ptr.add(source.len()); } } Some(Self(inner)) } /// Creates an `ArcStr` by repeating the source string `n` times /// /// # Panics /// /// This function panics if the capacity overflows, see /// [`try_repeat`](ArcStr::try_repeat) if this is undesirable. /// /// # Examples /// /// Basic usage: /// ``` /// use arcstr::ArcStr; /// /// let source = "A"; /// let repeated = ArcStr::repeat(source, 10); /// assert_eq!(repeated, "AAAAAAAAAA"); /// ``` /// /// A panic upon overflow: /// ```should_panic /// # use arcstr::ArcStr; /// /// // this will panic at runtime /// let huge = ArcStr::repeat("A", usize::MAX); /// ``` pub fn repeat(source: &str, n: usize) -> Self { Self::try_repeat(source, n).expect("capacity overflow") } } #[cold] #[inline(never)] #[cfg(feature = "substr")] fn out_of_range(arc: &ArcStr, substr: &&str) -> ! { let arc_start = arc.as_ptr(); let arc_end = arc_start.wrapping_add(arc.len()); let substr_start = substr.as_ptr(); let substr_end = substr_start.wrapping_add(substr.len()); panic!( "ArcStr over ({:p}..{:p}) does not contain substr over ({:p}..{:p})", arc_start, arc_end, substr_start, substr_end, ); } impl Clone for ArcStr { #[inline] fn clone(&self) -> Self { if !Self::is_static(self) { // From libstd's impl: // // > Using a relaxed ordering is alright here, as knowledge of the // > original reference prevents other threads from erroneously deleting // > the object. // // See: https://doc.rust-lang.org/src/alloc/sync.rs.html#1073 let n: PackedFlagUint = PackedFlagUint::from_encoded(unsafe { let step = PackedFlagUint::FALSE_ONE.encoded_value(); (*self.0.as_ptr()) .count_flag .fetch_add(step, Ordering::Relaxed) }); // Protect against aggressive leaking of Arcs causing us to // overflow. Technically, we could probably transition it to static // here, but I haven't thought it through. if n.uint_part() > RC_MAX && !n.flag_part() { let val = PackedFlagUint::new_raw(true, 0).encoded_value(); unsafe { (*self.0.as_ptr()) .count_flag .fetch_or(val, Ordering::Release) }; // abort(); } } Self(self.0) } } const RC_MAX: usize = PackedFlagUint::UINT_PART_MAX / 2; impl Drop for ArcStr { #[inline] fn drop(&mut self) { if Self::is_static(self) { return; } unsafe { let this = self.0.as_ptr(); let enc = PackedFlagUint::from_encoded( (*this) .count_flag .fetch_sub(PackedFlagUint::FALSE_ONE.encoded_value(), Ordering::Release), ); // Note: `enc == PackedFlagUint::FALSE_ONE` if enc == PackedFlagUint::FALSE_ONE { let _ = (*this).count_flag.load(Ordering::Acquire); ThinInner::destroy_cold(this) } } } } // Caveat on the `static`/`strong` fields: "is_static" indicates if we're // located in static data (as with empty string). is_static being false meanse // we are a normal arc-ed string. // // While `ArcStr` claims to hold a pointer to a `ThinInner`, for the static case // we actually are using a pointer to a `StaticArcStrInner<[u8; N]>`. These have // almost identical layouts, except the static contains a explicit trailing // array, and does not have a `AtomicUsize` The issue is: We kind of want the // static ones to not have any interior mutability, so that `const`s can use // them, and so that they may be stored in read-only memory. // // We do this by keeping a flag in `len_flag` flag to indicate which case we're // in, and maintaining the invariant that if we're a `StaticArcStrInner` **we // may never access `.strong` in any way or produce a `&ThinInner` pointing to // our data**. // // This is more subtle than you might think, sinc AFAIK we're not legally // allowed to create an `&ThinInner` until we're 100% sure it's nonstatic, and // prior to determining it, we are forced to work from entirely behind a raw // pointer... // // That said, a bit of this hoop jumping might be not required in the future, // but for now what we're doing works and is apparently sound: // https://github.com/rust-lang/unsafe-code-guidelines/issues/246 #[repr(C, align(8))] struct ThinInner { // Both of these are `PackedFlagUint`s that store `is_static` as the flag. // // The reason it's not just stored in len is because an ArcStr may become // static after creation (via `ArcStr::leak`) and we don't need to do an // atomic load to access the length (and not only because it would mess with // optimization). // // The reason it's not just stored in the count is because it may be UB to // do atomic loads from read-only memory. This is also the reason it's not // stored in a separate atomic, and why doing an atomic load to access the // length wouldn't be acceptable even if compilers were really good. len_flag: PackedFlagUint, count_flag: AtomicUsize, data: [u8; 0], } const OFFSET_LENFLAGS: usize = 0; const OFFSET_COUNTFLAGS: usize = size_of::(); const OFFSET_DATA: usize = OFFSET_COUNTFLAGS + size_of::(); // Not public API, exists for macros. #[repr(C, align(8))] #[doc(hidden)] pub struct StaticArcStrInner { pub len_flag: usize, pub count_flag: usize, pub data: Buf, } impl StaticArcStrInner { #[doc(hidden)] pub const STATIC_COUNT_VALUE: usize = PackedFlagUint::new_raw(true, 1).encoded_value(); #[doc(hidden)] #[inline] pub const fn encode_len(v: usize) -> Option { match PackedFlagUint::new(true, v) { Some(v) => Some(v.encoded_value()), None => None, } } } const _: [(); size_of::>()] = [(); 2 * size_of::()]; const _: [(); align_of::>()] = [(); 8]; const _: [(); size_of::()]>>()] = [(); 4 * size_of::()]; const _: [(); align_of::()]>>()] = [(); 8]; const _: [(); size_of::()] = [(); 2 * size_of::()]; const _: [(); align_of::()] = [(); 8]; const _: [(); align_of::()] = [(); align_of::()]; const _: [(); align_of::()] = [(); size_of::()]; const _: [(); size_of::()] = [(); size_of::()]; const _: [(); align_of::()] = [(); align_of::()]; const _: [(); size_of::()] = [(); size_of::()]; #[derive(Clone, Copy, PartialEq, Eq)] #[repr(transparent)] struct PackedFlagUint(usize); impl PackedFlagUint { const UINT_PART_MAX: usize = (1 << (usize::BITS - 1)) - 1; /// Encodes `false` as the flag and `1` as the uint. Used for a few things, /// such as the amount we `fetch_add` by for refcounting, and so on. const FALSE_ONE: Self = Self::new_raw(false, 1); #[inline] const fn new(flag_part: bool, uint_part: usize) -> Option { if uint_part > Self::UINT_PART_MAX { None } else { Some(Self::new_raw(flag_part, uint_part)) } } #[inline(always)] const fn new_raw(flag_part: bool, uint_part: usize) -> Self { Self(flag_part as usize | (uint_part << 1)) } #[inline(always)] const fn uint_part(self) -> usize { self.0 >> 1 } #[inline(always)] const fn flag_part(self) -> bool { (self.0 & 1) != 0 } #[inline(always)] const fn from_encoded(v: usize) -> Self { Self(v) } #[inline(always)] const fn encoded_value(self) -> usize { self.0 } #[inline(always)] #[must_use] const fn with_flag(self, v: bool) -> Self { Self(v as usize | self.0) } } const EMPTY: ArcStr = literal!(""); impl ThinInner { #[inline] fn allocate(data: &str, initially_static: bool) -> NonNull { match Self::try_allocate(data, initially_static) { Ok(v) => v, Err(None) => alloc_overflow(), Err(Some(layout)) => alloc::alloc::handle_alloc_error(layout), } } #[inline] fn data_ptr(this: NonNull) -> *mut u8 { unsafe { this.as_ptr().cast::().add(OFFSET_DATA) } } /// Allocates a `ThinInner` where the data segment is uninitialized or /// zeroed. /// /// Returns `Err(Some(layout))` if we failed to allocate that layout, and /// `Err(None)` for integer overflow when computing layout fn try_allocate_maybe_uninit( capacity: usize, initially_static: bool, init_how: AllocInit, ) -> Result, Option> { const ALIGN: usize = align_of::(); debug_assert_ne!(capacity, 0); if capacity >= (isize::MAX as usize) - (OFFSET_DATA + ALIGN) { return Err(None); } debug_assert!(Layout::from_size_align(capacity + OFFSET_DATA, ALIGN).is_ok()); let layout = unsafe { Layout::from_size_align_unchecked(capacity + OFFSET_DATA, ALIGN) }; let ptr = match init_how { AllocInit::Uninit => unsafe { alloc::alloc::alloc(layout) as *mut ThinInner }, AllocInit::Zero => unsafe { alloc::alloc::alloc_zeroed(layout) as *mut ThinInner }, }; if ptr.is_null() { return Err(Some(layout)); } // we actually already checked this above... debug_assert!(PackedFlagUint::new(initially_static, capacity).is_some()); let len_flag = PackedFlagUint::new_raw(initially_static, capacity); debug_assert_eq!(len_flag.uint_part(), capacity); debug_assert_eq!(len_flag.flag_part(), initially_static); unsafe { core::ptr::addr_of_mut!((*ptr).len_flag).write(len_flag); let initial_count_flag = PackedFlagUint::new_raw(initially_static, 1); let count_flag: AtomicUsize = AtomicUsize::new(initial_count_flag.encoded_value()); core::ptr::addr_of_mut!((*ptr).count_flag).write(count_flag); debug_assert_eq!( (ptr as *const u8).wrapping_add(OFFSET_DATA), (*ptr).data.as_ptr(), ); Ok(NonNull::new_unchecked(ptr)) } } // returns `Err(Some(l))` if we failed to allocate that layout, and // `Err(None)` for integer overflow when computing layout. #[inline] fn try_allocate(data: &str, initially_static: bool) -> Result, Option> { // Safety: we initialize the whole buffer by copying `data` into it. unsafe { // Allocate a enough space to hold the given string Self::try_allocate_with( data.len(), initially_static, AllocInit::Uninit, // Copy the given string into the allocation |uninit_slice| { debug_assert_eq!(uninit_slice.len(), data.len()); core::ptr::copy_nonoverlapping( data.as_ptr(), uninit_slice.as_mut_ptr().cast::(), data.len(), ) }, ) } } /// Safety: caller must fully initialize the provided buffer with valid /// UTF-8 in the `initializer` function (well, you at least need to handle /// it before giving it back to the user). #[inline] unsafe fn try_allocate_with( len: usize, initially_static: bool, init_style: AllocInit, initializer: impl FnOnce(&mut [core::mem::MaybeUninit]), ) -> Result, Option> { // Allocate a enough space to hold the given string let this = Self::try_allocate_maybe_uninit(len, initially_static, init_style)?; initializer(core::slice::from_raw_parts_mut( Self::data_ptr(this).cast::>(), len, )); Ok(this) } #[inline] unsafe fn get_len_flag(p: *const ThinInner) -> PackedFlagUint { debug_assert_eq!(OFFSET_LENFLAGS, 0); *p.cast() } #[cold] unsafe fn destroy_cold(p: *mut ThinInner) { let lf = Self::get_len_flag(p); let (is_static, len) = (lf.flag_part(), lf.uint_part()); debug_assert!(!is_static); let layout = { let size = len + OFFSET_DATA; let align = align_of::(); Layout::from_size_align_unchecked(size, align) }; alloc::alloc::dealloc(p as *mut _, layout); } } #[derive(Clone, Copy, PartialEq)] enum AllocInit { Uninit, Zero, } #[inline(never)] #[cold] fn alloc_overflow() -> ! { panic!("overflow during Layout computation") } impl From<&str> for ArcStr { #[inline] fn from(s: &str) -> Self { if s.is_empty() { Self::new() } else { Self(ThinInner::allocate(s, false)) } } } impl core::ops::Deref for ArcStr { type Target = str; #[inline] fn deref(&self) -> &str { unsafe { core::str::from_utf8_unchecked(self.as_bytes()) } } } impl Default for ArcStr { #[inline] fn default() -> Self { Self::new() } } impl From for ArcStr { #[inline] fn from(v: String) -> Self { v.as_str().into() } } impl From<&mut str> for ArcStr { #[inline] fn from(s: &mut str) -> Self { let s: &str = s; Self::from(s) } } impl From> for ArcStr { #[inline] fn from(s: Box) -> Self { Self::from(&s[..]) } } impl From for Box { #[inline] fn from(s: ArcStr) -> Self { s.as_str().into() } } impl From for alloc::rc::Rc { #[inline] fn from(s: ArcStr) -> Self { s.as_str().into() } } impl From for alloc::sync::Arc { #[inline] fn from(s: ArcStr) -> Self { s.as_str().into() } } impl From> for ArcStr { #[inline] fn from(s: alloc::rc::Rc) -> Self { Self::from(&*s) } } impl From> for ArcStr { #[inline] fn from(s: alloc::sync::Arc) -> Self { Self::from(&*s) } } impl<'a> From> for ArcStr { #[inline] fn from(s: Cow<'a, str>) -> Self { Self::from(&*s) } } impl<'a> From<&'a ArcStr> for Cow<'a, str> { #[inline] fn from(s: &'a ArcStr) -> Self { Cow::Borrowed(s) } } impl<'a> From for Cow<'a, str> { #[inline] fn from(s: ArcStr) -> Self { if let Some(st) = ArcStr::as_static(&s) { Cow::Borrowed(st) } else { Cow::Owned(s.to_string()) } } } impl From<&String> for ArcStr { #[inline] fn from(s: &String) -> Self { Self::from(s.as_str()) } } impl From<&ArcStr> for ArcStr { #[inline] fn from(s: &ArcStr) -> Self { s.clone() } } impl core::fmt::Debug for ArcStr { #[inline] fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { core::fmt::Debug::fmt(self.as_str(), f) } } impl core::fmt::Display for ArcStr { #[inline] fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { core::fmt::Display::fmt(self.as_str(), f) } } impl PartialEq for ArcStr { #[inline] fn eq(&self, o: &Self) -> bool { ArcStr::ptr_eq(self, o) || PartialEq::eq(self.as_str(), o.as_str()) } #[inline] fn ne(&self, o: &Self) -> bool { !ArcStr::ptr_eq(self, o) && PartialEq::ne(self.as_str(), o.as_str()) } } impl Eq for ArcStr {} macro_rules! impl_peq { (@one $a:ty, $b:ty) => { #[allow(clippy::extra_unused_lifetimes)] impl<'a> PartialEq<$b> for $a { #[inline] fn eq(&self, s: &$b) -> bool { PartialEq::eq(&self[..], &s[..]) } #[inline] fn ne(&self, s: &$b) -> bool { PartialEq::ne(&self[..], &s[..]) } } }; ($(($a:ty, $b:ty),)+) => {$( impl_peq!(@one $a, $b); impl_peq!(@one $b, $a); )+}; } impl_peq! { (ArcStr, str), (ArcStr, &'a str), (ArcStr, String), (ArcStr, Cow<'a, str>), (ArcStr, Box), (ArcStr, alloc::sync::Arc), (ArcStr, alloc::rc::Rc), (ArcStr, alloc::sync::Arc), (ArcStr, alloc::rc::Rc), } impl PartialOrd for ArcStr { #[inline] fn partial_cmp(&self, s: &Self) -> Option { Some(self.as_str().cmp(s.as_str())) } } impl Ord for ArcStr { #[inline] fn cmp(&self, s: &Self) -> core::cmp::Ordering { self.as_str().cmp(s.as_str()) } } impl core::hash::Hash for ArcStr { #[inline] fn hash(&self, h: &mut H) { self.as_str().hash(h) } } macro_rules! impl_index { ($($IdxT:ty,)*) => {$( impl core::ops::Index<$IdxT> for ArcStr { type Output = str; #[inline] fn index(&self, i: $IdxT) -> &Self::Output { &self.as_str()[i] } } )*}; } impl_index! { core::ops::RangeFull, core::ops::Range, core::ops::RangeFrom, core::ops::RangeTo, core::ops::RangeInclusive, core::ops::RangeToInclusive, } impl AsRef for ArcStr { #[inline] fn as_ref(&self) -> &str { self } } impl AsRef<[u8]> for ArcStr { #[inline] fn as_ref(&self) -> &[u8] { self.as_bytes() } } impl core::borrow::Borrow for ArcStr { #[inline] fn borrow(&self) -> &str { self } } impl core::str::FromStr for ArcStr { type Err = core::convert::Infallible; #[inline] fn from_str(s: &str) -> Result { Ok(Self::from(s)) } } #[cfg(test)] #[cfg(not(msrv))] // core::mem::offset_of! isn't stable in our MSRV mod test { use super::*; fn sasi_layout_check() { assert!(align_of::>() >= 8); assert_eq!( core::mem::offset_of!(StaticArcStrInner, count_flag), OFFSET_COUNTFLAGS ); assert_eq!( core::mem::offset_of!(StaticArcStrInner, len_flag), OFFSET_LENFLAGS ); assert_eq!( core::mem::offset_of!(StaticArcStrInner, data), OFFSET_DATA ); assert_eq!( core::mem::offset_of!(ThinInner, count_flag), core::mem::offset_of!(StaticArcStrInner::, count_flag), ); assert_eq!( core::mem::offset_of!(ThinInner, len_flag), core::mem::offset_of!(StaticArcStrInner::, len_flag), ); assert_eq!( core::mem::offset_of!(ThinInner, data), core::mem::offset_of!(StaticArcStrInner::, data), ); } #[test] fn verify_type_pun_offsets_sasi_big_bufs() { assert_eq!( core::mem::offset_of!(ThinInner, count_flag), OFFSET_COUNTFLAGS, ); assert_eq!(core::mem::offset_of!(ThinInner, len_flag), OFFSET_LENFLAGS); assert_eq!(core::mem::offset_of!(ThinInner, data), OFFSET_DATA); assert!(align_of::() >= 8); sasi_layout_check::<[u8; 0]>(); sasi_layout_check::<[u8; 1]>(); sasi_layout_check::<[u8; 2]>(); sasi_layout_check::<[u8; 3]>(); sasi_layout_check::<[u8; 4]>(); sasi_layout_check::<[u8; 5]>(); sasi_layout_check::<[u8; 15]>(); sasi_layout_check::<[u8; 16]>(); sasi_layout_check::<[u8; 64]>(); sasi_layout_check::<[u8; 128]>(); sasi_layout_check::<[u8; 1024]>(); sasi_layout_check::<[u8; 4095]>(); sasi_layout_check::<[u8; 4096]>(); } } #[cfg(all(test, loom))] mod loomtest { use super::ArcStr; use loom::sync::Arc; use loom::thread; #[test] fn cloning_threads() { loom::model(|| { let a = ArcStr::from("abcdefgh"); let addr = a.as_ptr() as usize; let a1 = Arc::new(a); let a2 = a1.clone(); let t1 = thread::spawn(move || { let b: ArcStr = (*a1).clone(); assert_eq!(b.as_ptr() as usize, addr); }); let t2 = thread::spawn(move || { let b: ArcStr = (*a2).clone(); assert_eq!(b.as_ptr() as usize, addr); }); t1.join().unwrap(); t2.join().unwrap(); }); } #[test] fn drop_timing() { loom::model(|| { let a1 = alloc::vec![ ArcStr::from("s1"), ArcStr::from("s2"), ArcStr::from("s3"), ArcStr::from("s4"), ]; let a2 = a1.clone(); let t1 = thread::spawn(move || { let mut a1 = a1; while let Some(s) = a1.pop() { assert!(s.starts_with("s")); } }); let t2 = thread::spawn(move || { let mut a2 = a2; while let Some(s) = a2.pop() { assert!(s.starts_with("s")); } }); t1.join().unwrap(); t2.join().unwrap(); }); } #[test] fn leak_drop() { loom::model(|| { let a1 = ArcStr::from("foo"); let a2 = a1.clone(); let t1 = thread::spawn(move || { drop(a1); }); let t2 = thread::spawn(move || a2.leak()); t1.join().unwrap(); let leaked: &'static str = t2.join().unwrap(); assert_eq!(leaked, "foo"); }); } } arcstr-1.2.0/src/impl_serde.rs000064400000000000000000000030231046102023000144000ustar 00000000000000use super::ArcStr; #[cfg(feature = "substr")] use super::Substr; use core::marker::PhantomData; use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; impl Serialize for ArcStr { fn serialize(&self, ser: S) -> Result { ser.serialize_str(self) } } impl<'de> Deserialize<'de> for ArcStr { fn deserialize>(d: D) -> Result { d.deserialize_str(StrVisitor::(PhantomData)) } } #[cfg(feature = "substr")] impl Serialize for crate::Substr { fn serialize(&self, ser: S) -> Result { ser.serialize_str(self) } } #[cfg(feature = "substr")] impl<'de> Deserialize<'de> for Substr { fn deserialize>(d: D) -> Result { d.deserialize_str(StrVisitor::(PhantomData)) } } struct StrVisitor(PhantomData StrTy>); impl<'de, StrTy> de::Visitor<'de> for StrVisitor where for<'a> &'a str: Into, { type Value = StrTy; fn expecting(&self, formatter: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { formatter.write_str("a string") } fn visit_str(self, v: &str) -> Result { Ok(v.into()) } fn visit_bytes(self, v: &[u8]) -> Result { match core::str::from_utf8(v) { Ok(s) => Ok(s.into()), Err(_) => Err(de::Error::invalid_value(de::Unexpected::Bytes(v), &self)), } } } arcstr-1.2.0/src/lib.rs000064400000000000000000000077541046102023000130420ustar 00000000000000//! # Better reference counted strings //! //! This crate defines [`ArcStr`], a type similar to `Arc`, but with a //! number of new features and functionality. There's a list of //! [benefits][benefits] in the `ArcStr` documentation comment which covers some //! of the reasons you might want to use it over other alternatives. //! //! Additionally, if the `substr` feature is enabled (and it is by default), we //! provide a [`Substr`] type which is essentially a `(ArcStr, Range)` //! with better ergonomics and more functionality, which represents a shared //! slice of a "parent" `ArcStr` (note that in reality, `u32` is used for the //! index type, but this is not exposed in the API, and can be transparently //! changed via a cargo feature). //! //! [benefits]: struct.ArcStr.html#benefits-of-arcstr-over-arcstr //! //! ## Feature overview //! //! A quick tour of the distinguishing features: //! //! ``` //! use arcstr::ArcStr; //! //! // Works in const: //! const MY_ARCSTR: ArcStr = arcstr::literal!("amazing constant"); //! assert_eq!(MY_ARCSTR, "amazing constant"); //! //! // `arcstr::literal!` input can come from `include_str!` too: //! # // We have to fake it here, but this has test coverage and such. //! # const _: &str = stringify!{ //! const MY_ARCSTR: ArcStr = arcstr::literal!(include_str!("my-best-files.txt")); //! # }; //! ``` //! //! Or, you can define the literals in normal expressions. Note that these //! literals are essentially ["Zero Cost"][zero-cost]. Specifically, below we //! not only avoid allocating any heap memory to instantiate `wow` or any of //! the clones, we also don't have to perform any atomic reads or writes. //! //! [zero-cost]: struct.ArcStr.html#what-does-zero-cost-literals-mean //! //! ``` //! use arcstr::ArcStr; //! //! let wow: ArcStr = arcstr::literal!("Wow!"); //! assert_eq!("Wow!", wow); //! // This line is probably not something you want to do regularly, //! // but causes no extra allocations, nor performs any atomic reads //! // nor writes. //! let wowzers = wow.clone().clone().clone().clone(); //! //! // At some point in the future, we can get a `&'static str` out of one //! // of the literal `ArcStr`s too. Note that this returns `None` for //! // a dynamically allocated `ArcStr`: //! let static_str: Option<&'static str> = ArcStr::as_static(&wowzers); //! assert_eq!(static_str, Some("Wow!")); //! ``` //! //! Of course, this is in addition to the typical functionality you might find in a //! non-borrowed string type (with the caveat that there is explicitly no way to //! mutate `ArcStr`). //! //! It's an open TODO to update this "feature tour" to include `Substr`. #![cfg_attr(not(feature = "std"), no_std)] #![deny(missing_docs)] #![allow(unknown_lints)] // for `cfg(loom)` and such -- I don't want to add a build.rs for this. #![allow(unexpected_cfgs)] #[doc(hidden)] pub extern crate alloc; #[doc(hidden)] pub use core; #[macro_use] mod mac; mod arc_str; #[cfg(feature = "serde")] mod impl_serde; pub use arc_str::ArcStr; #[cfg(feature = "substr")] mod substr; #[cfg(feature = "substr")] pub use substr::Substr; // Not public API, exists for macros #[doc(hidden)] pub mod _private { // Not part of public API. Transmutes a `*const u8` to a `&[u8; N]`. // // As of writing this, it's unstable to directly deref a raw pointer in // const code. We can get around this by transmuting (using the // const-transmute union trick) to transmute from `*const u8` to `&[u8; N]`, // and the dereferencing that. // // ... I'm a little surprised that this is allowed, but in general I do // remember a motivation behind stabilizing transmute in `const fn` was that // the union trick existed as a workaround. // // Anyway, this trick is courtesy of rodrimati1992 (that means you have to // blame them if it blows up :p). #[repr(C)] pub union ConstPtrDeref { pub p: *const u8, pub a: &'static Arr, } pub use crate::arc_str::StaticArcStrInner; pub use core::primitive::{str, u8}; } arcstr-1.2.0/src/mac.rs000064400000000000000000000121401046102023000130150ustar 00000000000000/// Create a const [`ArcStr`](crate::ArcStr) from a string literal. The /// resulting `ArcStr` require no heap allocation, can be freely cloned and used /// interchangeably with `ArcStr`s from the heap, and are effectively "free". /// /// The main downside is that it's a macro. Eventually it may be doable as a /// `const fn`, which would be cleaner, but for now the drawbacks to this are /// not overwhelming, and the functionality it provides is very useful. /// /// # Usage /// /// ``` /// # use arcstr::ArcStr; /// // Works in const: /// const MY_ARCSTR: ArcStr = arcstr::literal!("testing testing"); /// assert_eq!(MY_ARCSTR, "testing testing"); /// /// // Or, just in normal expressions. /// assert_eq!("Wow!", arcstr::literal!("Wow!")); /// ``` /// /// Another motivating use case is bundled files: /// /// ```rust,ignore /// use arcstr::ArcStr; /// const VERY_IMPORTANT_FILE: ArcStr = /// arcstr::literal!(include_str!("./very-important.txt")); /// ``` #[macro_export] macro_rules! literal { ($text:expr $(,)?) => {{ // Note: extra scope to reduce the size of what's in `$text`'s scope // (note that consts in macros dont have hygene the way let does). const __TEXT: &$crate::_private::str = $text; { #[allow(clippy::declare_interior_mutable_const)] const SI: &$crate::_private::StaticArcStrInner<[$crate::_private::u8; __TEXT.len()]> = unsafe { &$crate::_private::StaticArcStrInner { len_flag: match $crate::_private::StaticArcStrInner::<[$crate::_private::u8; __TEXT.len()]>::encode_len(__TEXT.len()) { Some(len) => len, None => $crate::core::panic!("impossibly long length") }, count_flag: $crate::_private::StaticArcStrInner::<[$crate::_private::u8; __TEXT.len()]>::STATIC_COUNT_VALUE, // See comment for `_private::ConstPtrDeref` for what the hell's // going on here. data: *$crate::_private::ConstPtrDeref::<[$crate::_private::u8; __TEXT.len()]> { p: __TEXT.as_ptr(), } .a, // data: __TEXT.as_ptr().cast::<[$crate::_private::u8; __TEXT.len()]>().read(), } }; #[allow(clippy::declare_interior_mutable_const)] const S: $crate::ArcStr = unsafe { $crate::ArcStr::_private_new_from_static_data(SI) }; S } }}; } /// Conceptually equivalent to `ArcStr::from(format!("...", args...))`. /// /// In the future, this will be implemented in such a way to avoid an additional /// string copy which is required by the `from` operation. /// /// # Example /// /// ``` /// let arcstr = arcstr::format!("testing {}", 123); /// assert_eq!(arcstr, "testing 123"); /// ``` #[macro_export] macro_rules! format { ($($toks:tt)*) => { $crate::ArcStr::from($crate::alloc::fmt::format($crate::core::format_args!($($toks)*))) }; } /// `feature = "substr"`: Create a `const` [`Substr`][crate::Substr]. /// /// This is a wrapper that initializes a `Substr` over the entire contents of a /// `const` [`ArcStr`](crate::ArcStr) made using [arcstr::literal!](crate::literal). /// /// As with `arcstr::literal`, these require no heap allocation, can be freely /// cloned and used interchangeably with `ArcStr`s from the heap, and are /// effectively "free". /// /// The main use case here is in applications where `Substr` is a much more /// common string type than `ArcStr`. /// /// # Examples /// /// ``` /// use arcstr::{Substr, literal_substr}; /// // Works in const: /// const EXAMPLE_SUBSTR: Substr = literal_substr!("testing testing"); /// assert_eq!(EXAMPLE_SUBSTR, "testing testing"); /// /// // Or, just in normal expressions. /// assert_eq!("Wow!", literal_substr!("Wow!")); /// ``` #[macro_export] #[cfg(feature = "substr")] macro_rules! literal_substr { ($text:expr $(,)?) => {{ const __S: &$crate::_private::str = $text; { const PARENT: $crate::ArcStr = $crate::literal!(__S); const SUBSTR: $crate::Substr = unsafe { $crate::Substr::from_parts_unchecked(PARENT, 0..__S.len()) }; SUBSTR } }}; } #[cfg(test)] mod test { #[test] fn ensure_no_import() { let v = literal!("foo"); assert_eq!(v, "foo"); #[cfg(feature = "substr")] { let substr = literal_substr!("bar"); assert_eq!(substr, "bar"); } // Loom doesn't like it if you do things outside `loom::model`, AFAICT. // These calls produce error messages from inside `libstd` about // accessing thread_locals that haven't been initialized. #[cfg(not(loom))] { let test = crate::format!("foo"); assert_eq!(test, "foo"); let test2 = crate::format!("foo {}", 123); assert_eq!(test2, "foo 123"); #[cfg(not(msrv))] { let foo = "abc"; let test3 = crate::format!("foo {foo}"); assert_eq!(test3, "foo abc"); } } } } arcstr-1.2.0/src/substr.rs000064400000000000000000000621601046102023000136060ustar 00000000000000#![allow( // We follow libstd's lead and prefer to define both. clippy::partialeq_ne_impl, // This is a really annoying clippy lint, since it's required for so many cases... clippy::cast_ptr_alignment, // For macros clippy::redundant_slicing, )] #![cfg_attr(feature = "substr-usize-indices", allow(clippy::unnecessary_cast))] use crate::ArcStr; use core::ops::{Range, RangeBounds}; #[cfg(feature = "substr-usize-indices")] type Idx = usize; #[cfg(not(feature = "substr-usize-indices"))] type Idx = u32; #[cfg(not(any(target_pointer_width = "64", target_pointer_width = "32")))] compile_error!( "Non-32/64-bit pointers not supported right now due to insufficient \ testing on a platform like that. Please file a issue with the \ `arcstr` crate so we can talk about your use case if this is \ important to you." ); /// A low-cost string type representing a view into an [`ArcStr`]. /// /// Conceptually this is `(ArcStr, Range)` with ergonomic helpers. In /// implementation, the only difference between it and that is that the index /// type is `u32` unless the `substr-usize-indices` feature is enabled, which /// makes them use `usize`. /// /// # Examples /// /// ``` /// use arcstr::{ArcStr, Substr}; /// let parent = ArcStr::from("foo bar"); /// // The main way to create a Substr is with `ArcStr::substr`. /// let substr: Substr = parent.substr(3..); /// assert_eq!(substr, " bar"); /// // You can use `substr_using` to turn a function which is /// // `&str => &str` into a function over `Substr => Substr`. /// // See also `substr_from`, `try_substr_{from,using}`, and /// // the functions with the same name on `ArcStr`. /// let trimmed = substr.substr_using(str::trim); /// assert_eq!(trimmed, "bar"); /// ``` /// /// # Caveats /// /// The main caveat is the bit about index types. The index type is u32 by /// default. You can turn on `substr-usize-indices` if you desire though. The /// feature doesn't change the public API at all, just makes it able to handle /// enormous strings without panicking. This seems very niche to me, though. #[derive(Clone)] #[repr(C)] // We mentioned ArcStr being good at FFI at some point so why not pub struct Substr(ArcStr, Idx, Idx); #[inline] #[cfg(all(target_pointer_width = "64", not(feature = "substr-usize-indices")))] #[allow(clippy::let_unit_value)] const fn to_idx_const(i: usize) -> Idx { const DUMMY: [(); 1] = [()]; let _ = DUMMY[i >> 32]; i as Idx } #[inline] #[cfg(any(not(target_pointer_width = "64"), feature = "substr-usize-indices"))] const fn to_idx_const(i: usize) -> Idx { i as Idx } #[inline] #[cfg(all(target_pointer_width = "64", not(feature = "substr-usize-indices")))] fn to_idx(i: usize) -> Idx { if i > 0xffff_ffff { index_overflow(i); } i as Idx } #[inline] #[cfg(any(not(target_pointer_width = "64"), feature = "substr-usize-indices"))] fn to_idx(i: usize) -> Idx { i as Idx } #[cold] #[inline(never)] #[cfg(all(target_pointer_width = "64", not(feature = "substr-usize-indices")))] fn index_overflow(i: usize) -> ! { panic!("The index {} is too large for arcstr::Substr (enable the `substr-usize-indices` feature in `arcstr` if you need this)", i); } #[cold] #[inline(never)] fn bad_substr_idx(s: &ArcStr, i: usize, e: usize) -> ! { assert!(i <= e, "Bad substr range: start {} must be <= end {}", i, e); let max = if cfg!(all( target_pointer_width = "64", not(feature = "substr-usize-indices") )) { u32::MAX as usize } else { usize::MAX }; let len = s.len().min(max); assert!( e <= len, "Bad substr range: end {} must be <= string length/index max size {}", e, len ); assert!( s.is_char_boundary(i) && s.is_char_boundary(e), "Bad substr range: start and end must be on char boundaries" ); unreachable!( "[arcstr bug]: should have failed one of the above tests: \ please report me. debugging info: b={}, e={}, l={}, max={:#x}", i, e, s.len(), max ); } impl Substr { /// Construct an empty substr. /// /// # Examples /// ``` /// # use arcstr::Substr; /// let s = Substr::new(); /// assert_eq!(s, ""); /// ``` #[inline] pub const fn new() -> Self { Substr(ArcStr::new(), 0, 0) } /// Construct a Substr over the entire ArcStr. /// /// This is also provided as `Substr::from(some_arcstr)`, and can be /// accomplished with `a.substr(..)`, `a.into_substr(..)`, ... /// /// # Examples /// ``` /// # use arcstr::{Substr, ArcStr}; /// let s = Substr::full(ArcStr::from("foo")); /// assert_eq!(s, "foo"); /// assert_eq!(s.range(), 0..3); /// ``` #[inline] pub fn full(a: ArcStr) -> Self { let l = to_idx(a.len()); Substr(a, 0, l) } #[inline] pub(crate) fn from_parts(a: &ArcStr, range: impl RangeBounds) -> Self { use core::ops::Bound; let begin = match range.start_bound() { Bound::Included(&n) => n, Bound::Excluded(&n) => n + 1, Bound::Unbounded => 0, }; let end = match range.end_bound() { Bound::Included(&n) => n + 1, Bound::Excluded(&n) => n, Bound::Unbounded => a.len(), }; let _ = &a.as_str()[begin..end]; Self(ArcStr::clone(a), to_idx(begin), to_idx(end)) } /// Extract a substr of this substr. /// /// If the result would be empty, a new strong reference to our parent is /// not created. /// /// # Examples /// ``` /// # use arcstr::Substr; /// let s: Substr = arcstr::literal!("foobarbaz").substr(3..); /// assert_eq!(s.as_str(), "barbaz"); /// /// let s2 = s.substr(1..5); /// assert_eq!(s2, "arba"); /// ``` /// # Panics /// If any of the following are untrue, we panic /// - `range.start() <= range.end()` /// - `range.end() <= self.len()` /// - `self.is_char_boundary(start) && self.is_char_boundary(end)` /// - These can be conveniently verified in advance using /// `self.get(start..end).is_some()` if needed. #[inline] pub fn substr(&self, range: impl RangeBounds) -> Self { use core::ops::Bound; let my_end = self.2 as usize; let begin = match range.start_bound() { Bound::Included(&n) => n, Bound::Excluded(&n) => n + 1, Bound::Unbounded => 0, }; let end = match range.end_bound() { Bound::Included(&n) => n + 1, Bound::Excluded(&n) => n, Bound::Unbounded => self.len(), }; let new_begin = self.1 as usize + begin; let new_end = self.1 as usize + end; // let _ = &self.0.as_str()[new_begin..new_end]; if begin > end || end > my_end || !self.0.is_char_boundary(new_begin) || !self.0.is_char_boundary(new_end) { bad_substr_idx(&self.0, new_begin, new_end); } debug_assert!(self.0.get(new_begin..new_end).is_some()); debug_assert!(new_begin <= (Idx::MAX as usize) && new_end <= (Idx::MAX as usize)); Self(ArcStr::clone(&self.0), new_begin as Idx, new_end as Idx) } /// Extract a string slice containing our data. /// /// Note: This is an equivalent to our `Deref` implementation, but can be /// more readable than `&*s` in the cases where a manual invocation of /// `Deref` would be required. /// /// # Examples /// ``` /// # use arcstr::Substr; /// let s: Substr = arcstr::literal!("foobar").substr(3..); /// assert_eq!(s.as_str(), "bar"); /// ``` #[inline] pub fn as_str(&self) -> &str { self } /// Returns the length of this `Substr` in bytes. /// /// # Examples /// /// ``` /// # use arcstr::{ArcStr, Substr}; /// let a: Substr = ArcStr::from("foo").substr(1..); /// assert_eq!(a.len(), 2); /// ``` #[inline] pub fn len(&self) -> usize { debug_assert!(self.2 >= self.1); (self.2 - self.1) as usize } /// Returns true if this `Substr` is empty. /// /// # Examples /// /// ``` /// # use arcstr::Substr; /// assert!(arcstr::literal!("abc").substr(3..).is_empty()); /// assert!(!arcstr::literal!("abc").substr(2..).is_empty()); /// assert!(Substr::new().is_empty()); /// ``` #[inline] pub fn is_empty(&self) -> bool { self.2 == self.1 } /// Convert us to a `std::string::String`. /// /// This is provided as an inherent method to avoid needing to route through /// the `Display` machinery, but is equivalent to `ToString::to_string`. /// /// # Examples /// /// ``` /// # use arcstr::Substr; /// let s: Substr = arcstr::literal!("12345").substr(1..4); /// assert_eq!(s.to_string(), "234"); /// ``` #[inline] #[allow(clippy::inherent_to_string_shadow_display)] pub fn to_string(&self) -> alloc::string::String { #[cfg(not(feature = "std"))] use alloc::borrow::ToOwned; self.as_str().to_owned() } /// Unchecked function to construct a [`Substr`] from an [`ArcStr`] and a /// byte range. Direct usage of this function is largely discouraged in /// favor of [`ArcStr::substr`][crate::ArcStr::substr], or the /// [`literal_substr!`](crate::literal_substr) macro, which currently is /// implemented using a call to this function (however, can guarantee safe /// usage). /// /// This is unsafe because currently `ArcStr` cannot provide a `&str` in a /// `const fn`. If that changes then we will likely deprecate this function, /// and provide a `pub const fn from_parts` with equivalent functionality. /// /// In the distant future, it would be nice if this accepted other kinds of /// ranges too. /// /// # Examples /// /// ``` /// use arcstr::{ArcStr, Substr}; /// const FOOBAR: ArcStr = arcstr::literal!("foobar"); /// const OBA: Substr = unsafe { Substr::from_parts_unchecked(FOOBAR, 2..5) }; /// assert_eq!(OBA, "oba"); /// ``` // TODO: can I do a compile_fail test that only is a failure under a certain feature? /// /// # Safety /// You promise that `range` is in bounds for `s`, and that the start and /// end are both on character boundaries. Note that we do check that the /// `usize` indices fit into `u32` if thats our configured index type, so /// `_unchecked` is not *entirely* a lie. /// /// # Panics /// If the `substr-usize-indices` is not enabled, and the target arch is /// 64-bit, and the usizes do not fit in 32 bits, then we panic with a /// (possibly strange-looking) index-out-of-bounds error in order to force /// compilation failure. #[inline] pub const unsafe fn from_parts_unchecked(s: ArcStr, range: Range) -> Self { Self(s, to_idx_const(range.start), to_idx_const(range.end)) } /// Returns `true` if the two `Substr`s have identical parents, and are /// covering the same range. /// /// Note that the "identical"ness of parents is determined by /// [`ArcStr::ptr_eq`], which can have surprising/nondeterministic results /// when used on `const` `ArcStr`s. It is guaranteed that `Substr::clone()`s /// will be `shallow_eq` eachother, however. /// /// This should generally only be used as an optimization, or a debugging /// aide. Additionally, it is already used in the implementation of /// `PartialEq`, so optimizing a comparison by performing it first is /// generally unnecessary. /// /// # Examples /// ``` /// # use arcstr::{ArcStr, Substr}; /// let parent = ArcStr::from("foooo"); /// let sub1 = parent.substr(1..3); /// let sub2 = parent.substr(1..3); /// assert!(Substr::shallow_eq(&sub1, &sub2)); /// // Same parent *and* contents, but over a different range: not `shallow_eq`. /// let not_same = parent.substr(3..); /// assert!(!Substr::shallow_eq(&sub1, ¬_same)); /// ``` #[inline] pub fn shallow_eq(this: &Self, o: &Self) -> bool { ArcStr::ptr_eq(&this.0, &o.0) && (this.1 == o.1) && (this.2 == o.2) } /// Returns the ArcStr this is a substring of. /// /// Note that the exact pointer value of this can be somewhat /// nondeterministic when used with `const` `ArcStr`s. For example /// /// ```rust,ignore /// const FOO: ArcStr = arcstr::literal!("foo"); /// // This is non-deterministic, as all references to a given /// // const are not required to point to the same value. /// ArcStr::ptr_eq(FOO.substr(..).parent(), &FOO); /// ``` /// /// # Examples /// /// ``` /// # use arcstr::ArcStr; /// let parent = ArcStr::from("abc def"); /// let child = parent.substr(2..5); /// assert!(ArcStr::ptr_eq(&parent, child.parent())); /// /// let child = parent.substr(..); /// assert_eq!(child.range(), 0..7); /// ``` #[inline] pub fn parent(&self) -> &ArcStr { &self.0 } /// Returns the range of bytes we occupy inside our parent. /// /// This range is always guaranteed to: /// /// - Have an end >= start. /// - Have both start and end be less than or equal to `self.parent().len()` /// - Have both start and end be on meet `self.parent().is_char_boundary(b)` /// /// To put another way, it's always sound to do /// `s.parent().get_unchecked(s.range())`. /// /// ``` /// # use arcstr::ArcStr; /// let parent = ArcStr::from("abc def"); /// let child = parent.substr(2..5); /// assert_eq!(child.range(), 2..5); /// /// let child = parent.substr(..); /// assert_eq!(child.range(), 0..7); /// ``` #[inline] pub fn range(&self) -> Range { (self.1 as usize)..(self.2 as usize) } /// Returns a [`Substr`] of self over the given `&str`, or panics. /// /// It is not rare to end up with a `&str` which holds a view into a /// `Substr`'s backing data. A common case is when using functionality that /// takes and returns `&str` and are entirely unaware of `arcstr`, for /// example: `str::trim()`. /// /// This function allows you to reconstruct a [`Substr`] from a `&str` which /// is a view into this `Substr`'s backing string. /// /// See [`Substr::try_substr_from`] for a version that returns an option /// rather than panicking. /// /// # Examples /// /// ``` /// use arcstr::Substr; /// let text = Substr::from(" abc"); /// let trimmed = text.trim(); /// let substr: Substr = text.substr_from(trimmed); /// assert_eq!(substr, "abc"); /// ``` /// /// # Panics /// /// Panics if `substr` isn't a view into our memory. /// /// Also panics if `substr` is a view into our memory but is >= `u32::MAX` /// bytes away from our start, if we're a 64-bit machine and /// `substr-usize-indices` is not enabled. pub fn substr_from(&self, substr: &str) -> Substr { // TODO: should outline `expect` call to avoid fmt bloat and let us // provide better error message like we do for ArcStr self.try_substr_from(substr) .expect("non-substring passed to Substr::substr_from") } /// If possible, returns a [`Substr`] of self over the given `&str`. /// /// This is a fallible version of [`Substr::substr_from`]. /// /// It is not rare to end up with a `&str` which holds a view into a /// `ArcStr`'s backing data. A common case is when using functionality that /// takes and returns `&str` and are entirely unaware of `arcstr`, for /// example: `str::trim()`. /// /// This function allows you to reconstruct a [`Substr`] from a `&str` which /// is a view into this [`Substr`]'s backing string. Note that we accept the /// empty string as input, in which case we return the same value as /// [`Substr::new`] (For clarity, this no longer holds a reference to /// `self.parent()`). /// /// # Examples /// /// ``` /// use arcstr::Substr; /// let text = Substr::from(" abc"); /// let trimmed = text.trim(); /// let substr: Option = text.try_substr_from(trimmed); /// assert_eq!(substr.unwrap(), "abc"); /// // `&str`s not derived from `self` will return None. /// let not_substr = text.try_substr_from("abc"); /// assert!(not_substr.is_none()); /// ``` /// /// # Panics /// /// Panics if `substr` is a view into our memory but is >= `u32::MAX` bytes /// away from our start, on a 64-bit machine, when `substr-usize-indices` is /// not enabled. pub fn try_substr_from(&self, substr: &str) -> Option { if substr.is_empty() { return Some(Substr::new()); } let parent_ptr = self.0.as_ptr() as usize; let self_start = parent_ptr + (self.1 as usize); let self_end = parent_ptr + (self.2 as usize); let substr_start = substr.as_ptr() as usize; let substr_end = substr_start + substr.len(); if substr_start < self_start || substr_end > self_end { return None; } let index = substr_start - self_start; let end = index + substr.len(); Some(self.substr(index..end)) } /// Compute a derived `&str` a function of `&str` => `&str`, and produce a /// Substr of the result if possible. /// /// The function may return either a derived string, or any empty string. /// /// This function is mainly a wrapper around [`Substr::try_substr_from`]. If /// you're coming to `arcstr` from the `shared_string` crate, this is the /// moral equivalent of the `slice_with` function. /// /// # Examples /// /// ``` /// use arcstr::Substr; /// let text = Substr::from(" abc"); /// let trimmed: Option = text.try_substr_using(str::trim); /// assert_eq!(trimmed.unwrap(), "abc"); /// let other = text.try_substr_using(|_s| "different string!"); /// assert_eq!(other, None); /// // As a special case, this is allowed. /// let empty = text.try_substr_using(|_s| ""); /// assert_eq!(empty.unwrap(), ""); /// ``` pub fn try_substr_using(&self, f: impl FnOnce(&str) -> &str) -> Option { self.try_substr_from(f(self.as_str())) } /// Compute a derived `&str` a function of `&str` => `&str`, and produce a /// Substr of the result. /// /// The function may return either a derived string, or any empty string. /// Returning anything else will result in a panic. /// /// This function is mainly a wrapper around [`Substr::try_substr_from`]. If /// you're coming to `arcstr` from the `shared_string` crate, this is the /// likely closest to the `slice_with_unchecked` function, but this panics /// instead of UB on dodginess. /// /// # Examples /// /// ``` /// use arcstr::Substr; /// let text = Substr::from(" abc"); /// let trimmed: Substr = text.substr_using(str::trim); /// assert_eq!(trimmed, "abc"); /// // As a special case, this is allowed. /// let empty = text.substr_using(|_s| ""); /// assert_eq!(empty, ""); /// ``` pub fn substr_using(&self, f: impl FnOnce(&str) -> &str) -> Self { self.substr_from(f(self.as_str())) } } impl From for Substr { #[inline] fn from(a: ArcStr) -> Self { Self::full(a) } } impl From<&ArcStr> for Substr { #[inline] fn from(a: &ArcStr) -> Self { Self::full(a.clone()) } } impl core::ops::Deref for Substr { type Target = str; #[inline] fn deref(&self) -> &str { debug_assert!(self.0.get((self.1 as usize)..(self.2 as usize)).is_some()); unsafe { self.0.get_unchecked((self.1 as usize)..(self.2 as usize)) } } } impl PartialEq for Substr { #[inline] fn eq(&self, o: &Self) -> bool { Substr::shallow_eq(self, o) || PartialEq::eq(self.as_str(), o.as_str()) } #[inline] fn ne(&self, o: &Self) -> bool { !Substr::shallow_eq(self, o) && PartialEq::ne(self.as_str(), o.as_str()) } } impl PartialEq for Substr { #[inline] fn eq(&self, o: &ArcStr) -> bool { (ArcStr::ptr_eq(&self.0, o) && (self.1 == 0) && (self.2 as usize == o.len())) || PartialEq::eq(self.as_str(), o.as_str()) } #[inline] fn ne(&self, o: &ArcStr) -> bool { (!ArcStr::ptr_eq(&self.0, o) || (self.1 != 0) || (self.2 as usize != o.len())) && PartialEq::ne(self.as_str(), o.as_str()) } } impl PartialEq for ArcStr { #[inline] fn eq(&self, o: &Substr) -> bool { PartialEq::eq(o, self) } #[inline] fn ne(&self, o: &Substr) -> bool { PartialEq::ne(o, self) } } impl Eq for Substr {} impl PartialOrd for Substr { #[inline] fn partial_cmp(&self, s: &Self) -> Option { Some(self.as_str().cmp(s.as_str())) } } impl Ord for Substr { #[inline] fn cmp(&self, s: &Self) -> core::cmp::Ordering { self.as_str().cmp(s.as_str()) } } impl core::hash::Hash for Substr { #[inline] fn hash(&self, h: &mut H) { self.as_str().hash(h) } } impl core::fmt::Debug for Substr { #[inline] fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { core::fmt::Debug::fmt(self.as_str(), f) } } impl core::fmt::Display for Substr { #[inline] fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { core::fmt::Display::fmt(self.as_str(), f) } } impl Default for Substr { #[inline] fn default() -> Self { Self::new() } } macro_rules! impl_from_via_arcstr { ($($SrcTy:ty),+) => {$( impl From<$SrcTy> for Substr { #[inline] fn from(v: $SrcTy) -> Self { Self::full(ArcStr::from(v)) } } )+}; } impl_from_via_arcstr![ &str, &mut str, alloc::string::String, &alloc::string::String, alloc::boxed::Box, alloc::rc::Rc, alloc::sync::Arc, alloc::borrow::Cow<'_, str> ]; impl<'a> From<&'a Substr> for alloc::borrow::Cow<'a, str> { #[inline] fn from(s: &'a Substr) -> Self { alloc::borrow::Cow::Borrowed(s) } } impl<'a> From for alloc::borrow::Cow<'a, str> { #[inline] fn from(s: Substr) -> Self { if let Some(st) = ArcStr::as_static(&s.0) { debug_assert!(st.get(s.range()).is_some()); alloc::borrow::Cow::Borrowed(unsafe { st.get_unchecked(s.range()) }) } else { alloc::borrow::Cow::Owned(s.to_string()) } } } macro_rules! impl_peq { (@one $a:ty, $b:ty) => { #[allow(clippy::extra_unused_lifetimes)] impl<'a> PartialEq<$b> for $a { #[inline] fn eq(&self, s: &$b) -> bool { PartialEq::eq(&self[..], &s[..]) } #[inline] fn ne(&self, s: &$b) -> bool { PartialEq::ne(&self[..], &s[..]) } } }; ($(($a:ty, $b:ty),)+) => {$( impl_peq!(@one $a, $b); impl_peq!(@one $b, $a); )+}; } impl_peq! { (Substr, str), (Substr, &'a str), (Substr, alloc::string::String), (Substr, alloc::borrow::Cow<'a, str>), (Substr, alloc::boxed::Box), (Substr, alloc::sync::Arc), (Substr, alloc::rc::Rc), } macro_rules! impl_index { ($($IdxT:ty,)*) => {$( impl core::ops::Index<$IdxT> for Substr { type Output = str; #[inline] fn index(&self, i: $IdxT) -> &Self::Output { &self.as_str()[i] } } )*}; } impl_index! { core::ops::RangeFull, core::ops::Range, core::ops::RangeFrom, core::ops::RangeTo, core::ops::RangeInclusive, core::ops::RangeToInclusive, } impl AsRef for Substr { #[inline] fn as_ref(&self) -> &str { self } } impl AsRef<[u8]> for Substr { #[inline] fn as_ref(&self) -> &[u8] { self.as_bytes() } } impl core::borrow::Borrow for Substr { #[inline] fn borrow(&self) -> &str { self } } impl core::str::FromStr for Substr { type Err = core::convert::Infallible; #[inline] fn from_str(s: &str) -> Result { Ok(Self::from(ArcStr::from(s))) } } #[cfg(test)] mod test { use super::*; #[test] #[should_panic] #[cfg(not(miri))] // XXX does miri still hate unwinding? #[cfg(all(target_pointer_width = "64", not(feature = "substr-usize-indices")))] fn test_from_parts_unchecked_err() { let s = crate::literal!("foo"); // Note: this is actually a violation of the safety requirement of // from_parts_unchecked (the indices are illegal), but I can't get an // ArcStr that's big enough, and I'm the author so I know it's fine // because we hit the panic case. let _u = unsafe { Substr::from_parts_unchecked(s, 0x1_0000_0000usize..0x1_0000_0001) }; } #[test] fn test_from_parts_unchecked_valid() { let s = crate::literal!("foobar"); let u = unsafe { Substr::from_parts_unchecked(s, 2..5) }; assert_eq!(&*u, "oba"); } }