fluent-uri-0.1.4/.cargo_vcs_info.json0000644000000001360000000000100131300ustar { "git": { "sha1": "2bbcc73a085c11a4a3fbaa7194dbd06575fe7cc7" }, "path_in_vcs": "" }fluent-uri-0.1.4/.github/workflows/ci.yml000064400000000000000000000016610072674642500164670ustar 00000000000000on: push: branches: [ main ] name: CI jobs: test-and-doc: name: Test & Doc runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: nightly override: true - name: Test with default features uses: actions-rs/cargo@v1 with: command: test - name: Test with all features uses: actions-rs/cargo@v1 with: command: test args: --all-features - name: Build docs uses: actions-rs/cargo@v1 with: command: doc args: --no-deps --features ipv_future,rfc6874bis,std - name: Deploy docs uses: peaceiris/actions-gh-pages@v3 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./target/doc publish_branch: doc fluent-uri-0.1.4/.gitignore000064400000000000000000000000340072674642500137350ustar 00000000000000/.vscode /target Cargo.lock fluent-uri-0.1.4/.gitmodules000064400000000000000000000002050072674642500141220ustar 00000000000000[submodule "fuzz/uriparser-sys/uriparser"] path = fuzz/uriparser-sys/uriparser url = https://github.com/yescallop/uriparser.git fluent-uri-0.1.4/Cargo.lock0000644000000005720000000000100111070ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "fluent-uri" version = "0.1.4" dependencies = [ "bitflags", ] fluent-uri-0.1.4/Cargo.toml0000644000000021540000000000100111300ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "fluent-uri" version = "0.1.4" authors = ["Scallop Ye "] description = "A generic URI parser that strictly adheres to IETF RFC 3986." documentation = "https://docs.rs/fluent-uri" readme = "README.md" keywords = [ "parser", "uri", "rfc3986", ] categories = [ "encoding", "parser-implementations", ] license = "MIT" repository = "https://github.com/yescallop/fluent-uri-rs" [package.metadata.docs.rs] features = [ "ipv_future", "rfc6874bis", "std", ] [dependencies.bitflags] version = "1.3.2" [features] default = ["std"] ipv_future = [] rfc6874bis = [] std = [] unstable = [] fluent-uri-0.1.4/Cargo.toml.orig000064400000000000000000000015320072674642500146400ustar 00000000000000[package] name = "fluent-uri" version = "0.1.4" authors = ["Scallop Ye "] edition = "2021" description = "A generic URI parser that strictly adheres to IETF RFC 3986." documentation = "https://docs.rs/fluent-uri" repository = "https://github.com/yescallop/fluent-uri-rs" license = "MIT" keywords = ["parser", "uri", "rfc3986"] categories = ["encoding", "parser-implementations"] [dependencies] bitflags = "1.3.2" [features] default = ["std"] ipv_future = [] rfc6874bis = [] unstable = [] std = [] [package.metadata.docs.rs] features = ["ipv_future", "rfc6874bis", "std"] # Commented out to reduce compile time. # [[bench]] # name = "bench" # harness = false # required-features = ["unstable"] # [dev-dependencies] # criterion = "0.3" # url = "2.2.2" # uriparse = "0.6.4" # iri-string = "0.6.0" fluent-uri-0.1.4/LICENSE000064400000000000000000000020760072674642500127620ustar 00000000000000MIT License Copyright (c) 2021 Scallop Ye Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.fluent-uri-0.1.4/README.md000064400000000000000000000056310072674642500132340ustar 00000000000000# fluent-uri A generic URI parser in Rust that strictly adheres to IETF [RFC 3986]. [![crates.io](https://img.shields.io/crates/v/fluent-uri.svg)](https://crates.io/crates/fluent-uri) [![CI](https://github.com/yescallop/fluent-uri-rs/actions/workflows/ci.yml/badge.svg)](https://github.com/yescallop/fluent-uri-rs/actions/workflows/ci.yml) [![license](https://img.shields.io/github/license/yescallop/fluent-uri-rs?color=blue)](/LICENSE) - **Fast:** Zero-copy parsing. Observed to be 2x ~ 25x faster than common URI parsers in Rust. - **Easy:** Carefully designed and documented APIs. Handy percent-encoding utilities. - **Strict:** Parses every possible URI defined in the RFC and denies anything else. [API Docs](https://docs.rs/fluent-uri) | [Discussions](https://github.com/yescallop/fluent-uri-rs/discussions) [RFC 3986]: https://datatracker.ietf.org/doc/html/rfc3986/ ## Features & Examples - `EStr` (Percent-encoded string slices): All components in a URI that may be percent-encoded are parsed as `EStr`s, which allows easy splitting and fast decoding: ```rust let query = "name=%E5%BC%A0%E4%B8%89&speech=%C2%A1Ol%C3%A9!"; let map: HashMap<_, _> = EStr::new(query) .split('&') .filter_map(|pair| pair.split_once('=')) .map(|(k, v)| (k.decode(), v.decode())) .filter_map(|(k, v)| k.into_string().ok().zip(v.into_string().ok())) .collect(); assert_eq!(map["name"], "张三"); assert_eq!(map["speech"], "¡Olé!"); ``` - Three variants of `Uri` for different use cases: - `Uri<&str>`: borrowed; immutable. - `Uri<&mut [u8]>`: borrowed; in-place mutable. - `Uri`: owned; immutable. Decode and extract query parameters in-place from a URI reference: ```rust fn decode_and_extract_query( bytes: &mut [u8], ) -> Result<(Uri<&mut [u8]>, HashMap<&str, &str>), ParseError> { let mut uri = Uri::parse_mut(bytes)?; let map = if let Some(query) = uri.take_query() { query .split_view('&') .flat_map(|pair| pair.split_once_view('=')) .map(|(k, v)| (k.decode_in_place(), v.decode_in_place())) .flat_map(|(k, v)| k.into_str().ok().zip(v.into_str().ok())) .collect() } else { HashMap::new() }; Ok((uri, map)) } let mut bytes = *b"?lang=Rust&mascot=Ferris%20the%20crab"; let (uri, query) = decode_and_extract_query(&mut bytes)?; assert_eq!(query["lang"], "Rust"); assert_eq!(query["mascot"], "Ferris the crab"); // The query is taken from the `Uri`. assert!(uri.query().is_none()); // In-place decoding is like this if you're interested: assert_eq!(&bytes, b"?lang=Rust&mascot=Ferris the crabcrab"); ``` ## Roadmap - [ ] URI building. - [ ] Reference resolution. - [ ] Normalization and comparison. - [ ] Host: IDNA encoding and DNS syntax checking. fluent-uri-0.1.4/benches/bench.rs000064400000000000000000000043400072674642500150050ustar 00000000000000use criterion::{black_box, criterion_group, criterion_main, Criterion}; use fluent_uri::{ enc::{table::*, *}, *, }; use iri_string::types::UriReferenceStr; use uriparse::URIReference; use url::Url; criterion_group!( benches, bench_enc, bench_dec, bench_dec_unchecked, bench_dec_in_place, bench_validate, bench_parse, bench_parse_url, bench_parse_uriparse, bench_parse_iri_string, ); criterion_main!(benches); const ENC_CASE: &str = "te😃a 测1`~!@试#$%st^&+="; fn bench_enc(c: &mut Criterion) { c.bench_function("enc", |b| { b.iter(|| encode(black_box(ENC_CASE), QUERY_FRAGMENT)) }); } const DEC_CASE: &str = "te%F0%9F%98%83a%20%E6%B5%8B1%60~!@%E8%AF%95%23$%25st%5E&+="; fn bench_dec(c: &mut Criterion) { c.bench_function("dec", |b| b.iter(|| decode(black_box(DEC_CASE)))); } fn bench_dec_unchecked(c: &mut Criterion) { c.bench_function("dec_unchecked", |b| { b.iter(|| unsafe { decode_unchecked(black_box(DEC_CASE.as_bytes())); }) }); } fn bench_dec_in_place(c: &mut Criterion) { let mut vec = DEC_CASE.as_bytes().to_vec(); c.bench_function("dec_in_place", |b| { b.iter(|| unsafe { decode_in_place_unchecked(&mut vec); vec.copy_from_slice(DEC_CASE.as_bytes()); }) }); } fn bench_validate(c: &mut Criterion) { c.bench_function("validate", |b| { b.iter(|| validate(black_box(DEC_CASE), QUERY_FRAGMENT)) }); } const PARSE_CASE: &str = "https://user@example.com/search?q=%E6%B5%8B%E8%AF%95#fragment"; fn bench_parse(c: &mut Criterion) { c.bench_function("parse", |b| b.iter(|| Uri::parse(black_box(PARSE_CASE)))); } fn bench_parse_url(c: &mut Criterion) { c.bench_function("parse_url", |b| { b.iter(|| Url::parse(black_box(PARSE_CASE))) }); } fn bench_parse_uriparse(c: &mut Criterion) { c.bench_function("parse_uriparse", |b| { b.iter(|| URIReference::try_from(black_box(PARSE_CASE))) }); } fn bench_parse_iri_string(c: &mut Criterion) { c.bench_function("parse_iri_string", |b| { b.iter(|| <&UriReferenceStr>::try_from(black_box(PARSE_CASE))) }); } fluent-uri-0.1.4/examples/parser.rs000064400000000000000000000010060072674642500154250ustar 00000000000000use std::io; use fluent_uri::Uri; fn main() { let mut buf = String::new(); loop { buf.clear(); io::stdin() .read_line(&mut buf) .expect("failed to read line"); if buf.ends_with('\n') { buf.pop(); if buf.ends_with('\r') { buf.pop(); } } match Uri::parse(&buf) { Ok(u) => println!("{u:#?}"), Err(e) => println!("Error: {e}"), }; } } fluent-uri-0.1.4/src/enc/encoder.rs000064400000000000000000000013360072674642500152740ustar 00000000000000//! Percent-encoders for URI components. use crate::enc::table::{self, Table}; /// A trait used by [`EString`] to specify the table used for encoding. /// /// [`EString`]: super::EString pub trait Encoder: Send + Sync + 'static { /// The table used for encoding. const TABLE: &'static Table; } /// An encoder for the path component. #[derive(Clone, Copy, Debug)] pub struct PathEncoder(()); /// An encoder for the query or the fragment component. #[derive(Clone, Copy, Debug)] pub struct QueryFragmentEncoder(()); impl Encoder for PathEncoder { const TABLE: &'static Table = table::PATH; } impl Encoder for QueryFragmentEncoder { const TABLE: &'static Table = table::QUERY_FRAGMENT; } fluent-uri-0.1.4/src/enc/estring.rs000064400000000000000000000233300072674642500153260ustar 00000000000000use alloc::string::String; use core::{borrow::Borrow, fmt, hash, marker::PhantomData, ops::Deref}; #[cfg(feature = "unstable")] use alloc::vec::Vec; use super::{ encoder::Encoder, imp::{encode_to, HEX_TABLE}, EStr, }; /// A percent-encoded, growable string. /// /// # Panics /// /// This struct triggers a compile-time panic if the table specified /// by `E` does not allow percent-encoding. /// /// # Examples /// /// Encode key-value pairs to a query string. /// /// ``` /// use fluent_uri::enc::{ /// encoder::{Encoder, QueryFragmentEncoder}, /// table::{self, Table}, /// EString, /// }; /// /// struct DataEncoder; /// /// impl Encoder for DataEncoder { /// const TABLE: &'static Table = &table::QUERY_FRAGMENT.sub(&Table::gen(b"&=+")); /// } /// /// let pairs = [("name", "张三"), ("speech", "¡Olé!")]; /// let mut buf = EString::::new(); /// for (k, v) in pairs { /// if !buf.is_empty() { /// buf.push_byte(b'&'); /// } /// buf.push_with::(k); /// buf.push_byte(b'='); /// buf.push_with::(v); /// } /// /// assert_eq!(buf, "name=%E5%BC%A0%E4%B8%89&speech=%C2%A1Ol%C3%A9!"); /// ``` pub struct EString { string: String, _marker: PhantomData<&'static E>, } impl EString { const ASSERT: () = assert!( E::TABLE.allows_enc(), "table does not allow percent-encoding" ); /// Creates a new empty `EString`. #[inline] pub fn new() -> Self { EString { string: String::new(), _marker: PhantomData, } } /// Creates a new empty `EString` with a particular capacity. #[inline] pub fn with_capacity(capacity: usize) -> Self { EString { string: String::with_capacity(capacity), _marker: PhantomData, } } #[cfg(feature = "unstable")] #[inline] unsafe fn from_string_unchecked(string: String) -> Self { EString { string, _marker: PhantomData, } } /// Consumes this `EString` and yields the underlying `String` storage. #[inline] pub fn into_string(self) -> String { self.string } /// Coerces to an `EStr`. #[inline] pub fn as_estr(&self) -> &EStr { // SAFETY: `EString` guarantees that it is properly encoded. unsafe { EStr::new_unchecked(self.string.as_bytes()) } } /// Encodes a byte sequence and appends the result onto the end of this `EString`. #[inline] pub fn push + ?Sized>(&mut self, s: &S) { // SAFETY: The encoded bytes are valid UTF-8. let buf = unsafe { self.string.as_mut_vec() }; encode_to(s.as_ref(), E::TABLE, buf); } /// Encodes a byte sequence with a sub-encoder and appends the result onto the end of this `EString`. /// /// A sub-encoder `SubE` of `E` is an encoder such that `SubE::TABLE` is a [subset] of `E::TABLE`. /// /// [subset]: super::table::Table::is_subset /// /// # Panics /// /// This method triggers a compile-time panic if `SubE` is not a sub-encoder of `E`, or /// if the table specified by `SubE` does not allow percent-encoding. #[inline] #[allow(unused_variables)] pub fn push_with(&mut self, s: &(impl AsRef<[u8]> + ?Sized)) { struct Assert { _marker: PhantomData<(SubE, E)>, } impl Assert { const IS_SUB_ENCODER: () = assert!( SubE::TABLE.is_subset(E::TABLE), "pushing with non-sub-encoder" ); } let _ = (Assert::::IS_SUB_ENCODER, EString::::ASSERT); // SAFETY: The encoded bytes are valid UTF-8. let buf = unsafe { self.string.as_mut_vec() }; encode_to(s.as_ref(), SubE::TABLE, buf); } /// Encodes a byte and appends the result onto the end of this `EString`. #[inline] pub fn push_byte(&mut self, x: u8) { // SAFETY: The encoded bytes are valid UTF-8. let vec = unsafe { self.string.as_mut_vec() }; if E::TABLE.allows(x) { vec.push(x); } else { vec.extend_from_slice(&[ b'%', HEX_TABLE[x as usize * 2], HEX_TABLE[x as usize * 2 + 1], ]); } } /// Invokes [`capacity`] on the underlying `String`. /// /// [`capacity`]: String::capacity #[inline] pub fn capacity(&self) -> usize { self.string.capacity() } /// Invokes [`reserve`] on the underlying `String`. /// /// [`reserve`]: String::reserve #[inline] pub fn reserve(&mut self, additional: usize) { self.string.reserve(additional); } /// Invokes [`reserve_exact`] on the underlying `String`. /// /// [`reserve_exact`]: String::reserve_exact #[inline] pub fn reserve_exact(&mut self, additional: usize) { self.string.reserve_exact(additional); } /// Invokes [`shrink_to_fit`] on the underlying `String`. /// /// [`shrink_to_fit`]: String::shrink_to_fit #[inline] pub fn shrink_to_fit(&mut self) { self.string.shrink_to_fit() } /// Invokes [`shrink_to`] on the underlying `String`. /// /// [`shrink_to`]: String::shrink_to #[inline] pub fn shrink_to(&mut self, min_capacity: usize) { self.string.shrink_to(min_capacity) } /// Invokes [`len`] on the underlying `String`. /// /// [`len`]: String::len #[inline] pub fn len(&self) -> usize { self.string.len() } /// Invokes [`is_empty`] on the underlying `String`. /// /// [`is_empty`]: String::is_empty #[inline] pub fn is_empty(&self) -> bool { self.string.is_empty() } /// Invokes [`clear`] on the underlying `String`. /// /// [`clear`]: String::clear #[inline] pub fn clear(&mut self) { self.string.clear() } } #[cfg(feature = "unstable")] use crate::enc::{validate, EncodingError, Result}; #[cfg(feature = "unstable")] impl TryFrom for EString { type Error = EncodingError; #[inline] fn try_from(string: String) -> Result { validate(&string, E::TABLE)?; // SAFETY: The validation is done. Ok(unsafe { EString::from_string_unchecked(string) }) } } #[cfg(feature = "unstable")] impl TryFrom> for EString { type Error = EncodingError; #[inline] fn try_from(bytes: Vec) -> Result { validate(&bytes, E::TABLE)?; // SAFETY: The validation is done. unsafe { let string = String::from_utf8_unchecked(bytes); Ok(EString::from_string_unchecked(string)) } } } impl Deref for EString { type Target = EStr; #[inline] fn deref(&self) -> &EStr { self.as_estr() } } impl AsRef for EString { #[inline] fn as_ref(&self) -> &EStr { self.as_estr() } } impl AsRef for EString { #[inline] fn as_ref(&self) -> &str { self.as_str() } } impl Borrow for EString { #[inline] fn borrow(&self) -> &str { self.as_str() } } impl PartialEq> for EString { #[inline] fn eq(&self, other: &EString) -> bool { self.as_str() == other.as_str() } } impl PartialEq<&EStr> for EString { #[inline] fn eq(&self, other: &&EStr) -> bool { self.as_str() == other.as_str() } } impl PartialEq for EString { #[inline] fn eq(&self, other: &EStr) -> bool { self.as_str() == other.as_str() } } impl PartialEq> for EStr { #[inline] fn eq(&self, other: &EString) -> bool { self.as_str() == other.as_str() } } impl PartialEq<&str> for EString { #[inline] fn eq(&self, other: &&str) -> bool { self.as_str() == *other } } impl PartialEq for EString { #[inline] fn eq(&self, other: &str) -> bool { self.as_str() == other } } impl PartialEq> for str { #[inline] fn eq(&self, other: &EString) -> bool { self == other.as_str() } } impl Eq for EString {} impl Clone for EString { #[inline] fn clone(&self) -> Self { EString { string: self.string.clone(), _marker: PhantomData, } } #[inline] fn clone_from(&mut self, source: &Self) { self.string.clone_from(&source.string) } } impl fmt::Debug for EString { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("EString") .field("encoder", &core::any::type_name::()) .field("contents", &self.string) .finish() } } impl fmt::Display for EString { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(&self.string, f) } } impl Default for EString { #[inline] fn default() -> Self { EString::new() } } impl hash::Hash for EString { #[inline] fn hash(&self, state: &mut H) { self.string.hash(state) } } fluent-uri-0.1.4/src/enc/imp.rs000064400000000000000000000372620072674642500144510ustar 00000000000000use super::table; #[cfg(feature = "unstable")] use super::table::Table; use alloc::vec::Vec; use core::{fmt, ptr}; #[cfg(feature = "unstable")] use alloc::{borrow::Cow, str, string::String}; /// Returns immediately with an encoding error. macro_rules! err { ($index:expr, $kind:ident) => { return Err(crate::enc::imp::EncodingError { index: $index, kind: crate::enc::imp::EncodingErrorKind::$kind, }) }; } pub(crate) use err; /// Detailed cause of an [`EncodingError`]. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum EncodingErrorKind { /// Invalid percent-encoded octet that is either non-hexadecimal or incomplete. /// /// The error index points to the percent character "%" of the octet. InvalidOctet, /// Unexpected character that is not allowed by the URI syntax. /// /// The error index points to the character. #[cfg(feature = "unstable")] UnexpectedChar, } /// An error occurred when decoding or validating strings. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct EncodingError { pub(crate) index: usize, pub(crate) kind: EncodingErrorKind, } impl EncodingError { /// Returns the index where the error occurred in the input string. #[cfg(feature = "unstable")] #[inline] pub fn index(&self) -> usize { self.index } /// Returns the detailed cause of the error. #[cfg(feature = "unstable")] #[inline] pub fn kind(&self) -> EncodingErrorKind { self.kind } } #[cfg(feature = "std")] impl std::error::Error for EncodingError {} impl fmt::Display for EncodingError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let msg = match self.kind { EncodingErrorKind::InvalidOctet => "invalid percent-encoded octet at index ", #[cfg(feature = "unstable")] EncodingErrorKind::UnexpectedChar => "unexpected character at index ", }; write!(f, "{}{}", msg, self.index) } } pub(crate) type Result = core::result::Result; #[cfg(feature = "unstable")] const fn gen_hex_table() -> [u8; 512] { const HEX_DIGITS: &[u8; 16] = b"0123456789ABCDEF"; let mut i = 0; let mut out = [0; 512]; while i < 256 { out[i * 2] = HEX_DIGITS[i >> 4]; out[i * 2 + 1] = HEX_DIGITS[i & 15]; i += 1; } out } #[cfg(feature = "unstable")] pub(crate) const HEX_TABLE: &[u8; 512] = &gen_hex_table(); const fn gen_octet_table(hi: bool) -> [u8; 256] { let mut out = [0xFF; 256]; let shift = (hi as u8) * 4; let mut i = 0; while i < 10 { out[(i + b'0') as usize] = i << shift; i += 1; } while i < 16 { out[(i - 10 + b'A') as usize] = i << shift; out[(i - 10 + b'a') as usize] = i << shift; i += 1; } out } const OCTET_TABLE_HI: &[u8; 256] = &gen_octet_table(true); pub(crate) const OCTET_TABLE_LO: &[u8; 256] = &gen_octet_table(false); /// Decodes a percent-encoded octet assuming validity. fn decode_octet_unchecked(hi: u8, lo: u8) -> u8 { OCTET_TABLE_HI[hi as usize] | OCTET_TABLE_LO[lo as usize] } /// Decodes a percent-encoded octet. fn decode_octet(mut hi: u8, mut lo: u8) -> Option { hi = OCTET_TABLE_HI[hi as usize]; lo = OCTET_TABLE_LO[lo as usize]; if hi & 1 == 0 && lo & 0x80 == 0 { Some(hi | lo) } else { None } } fn calc_capacity(s: &[u8], triple: bool) -> usize { #[cold] fn capacity_overflow() -> ! { panic!("capacity overflow") } if triple { if s.len() > isize::MAX as usize / 3 { capacity_overflow(); } s.len() * 3 } else { s.len() } } /// Copies the first `i` bytes from `s` into a new buffer. /// /// Set `triple` to `true` if triple capacity is needed. /// /// # Safety /// /// `i` must not exceed `s.len()`. unsafe fn copy_new(s: &[u8], i: usize, triple: bool) -> Vec { let cap = calc_capacity(s, triple); let mut buf = Vec::with_capacity(cap); unsafe { // SAFETY: Since `i <= s.len() <= buf.capacity()`, `s` is valid // for reads of `i` bytes, and `buf` is valid for writes of `i` bytes. // Newly allocated `buf` cannot overlap with `s`. ptr::copy_nonoverlapping(s.as_ptr(), buf.as_mut_ptr(), i); // The first `i` bytes are now initialized so it's safe to set the length. buf.set_len(i); } buf } /// Copies the first `i` bytes from `s` into a buffer. /// /// Set `triple` to `true` if triple capacity is needed. /// /// # Safety /// /// `i` must not exceed `s.len()`. #[cfg(feature = "unstable")] unsafe fn copy(s: &[u8], buf: &mut Vec, i: usize, triple: bool) { let cap = calc_capacity(s, triple); buf.reserve(cap); unsafe { let dst = buf.as_mut_ptr().add(buf.len()); // SAFETY: Since `i <= s.len() <= buf.capacity() - buf.len()`, `s` is valid // for reads of `i` bytes, and `dst` is valid for writes of `i` bytes. // Mutable reference `buf` cannot overlap with immutable `s`. ptr::copy_nonoverlapping(s.as_ptr(), dst, i); // The appended `i` bytes are now initialized so it's safe to set the length. buf.set_len(buf.len() + i); } } /// Pushes a raw byte without checking bounds. /// /// # Safety /// /// `v.len() + 1` must not exceed `v.capacity()`. unsafe fn push(v: &mut Vec, x: u8) { let len = v.len(); debug_assert!(len < v.capacity()); // SAFETY: The caller must ensure that the capacity is enough. unsafe { *v.as_mut_ptr().add(len) = x; v.set_len(len + 1); } } /// Pushes a percent-encoded byte without checking bounds. /// /// # Safety /// /// `v.len() + 3` must not exceed `v.capacity()`. #[cfg(feature = "unstable")] unsafe fn push_pct_encoded(v: &mut Vec, x: u8) { let len = v.len(); debug_assert!(len + 2 < v.capacity()); // SAFETY: The caller must ensure that the capacity is enough. unsafe { let ptr = v.as_mut_ptr().add(len); *ptr = b'%'; *ptr.add(1) = HEX_TABLE[x as usize * 2]; *ptr.add(2) = HEX_TABLE[x as usize * 2 + 1]; v.set_len(len + 3); } } #[cfg(feature = "unstable")] pub(super) fn encode<'a>(s: &'a [u8], table: &Table) -> Cow<'a, str> { // Skip the allowed bytes. let i = match s.iter().position(|&x| !table.allows(x)) { Some(i) => i, // SAFETY: All bytes are checked to be less than 128 (ASCII). None => return Cow::Borrowed(unsafe { str::from_utf8_unchecked(s) }), }; unsafe { // SAFETY: `i` cannot exceed `s.len()` since `i < s.len()`. let mut buf = copy_new(s, i, true); _encode(s, i, table, &mut buf); // SAFETY: The bytes should all be ASCII and thus valid UTF-8. Cow::Owned(String::from_utf8_unchecked(buf)) } } #[cfg(feature = "unstable")] pub(super) fn encode_to<'a>(s: &[u8], table: &Table, buf: &'a mut Vec) { // Skip the allowed bytes. let i = match s.iter().position(|&x| !table.allows(x)) { Some(i) => i, None => return buf.extend_from_slice(s), }; unsafe { // SAFETY: `i` cannot exceed `s.len()` since `i < s.len()`. copy(s, buf, i, true); _encode(s, i, table, buf); } } #[cfg(feature = "unstable")] unsafe fn _encode(s: &[u8], mut i: usize, table: &Table, buf: &mut Vec) { while i < s.len() { let x = s[i]; // SAFETY: The maximum output length is triple the input length. unsafe { if table.allows(x) { push(buf, x); } else { push_pct_encoded(buf, x); } } i += 1; } } #[cfg(feature = "unstable")] pub(super) fn decode(s: &[u8]) -> Result> { // Skip bytes that are not '%'. let i = match s.iter().position(|&x| x == b'%') { Some(i) => i, None => return Ok(Cow::Borrowed(s)), }; // SAFETY: `i` cannot exceed `s.len()` since `i < s.len()`. let mut buf = unsafe { copy_new(s, i, false) }; unsafe { _decode(s, i, &mut buf, true)? } Ok(Cow::Owned(buf)) } /// Decodes a percent-encoded string assuming validity. /// /// # Safety /// /// This function does not check that the string is properly encoded. /// Any invalid encoded octet in the string will result in undefined behavior. pub unsafe fn decode_unchecked(s: &[u8]) -> Option> { // Skip bytes that are not '%'. let i = match s.iter().position(|&x| x == b'%') { Some(i) => i, None => return None, }; // SAFETY: `i` cannot exceed `s.len()` since `i < s.len()`. let mut buf = unsafe { copy_new(s, i, false) }; // SAFETY: The caller must ensure that the string is properly encoded. unsafe { _decode(s, i, &mut buf, false).unwrap() } Some(buf) } #[cfg(feature = "unstable")] pub(super) fn decode_with<'a>(s: &[u8], buf: &'a mut Vec) -> Result> { // Skip bytes that are not '%'. let i = match s.iter().position(|&x| x == b'%') { Some(i) => i, None => return Ok(None), }; let start = buf.len(); unsafe { // SAFETY: `i` cannot exceed `s.len()` since `i < s.len()`. copy(s, buf, i, false); _decode(s, i, buf, true)?; // SAFETY: The length is non-decreasing. Ok(Some(buf.get_unchecked(start..))) } } /// Decodes a percent-encoded string with a buffer assuming validity. /// /// If the string needs no decoding, this function returns `None` /// and no bytes will be appended to the buffer. /// /// # Safety /// /// This function does not check that the string is properly encoded. /// Any invalid encoded octet in the string will result in undefined behavior. #[cfg(feature = "unstable")] pub unsafe fn decode_with_unchecked<'a>(s: &[u8], buf: &'a mut Vec) -> Option<&'a [u8]> { // Skip bytes that are not '%'. let i = match s.iter().position(|&x| x == b'%') { Some(i) => i, None => return None, }; let start = buf.len(); unsafe { // SAFETY: `i` cannot exceed `s.len()` since `i < s.len()`. copy(s, buf, i, false); // SAFETY: The caller must ensure that the string is properly encoded. _decode(s, i, buf, false).unwrap(); // SAFETY: The length is non-decreasing. Some(buf.get_unchecked(start..)) } } unsafe fn _decode(s: &[u8], mut i: usize, buf: &mut Vec, checked: bool) -> Result<()> { while i < s.len() { let x = s[i]; if x == b'%' { let octet = if checked { if i + 2 >= s.len() { err!(i, InvalidOctet); } // SAFETY: We have checked that `i + 2 < s.len()`. // Overflow should be impossible because we cannot have that large a slice. let (hi, lo) = unsafe { (*s.get_unchecked(i + 1), *s.get_unchecked(i + 2)) }; match decode_octet(hi, lo) { Some(o) => o, None => err!(i, InvalidOctet), } } else { // SAFETY: The caller must ensure that the string is properly encoded. let (hi, lo) = unsafe { (*s.get_unchecked(i + 1), *s.get_unchecked(i + 2)) }; decode_octet_unchecked(hi, lo) }; // SAFETY: The output will never be longer than the input. unsafe { push(buf, octet) } i += 3; } else { // SAFETY: The output will never be longer than the input. unsafe { push(buf, x) } i += 1; } } Ok(()) } /// Decodes a percent-encoded string in-place assuming validity. /// /// Returns the length of decoded bytes to the left. /// /// # Safety /// /// This function does not check that the string is properly encoded. /// Any invalid encoded octet in the string will result in undefined behavior. pub unsafe fn decode_in_place_unchecked(s: &mut [u8]) -> usize { // Skip bytes that are not '%'. let mut i = match s.iter().position(|&x| x == b'%') { Some(i) => i, None => return s.len(), }; let mut dst = i; while i < s.len() { let x = s[i]; let octet = if x == b'%' { // SAFETY: The caller must ensure that the string is properly encoded. let (hi, lo) = unsafe { (*s.get_unchecked(i + 1), *s.get_unchecked(i + 2)) }; i += 3; decode_octet_unchecked(hi, lo) } else { i += 1; x }; // SAFETY: `dst <= i < len` holds. unsafe { *s.get_unchecked_mut(dst) = octet } dst += 1; } dst } #[cfg(feature = "unstable")] pub(super) fn validate_enc(s: &[u8], table: &Table) -> Result<()> { let mut i = 0; while i < s.len() { let x = s[i]; if x == b'%' { if i + 2 >= s.len() { err!(i, InvalidOctet); } // SAFETY: We have checked that `i + 2 < s.len()`. // Overflow should be impossible because we cannot have that large a slice. let (hi, lo) = unsafe { (*s.get_unchecked(i + 1), *s.get_unchecked(i + 2)) }; if table::HEXDIG.get(hi) & table::HEXDIG.get(lo) == 0 { err!(i, InvalidOctet); } i += 3; } else { if !table.allows(x) { err!(i, UnexpectedChar); } i += 1; } } Ok(()) } pub(super) const fn validate_estr(s: &[u8]) -> bool { let mut i = 0; while i < s.len() { let x = s[i]; if x == b'%' { if i + 2 >= s.len() { return false; } let (hi, lo) = (s[i + 1], s[i + 2]); if table::HEXDIG.get(hi) & table::HEXDIG.get(lo) == 0 { return false; } i += 3; } else { i += 1; } } true } #[cfg(all(feature = "unstable", test))] mod tests { use super::*; const RAW: &[u8] = "te😃a 测1`~!@试#$%st^&+=".as_bytes(); const ENCODED: &[u8] = b"te%F0%9F%98%83a%20%E6%B5%8B1%60~!@%E8%AF%95%23$%25st%5E&+="; #[test] fn enc_dec_validate() { let s = encode(RAW, table::QUERY_FRAGMENT); assert_eq!(ENCODED, s.as_bytes()); let mut buf = Vec::new(); encode_to(RAW, table::QUERY_FRAGMENT, &mut buf); assert_eq!(ENCODED, buf); assert!(validate_enc(s.as_bytes(), table::QUERY_FRAGMENT).is_ok()); assert_eq!(Ok(RAW), decode(ENCODED).as_deref()); let mut buf = Vec::new(); assert_eq!(Ok(Some(RAW)), decode_with(ENCODED, &mut buf)); assert_eq!(buf, RAW); assert_eq!(Some(RAW), unsafe { decode_unchecked(ENCODED).as_deref() }); let mut buf = Vec::new(); assert_eq!(Some(RAW), unsafe { decode_with_unchecked(ENCODED, &mut buf) }); assert_eq!(buf, RAW); assert_eq!(Ok(b"\x2d\xe6\xb5" as _), decode(b"%2D%E6%B5").as_deref()); let s = b"%2d%"; assert_eq!(3, decode(s).unwrap_err().index()); let s = b"%2d%fg"; assert_eq!(3, decode(s).unwrap_err().index()); // We used to use slot 0 to indicate that percent-encoded octets are allowed, // which was totally wrong since it just allows zero bytes. Glad we fixed it. assert!(validate_enc(b"\0", table::QUERY_FRAGMENT).is_err()); } } fluent-uri-0.1.4/src/enc/mod.rs000064400000000000000000000616570072674642500144500ustar 00000000000000#[cfg(not(feature = "unstable"))] pub(crate) mod table; #[cfg(feature = "unstable")] pub mod table; #[cfg(feature = "unstable")] use table::Table; pub(crate) mod imp; #[cfg(feature = "unstable")] pub use imp::{ decode_in_place_unchecked, decode_unchecked, decode_with_unchecked, EncodingError, EncodingErrorKind, }; #[cfg(feature = "unstable")] use imp::{err, Result}; #[cfg(feature = "unstable")] pub mod encoder; #[cfg(feature = "unstable")] mod estring; #[cfg(feature = "unstable")] pub use estring::*; /// Percent-encodes a byte sequence. /// /// # Panics /// /// Panics if the table does not allow percent-encoding. #[cfg(feature = "unstable")] #[inline] pub fn encode<'a, S: AsRef<[u8]> + ?Sized>(s: &'a S, table: &Table) -> Cow<'a, str> { assert!(table.allows_enc(), "table does not allow percent-encoding"); imp::encode(s.as_ref(), table) } /// Percent-encodes a byte sequence to a buffer. /// /// The buffer may either be a [`String`] or a [`Vec`]. /// /// # Panics /// /// Panics if the table does not allow percent-encoding. #[cfg(feature = "unstable")] #[inline] pub fn encode_to<'a, S: AsRef<[u8]> + ?Sized, B: internal::AsMutVec>( s: &S, table: &Table, buf: &'a mut B, ) { assert!(table.allows_enc(), "table does not allow percent-encoding"); // SAFETY: The encoded bytes are valid UTF-8. let buf = unsafe { buf.as_mut_vec() }; imp::encode_to(s.as_ref(), table, buf) } /// Decodes a percent-encoded string. #[cfg(feature = "unstable")] #[inline] pub fn decode + ?Sized>(s: &S) -> Result> { imp::decode(s.as_ref()) } /// Decodes a percent-encoded string with a buffer. /// /// If the string needs no decoding, this function returns `Ok(None)` /// and no bytes will be appended to the buffer. #[cfg(feature = "unstable")] #[inline] pub fn decode_with<'a, S: AsRef<[u8]> + ?Sized>( s: &S, buf: &'a mut Vec, ) -> Result> { imp::decode_with(s.as_ref(), buf) } /// Checks if all characters in a string are allowed by the given table. #[cfg(feature = "unstable")] #[inline] pub fn validate + ?Sized>(s: &S, table: &Table) -> Result<()> { let s = s.as_ref(); if table.allows_enc() { imp::validate_enc(s, table) } else { match s.iter().position(|&x| !table.allows(x)) { Some(i) => err!(i, UnexpectedChar), None => Ok(()), } } } use alloc::{ borrow::{self, Cow}, string::{FromUtf8Error, String}, vec::Vec, }; use core::{ fmt, hash, iter::FusedIterator, mem, str::{self, Utf8Error}, }; use crate::view::View; /// Percent-encoded string slices. /// /// # Examples /// /// Parse key-value pairs from a query string into a hash map: /// /// ``` /// use std::collections::HashMap; /// use fluent_uri::enc::EStr; /// /// let query = "name=%E5%BC%A0%E4%B8%89&speech=%C2%A1Ol%C3%A9!"; /// let map: HashMap<_, _> = EStr::new(query) /// .split('&') /// .filter_map(|pair| pair.split_once('=')) /// .map(|(k, v)| (k.decode(), v.decode())) /// .filter_map(|(k, v)| k.into_string().ok().zip(v.into_string().ok())) /// .collect(); /// assert_eq!(map["name"], "张三"); /// assert_eq!(map["speech"], "¡Olé!"); /// ``` #[repr(transparent)] pub struct EStr { inner: [u8], } impl AsRef for EStr { #[inline] fn as_ref(&self) -> &str { self.as_str() } } impl AsRef<[u8]> for EStr { #[inline] fn as_ref(&self) -> &[u8] { &self.inner } } /// Implements equality comparisons on `EStr`s. /// /// `EStr`s are compared by their byte values. Percent-encoding /// normalization is **not** performed prior to comparison. impl PartialEq for EStr { #[inline] fn eq(&self, other: &EStr) -> bool { self.inner == other.inner } } impl PartialEq for EStr { #[inline] fn eq(&self, other: &str) -> bool { self.as_str() == other } } impl PartialEq for str { #[inline] fn eq(&self, other: &EStr) -> bool { self == other.as_str() } } impl Eq for EStr {} impl fmt::Debug for EStr { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Debug::fmt(self.as_str(), f) } } impl fmt::Display for EStr { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl hash::Hash for EStr { #[inline] fn hash(&self, state: &mut H) { self.inner.hash(state) } } impl borrow::Borrow for &EStr { #[inline] fn borrow(&self) -> &str { self.as_str() } } impl Default for &EStr { /// Creates an empty `EStr`. #[inline] fn default() -> &'static EStr { EStr::EMPTY } } impl EStr { const EMPTY: &'static EStr = EStr::new(""); /// Converts a string slice to an `EStr`. /// /// # Panics /// /// Panics if the string is not properly encoded. pub const fn new(s: &str) -> &EStr { if imp::validate_estr(s.as_bytes()) { // SAFETY: The validation is done. unsafe { EStr::new_unchecked(s.as_bytes()) } } else { panic!("invalid percent-encoded string"); } } /// Converts a byte slice into an `EStr` assuming validity. #[inline] pub(crate) const unsafe fn new_unchecked(s: &[u8]) -> &EStr { // SAFETY: The caller must ensure that the bytes are valid percent-encoded UTF-8. unsafe { &*(s as *const [u8] as *const EStr) } } /// Yields the underlying string slice. #[inline] pub fn as_str(&self) -> &str { // SAFETY: The validation is done. unsafe { str::from_utf8_unchecked(&self.inner) } } /// Decodes the `EStr`. /// /// # Examples /// /// ``` /// use fluent_uri::enc::EStr; /// /// let dec = EStr::new("%C2%BF").decode(); /// assert_eq!(dec.as_bytes(), &[0xc2, 0xbf]); /// assert_eq!(dec.into_string()?, "¿"); /// # Ok::<_, std::string::FromUtf8Error>(()) /// ``` #[inline] pub fn decode(&self) -> Decode<'_> { // SAFETY: `EStr::new_unchecked` ensures that the string is properly encoded. match unsafe { imp::decode_unchecked(&self.inner) } { Some(s) => Decode::Dst(s), None => Decode::Src(self.as_str()), } } /// Decodes the `EStr` with a buffer. /// /// If the string needs no decoding, no bytes will be appended to the buffer. /// /// Note that the buffer is not cleared prior to decoding. /// /// # Examples /// /// ``` /// use fluent_uri::enc::EStr; /// /// let mut buf = Vec::new(); /// let dec = EStr::new("233").decode_with(&mut buf); /// assert_eq!(dec.to_str()?, "233"); /// assert!(!dec.decoded_any()); /// assert!(buf.is_empty()); /// /// let dec = EStr::new("2%333").decode_with(&mut buf); /// assert_eq!(dec.to_str()?, "233"); /// assert!(dec.decoded_any()); /// assert_eq!(buf, b"233"); /// # Ok::<_, core::str::Utf8Error>(()) /// ``` #[cfg(feature = "unstable")] #[inline] pub fn decode_with<'dst>(&self, buf: &'dst mut Vec) -> DecodeRef<'_, 'dst> { // SAFETY: `EStr::new_unchecked` ensures that the string is properly encoded. let decoded = unsafe { decode_with_unchecked(&self.inner, buf) }; match decoded { Some(s) => DecodeRef::Dst(s), None => DecodeRef::Src(self), } } /// Returns an iterator over subslices of the `EStr` separated by the given delimiter. /// /// # Panics /// /// Panics if the delimiter is not a [reserved] character. /// /// [reserved]: https://datatracker.ietf.org/doc/html/rfc3986/#section-2.2 /// /// # Examples /// /// ``` /// use fluent_uri::enc::EStr; /// /// assert!(EStr::new("a,b,c").split(',').eq(["a", "b", "c"])); /// assert!(EStr::new(",").split(',').eq(["", ""])); /// ``` #[inline] pub fn split(&self, delim: char) -> Split<'_> { assert!( delim.is_ascii() && table::RESERVED.allows(delim as u8), "splitting with non-reserved character" ); Split { s: &self.inner, delim: delim as u8, finished: false, } } /// Splits the `EStr` on the first occurrence of the given delimiter and /// returns prefix before delimiter and suffix after delimiter. /// /// Returns `None` if the delimiter is not found. /// /// # Panics /// /// Panics if the delimiter is not a [reserved] character. /// /// [reserved]: https://datatracker.ietf.org/doc/html/rfc3986/#section-2.2 /// /// # Examples /// /// ``` /// use fluent_uri::enc::EStr; /// /// let (k, v) = EStr::new("key=value").split_once('=').unwrap(); /// assert_eq!(k, "key"); /// assert_eq!(v, "value"); /// /// assert!(EStr::new("abc").split_once(';').is_none()); /// ``` #[inline] pub fn split_once(&self, delim: char) -> Option<(&EStr, &EStr)> { assert!( delim.is_ascii() && table::RESERVED.allows(delim as u8), "splitting with non-reserved character" ); let bytes = &self.inner; let i = bytes.iter().position(|&x| x == delim as u8)?; let (head, tail) = (&bytes[..i], &bytes[i + 1..]); // SAFETY: Splitting at a reserved character leaves valid percent-encoded UTF-8. unsafe { Some((EStr::new_unchecked(head), EStr::new_unchecked(tail))) } } } /// An [`EStr`] view into a mutable byte slice that allows in-place percent-decoding. impl<'a> View<'a, EStr> { /// Decodes the `View` in-place. #[inline] pub fn decode_in_place(self) -> DecodeInPlace<'a> { let bytes = self.into_bytes(); // SAFETY: `Self::new` ensures that the bytes are properly encoded. let len = unsafe { imp::decode_in_place_unchecked(bytes) }; if len == bytes.len() { // SAFETY: Nothing is decoded so the bytes are valid UTF-8. DecodeInPlace::Src(unsafe { View::new(bytes) }) } else { // SAFETY: The length must be less. DecodeInPlace::Dst(unsafe { bytes.get_unchecked_mut(..len) }) } } /// Returns an iterator over subslices of the `View` separated by the given delimiter. /// /// # Panics /// /// Panics if the delimiter is not a [reserved] character. /// /// [reserved]: https://datatracker.ietf.org/doc/html/rfc3986/#section-2.2 #[inline] pub fn split_view(self, delim: char) -> SplitView<'a> { assert!( delim.is_ascii() && table::RESERVED.allows(delim as u8), "splitting with non-reserved character" ); SplitView { s: self.into_bytes(), delim: delim as u8, finished: false, } } /// Splits the `View` on the first occurrence of the given delimiter and /// returns prefix before delimiter and suffix after delimiter. /// /// Returns `Err(self)` if the delimiter is not found. /// /// # Panics /// /// Panics if the delimiter is not a [reserved] character. /// /// [reserved]: https://datatracker.ietf.org/doc/html/rfc3986/#section-2.2 #[inline] pub fn split_once_view(self, delim: char) -> Result<(Self, Self), Self> { assert!( delim.is_ascii() && table::RESERVED.allows(delim as u8), "splitting with non-reserved character" ); let i = match self.as_str().bytes().position(|x| x == delim as u8) { Some(i) => i, None => return Err(self), }; let (head, tail) = self.into_bytes().split_at_mut(i); // SAFETY: Splitting at a reserved character leaves valid percent-encoded UTF-8. unsafe { Ok((View::new(head), View::new(&mut tail[1..]))) } } } /// A wrapper of percent-decoded bytes. /// /// This enum is created by the [`decode`] method on [`EStr`]. /// /// [`decode`]: EStr::decode #[derive(Clone, Debug)] pub enum Decode<'a> { /// No percent-encoded octets are decoded. Src(&'a str), /// One or more percent-encoded octets are decoded. Dst(Vec), } impl<'a> Decode<'a> { /// Returns a reference to the decoded bytes. #[inline] pub fn as_bytes(&self) -> &[u8] { match self { Self::Src(s) => s.as_bytes(), Self::Dst(vec) => vec, } } /// Consumes this `Decode` and yields the underlying decoded bytes. #[inline] pub fn into_bytes(self) -> Cow<'a, [u8]> { match self { Self::Src(s) => Cow::Borrowed(s.as_bytes()), Self::Dst(vec) => Cow::Owned(vec), } } /// Returns `true` if anything is decoded. #[cfg(feature = "unstable")] #[inline] pub fn decoded_any(&self) -> bool { matches!(self, Self::Dst(_)) } /// Converts the decoded bytes to a string. /// /// An error is returned if the decoded bytes are not valid UTF-8. #[inline] pub fn into_string(self) -> Result, FromUtf8Error> { // FIXME: A (maybe) more efficient approach: only validating encoded sequences. match self { Self::Src(s) => Ok(Cow::Borrowed(s)), Self::Dst(vec) => String::from_utf8(vec).map(Cow::Owned), } } /// Converts the decoded bytes to a string lossily. pub fn into_string_lossy(self) -> Cow<'a, str> { match self { Self::Src(s) => Cow::Borrowed(s), Self::Dst(vec) => Cow::Owned(match String::from_utf8_lossy(&vec) { // SAFETY: If a borrowed string slice is returned, the bytes must be valid UTF-8. Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(vec) }, Cow::Owned(string) => string, }), } } } /// A wrapper of borrowed percent-decoded bytes. /// /// This enum is created by the [`decode_with`] method on [`EStr`]. /// /// [`decode_with`]: EStr::decode_with #[cfg(feature = "unstable")] #[derive(Clone, Copy, Debug)] pub enum DecodeRef<'src, 'dst> { /// Nothing decoded, i.e., borrowed from the source. Src(&'src EStr), /// Something decoded, i.e., borrowed from the buffer. Dst(&'dst [u8]), } #[cfg(feature = "unstable")] impl<'src, 'dst> DecodeRef<'src, 'dst> { /// Returns a reference to the decoded bytes. #[inline] pub fn as_bytes<'a>(&self) -> &'a [u8] where 'src: 'a, 'dst: 'a, { match *self { Self::Src(s) => s.as_str().as_bytes(), Self::Dst(s) => s, } } /// Returns `true` if anything is decoded. #[inline] pub fn decoded_any(&self) -> bool { matches!(self, Self::Dst(_)) } /// Converts the decoded bytes to a string slice. /// /// An error is returned if the decoded bytes are not valid UTF-8. #[inline] pub fn to_str<'a>(&self) -> Result<&'a str, Utf8Error> where 'src: 'a, 'dst: 'a, { match *self { Self::Src(s) => Ok(s.as_str()), Self::Dst(s) => str::from_utf8(s), } } /// Converts the decoded bytes to a string lossily. #[inline] pub fn to_string_lossy<'a>(&self) -> Cow<'a, str> where 'src: 'a, 'dst: 'a, { match *self { Self::Src(s) => Cow::Borrowed(s.as_str()), Self::Dst(s) => String::from_utf8_lossy(s), } } } /// A wrapper of in-place percent-decoded bytes. /// /// This enum is created by the [`decode_in_place`] method on [`View`]. /// /// [`decode_in_place`]: View::::decode_in_place #[derive(Debug)] pub enum DecodeInPlace<'a> { /// No percent-encoded octets are decoded. Src(View<'a, str>), /// One or more percent-encoded octets are decoded. Dst(&'a mut [u8]), } impl<'a> DecodeInPlace<'a> { /// Returns a reference to the decoded bytes. #[inline] pub fn as_bytes(&self) -> &[u8] { match self { Self::Src(s) => s.as_bytes(), Self::Dst(s) => s, } } /// Consumes this `DecodeInPlace` and yields the underlying decoded bytes. #[inline] pub fn into_bytes(self) -> &'a mut [u8] { match self { Self::Src(s) => s.into_bytes(), Self::Dst(s) => s, } } /// Returns `true` if anything is decoded. #[cfg(feature = "unstable")] #[inline] pub fn decoded_any(&self) -> bool { matches!(self, Self::Dst(_)) } /// Converts the decoded bytes to a [`View`]. /// /// An error along with the decoded bytes is returned if the bytes are not valid UTF-8. #[inline] pub fn into_str_view(self) -> Result, (&'a mut [u8], Utf8Error)> { match self { Self::Src(s) => Ok(s), Self::Dst(s) => match str::from_utf8(s) { // SAFETY: The validation is done. Ok(_) => Ok(unsafe { View::new(s) }), Err(e) => Err((s, e)), }, } } /// Converts the decoded bytes to a string slice. /// /// An error along with the decoded bytes is returned if the bytes are not valid UTF-8. #[inline] pub fn into_str(self) -> Result<&'a str, (&'a mut [u8], Utf8Error)> { self.into_str_view().map(View::into_ref) } /// Converts the decoded bytes to a string lossily. #[inline] pub fn into_string_lossy(self) -> Cow<'a, str> { match self { Self::Src(s) => Cow::Borrowed(s.into_ref()), Self::Dst(s) => String::from_utf8_lossy(s), } } } /// An iterator over subslices of an [`EStr`] separated by a delimiter. /// /// This struct is created by the [`split`] method on [`EStr`]. /// /// [`split`]: EStr::split #[derive(Clone, Debug)] #[must_use = "iterators are lazy and do nothing unless consumed"] pub struct Split<'a> { s: &'a [u8], delim: u8, pub(crate) finished: bool, } impl<'a> Iterator for Split<'a> { type Item = &'a EStr; #[inline] fn next(&mut self) -> Option<&'a EStr> { if self.finished { return None; } let head; match self.s.iter().position(|&x| x == self.delim) { Some(i) => { head = &self.s[..i]; self.s = &self.s[i + 1..]; } None => { self.finished = true; head = self.s; } } // SAFETY: Splitting at a reserved character leaves valid percent-encoded UTF-8. Some(unsafe { EStr::new_unchecked(head) }) } #[inline] fn size_hint(&self) -> (usize, Option) { if self.finished { (0, Some(0)) } else { (1, Some(self.s.len() + 1)) } } } impl<'a> DoubleEndedIterator for Split<'a> { #[inline] fn next_back(&mut self) -> Option<&'a EStr> { if self.finished { return None; } let tail; match self.s.iter().rposition(|&x| x == self.delim) { Some(i) => { tail = &self.s[i + 1..]; self.s = &self.s[..i]; } None => { self.finished = true; tail = self.s; } } // SAFETY: Splitting at a reserved character leaves valid percent-encoded UTF-8. Some(unsafe { EStr::new_unchecked(tail) }) } } impl FusedIterator for Split<'_> {} /// An iterator over subslices of a [`View`] separated by a delimiter. /// /// This struct is created by the [`split_view`] method on [`View`]. /// /// [`split_view`]: View::::split_view #[derive(Debug)] #[must_use = "iterators are lazy and do nothing unless consumed"] pub struct SplitView<'a> { s: &'a mut [u8], delim: u8, pub(crate) finished: bool, } impl<'a> Iterator for SplitView<'a> { type Item = View<'a, EStr>; #[inline] fn next(&mut self) -> Option> { if self.finished { return None; } let head = match self.s.iter().position(|&x| x == self.delim) { Some(i) => { let tmp = mem::take(&mut self.s); let (head, tail) = tmp.split_at_mut(i); self.s = &mut tail[1..]; head } None => { self.finished = true; mem::take(&mut self.s) } }; // SAFETY: Splitting at a reserved character leaves valid percent-encoded UTF-8. Some(unsafe { View::new(head) }) } #[inline] fn size_hint(&self) -> (usize, Option) { if self.finished { (0, Some(0)) } else { (1, Some(self.s.len() + 1)) } } } impl<'a> DoubleEndedIterator for SplitView<'a> { #[inline] fn next_back(&mut self) -> Option> { if self.finished { return None; } let tail = match self.s.iter().rposition(|&x| x == self.delim) { Some(i) => { let tmp = mem::take(&mut self.s); let (head, tail) = tmp.split_at_mut(i); self.s = head; &mut tail[1..] } None => { self.finished = true; mem::take(&mut self.s) } }; // SAFETY: Splitting at a reserved character leaves valid percent-encoded UTF-8. Some(unsafe { View::new(tail) }) } } impl FusedIterator for SplitView<'_> {} /// An error occurred when attempting to write to a buffer that is too small. /// /// This error is created by the [`to_mut_in`] method on [`Uri`]. // FIXME: Add `EStr::decode_with` when it is stabilized. /// /// [`to_mut_in`]: crate::Uri::to_mut_in /// [`Uri`]: crate::Uri #[cfg(feature = "unstable")] #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct BufferTooSmallError(()); #[cfg(all(feature = "unstable", feature = "std"))] impl std::error::Error for BufferTooSmallError {} #[cfg(feature = "unstable")] impl fmt::Display for BufferTooSmallError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "buffer too small") } } #[cfg(feature = "unstable")] pub(crate) mod internal { use crate::enc::BufferTooSmallError; use alloc::{collections::TryReserveError, string::String, vec::Vec}; use core::mem::MaybeUninit; pub trait AsMutVec { unsafe fn as_mut_vec(&mut self) -> &mut Vec; } impl AsMutVec for Vec { #[inline] unsafe fn as_mut_vec(&mut self) -> &mut Vec { self } } impl AsMutVec for String { #[inline] unsafe fn as_mut_vec(&mut self) -> &mut Vec { // SAFETY: The caller must not mess up the string. unsafe { self.as_mut_vec() } } } pub trait Buf { type PrepareError; fn prepare(&mut self, len: usize) -> Result<*mut u8, Self::PrepareError>; unsafe fn finish(&mut self, len: usize); } impl Buf for Vec { type PrepareError = TryReserveError; #[inline] fn prepare(&mut self, len: usize) -> Result<*mut u8, TryReserveError> { self.try_reserve(len)?; Ok(self.as_mut_ptr_range().end) } #[inline] unsafe fn finish(&mut self, len: usize) { // SAFETY: The caller must ensure that the additional `len` bytes are initialized. unsafe { self.set_len(self.len() + len) } } } impl Buf for [u8] { type PrepareError = BufferTooSmallError; #[inline] fn prepare(&mut self, len: usize) -> Result<*mut u8, BufferTooSmallError> { if self.len() < len { Err(BufferTooSmallError(())) } else { Ok(self.as_mut_ptr()) } } #[inline] unsafe fn finish(&mut self, _len: usize) {} } impl Buf for [MaybeUninit] { type PrepareError = BufferTooSmallError; #[inline] fn prepare(&mut self, len: usize) -> Result<*mut u8, BufferTooSmallError> { if self.len() < len { Err(BufferTooSmallError(())) } else { Ok(self.as_mut_ptr().cast()) } } #[inline] unsafe fn finish(&mut self, _len: usize) {} } } fluent-uri-0.1.4/src/enc/table.rs000064400000000000000000000124670072674642500147530ustar 00000000000000//! Byte pattern tables from RFC 3986. //! //! The predefined table constants in this module are documented with //! the ABNF notation of [RFC 2234]. //! //! [RFC 2234]: https://datatracker.ietf.org/doc/html/rfc2234/ /// A table determining the byte patterns allowed in a string. /// /// It is guaranteed that the unencoded bytes allowed are ASCII and that /// an unencoded `%` is not allowed. #[derive(Clone, Copy, Debug)] pub struct Table { arr: [u8; 256], allows_enc: bool, } impl Table { /// Generates a table that only allows the given unencoded bytes. /// /// # Panics /// /// Panics if any of the bytes is not ASCII or is `%`. pub const fn gen(mut bytes: &[u8]) -> Table { let mut arr = [0; 256]; while let [cur, rem @ ..] = bytes { assert!(cur.is_ascii() && *cur != b'%', "non-ASCII or %"); arr[*cur as usize] = 1; bytes = rem; } Table { arr, allows_enc: false, } } /// Marks this table as allowing percent-encoded octets. pub const fn enc(mut self) -> Table { self.allows_enc = true; self } /// Combines two tables into one. /// /// Returns a new table that allows all the byte patterns allowed /// either by `self` or by `other`. pub const fn or(mut self, other: &Table) -> Table { let mut i = 0; while i < 128 { self.arr[i] |= other.arr[i]; i += 1; } self.allows_enc |= other.allows_enc; self } /// Subtracts from this table. /// /// Returns a new table that allows all the byte patterns allowed /// by `self` but not allowed by `other`. #[cfg(feature = "unstable")] pub const fn sub(mut self, other: &Table) -> Table { let mut i = 0; while i < 128 { if other.arr[i] != 0 { self.arr[i] = 0; } i += 1; } if other.allows_enc { self.allows_enc = false; } self } /// Returns `true` if the table is a subset of another, i.e., `other` /// allows at least all the byte patterns allowed by `self`. #[cfg(feature = "unstable")] pub const fn is_subset(&self, other: &Table) -> bool { let mut i = 0; while i < 128 { if self.arr[i] != 0 && other.arr[i] == 0 { return false; } i += 1; } !self.allows_enc || other.allows_enc } /// Shifts the table values left. pub(crate) const fn shl(mut self, n: u8) -> Table { let mut i = 0; while i < 128 { self.arr[i] <<= n; i += 1; } self } /// Returns the specified table value. #[inline] pub(crate) const fn get(&self, x: u8) -> u8 { self.arr[x as usize] } /// Returns `true` if an unencoded byte is allowed by the table. #[inline] pub const fn allows(&self, x: u8) -> bool { self.get(x) != 0 } /// Returns `true` if percent-encoded octets are allowed by the table. #[inline] pub const fn allows_enc(&self) -> bool { self.allows_enc } } const fn gen(bytes: &[u8]) -> Table { Table::gen(bytes) } /// ALPHA = A-Z / a-z pub const ALPHA: &Table = &gen(b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); /// DIGIT = 0-9 pub const DIGIT: &Table = &gen(b"0123456789"); /// HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" /// / "a" / "b" / "c" / "d" / "e" / "f" pub const HEXDIG: &Table = &DIGIT.or(&gen(b"ABCDEFabcdef")); /// reserved = gen-delims / sub-delims pub const RESERVED: &Table = &GEN_DELIMS.or(SUB_DELIMS); /// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" pub const GEN_DELIMS: &Table = &gen(b":/?#[]@"); /// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" /// / "*" / "+" / "," / ";" / "=" pub const SUB_DELIMS: &Table = &gen(b"!$&'()*+,;="); /// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" pub const UNRESERVED: &Table = &ALPHA.or(DIGIT).or(&gen(b"-._~")); /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" pub const PCHAR: &Table = &UNRESERVED.or(SUB_DELIMS).or(&gen(b":@")).enc(); /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) pub const SEGMENT_NC: &Table = &UNRESERVED.or(SUB_DELIMS).or(&gen(b"@")).enc(); /// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) pub const SCHEME: &Table = &ALPHA.or(DIGIT).or(&gen(b"+-.")); /// userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) pub const USERINFO: &Table = &UNRESERVED.or(SUB_DELIMS).or(&gen(b":")).enc(); /// IPvFuture = "v" 1\*HEXDIG "." 1\*( unreserved / sub-delims / ":" ) #[cfg(feature = "ipv_future")] pub const IPV_FUTURE: &Table = &UNRESERVED.or(SUB_DELIMS).or(&gen(b":")); /// reg-name = *( unreserved / pct-encoded / sub-delims ) pub const REG_NAME: &Table = &UNRESERVED.or(SUB_DELIMS).enc(); /// path = *( pchar / "/" ) pub const PATH: &Table = &PCHAR.or(&gen(b"/")); /// query = fragment = *( pchar / "/" / "?" ) pub const QUERY_FRAGMENT: &Table = &PCHAR.or(&gen(b"/?")); /// RFC 6874bis: ZoneID = 1*( unreserved ) #[cfg(feature = "rfc6874bis")] pub const ZONE_ID: &Table = UNRESERVED; fluent-uri-0.1.4/src/fmt.rs000064400000000000000000000117610072674642500137010ustar 00000000000000use super::*; use alloc::string::String; use core::fmt; impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let msg = match self.kind { ParseErrorKind::InvalidOctet => "invalid percent-encoded octet at index ", ParseErrorKind::UnexpectedChar => "unexpected character at index ", ParseErrorKind::InvalidIpLiteral => "invalid IP literal at index ", }; write!(f, "{}{}", msg, self.index) } } impl fmt::Debug for Uri<&str> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Uri") .field("scheme", &self.scheme()) .field("authority", &self.authority()) .field("path", &self.path()) .field("query", &self.query()) .field("fragment", &self.fragment()) .finish() } } impl fmt::Display for Uri<&str> { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl fmt::Debug for Uri<&mut [u8]> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Uri").finish_non_exhaustive() } } impl fmt::Debug for Uri { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Debug::fmt(self.borrow(), f) } } impl fmt::Display for Uri { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl fmt::Display for Scheme { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl fmt::Debug for Scheme { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Debug::fmt(self.as_str(), f) } } impl fmt::Debug for Authority<&str> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Authority") .field("userinfo", &self.userinfo()) .field("host", &self.host()) .field("port", &self.port()) .finish() } } impl fmt::Display for Authority<&str> { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl fmt::Debug for Authority<&mut [u8]> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Authority").finish_non_exhaustive() } } impl fmt::Debug for Authority { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Authority") .field("userinfo", &self.userinfo()) .field("host", &self.host()) .field("port", &self.port()) .finish() } } impl fmt::Display for Authority { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl fmt::Debug for Host<&str> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Host") .field("text", &self.as_str()) .field("data", &self.data()) .finish() } } impl fmt::Display for Host<&str> { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl fmt::Debug for Host { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Host") .field("text", &self.as_str()) .field("data", &self.data()) .finish() } } impl fmt::Display for Host { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl fmt::Debug for Host<&mut [u8]> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Host") .field("text", &self.as_str()) .field("data", &self.data()) .finish() } } impl fmt::Display for Host<&mut [u8]> { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl fmt::Display for Path { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } impl fmt::Debug for Path { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Debug::fmt(self.as_str(), f) } } impl<'a, T: ?Sized + fmt::Display + Lens> fmt::Display for View<'a, T> { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_ref(), f) } } impl<'a, T: ?Sized + fmt::Debug + Lens> fmt::Debug for View<'a, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("View").field(&&**self).finish() } } fluent-uri-0.1.4/src/internal.rs000064400000000000000000000125200072674642500147210ustar 00000000000000#![allow(missing_debug_implementations)] use alloc::{string::String, vec::Vec}; use core::{ cell::Cell, mem::MaybeUninit, num::NonZeroU32, ops::{Deref, DerefMut}, }; #[cfg(feature = "std")] use std::net::{Ipv4Addr, Ipv6Addr}; use super::*; use bitflags::bitflags; pub trait Pointer { fn get(&self) -> *mut u8; fn len(&self) -> u32; /// Creates a `Self` from the given (ptr, len, cap) triple. /// /// # Safety /// /// - The pointer must not be null. /// - The length and capacity must be correct. unsafe fn new(ptr: *mut u8, len: u32, cap: u32) -> Self; const DANGLING: Self; } #[derive(Clone, Copy)] #[repr(C)] pub struct Uncapped { ptr: NonNull, len: u32, _pad: MaybeUninit, } impl Pointer for Uncapped { #[inline] fn get(&self) -> *mut u8 { self.ptr.as_ptr() } #[inline] fn len(&self) -> u32 { self.len } #[inline] unsafe fn new(ptr: *mut u8, len: u32, _cap: u32) -> Self { Self { // SAFETY: The caller must ensure that the pointer is not null. ptr: unsafe { NonNull::new_unchecked(ptr) }, len, _pad: MaybeUninit::uninit(), } } const DANGLING: Self = Self { ptr: NonNull::dangling(), len: 0, _pad: MaybeUninit::uninit(), }; } #[repr(C)] pub struct Capped { ptr: NonNull, len: u32, cap: u32, } impl Capped { #[inline] pub fn into_string(self) -> String { let me = ManuallyDrop::new(self); // SAFETY: `Capped` is created from a `String`. unsafe { String::from_raw_parts(me.ptr.as_ptr(), me.len as _, me.cap as _) } } } impl Pointer for Capped { #[inline] fn get(&self) -> *mut u8 { self.ptr.as_ptr() } #[inline] fn len(&self) -> u32 { self.len } #[inline] unsafe fn new(ptr: *mut u8, len: u32, cap: u32) -> Self { Self { // SAFETY: The caller must ensure that the pointer is not null. ptr: unsafe { NonNull::new_unchecked(ptr) }, len, cap, } } const DANGLING: Self = Self { ptr: NonNull::dangling(), len: 0, cap: 0, }; } impl Drop for Capped { #[inline] fn drop(&mut self) { // SAFETY: `Capped` is created from a `String`. let _ = unsafe { String::from_raw_parts(self.ptr.as_ptr(), 0, self.cap as _) }; } } pub trait Storage { type Ptr: Pointer; fn is_mut() -> bool; } impl Storage for &str { type Ptr = Uncapped; #[inline] fn is_mut() -> bool { false } } impl Storage for &mut [u8] { type Ptr = Uncapped; #[inline] fn is_mut() -> bool { true } } impl Storage for String { type Ptr = Capped; #[inline] fn is_mut() -> bool { false } } pub trait Io<'i, 'o>: Storage {} impl<'i, 'a> Io<'i, 'a> for &'a str {} impl<'a> Io<'a, 'a> for &mut [u8] {} impl<'a> Io<'a, 'a> for String {} pub trait IntoOwnedUri { fn as_raw_parts(&self) -> (*mut u8, usize, usize); } impl IntoOwnedUri for String { #[inline] fn as_raw_parts(&self) -> (*mut u8, usize, usize) { (self.as_ptr() as _, self.len(), self.capacity()) } } impl IntoOwnedUri for Vec { #[inline] fn as_raw_parts(&self) -> (*mut u8, usize, usize) { (self.as_ptr() as _, self.len(), self.capacity()) } } #[derive(Clone)] pub struct Data { pub tag: Tag, // The index of the trailing colon. pub scheme_end: Option, pub auth: Option, pub path_bounds: (u32, u32), // One byte past the last byte of query. pub query_end: Option, // One byte past the preceding '#'. pub fragment_start: Option, } impl Data { pub const INIT: Data = Data { tag: Tag::empty(), scheme_end: None, auth: None, path_bounds: (0, 0), query_end: None, fragment_start: None, }; } #[doc(hidden)] impl Deref for Uri { type Target = Data; #[inline] fn deref(&self) -> &Data { &self.data } } #[doc(hidden)] impl DerefMut for Uri { #[inline] fn deref_mut(&mut self) -> &mut Data { &mut self.data } } bitflags! { pub struct Tag: u32 { const HOST_REG_NAME = 0b00000001; const HOST_IPV4 = 0b00000010; const HOST_IPV6 = 0b00000100; const AUTH_TAKEN = 0b00001000; const HOST_TAKEN = 0b00010000; const PORT_TAKEN = 0b00100000; const PATH_TAKEN = 0b01000000; } } #[derive(Clone)] pub struct AuthData { pub start: Cell, pub host_bounds: (u32, u32), pub host_data: RawHostData, } #[derive(Clone, Copy)] pub union RawHostData { #[cfg(feature = "std")] pub ipv4_addr: Ipv4Addr, pub ipv6: Ipv6Data, #[cfg(feature = "ipv_future")] pub ipv_future_dot_i: u32, pub none: (), } #[derive(Clone, Copy)] pub struct Ipv6Data { #[cfg(feature = "std")] pub addr: Ipv6Addr, #[cfg(feature = "rfc6874bis")] pub zone_id_start: Option, } fluent-uri-0.1.4/src/lib.rs000064400000000000000000001033520072674642500136570ustar 00000000000000#![warn(missing_debug_implementations, missing_docs, rust_2018_idioms)] #![deny(unsafe_op_in_unsafe_fn)] #![cfg_attr(not(feature = "std"), no_std)] //! A generic URI parser that strictly adheres to IETF [RFC 3986]. //! //! [RFC 3986]: https://datatracker.ietf.org/doc/html/rfc3986/ //! //! See the documentation of [`Uri`] for more details. //! //! # Feature flags //! //! All features except `std` are disabled by default. Note that the last two features //! each alter the enum [`HostData`] in a backward incompatible way that could make it //! impossible for two crates that depend on different features of `fluent-uri` to //! be used together. //! //! - `std`: Enables `std` support. This includes [`Error`] implementations //! and `Ip{v4, v6}Addr` support in [`HostData`]. //! //! - `ipv_future`: Enables the parsing of [IPvFuture] literal addresses, //! which fails with [`InvalidIpLiteral`] when disabled. //! //! Only enable this feature when you have a compelling reason to do so, such as //! that you have to deal with an existing system where the IPvFuture format is //! in use. //! //! - `rfc6874bis`: Enables the parsing of IPv6 zone identifiers, //! such as in `https://[fe80::abcd%en1]`. //! //! This feature is based on the homonymous [draft] and is thus subject to change. //! //! [`Error`]: std::error::Error //! [IPvFuture]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2 //! [`InvalidIpLiteral`]: ParseErrorKind::InvalidIpLiteral //! [draft]: https://datatracker.ietf.org/doc/html/draft-ietf-6man-rfc6874bis-05 extern crate alloc; /// Utilities for percent-encoding. pub mod enc; mod fmt; mod view; pub use view::*; mod parser; use crate::enc::{EStr, Split}; use alloc::{string::String, vec::Vec}; use core::{iter::Iterator, marker::PhantomData, mem::ManuallyDrop, ptr::NonNull, slice, str}; #[cfg(feature = "std")] use std::net::{Ipv4Addr, Ipv6Addr}; mod internal; use internal::*; /// Detailed cause of a [`ParseError`]. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ParseErrorKind { /// Invalid percent-encoded octet that is either non-hexadecimal or incomplete. /// /// The error index points to the percent character "%" of the octet. InvalidOctet, /// Unexpected character that is not allowed by the URI syntax. /// /// The error index points to the character. UnexpectedChar, /// Invalid IP literal address. /// /// The error index points to the preceding left square bracket "[". InvalidIpLiteral, } /// An error occurred when parsing URI references. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct ParseError { index: u32, kind: ParseErrorKind, } impl ParseError { /// Returns the index where the error occurred in the input string. #[inline] pub fn index(&self) -> usize { self.index as usize } /// Returns the detailed cause of the error. #[inline] pub fn kind(&self) -> ParseErrorKind { self.kind } } #[cfg(feature = "std")] impl std::error::Error for ParseError {} type Result = core::result::Result; #[cold] fn len_overflow() -> ! { panic!("input length exceeds i32::MAX"); } /// A [URI reference] defined in RFC 3986. /// /// [URI reference]: https://datatracker.ietf.org/doc/html/rfc3986/#section-4.1 /// /// # Variants /// /// There are three variants of `Uri` in total: /// /// - `Uri<&str>`: borrowed; immutable. /// - `Uri<&mut [u8]>`: borrowed; in-place mutable. /// - `Uri`: owned; immutable. /// /// Lifetimes are correctly handled in a way that `Uri<&'a str>` and `Uri<&'a mut [u8]>` /// both output references with lifetime `'a` where appropriate. This allows you to drop /// a temporary `Uri` while keeping the output references: /// /// ``` /// use fluent_uri::Uri; /// /// let mut bytes = *b"foo:bar"; /// /// let uri = Uri::parse(&bytes)?; /// let path = uri.path(); /// drop(uri); /// assert_eq!(path.as_str(), "bar"); /// /// let mut uri = Uri::parse_mut(&mut bytes)?; /// let path = uri.take_path(); /// drop(uri); /// assert_eq!(path.as_str(), "bar"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` /// /// # Examples /// /// Create and convert between `Uri<&str>` and `Uri`: /// /// ``` /// use fluent_uri::Uri; /// /// let uri_str = "http://example.com/"; /// /// // Create a `Uri<&str>` from a string slice. /// let uri_a: Uri<&str> = Uri::parse(uri_str)?; /// /// // Create a `Uri` from an owned string. /// let uri_b: Uri = Uri::parse_from(uri_str.to_owned()).map_err(|e| e.1)?; /// /// // Convert a `Uri<&str>` to a `Uri`. /// let uri_c: Uri = uri_a.to_owned(); /// /// // Borrow a `Uri` as a `Uri<&str>`. /// let uri_d: &Uri<&str> = uri_b.borrow(); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` /// /// Decode and extract query parameters in-place from a URI reference: /// /// ``` /// use fluent_uri::{ParseError, Uri}; /// use std::collections::HashMap; /// /// fn decode_and_extract_query( /// bytes: &mut [u8], /// ) -> Result<(Uri<&mut [u8]>, HashMap<&str, &str>), ParseError> { /// let mut uri = Uri::parse_mut(bytes)?; /// let map = if let Some(query) = uri.take_query() { /// query /// .split_view('&') /// .flat_map(|pair| pair.split_once_view('=')) /// .map(|(k, v)| (k.decode_in_place(), v.decode_in_place())) /// .flat_map(|(k, v)| k.into_str().ok().zip(v.into_str().ok())) /// .collect() /// } else { /// HashMap::new() /// }; /// Ok((uri, map)) /// } /// /// let mut bytes = *b"?lang=Rust&mascot=Ferris%20the%20crab"; /// let (uri, query) = decode_and_extract_query(&mut bytes)?; /// /// assert_eq!(query["lang"], "Rust"); /// assert_eq!(query["mascot"], "Ferris the crab"); /// /// // The query is taken from the `Uri`. /// assert!(uri.query().is_none()); /// // In-place decoding is like this if you're interested: /// assert_eq!(&bytes, b"?lang=Rust&mascot=Ferris the crabcrab"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` // TODO: Create a mutable copy of an immutable `Uri` in a buffer: #[repr(C)] pub struct Uri { ptr: T::Ptr, data: Data, _marker: PhantomData, } impl<'a> Uri<&'a str> { /// Parses a URI reference from a byte sequence into a `Uri<&str>`. /// /// This function validates the input strictly except that UTF-8 validation is not /// performed on a percent-encoded registered name (see [Section 3.2.2, RFC 3986][1]). /// Care should be taken when dealing with such cases. /// /// [1]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2 /// /// # Panics /// /// Panics if the input length is greater than [`i32::MAX`]. pub fn parse + ?Sized>(s: &S) -> Result> { let bytes = s.as_ref(); if bytes.len() > i32::MAX as usize { len_overflow(); } // SAFETY: We're using the correct pointer, length, capacity, and generics. unsafe { parser::parse(bytes.as_ptr() as *mut _, bytes.len() as u32, 0) } } /// Duplicates this `Uri<&str>`. #[inline] pub fn dup(&self) -> Uri<&'a str> { Uri { data: self.data.clone(), ..*self } } /// Creates a new `Uri` by cloning the contents of this `Uri<&str>`. #[inline] pub fn to_owned(&self) -> Uri { let len = self.len(); // We're allocating manually because there is no guarantee that // `String::to_owned` gives the exact capacity of `self.len`. let mut vec = ManuallyDrop::new(Vec::with_capacity(len as usize)); let ptr = vec.as_mut_ptr(); // SAFETY: The capacity of `vec` is exactly `self.len`. // Newly allocated `Vec` won't overlap with existing data. unsafe { self.ptr.get().copy_to_nonoverlapping(ptr, len as usize); } Uri { // SAFETY: The pointer is not null and the length and capacity are correct. ptr: unsafe { Capped::new(ptr, len, len) }, data: self.data.clone(), _marker: PhantomData, } } } impl<'i, 'o, T: Io<'i, 'o> + AsRef> Uri { #[inline] /// Returns the URI reference as a string slice. pub fn as_str(&'i self) -> &'o str { // SAFETY: The indexes are within bounds and the validation is done. unsafe { self.slice(0, self.len()) } } /// Creates a mutable copy of this `Uri` in the given buffer. /// /// The type of a buffer may be: /// /// - [`Vec`]: bytes appended to the end; returns a [`TryReserveError`] /// when the allocation fails. /// /// - [`[u8]`](prim@slice) or [`[MaybeUninit]`](prim@slice): bytes /// written from the start; returns a [`BufferTooSmallError`] when /// the buffer is too small. /// /// [`TryReserveError`]: std::collections::TryReserveError /// [`BufferTooSmallError`]: crate::enc::BufferTooSmallError #[cfg(feature = "unstable")] #[inline] pub fn to_mut_in<'b, B: crate::enc::internal::Buf + ?Sized>( &self, buf: &'b mut B, ) -> Result, B::PrepareError> { let len = self.len(); let ptr = buf.prepare(len as usize)?; // SAFETY: We have reserved enough space in the buffer, and // mutable reference `buf` ensures exclusive access. unsafe { self.ptr.get().copy_to_nonoverlapping(ptr, len as usize); buf.finish(len as usize); } Ok(Uri { // SAFETY: The pointer is not null and the length and capacity are correct. ptr: unsafe { Uncapped::new(ptr, len, 0) }, data: self.data.clone(), _marker: PhantomData, }) } } #[cold] fn component_taken() -> ! { panic!("component already taken"); } impl<'i, 'o, T: Io<'i, 'o>> Uri { #[inline] fn len(&self) -> u32 { self.ptr.len() } #[inline] unsafe fn slice(&'i self, start: u32, end: u32) -> &'o str { debug_assert!(start <= end && end <= self.len()); // SAFETY: The caller must ensure that the indexes are within bounds. let bytes = unsafe { slice::from_raw_parts(self.ptr.get().add(start as usize), (end - start) as usize) }; // SAFETY: The parser guarantees that the bytes are valid UTF-8. unsafe { str::from_utf8_unchecked(bytes) } } #[inline] unsafe fn eslice(&'i self, start: u32, end: u32) -> &'o EStr { // SAFETY: The caller must ensure that the indexes are within bounds. let s = unsafe { self.slice(start, end) }; // SAFETY: The caller must ensure that the subslice is properly encoded. unsafe { EStr::new_unchecked(s.as_bytes()) } } /// Returns the [scheme] component. /// /// [scheme]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.1 #[inline] pub fn scheme(&'i self) -> Option<&'o Scheme> { // SAFETY: The indexes are within bounds and the validation is done. self.scheme_end .map(|i| Scheme::new(unsafe { self.slice(0, i.get()) })) } /// Returns the [authority] component. /// /// [authority]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2 #[inline] pub fn authority(&self) -> Option<&Authority> { if T::is_mut() && self.tag.contains(Tag::AUTH_TAKEN) { return None; } if self.auth.is_some() { // SAFETY: The authority is present and not modified. Some(unsafe { Authority::new(self) }) } else { None } } /// Returns the [path] component. /// /// [path]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.3 /// /// # Panics /// /// Panics if the path component is already taken. #[inline] pub fn path(&'i self) -> &'o Path { if T::is_mut() && self.tag.contains(Tag::PATH_TAKEN) { component_taken(); } // SAFETY: The indexes are within bounds and the validation is done. Path::new(unsafe { self.eslice(self.path_bounds.0, self.path_bounds.1) }) } /// Returns the [query] component. /// /// [query]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.4 #[inline] pub fn query(&'i self) -> Option<&'o EStr> { // SAFETY: The indexes are within bounds and the validation is done. self.query_end .map(|i| unsafe { self.eslice(self.path_bounds.1 + 1, i.get()) }) } /// Returns the [fragment] component. /// /// [fragment]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.5 #[inline] pub fn fragment(&'i self) -> Option<&'o EStr> { // SAFETY: The indexes are within bounds and the validation is done. self.fragment_start .map(|i| unsafe { self.eslice(i.get(), self.len()) }) } /// Returns `true` if the URI reference is [relative], i.e., without a scheme. /// /// Note that this method is not the opposite of [`is_absolute`]. /// /// [relative]: https://datatracker.ietf.org/doc/html/rfc3986/#section-4.2 /// [`is_absolute`]: Self::is_absolute /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("/path/to/file")?; /// assert!(uri.is_relative()); /// let uri = Uri::parse("http://example.com/")?; /// assert!(!uri.is_relative()); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] pub fn is_relative(&self) -> bool { self.scheme_end.is_none() } /// Returns `true` if the URI reference is [absolute], i.e., with a scheme and without a fragment. /// /// Note that this method is not the opposite of [`is_relative`]. /// /// [absolute]: https://datatracker.ietf.org/doc/html/rfc3986/#section-4.3 /// [`is_relative`]: Self::is_relative /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("http://example.com/")?; /// assert!(uri.is_absolute()); /// let uri = Uri::parse("http://example.com/#title1")?; /// assert!(!uri.is_absolute()); /// let uri = Uri::parse("/path/to/file")?; /// assert!(!uri.is_absolute()); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] pub fn is_absolute(&self) -> bool { self.scheme_end.is_some() && self.fragment_start.is_none() } } impl<'a> Uri<&'a mut [u8]> { /// Parses a URI reference from a mutable byte sequence into a `Uri<&mut [u8]>`. /// /// See the [`parse`] function for more details. /// /// [`parse`]: Uri::parse /// /// # Panics /// /// Panics if the input length is greater than [`i32::MAX`]. #[inline] pub fn parse_mut + ?Sized>(s: &mut S) -> Result> { let bytes = s.as_mut(); if bytes.len() > i32::MAX as usize { len_overflow(); } // SAFETY: We're using the correct pointer, length, capacity, and generics. unsafe { parser::parse(bytes.as_mut_ptr(), bytes.len() as u32, 0) } } #[inline] unsafe fn view(&mut self, start: u32, end: u32) -> View<'a, T> where T: ?Sized + Lens, { debug_assert!(start <= end && end <= self.len()); // SAFETY: The caller must ensure that the indexes are within bounds. let bytes = unsafe { slice::from_raw_parts_mut(self.ptr.get().add(start as usize), (end - start) as usize) }; // SAFETY: The caller must ensure that the bytes are properly encoded. unsafe { View::new(bytes) } } /// Takes a view of the scheme component, leaving a `None` in its place. #[inline] pub fn take_scheme(&mut self) -> Option> { // SAFETY: The indexes are within bounds and the validation is done. self.scheme_end .take() .map(|i| unsafe { self.view(0, i.get()) }) } /// Takes a view of the authority component, leaving a `None` in its place. #[inline] pub fn take_authority(&mut self) -> Option>> { if self.tag.contains(Tag::AUTH_TAKEN) { return None; } self.tag |= Tag::AUTH_TAKEN; if self.auth.is_some() { // SAFETY: The authority is present and not modified. Some(unsafe { View::new(self) }) } else { None } } /// Takes a view of the path component. /// /// # Panics /// /// Panics if the path component is already taken. #[inline] pub fn take_path(&mut self) -> View<'a, Path> { if self.tag.contains(Tag::PATH_TAKEN) { component_taken(); } self.tag |= Tag::PATH_TAKEN; // SAFETY: The indexes are within bounds and the validation is done. unsafe { self.view(self.path_bounds.0, self.path_bounds.1) } } /// Takes a view of the query component, leaving a `None` in its place. #[inline] pub fn take_query(&mut self) -> Option> { // SAFETY: The indexes are within bounds and the validation is done. self.query_end .take() .map(|i| unsafe { self.view(self.path_bounds.1 + 1, i.get()) }) } /// Takes a view of the fragment component, leaving a `None` in its place. #[inline] pub fn take_fragment(&mut self) -> Option> { // SAFETY: The indexes are within bounds and the validation is done. self.fragment_start .take() .map(|i| unsafe { self.view(i.get(), self.len()) }) } } impl Uri { /// Parses a URI reference from a [`String`] or [`Vec`] into a `Uri`. /// /// See the [`parse`] function for more details. /// /// [`parse`]: Uri::parse /// /// # Panics /// /// Panics if the input capacity is greater than [`i32::MAX`]. #[inline] pub fn parse_from(t: T) -> Result, (T, ParseError)> { #[cold] fn cap_overflow() -> ! { panic!("input capacity exceeds i32::MAX"); } let buf = ManuallyDrop::new(t); let (ptr, len, cap) = buf.as_raw_parts(); if cap > i32::MAX as usize { cap_overflow(); } // SAFETY: We're using the correct pointer, length, capacity, and generics. match unsafe { parser::parse(ptr, len as u32, cap as u32) } { Ok(out) => Ok(out), Err(e) => Err((ManuallyDrop::into_inner(buf), e)), } } /// Consumes this `Uri` and yields the underlying [`String`] storage. /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("https://www.rust-lang.org/")?.to_owned(); /// let string = uri.into_string(); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] pub fn into_string(self) -> String { self.ptr.into_string() } /// Borrows this `Uri` as a reference to `Uri<&str>`. #[inline] // We can't impl `Borrow` due to the limitation of lifetimes. #[allow(clippy::should_implement_trait)] pub fn borrow(&self) -> &Uri<&str> { // SAFETY: `Uri` has a fixed layout, `Uri<&str>` with a capacity is // always fine and the lifetimes are correct. unsafe { &*(self as *const Uri as *const Uri<&str>) } } } impl Clone for Uri { #[inline] fn clone(&self) -> Self { self.borrow().to_owned() } } impl Default for Uri { /// Creates an empty `Uri`. #[inline] fn default() -> Self { Uri { ptr: T::Ptr::DANGLING, data: Data::INIT, _marker: PhantomData, } } } // SAFETY: `&str`, `&mut [u8]` and `String` are all Send and Sync. unsafe impl Send for Uri {} unsafe impl Sync for Uri {} /// The [scheme] component of URI reference. /// /// [scheme]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.1 #[repr(transparent)] pub struct Scheme(str); const ASCII_CASE_MASK: u8 = 0b010_0000; impl Scheme { #[inline] fn new(scheme: &str) -> &Scheme { // SAFETY: Transparency holds. unsafe { &*(scheme as *const str as *const Scheme) } } /// Returns the scheme as a string slice. /// /// Note that the scheme is case-insensitive. You should typically use /// [`eq_lowercase`] for testing if the scheme is a desired one. /// /// [`eq_lowercase`]: Self::eq_lowercase /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("HTTP://example.com/")?; /// let scheme = uri.scheme().unwrap(); /// assert_eq!(scheme.as_str(), "HTTP"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] pub fn as_str(&self) -> &str { &self.0 } /// Returns the scheme as a string in lower case. /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("HTTP://example.com/")?; /// let scheme = uri.scheme().unwrap(); /// assert_eq!(scheme.to_lowercase(), "http"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] pub fn to_lowercase(&self) -> String { let bytes = self.0.bytes().map(|x| x | ASCII_CASE_MASK).collect(); // SAFETY: Setting the sixth bit keeps UTF-8. unsafe { String::from_utf8_unchecked(bytes) } } /// Checks if the scheme equals case-insensitively with a lowercase string. /// /// This method is slightly faster than [`str::eq_ignore_ascii_case`] but will /// always return `false` if there is any uppercase letter in the given string. /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("HTTP://example.com/")?; /// let scheme = uri.scheme().unwrap(); /// assert!(scheme.eq_lowercase("http")); /// // Always return `false` if there's any uppercase letter in the given string. /// assert!(!scheme.eq_lowercase("hTTp")); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] pub fn eq_lowercase(&self, other: &str) -> bool { let (a, b) = (self.0.as_bytes(), other.as_bytes()); // NOTE: Using iterators results in poor codegen here. if a.len() != b.len() { false } else { for i in 0..a.len() { // The only characters allowed in a scheme are alphabets, digits, "+", "-" and ".", // the ASCII codes of which allow us to simply set the sixth bit and compare. if a[i] | ASCII_CASE_MASK != b[i] { return false; } } true } } } /// The [authority] component of URI reference. /// /// [authority]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2 #[repr(transparent)] pub struct Authority { uri: Uri, } impl<'i, 'o, T: Io<'i, 'o>> Authority { #[inline] unsafe fn new(uri: &Uri) -> &Authority { // SAFETY: Transparency holds. // The caller must ensure that the authority is present and not modified. unsafe { &*(uri as *const Uri as *const Authority) } } #[inline] fn data(&self) -> &AuthData { // SAFETY: When authority is present, `auth` must be `Some`. unsafe { self.uri.auth.as_ref().unwrap_unchecked() } } #[inline] fn start(&self) -> u32 { self.data().start.get().get() } #[inline] fn end(&self) -> u32 { if T::is_mut() && self.uri.tag.contains(Tag::PORT_TAKEN) { self.host_bounds().1 } else { self.uri.path_bounds.0 } } #[inline] fn host_bounds(&self) -> (u32, u32) { self.data().host_bounds } /// Returns the authority as a string slice. /// /// # Panics /// /// Panics if the host subcomponent is already taken. /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("ftp://user@[fe80::abcd]:6780/")?; /// let authority = uri.authority().unwrap(); /// assert_eq!(authority.as_str(), "user@[fe80::abcd]:6780"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] pub fn as_str(&'i self) -> &'o str { if T::is_mut() && self.uri.tag.contains(Tag::HOST_TAKEN) { component_taken(); } // SAFETY: The indexes are within bounds and the validation is done. unsafe { self.uri.slice(self.start(), self.end()) } } /// Returns the [userinfo] subcomponent. /// /// [userinfo]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.1 /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("ftp://user@192.168.1.24/")?; /// let authority = uri.authority().unwrap(); /// assert_eq!(authority.userinfo().unwrap(), "user"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] pub fn userinfo(&'i self) -> Option<&'o EStr> { let (start, host_start) = (self.start(), self.host_bounds().0); // SAFETY: The indexes are within bounds and the validation is done. (start != host_start).then(|| unsafe { self.uri.eslice(start, host_start - 1) }) } /// Returns the [host] subcomponent. /// /// [host]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2 /// /// # Panics /// /// Panics if the host subcomponent is already taken. pub fn host(&self) -> &Host { if T::is_mut() && self.uri.tag.contains(Tag::HOST_TAKEN) { component_taken(); } // SAFETY: The host is not modified. unsafe { Host::new(self) } } /// Returns the [port] subcomponent. /// /// [port]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.3 /// /// Note that in the generic URI syntax, the port may be empty, with leading zeros, or very large. /// It is up to you to decide whether to deny such a port, fallback to the scheme's default if it /// is empty, ignore the leading zeros, or use a different addressing mechanism that allows a large port. /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("ssh://device.local:4673/")?; /// let authority = uri.authority().unwrap(); /// assert_eq!(authority.port(), Some("4673")); /// /// let uri = Uri::parse("ssh://device.local:/")?; /// let authority = uri.authority().unwrap(); /// assert_eq!(authority.port(), Some("")); /// /// let uri = Uri::parse("ssh://device.local/")?; /// let authority = uri.authority().unwrap(); /// assert_eq!(authority.port(), None); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] pub fn port(&'i self) -> Option<&'o str> { if T::is_mut() && self.uri.tag.contains(Tag::PORT_TAKEN) { return None; } let (host_end, end) = (self.host_bounds().1, self.uri.path_bounds.0); // SAFETY: The indexes are within bounds and the validation is done. (host_end != end).then(|| unsafe { self.uri.slice(host_end + 1, end) }) } } /// The [host] subcomponent of authority. /// /// [host]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2 #[repr(transparent)] pub struct Host { auth: Authority, } impl<'i, 'o, T: Io<'i, 'o>> Host { #[inline] unsafe fn new(auth: &Authority) -> &Host { // SAFETY: Transparency holds. // The caller must ensure that the host is not modified. unsafe { &*(auth as *const Authority as *const Host) } } #[inline] fn bounds(&self) -> (u32, u32) { self.auth.host_bounds() } #[inline] fn raw_data(&self) -> &RawHostData { &self.auth.data().host_data } /// Returns the host as a string slice. /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("ftp://user@[::1]/")?; /// let authority = uri.authority().unwrap(); /// assert_eq!(authority.host().as_str(), "[::1]"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] pub fn as_str(&'i self) -> &'o str { // SAFETY: The indexes are within bounds and the validation is done. unsafe { self.auth.uri.slice(self.bounds().0, self.bounds().1) } } /// Returns the structured host data. #[inline] pub fn data(&'i self) -> HostData<'o> { let _data = self.raw_data(); let tag = self.auth.uri.tag; // SAFETY: We only access the union after checking the tag. unsafe { if tag.contains(Tag::HOST_REG_NAME) { // SAFETY: The validation is done. return HostData::RegName(EStr::new_unchecked(self.as_str().as_bytes())); } else if tag.contains(Tag::HOST_IPV4) { return HostData::Ipv4( #[cfg(feature = "std")] _data.ipv4_addr, ); } #[cfg(feature = "ipv_future")] if !tag.contains(Tag::HOST_IPV6) { let dot_i = _data.ipv_future_dot_i; let bounds = self.bounds(); // SAFETY: The indexes are within bounds and the validation is done. return HostData::IpvFuture { ver: self.auth.uri.slice(bounds.0 + 2, dot_i), addr: self.auth.uri.slice(dot_i + 1, bounds.1 - 1), }; } HostData::Ipv6 { #[cfg(feature = "std")] addr: _data.ipv6.addr, // SAFETY: The indexes are within bounds and the validation is done. #[cfg(feature = "rfc6874bis")] zone_id: _data .ipv6 .zone_id_start .map(|start| self.auth.uri.slice(start.get(), self.bounds().1 - 1)), } } } } /// Structured host data. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum HostData<'a> { /// An IPv4 address. #[cfg_attr(not(feature = "std"), non_exhaustive)] Ipv4(#[cfg(feature = "std")] Ipv4Addr), /// An IPv6 address. #[cfg_attr(not(feature = "std"), non_exhaustive)] Ipv6 { /// The address. #[cfg(feature = "std")] addr: Ipv6Addr, /// An optional zone identifier. /// /// This is supported on **crate feature `rfc6874bis`** only. #[cfg(feature = "rfc6874bis")] zone_id: Option<&'a str>, }, /// An IP address of future version. /// /// This is supported on **crate feature `ipv_future`** only. #[cfg(feature = "ipv_future")] IpvFuture { /// The version. ver: &'a str, /// The address. addr: &'a str, }, /// A registered name. RegName(&'a EStr), } /// The [path] component of URI reference. /// /// [path]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.3 #[repr(transparent)] pub struct Path { inner: EStr, } impl Path { #[inline] fn new(path: &EStr) -> &Path { // SAFETY: Transparency holds. unsafe { &*(path as *const EStr as *const Path) } } /// Yields the underlying [`EStr`]. #[inline] pub fn as_estr(&self) -> &EStr { &self.inner } /// Returns the path as a string slice. #[inline] pub fn as_str(&self) -> &str { self.inner.as_str() } /// Returns `true` if the path is absolute, i.e., beginning with "/". #[inline] pub fn is_absolute(&self) -> bool { self.as_str().starts_with('/') } /// Returns `true` if the path is rootless, i.e., not beginning with "/". #[inline] pub fn is_rootless(&self) -> bool { !self.is_absolute() } /// Returns an iterator over the path [segments]. /// /// [segments]: https://datatracker.ietf.org/doc/html/rfc3986/#section-3.3 /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// // An empty path has no segments. /// let uri = Uri::parse("")?; /// assert_eq!(uri.path().segments().next(), None); /// /// let uri = Uri::parse("a/b/c")?; /// assert!(uri.path().segments().eq(["a", "b", "c"])); /// /// // The empty string before a preceding "/" is not a segment. /// // However, segments can be empty in the other cases. /// let uri = Uri::parse("/path/to//dir/")?; /// assert!(uri.path().segments().eq(["path", "to", "", "dir", ""])); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] pub fn segments(&self) -> Split<'_> { let mut path = self.inner.as_str(); if self.is_absolute() { // SAFETY: Skipping "/" is fine. path = unsafe { path.get_unchecked(1..) }; } // SAFETY: The validation is done. let path = unsafe { EStr::new_unchecked(path.as_bytes()) }; let mut split = path.split('/'); split.finished = self.as_str().is_empty(); split } } fluent-uri-0.1.4/src/parser.rs000064400000000000000000000464160072674642500144140ustar 00000000000000use crate::{ enc::{imp::OCTET_TABLE_LO, table::*}, internal::Pointer, AuthData, Data, RawHostData as HostData, Result, Tag, Uri, }; use core::{cell::Cell, marker::PhantomData, num::NonZeroU32, str}; use super::{internal::Storage, Ipv6Data}; pub(crate) unsafe fn parse(ptr: *mut u8, len: u32, cap: u32) -> Result> { let mut parser = Parser { ptr, len, out: Data::INIT, pos: 0, mark: 0, }; parser.parse_from_scheme()?; Ok(Uri { // SAFETY: The caller must ensure that the pointer is not null // and that the length and capacity are correct. ptr: unsafe { ::new(ptr, len, cap) }, data: parser.out, _marker: PhantomData, }) } /// Returns immediately with an error. macro_rules! err { ($index:expr, $kind:ident) => { return Err(crate::ParseError { index: $index, kind: crate::ParseErrorKind::$kind, }) }; } /// URI parser. /// /// The invariants hold that `mark <= pos <= len`, /// where `pos` is non-decreasing and `bytes[..pos]` is valid UTF-8. struct Parser { ptr: *const u8, len: u32, out: Data, pos: u32, mark: u32, } enum PathKind { General, AbEmpty, ContinuedNoScheme, } enum Seg { // *1":" 1*4HEXDIG Normal(u16, bool), // "::" Ellipsis, // *1":" 1*4HEXDIG "." MaybeV4(bool), // ":" SingleColon, } impl Parser { fn has_remaining(&self) -> bool { self.pos < self.len } unsafe fn get_unchecked(&self, i: u32) -> u8 { debug_assert!(i < self.len, "index out of bounds"); // SAFETY: The caller must ensure that the index is within bounds. unsafe { *self.ptr.add(i as usize) } } fn get(&self, i: u32) -> u8 { assert!(i < self.len, "index out of bounds"); // SAFETY: We have checked that `i < len`. unsafe { self.get_unchecked(i) } } fn peek(&self, i: u32) -> Option { (self.pos + i < self.len).then(|| self.get(self.pos + i)) } // Any call to this method must keep the invariants. fn skip(&mut self, n: u32) { // INVARIANT: `pos` is non-decreasing. self.pos += n; debug_assert!(self.pos <= self.len); } fn mark(&mut self) { // INVARIANT: It holds that `mark <= pos`. self.mark = self.pos; } fn marked_len(&self) -> u32 { self.pos - self.mark } fn scan(&mut self, table: &Table) -> Result<()> { if table.allows_enc() { self.scan_enc(table, |_| ()) } else { let mut i = self.pos; while i < self.len { if !table.allows(self.get(i)) { break; } // INVARIANT: Since `i < len`, it holds that `i + 1 <= len`. i += 1; } // INVARIANT: `i` is non-decreasing and all bytes scanned are ASCII. self.pos = i; Ok(()) } } fn scan_enc(&mut self, table: &Table, mut f: impl FnMut(u8)) -> Result<()> { let mut i = self.pos; while i < self.len { let x = self.get(i); if x == b'%' { if i + 2 >= self.len { err!(i, InvalidOctet); } // SAFETY: We have checked that `i + 2 < len`. // Overflow is impossible since `len` is no larger than `i32::MAX`. let (hi, lo) = unsafe { (self.get_unchecked(i + 1), self.get_unchecked(i + 2)) }; if HEXDIG.get(hi) & HEXDIG.get(lo) == 0 { err!(i, InvalidOctet); } // INVARIANT: Since `i + 2 < len`, it holds that `i + 3 <= len`. i += 3; } else { let v = table.get(x); if v == 0 { break; } f(v); // INVARIANT: Since `i < len`, it holds that `i + 1 <= len`. i += 1; } } // INVARIANT: `i` is non-decreasing and all bytes scanned are ASCII. self.pos = i; Ok(()) } // Returns `true` if any byte is read. fn read(&mut self, table: &Table) -> Result { let start = self.pos; self.scan(table)?; Ok(self.pos != start) } fn read_str(&mut self, s: &str) -> bool { assert!(s.len() <= i32::MAX as usize); let len = s.len() as u32; // SAFETY: We have checked that `pos + s.len() <= len`. // Overflow is impossible since both `len` and `s.len()` are no larger than `i32::MAX`. let res = self.pos + len <= self.len && (0..len) .all(|i| unsafe { self.get_unchecked(self.pos + i) } == s.as_bytes()[i as usize]); if res { // INVARIANT: The remaining bytes start with `s` so it's fine to skip `s.len()`. self.skip(len); } res } fn parse_from_scheme(&mut self) -> Result<()> { // Mark initially set to 0. self.scan(SCHEME)?; if self.peek(0) == Some(b':') { // Scheme starts with a letter. if self.pos != 0 && self.get(0).is_ascii_alphabetic() { self.out.scheme_end = NonZeroU32::new(self.pos); } else { err!(0, UnexpectedChar); } // INVARIANT: Skipping ":" is fine. self.skip(1); return if self.read_str("//") { self.parse_from_authority() } else { self.parse_from_path(PathKind::General) }; } else if self.marked_len() == 0 { // Nothing scanned. if self.read_str("//") { return self.parse_from_authority(); } } // Scheme chars are valid for path. self.parse_from_path(PathKind::ContinuedNoScheme) } fn parse_from_authority(&mut self) -> Result<()> { let host; let start = self.pos; // This table contains userinfo, reg-name, ":", and port. const TABLE: &Table = &USERINFO.shl(1).or(&Table::gen(b":")); // The number of colons scanned. let mut colon_cnt = 0; self.mark(); self.scan_enc(TABLE, |v| { colon_cnt += (v & 1) as u32; })?; if self.peek(0) == Some(b'@') { // Userinfo present. // INVARIANT: Skipping "@" is fine. self.skip(1); self.mark(); let data = self.read_host()?; host = (self.mark, self.pos, data); self.read_port(); } else if self.marked_len() == 0 { // Nothing scanned. We're now at the start of an IP literal or the path. if let Some(data) = self.read_ip_literal()? { host = (self.mark, self.pos, data); self.read_port(); } else { // Empty authority. self.out.tag = Tag::HOST_REG_NAME; host = (self.pos, self.pos, HostData { none: () }); } } else { // The whole authority scanned. Try to parse the host and port. let host_end = match colon_cnt { // All host. 0 => self.pos, // Host and port. 1 => { let mut i = self.pos - 1; loop { // SAFETY: There must be a colon in the way. let x = unsafe { self.get_unchecked(i) }; if !x.is_ascii_digit() { if x == b':' { break; } else { err!(i, UnexpectedChar); } } i -= 1; } i } // Multiple colons. _ => { let mut i = self.mark; loop { // SAFETY: There must be a colon in the way. let x = unsafe { self.get_unchecked(i) }; if x == b':' { err!(i, UnexpectedChar) } i += 1; } } }; // Save the state. let state = (self.len, self.pos); // The entire host is already scanned so the index is within bounds. self.len = host_end; // INVARIANT: It holds that `mark <= pos <= buf.len()`. // Here `pos` may decrease but will be restored later. self.pos = self.mark; let v4 = self.scan_v4(); let (tag, data) = match v4 { Some(_addr) if !self.has_remaining() => ( Tag::HOST_IPV4, HostData { #[cfg(feature = "std")] ipv4_addr: _addr.into(), #[cfg(not(feature = "std"))] none: (), }, ), _ => (Tag::HOST_REG_NAME, HostData { none: () }), }; self.out.tag = tag; host = (self.mark, host_end, data); // Restore the state. // INVARIANT: Restoring the state would not affect the invariants. (self.len, self.pos) = state; } self.out.auth = Some(AuthData { // SAFETY: Authority won't start at index 0. start: Cell::new(unsafe { NonZeroU32::new_unchecked(start) }), host_bounds: (host.0, host.1), host_data: host.2, }); self.parse_from_path(PathKind::AbEmpty) } // The marked length must be zero when this method is called. fn read_host(&mut self) -> Result { match self.read_ip_literal()? { Some(host) => Ok(host), None => self.read_v4_or_reg_name(), } } // The marked length must be zero when this method is called. fn read_ip_literal(&mut self) -> Result> { if !self.read_str("[") { return Ok(None); } let host = if let Some(_addr) = self.scan_v6() { self.out.tag = Tag::HOST_IPV6; HostData { ipv6: Ipv6Data { #[cfg(feature = "std")] addr: _addr.into(), #[cfg(feature = "rfc6874bis")] zone_id_start: self.read_zone_id()?, }, } } else { #[cfg(feature = "ipv_future")] if self.marked_len() == 1 { self.read_ipv_future()? } else { err!(self.mark, InvalidIpLiteral); } #[cfg(not(feature = "ipv_future"))] err!(self.mark, InvalidIpLiteral); }; if !self.read_str("]") { err!(self.mark, InvalidIpLiteral); } Ok(Some(host)) } fn scan_v6(&mut self) -> Option<[u16; 8]> { let mut segs = [0; 8]; let mut ellipsis_i = 8; let mut i = 0; while i < 8 { match self.scan_v6_segment() { Some(Seg::Normal(seg, colon)) => { if colon == (i == 0 || i == ellipsis_i) { // Preceding colon, triple colons, or no colon. return None; } segs[i] = seg; i += 1; } Some(Seg::Ellipsis) => { if ellipsis_i != 8 { // Multiple ellipses. return None; } ellipsis_i = i; } Some(Seg::MaybeV4(colon)) => { if i > 6 || colon == (i == ellipsis_i) { // Not enough space, triple colons, or no colon. return None; } let octets = self.scan_v4()?.to_be_bytes(); segs[i] = u16::from_be_bytes([octets[0], octets[1]]); segs[i + 1] = u16::from_be_bytes([octets[2], octets[3]]); i += 2; break; } Some(Seg::SingleColon) => return None, None => break, } } if ellipsis_i == 8 { // No ellipsis. if i != 8 { // Too short. return None; } } else if i == 8 { // Eliding nothing. return None; } else { // Shift the segments after the ellipsis to the right. for j in (ellipsis_i..i).rev() { segs[8 - (i - j)] = segs[j]; segs[j] = 0; } } Some(segs) } fn scan_v6_segment(&mut self) -> Option { let colon = self.read_str(":"); if !self.has_remaining() { return if colon { Some(Seg::SingleColon) } else { None }; } let first = self.peek(0).unwrap(); let mut x = match OCTET_TABLE_LO[first as usize] { v if v < 128 => v as u16, _ => { return if colon { if first == b':' { // INVARIANT: Skipping ":" is fine. self.skip(1); Some(Seg::Ellipsis) } else { Some(Seg::SingleColon) } } else { None }; } }; let mut i = 1; while i < 4 { if let Some(b) = self.peek(i) { match OCTET_TABLE_LO[b as usize] { v if v < 128 => { x = (x << 4) | v as u16; i += 1; continue; } _ if b == b'.' => return Some(Seg::MaybeV4(colon)), _ => break, } } else { // INVARIANT: Skipping `i` hexadecimal digits is fine. self.skip(i); return None; } } // INVARIANT: Skipping `i` hexadecimal digits is fine. self.skip(i); Some(Seg::Normal(x, colon)) } #[cfg(feature = "rfc6874bis")] fn read_zone_id(&mut self) -> Result> { if self.read_str("%") { let start = self.pos; if !self.read(ZONE_ID)? { err!(self.mark, InvalidIpLiteral); } Ok(NonZeroU32::new(start)) } else { Ok(None) } } // The marked length must be zero when this method is called. fn read_v4_or_reg_name(&mut self) -> Result { let v4 = self.scan_v4(); let v4_end = self.pos; self.scan(REG_NAME)?; let (tag, data) = match v4 { Some(_addr) if self.pos == v4_end => ( Tag::HOST_IPV4, HostData { #[cfg(feature = "std")] ipv4_addr: _addr.into(), #[cfg(not(feature = "std"))] none: (), }, ), _ => (Tag::HOST_REG_NAME, HostData { none: () }), }; self.out.tag = tag; Ok(data) } fn scan_v4(&mut self) -> Option { let mut addr = self.scan_v4_octet()? << 24; for i in (0..3).rev() { if !self.read_str(".") { return None; } addr |= self.scan_v4_octet()? << (i * 8); } Some(addr) } fn scan_v4_octet(&mut self) -> Option { let mut res = self.peek_digit(0)?; if res == 0 { // INVARIANT: Skipping "0" is fine. self.skip(1); return Some(0); } for i in 1..3 { match self.peek_digit(i) { Some(x) => res = res * 10 + x, None => { // INVARIANT: Skipping `i` digits is fine. self.skip(i); return Some(res); } } } // INVARIANT: Skipping 3 digits is fine. self.skip(3); if res <= u8::MAX as u32 { Some(res) } else { None } } fn peek_digit(&self, i: u32) -> Option { self.peek(i).and_then(|x| (x as char).to_digit(10)) } fn read_port(&mut self) { self.read_str(":").then(|| { let mut i = 0; while self.peek_digit(i).is_some() { i += 1; } // INVARIANT: Skipping `i` digits is fine. self.skip(i); }); } #[cfg(feature = "ipv_future")] fn read_ipv_future(&mut self) -> Result { if matches!(self.peek(0), Some(b'v' | b'V')) { // INVARIANT: Skipping "v" or "V" is fine. self.skip(1); let ver_read = self.read(HEXDIG)?; let dot_i = self.pos; if ver_read && self.read_str(".") && self.read(IPV_FUTURE)? { // Tag is empty for IPvFuture. return Ok(HostData { ipv_future_dot_i: dot_i, }); } } err!(self.mark, InvalidIpLiteral); } fn parse_from_path(&mut self, kind: PathKind) -> Result<()> { self.out.path_bounds = match kind { PathKind::General => { let start = self.pos; self.read(PATH)?; (start, self.pos) } PathKind::AbEmpty => { let start = self.pos; // Either empty or starting with "/". if self.read(PATH)? && self.get(start) != b'/' { err!(start, UnexpectedChar); } (start, self.pos) } PathKind::ContinuedNoScheme => { self.scan(SEGMENT_NC)?; if self.peek(0) == Some(b':') { // In a relative reference, the first path // segment cannot contain a colon character. err!(self.pos, UnexpectedChar); } self.scan(PATH)?; (self.mark, self.pos) } }; if self.read_str("?") { self.read(QUERY_FRAGMENT)?; self.out.query_end = NonZeroU32::new(self.pos); } if self.read_str("#") { self.out.fragment_start = NonZeroU32::new(self.pos); self.read(QUERY_FRAGMENT)?; } if self.has_remaining() { err!(self.pos, UnexpectedChar); } Ok(()) } } fluent-uri-0.1.4/src/view.rs000064400000000000000000000202250072674642500140600ustar 00000000000000use core::{num::NonZeroU32, ops::Deref}; use super::*; use crate::enc::SplitView; mod internal { use super::*; pub trait Lens { type Target: ?Sized; /// Views the target of a `View` as `&Self`. fn view(target: &Self::Target) -> &Self; } impl Lens for EStr { type Target = [u8]; #[inline] fn view(bytes: &[u8]) -> &Self { // SAFETY: `Self::new` ensures that the bytes are properly encoded. unsafe { EStr::new_unchecked(bytes) } } } impl Lens for str { type Target = [u8]; #[inline] fn view(bytes: &[u8]) -> &Self { // SAFETY: `Self::new` ensures that the bytes are valid UTF-8. unsafe { str::from_utf8_unchecked(bytes) } } } impl Lens for Scheme { type Target = [u8]; #[inline] fn view(bytes: &[u8]) -> &Self { Scheme::new(str::view(bytes)) } } impl<'a> Lens for Authority<&'a mut [u8]> { type Target = Uri<&'a mut [u8]>; #[inline] fn view(uri: &Self::Target) -> &Self { // SAFETY: `Self::new` ensures that the authority is present and not modified. unsafe { Authority::new(uri) } } } impl<'a> Lens for Host<&'a mut [u8]> { type Target = Uri<&'a mut [u8]>; #[inline] fn view(uri: &Self::Target) -> &Self { // SAFETY: `Self::new` ensures that the host is not modified. unsafe { Host::new(Authority::view(uri)) } } } impl Lens for Path { type Target = [u8]; #[inline] fn view(bytes: &[u8]) -> &Self { Path::new(EStr::view(bytes)) } } } pub(crate) use self::internal::Lens; /// A smart pointer that allows viewing a mutable byte slice as `&T`. /// /// This struct was introduced considering the fact that a bare `&mut EStr` wouldn't /// do for in-place decoding because such decoding breaks the invariants of [`EStr`]. /// /// Six types of *lenses* may be used as `T`: [`EStr`], [`prim@str`], [`Scheme`], /// [`Authority`], [`Host`], and [`Path`]. pub struct View<'a, T: ?Sized + Lens>(&'a mut T::Target, PhantomData<&'a T>); impl<'a, T: ?Sized + Lens> Deref for View<'a, T> { type Target = T; #[inline] fn deref(&self) -> &T { T::view(self.0) } } impl<'a, T: ?Sized + Lens> AsRef for View<'a, T> { #[inline] fn as_ref(&self) -> &T { self } } impl<'a, T: ?Sized + Lens> View<'a, T> { /// Creates a `View` from its target assuming validity. /// /// # Safety /// /// The target must be valid as `T`. #[inline] pub(crate) unsafe fn new(target: &'a mut T::Target) -> Self { View(target, PhantomData) } } /// These methods are only available for lenses [`EStr`], [`prim@str`], [`Scheme`], and [`Path`]. impl<'a, T: ?Sized + Lens> View<'a, T> { /// Consumes this `View` and yields the underlying `&T`. #[inline] pub fn into_ref(self) -> &'a T { T::view(self.0) } /// Consumes this `View` and yields the underlying mutable byte slice. #[inline] pub fn into_bytes(self) -> &'a mut [u8] { self.0 } } /// A [`Scheme`] view into a mutable byte slice that allows lowercasing in-place. impl<'a> View<'a, Scheme> { /// Converts the scheme to lower case in-place. /// /// This method is slightly faster than [`slice::make_ascii_lowercase`]. /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let mut bytes = *b"HTTP://example.com/"; /// let mut uri = Uri::parse_mut(&mut bytes)?; /// /// let mut scheme = uri.take_scheme().unwrap(); /// scheme.make_lowercase(); /// assert_eq!(scheme.as_str(), "http"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] pub fn make_lowercase(&mut self) { // SAFETY: Setting the sixth bit keeps UTF-8. for byte in self.0.iter_mut() { *byte |= ASCII_CASE_MASK; } } } /// An [`Authority`] view into a mutable byte slice. impl<'i, 'a> View<'i, Authority<&'a mut [u8]>> { /// Consumes this `View` and yields the underlying `View`. /// /// The userinfo or port subcomponent is truncated if it is already taken. /// /// # Panics /// /// Panics if the host subcomponent is already taken. #[inline] pub fn into_str_view(self) -> View<'a, str> { if self.0.tag.contains(Tag::HOST_TAKEN) { component_taken(); } // SAFETY: The indexes are within bounds and the validation is done. unsafe { self.0.view(self.start(), self.end()) } } /// Takes a view of the userinfo subcomponent, leaving a `None` in its place. #[inline] pub fn take_userinfo(&mut self) -> Option> { let (start, host_start) = (self.start(), self.host_bounds().0); (start != host_start).then(|| unsafe { // SAFETY: Host won't start at index 0. self.data().start.set(NonZeroU32::new_unchecked(host_start)); // SAFETY: The indexes are within bounds and the validation is done. self.0.view(start, host_start - 1) }) } /// Takes a view of the host subcomponent. /// /// # Panics /// /// Panics if the host subcomponent is already taken. // NOTE: The lifetime on `View` can't be `'i` because if it was, // `view.0` would alias with `self.0`. #[inline] pub fn take_host(&mut self) -> View<'_, Host<&'a mut [u8]>> { if self.0.tag.contains(Tag::HOST_TAKEN) { component_taken(); } self.0.tag |= Tag::HOST_TAKEN; // SAFETY: The host is not modified at this time. unsafe { View::new(self.0) } } /// Takes a view of the port subcomponent, leaving a `None` in its place. #[inline] pub fn take_port(&mut self) -> Option> { if self.0.tag.contains(Tag::PORT_TAKEN) { return None; } self.0.tag |= Tag::PORT_TAKEN; let (host_end, end) = (self.host_bounds().1, self.0.path_bounds.0); // SAFETY: The indexes are within bounds and the validation is done. (host_end != end).then(|| unsafe { self.0.view(host_end + 1, end) }) } } /// A [`Host`] view into a mutable byte slice. impl<'i, 'a> View<'i, Host<&'a mut [u8]>> { /// Consumes this `View` and yields the underlying `View`. #[inline] pub fn into_str_view(self) -> View<'a, str> { // SAFETY: The indexes are within bounds and the validation is done. unsafe { self.0.view(self.bounds().0, self.bounds().1) } } /// Consumes this `View` and yields the underlying `View`, /// assuming that the host is a registered name. /// /// # Panics /// /// Panics if the host is not a registered name. #[inline] pub fn unwrap_reg_name(self) -> View<'a, EStr> { assert!(self.0.tag.contains(Tag::HOST_REG_NAME)); // SAFETY: The indexes are within bounds and the validation is done. unsafe { self.0.view(self.bounds().0, self.bounds().1) } } } /// A [`Path`] view into a mutable byte slice. impl<'a> View<'a, Path> { /// Consumes this `View` and yields the underlying `View`. #[inline] pub fn into_estr_view(self) -> View<'a, EStr> { View(self.0, PhantomData) } /// Returns an iterator over the views of path segments. #[inline] pub fn segments_view(self) -> SplitView<'a> { let absolute = self.is_absolute(); let mut path = self.into_bytes(); let empty = path.is_empty(); if absolute { // SAFETY: Skipping "/" is fine. path = unsafe { path.get_unchecked_mut(1..) }; } // SAFETY: The validation is done. let path = unsafe { View::::new(path) }; let mut split = path.split_view('/'); split.finished = empty; split } } fluent-uri-0.1.4/tests/parse.rs000064400000000000000000000415560072674642500146050ustar 00000000000000use std::net::{Ipv4Addr, Ipv6Addr}; use fluent_uri::{enc::EStr, ParseErrorKind::*, *}; #[test] fn parse_absolute() { let u = Uri::parse("file:///etc/hosts").unwrap(); assert_eq!(u.as_str(), "file:///etc/hosts"); assert_eq!(u.scheme().unwrap().as_str(), "file"); let a = u.authority().unwrap(); assert_eq!(a.as_str(), ""); assert_eq!(a.userinfo(), None); assert_eq!(a.host().as_str(), ""); assert_eq!(a.host().data(), HostData::RegName(EStr::new(""))); assert_eq!(a.port(), None); assert_eq!(u.path().as_str(), "/etc/hosts"); assert!(u.path().segments().eq(["etc", "hosts"])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("ftp://ftp.is.co.za/rfc/rfc1808.txt").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "ftp"); let a = u.authority().unwrap(); assert_eq!(a.as_str(), "ftp.is.co.za"); assert_eq!(a.userinfo(), None); assert_eq!(a.host().as_str(), "ftp.is.co.za"); assert_eq!( a.host().data(), HostData::RegName(EStr::new("ftp.is.co.za")) ); assert_eq!(a.port(), None); assert_eq!(u.path().as_str(), "/rfc/rfc1808.txt"); assert!(u.path().segments().eq(["rfc", "rfc1808.txt"])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("http://www.ietf.org/rfc/rfc2396.txt").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "http"); let a = u.authority().unwrap(); assert_eq!(a.as_str(), "www.ietf.org"); assert_eq!(a.userinfo(), None); assert_eq!(a.host().as_str(), "www.ietf.org"); assert_eq!( a.host().data(), HostData::RegName(EStr::new("www.ietf.org")) ); assert_eq!(a.port(), None); assert_eq!(u.path().as_str(), "/rfc/rfc2396.txt"); assert!(u.path().segments().eq(["rfc", "rfc2396.txt"])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("ldap://[2001:db8::7]/c=GB?objectClass?one").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "ldap"); let a = u.authority().unwrap(); assert_eq!(a.as_str(), "[2001:db8::7]"); assert_eq!(a.userinfo(), None); assert_eq!(a.host().as_str(), "[2001:db8::7]"); assert_eq!( a.host().data(), HostData::Ipv6 { addr: Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 0x7), #[cfg(feature = "rfc6874bis")] zone_id: None } ); assert_eq!(a.port(), None); assert_eq!(u.path().as_str(), "/c=GB"); assert!(u.path().segments().eq(["c=GB"])); assert_eq!(u.query(), Some(EStr::new("objectClass?one"))); assert_eq!(u.fragment(), None); let u = Uri::parse("mailto:John.Doe@example.com").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "mailto"); assert!(u.authority().is_none()); assert_eq!(u.path().as_str(), "John.Doe@example.com"); assert!(u.path().segments().eq(["John.Doe@example.com"])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("news:comp.infosystems.www.servers.unix").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "news"); assert!(u.authority().is_none()); assert_eq!(u.path().as_str(), "comp.infosystems.www.servers.unix"); assert!(u .path() .segments() .eq(["comp.infosystems.www.servers.unix"])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("tel:+1-816-555-1212").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "tel"); assert!(u.authority().is_none()); assert_eq!(u.path().as_str(), "+1-816-555-1212"); assert!(u.path().segments().eq(["+1-816-555-1212"])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("telnet://192.0.2.16:80/").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "telnet"); let a = u.authority().unwrap(); assert_eq!(a.as_str(), "192.0.2.16:80"); assert_eq!(a.userinfo(), None); assert_eq!(a.host().as_str(), "192.0.2.16"); assert_eq!( a.host().data(), HostData::Ipv4(Ipv4Addr::new(192, 0, 2, 16)) ); assert_eq!(a.port(), Some("80")); assert_eq!(u.path().as_str(), "/"); assert!(u.path().segments().eq([""])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("urn:oasis:names:specification:docbook:dtd:xml:4.1.2").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "urn"); assert!(u.authority().is_none()); assert_eq!( u.path().as_str(), "oasis:names:specification:docbook:dtd:xml:4.1.2" ); assert!(u .path() .segments() .eq(["oasis:names:specification:docbook:dtd:xml:4.1.2"])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("foo://example.com:8042/over/there?name=ferret#nose").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "foo"); let a = u.authority().unwrap(); assert_eq!(a.as_str(), "example.com:8042"); assert_eq!(a.userinfo(), None); assert_eq!(a.host().as_str(), "example.com"); assert_eq!(a.host().data(), HostData::RegName(EStr::new("example.com"))); assert_eq!(a.port(), Some("8042")); assert_eq!(u.path().as_str(), "/over/there"); assert!(u.path().segments().eq(["over", "there"])); assert_eq!(u.query(), Some(EStr::new("name=ferret"))); assert_eq!(u.fragment(), Some(EStr::new("nose"))); let u = Uri::parse("ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "ftp"); let a = u.authority().unwrap(); assert_eq!(a.as_str(), "cnn.example.com&story=breaking_news@10.0.0.1"); assert_eq!( a.userinfo(), Some(EStr::new("cnn.example.com&story=breaking_news")) ); assert_eq!(a.host().as_str(), "10.0.0.1"); assert_eq!(a.host().data(), HostData::Ipv4(Ipv4Addr::new(10, 0, 0, 1))); assert_eq!(a.port(), None); assert_eq!(u.path().as_str(), "/top_story.htm"); assert!(u.path().segments().eq(["top_story.htm"])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); #[cfg(feature = "ipv_future")] { let u = Uri::parse("http://[vFe.foo.bar]").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "http"); let a = u.authority().unwrap(); assert_eq!(a.as_str(), "[vFe.foo.bar]"); assert_eq!(a.userinfo(), None); assert_eq!(a.host().as_str(), "[vFe.foo.bar]"); assert_eq!( a.host().data(), HostData::IpvFuture { ver: "Fe", addr: "foo.bar", } ); assert_eq!(a.port(), None); assert_eq!(u.path().as_str(), ""); assert!(u.path().segments().eq(None::<&str>)); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); } #[cfg(feature = "rfc6874bis")] { let u = Uri::parse("http://[fe80::520f:f5ff:fe51:cf0%17]").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "http"); let a = u.authority().unwrap(); assert_eq!(a.as_str(), "[fe80::520f:f5ff:fe51:cf0%17]"); assert_eq!(a.userinfo(), None); assert_eq!(a.host().as_str(), "[fe80::520f:f5ff:fe51:cf0%17]"); assert_eq!( a.host().data(), HostData::Ipv6 { addr: Ipv6Addr::new(0xfe80, 0, 0, 0, 0x520f, 0xf5ff, 0xfe51, 0xcf0), zone_id: Some("17"), } ); assert_eq!(a.port(), None); assert_eq!(u.path().as_str(), ""); assert!(u.path().segments().eq(None::<&str>)); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); } let u = Uri::parse("http://127.0.0.1:/").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "http"); let a = u.authority().unwrap(); assert_eq!(a.as_str(), "127.0.0.1:"); assert_eq!(a.userinfo(), None); assert_eq!(a.host().as_str(), "127.0.0.1"); assert_eq!(a.host().data(), HostData::Ipv4(Ipv4Addr::new(127, 0, 0, 1))); assert_eq!(a.port(), Some("")); // TODO: `u16` port parsing. assert_eq!(u.path().as_str(), "/"); assert!(u.path().segments().eq([""])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("http://127.0.0.1:8080/").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "http"); let a = u.authority().unwrap(); assert_eq!(a.as_str(), "127.0.0.1:8080"); assert_eq!(a.userinfo(), None); assert_eq!(a.host().as_str(), "127.0.0.1"); assert_eq!(a.host().data(), HostData::Ipv4(Ipv4Addr::new(127, 0, 0, 1))); assert_eq!(a.port(), Some("8080")); assert_eq!(u.path().as_str(), "/"); assert!(u.path().segments().eq([""])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("http://127.0.0.1:80808/").unwrap(); assert_eq!(u.scheme().unwrap().as_str(), "http"); let a = u.authority().unwrap(); assert_eq!(a.as_str(), "127.0.0.1:80808"); assert_eq!(a.userinfo(), None); assert_eq!(a.host().as_str(), "127.0.0.1"); assert_eq!(a.host().data(), HostData::Ipv4(Ipv4Addr::new(127, 0, 0, 1))); assert_eq!(a.port(), Some("80808")); // TODO: `u16` port parsing. assert_eq!(u.path().as_str(), "/"); assert!(u.path().segments().eq([""])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); } #[test] fn parse_relative() { let u = Uri::parse("").unwrap(); assert!(u.scheme().is_none()); assert!(u.authority().is_none()); assert_eq!(u.path().as_str(), ""); assert!(u.path().segments().eq(None::<&str>)); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("foo.txt").unwrap(); assert!(u.scheme().is_none()); assert!(u.authority().is_none()); assert_eq!(u.path().as_str(), "foo.txt"); assert!(u.path().segments().eq(["foo.txt"])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse(".").unwrap(); assert!(u.scheme().is_none()); assert!(u.authority().is_none()); assert_eq!(u.path().as_str(), "."); assert!(u.path().segments().eq(["."])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("./this:that").unwrap(); assert!(u.scheme().is_none()); assert!(u.authority().is_none()); assert_eq!(u.path().as_str(), "./this:that"); assert!(u.path().segments().eq([".", "this:that"])); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("//example.com").unwrap(); assert!(u.scheme().is_none()); let a = u.authority().unwrap(); assert_eq!(a.as_str(), "example.com"); assert_eq!(a.userinfo(), None); assert_eq!(a.host().as_str(), "example.com"); assert_eq!(a.host().data(), HostData::RegName(EStr::new("example.com"))); assert_eq!(a.port(), None); assert_eq!(u.path().as_str(), ""); assert!(u.path().segments().eq(None::<&str>)); assert_eq!(u.query(), None); assert_eq!(u.fragment(), None); let u = Uri::parse("?query").unwrap(); assert!(u.scheme().is_none()); assert!(u.authority().is_none()); assert_eq!(u.path().as_str(), ""); assert!(u.path().segments().eq(None::<&str>)); assert_eq!(u.query(), Some(EStr::new("query"))); assert_eq!(u.fragment(), None); let u = Uri::parse("#fragment").unwrap(); assert!(u.scheme().is_none()); assert!(u.authority().is_none()); assert_eq!(u.path().as_str(), ""); assert!(u.path().segments().eq(None::<&str>)); assert_eq!(u.query(), None); assert_eq!(u.fragment(), Some(EStr::new("fragment"))); } #[test] fn parse_error() { // Empty scheme let e = Uri::parse(":hello").unwrap_err(); assert_eq!(e.index(), 0); assert_eq!(e.kind(), UnexpectedChar); // Scheme starts with non-letter let e = Uri::parse("3ttp://a.com").unwrap_err(); assert_eq!(e.index(), 0); assert_eq!(e.kind(), UnexpectedChar); // After rewriting the parser, the following two cases are interpreted as // containing colon in the first path segment of a relative reference. // Unexpected char in scheme let e = Uri::parse("exam=ple:foo").unwrap_err(); assert_eq!(e.index(), 8); assert_eq!(e.kind(), UnexpectedChar); let e = Uri::parse("(:").unwrap_err(); assert_eq!(e.index(), 1); assert_eq!(e.kind(), UnexpectedChar); // Percent-encoded scheme let e = Uri::parse("a%20:foo").unwrap_err(); assert_eq!(e.index(), 4); assert_eq!(e.kind(), UnexpectedChar); // Unexpected char in path let e = Uri::parse("foo\\bar").unwrap_err(); assert_eq!(e.index(), 3); assert_eq!(e.kind(), UnexpectedChar); // Non-hexadecimal percent-encoded octet let e = Uri::parse("foo%xxd").unwrap_err(); assert_eq!(e.index(), 3); assert_eq!(e.kind(), InvalidOctet); // Incomplete percent-encoded octet let e = Uri::parse("text%a").unwrap_err(); assert_eq!(e.index(), 4); assert_eq!(e.kind(), InvalidOctet); // A single percent let e = Uri::parse("%").unwrap_err(); assert_eq!(e.index(), 0); assert_eq!(e.kind(), InvalidOctet); // Non-decimal port // In this case the port is validated in reverse. let e = Uri::parse("http://example.com:80ab").unwrap_err(); assert_eq!(e.index(), 22); assert_eq!(e.kind(), UnexpectedChar); let e = Uri::parse("http://user@example.com:80ab").unwrap_err(); assert_eq!(e.index(), 26); assert_eq!(e.kind(), UnexpectedChar); // Multiple colons in authority let e = Uri::parse("http://user:pass:example.com/").unwrap_err(); assert_eq!(e.index(), 11); assert_eq!(e.kind(), UnexpectedChar); // Unclosed bracket let e = Uri::parse("https://[::1/").unwrap_err(); assert_eq!(e.index(), 8); assert_eq!(e.kind(), InvalidIpLiteral); // Not port after IP literal let e = Uri::parse("https://[::1]wrong").unwrap_err(); assert_eq!(e.index(), 13); assert_eq!(e.kind(), UnexpectedChar); // IP literal too short let e = Uri::parse("http://[:]").unwrap_err(); assert_eq!(e.index(), 7); assert_eq!(e.kind(), InvalidIpLiteral); let e = Uri::parse("http://[]").unwrap_err(); assert_eq!(e.index(), 7); assert_eq!(e.kind(), InvalidIpLiteral); // Non-hexadecimal version in IPvFuture let e = Uri::parse("http://[vG.addr]").unwrap_err(); assert_eq!(e.index(), 7); assert_eq!(e.kind(), InvalidIpLiteral); // Empty version in IPvFuture let e = Uri::parse("http://[v.addr]").unwrap_err(); assert_eq!(e.index(), 7); assert_eq!(e.kind(), InvalidIpLiteral); // Empty address in IPvFuture let e = Uri::parse("ftp://[vF.]").unwrap_err(); assert_eq!(e.index(), 6); assert_eq!(e.kind(), InvalidIpLiteral); // Percent-encoded address in IPvFuture let e = Uri::parse("ftp://[vF.%20]").unwrap_err(); assert_eq!(e.index(), 6); assert_eq!(e.kind(), InvalidIpLiteral); // Empty Zone ID #[cfg(feature = "rfc6874bis")] { let e = Uri::parse("ftp://[fe80::abcd%]").unwrap_err(); assert_eq!(e.index(), 6); assert_eq!(e.kind(), InvalidIpLiteral); } // Zone ID when the feature isn't enabled. #[cfg(not(feature = "rfc6874bis"))] { let e = Uri::parse("ftp://[fe80::abcd%eth0]").unwrap_err(); assert_eq!(e.index(), 6); assert_eq!(e.kind(), InvalidIpLiteral); } // Invalid IPv6 address let e = Uri::parse("example://[44:55::66::77]").unwrap_err(); assert_eq!(e.index(), 10); assert_eq!(e.kind(), InvalidIpLiteral); // IPvFuture when the feature isn't enabled. #[cfg(not(feature = "ipv_future"))] { let e = Uri::parse("http://[vFe.foo.bar]").unwrap_err(); assert_eq!(e.index(), 7); assert_eq!(e.kind(), InvalidIpLiteral); } } #[test] fn strict_ip_addr() { let u = Uri::parse("//127.0.0.001").unwrap(); let a = u.authority().unwrap(); assert!(matches!(a.host().data(), HostData::RegName(_))); let u = Uri::parse("//127.1").unwrap(); let a = u.authority().unwrap(); assert!(matches!(a.host().data(), HostData::RegName(_))); let u = Uri::parse("//127.00.00.1").unwrap(); let a = u.authority().unwrap(); assert!(matches!(a.host().data(), HostData::RegName(_))); assert!(Uri::parse("//[::1.1.1.1]").is_ok()); assert!(Uri::parse("//[::ffff:1.1.1.1]").is_ok()); assert!(Uri::parse("//[0000:0000:0000:0000:0000:0000:255.255.255.255]").is_ok()); assert_eq!( Uri::parse("//[::01.1.1.1]").unwrap_err().kind(), InvalidIpLiteral ); assert_eq!( Uri::parse("//[::00.1.1.1]").unwrap_err().kind(), InvalidIpLiteral ); } fluent-uri-0.1.4/tests/parse_ip.rs000064400000000000000000000110100072674642500152530ustar 00000000000000use std::net::{Ipv4Addr, Ipv6Addr}; use fluent_uri::{HostData, Uri}; fn parse_v4(s: &str) -> Option { let s = format!("//{s}"); match Uri::parse(&s).ok()?.authority()?.host().data() { HostData::Ipv4(addr) => Some(addr), _ => None, } } fn parse_v6(s: &str) -> Option { let s = format!("//[{s}]"); match Uri::parse(&s).ok()?.authority()?.host().data() { HostData::Ipv6 { addr, .. } => Some(addr), _ => None, } } #[test] fn test_parse_v4() { assert_eq!(Some(Ipv4Addr::new(127, 0, 0, 1)), parse_v4("127.0.0.1")); assert_eq!( Some(Ipv4Addr::new(255, 255, 255, 255)), parse_v4("255.255.255.255") ); assert_eq!(Some(Ipv4Addr::new(0, 0, 0, 0)), parse_v4("0.0.0.0")); // out of range assert!(parse_v4("256.0.0.1").is_none()); // too short assert!(parse_v4("255.0.0").is_none()); // too long assert!(parse_v4("255.0.0.1.2").is_none()); // no number between dots assert!(parse_v4("255.0..1").is_none()); // octal assert!(parse_v4("255.0.0.01").is_none()); // octal zero assert!(parse_v4("255.0.0.00").is_none()); assert!(parse_v4("255.0.00.0").is_none()); // preceding dot assert!(parse_v4(".0.0.0.0").is_none()); // trailing dot assert!(parse_v4("0.0.0.0.").is_none()); } #[test] fn test_parse_v6() { assert_eq!( Some(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)), parse_v6("0:0:0:0:0:0:0:0") ); assert_eq!( Some(Ipv6Addr::new(1, 2, 3, 4, 5, 6, 7, 8)), parse_v6("1:02:003:0004:0005:006:07:8") ); assert_eq!(Some(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)), parse_v6("::1")); assert_eq!(Some(Ipv6Addr::new(1, 0, 0, 0, 0, 0, 0, 0)), parse_v6("1::")); assert_eq!(Some(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)), parse_v6("::")); assert_eq!( Some(Ipv6Addr::new(0x2a02, 0x6b8, 0, 0, 0, 0, 0x11, 0x11)), parse_v6("2a02:6b8::11:11") ); assert_eq!( Some(Ipv6Addr::new(0, 2, 3, 4, 5, 6, 7, 8)), parse_v6("::2:3:4:5:6:7:8") ); assert_eq!( Some(Ipv6Addr::new(1, 2, 3, 4, 0, 6, 7, 8)), parse_v6("1:2:3:4::6:7:8") ); assert_eq!( Some(Ipv6Addr::new(1, 2, 3, 4, 5, 6, 7, 0)), parse_v6("1:2:3:4:5:6:7::") ); // only a colon assert!(parse_v6(":").is_none()); // too long group assert!(parse_v6("::00000").is_none()); // too short assert!(parse_v6("1:2:3:4:5:6:7").is_none()); // too long assert!(parse_v6("1:2:3:4:5:6:7:8:9").is_none()); // triple colon assert!(parse_v6("1:2:::6:7:8").is_none()); assert!(parse_v6("1:2:::").is_none()); assert!(parse_v6(":::6:7:8").is_none()); assert!(parse_v6(":::").is_none()); // two double colons assert!(parse_v6("1:2::6::8").is_none()); assert!(parse_v6("::6::8").is_none()); assert!(parse_v6("1:2::6::").is_none()); assert!(parse_v6("::2:6::").is_none()); // `::` indicating zero groups of zeros assert!(parse_v6("::1:2:3:4:5:6:7:8").is_none()); assert!(parse_v6("1:2:3:4::5:6:7:8").is_none()); assert!(parse_v6("1:2:3:4:5:6:7:8::").is_none()); // preceding colon assert!(parse_v6(":1:2:3:4:5:6:7:8").is_none()); assert!(parse_v6(":1::1").is_none()); assert!(parse_v6(":1").is_none()); // trailing colon assert!(parse_v6("1:2:3:4:5:6:7:8:").is_none()); assert!(parse_v6("1::1:").is_none()); assert!(parse_v6("1:").is_none()); } #[test] fn test_parse_v4_in_v6() { assert_eq!( Some(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 49152, 545)), parse_v6("::192.0.2.33") ); assert_eq!( Some(Ipv6Addr::new(0, 0, 0, 0, 0, 0xFFFF, 49152, 545)), parse_v6("::FFFF:192.0.2.33") ); assert_eq!( Some(Ipv6Addr::new(0x64, 0xff9b, 0, 0, 0, 0, 49152, 545)), parse_v6("64:ff9b::192.0.2.33") ); assert_eq!( Some(Ipv6Addr::new( 0x2001, 0xdb8, 0x122, 0xc000, 0x2, 0x2100, 49152, 545 )), parse_v6("2001:db8:122:c000:2:2100:192.0.2.33") ); // colon after v4 assert!(parse_v6("::127.0.0.1:").is_none()); // not enough groups assert!(parse_v6("1:2:3:4:5:127.0.0.1").is_none()); // too many groups assert!(parse_v6("1:2:3:4:5:6:7:127.0.0.1").is_none()); // triple colons before v4 assert!(parse_v6(":::4.4.4.4").is_none()); // no colon before v4 assert!(parse_v6("::ffff4.4.4.4").is_none()); }