fluent-uri-0.4.1/.cargo_vcs_info.json0000644000000001360000000000100131300ustar { "git": { "sha1": "d9a6a20614f34b00476837eb8904fb01ca3e54df" }, "path_in_vcs": "" }fluent-uri-0.4.1/.github/workflows/ci.yml000064400000000000000000000014451046102023000164370ustar 00000000000000on: push: branches: ["main"] name: CI jobs: test: name: Test runs-on: ubuntu-latest env: RUSTFLAGS: -D warnings steps: - uses: actions/checkout@v4 - name: Install Rust nightly uses: dtolnay/rust-toolchain@nightly - name: Test with default features run: cargo test - name: Test with all features run: cargo test --all-features - name: Test with no features run: cargo test --tests --no-default-features - name: Test with feature alloc run: cargo test --tests --no-default-features -F alloc - name: Test with feature impl-error run: cargo test --tests --no-default-features -F impl-error - name: Test with feature net run: cargo test --tests --no-default-features -F net fluent-uri-0.4.1/.gitignore000064400000000000000000000000331046102023000137040ustar 00000000000000/.vscode target Cargo.lock fluent-uri-0.4.1/.gitmodules000064400000000000000000000002021046102023000140670ustar 00000000000000[submodule "fuzz/uriparser-sys/uriparser"] path = fuzz/uriparser-sys/uriparser url = https://github.com/uriparser/uriparser.git fluent-uri-0.4.1/Cargo.lock0000644000000047210000000000100111070ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "borrow-or-share" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc0b364ead1874514c8c2855ab558056ebfeb775653e7ae45ff72f28f8f3166c" [[package]] name = "fluent-uri" version = "0.4.1" dependencies = [ "borrow-or-share", "ref-cast", "serde", ] [[package]] name = "proc-macro2" version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" dependencies = [ "proc-macro2", ] [[package]] name = "ref-cast" version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" dependencies = [ "ref-cast-impl", ] [[package]] name = "ref-cast-impl" version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", ] [[package]] name = "serde_core" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "syn" version = "2.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "unicode-ident" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" fluent-uri-0.4.1/Cargo.toml0000644000000037610000000000100111350ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.68" name = "fluent-uri" version = "0.4.1" authors = ["Scallop Ye "] build = false autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "A generic URI/IRI handling library compliant with RFC 3986/3987." documentation = "https://docs.rs/fluent-uri" readme = "README.md" keywords = [ "builder", "parser", "uri", "iri", ] categories = [ "encoding", "parser-implementations", ] license = "MIT" repository = "https://github.com/yescallop/fluent-uri-rs" [package.metadata.docs.rs] all-features = true targets = ["x86_64-unknown-linux-gnu"] rustdoc-args = [ "--cfg", "docsrs", ] [features] alloc = [ "borrow-or-share/alloc", "serde?/alloc", ] default = ["std"] impl-error = [] net = [] std = [ "alloc", "impl-error", ] [lib] name = "fluent_uri" path = "src/lib.rs" [[test]] name = "convert" path = "tests/convert.rs" [[test]] name = "normalize" path = "tests/normalize.rs" [[test]] name = "parse" path = "tests/parse.rs" [[test]] name = "parse_ip" path = "tests/parse_ip.rs" [[test]] name = "resolve" path = "tests/resolve.rs" [[test]] name = "to_socket_addrs" path = "tests/to_socket_addrs.rs" [dependencies.borrow-or-share] version = "0.2.4" default-features = false [dependencies.ref-cast] version = "1.0" [dependencies.serde] version = "1.0" optional = true default-features = false [lints.rust.unexpected_cfgs] level = "warn" priority = 0 check-cfg = ["cfg(fluent_uri_unstable, fuzzing)"] fluent-uri-0.4.1/Cargo.toml.orig000064400000000000000000000017211046102023000146100ustar 00000000000000[package] name = "fluent-uri" version = "0.4.1" authors = ["Scallop Ye "] edition = "2021" rust-version = "1.68" description = "A generic URI/IRI handling library compliant with RFC 3986/3987." documentation = "https://docs.rs/fluent-uri" repository = "https://github.com/yescallop/fluent-uri-rs" license = "MIT" keywords = ["builder", "parser", "uri", "iri"] categories = ["encoding", "parser-implementations"] [features] default = ["std"] std = ["alloc", "impl-error"] alloc = ["borrow-or-share/alloc", "serde?/alloc"] impl-error = [] net = [] [dependencies] borrow-or-share = { version = "0.2.4", default-features = false } ref-cast = "1.0" [dependencies.serde] version = "1.0" default-features = false optional = true [package.metadata.docs.rs] all-features = true targets = ["x86_64-unknown-linux-gnu"] rustdoc-args = ["--cfg", "docsrs"] [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = [ 'cfg(fluent_uri_unstable, fuzzing)', ] } fluent-uri-0.4.1/LICENSE000064400000000000000000000020521046102023000127240ustar 00000000000000MIT License Copyright (c) 2024 Scallop Ye Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.fluent-uri-0.4.1/README.md000064400000000000000000000127121046102023000132020ustar 00000000000000# fluent-uri A generic URI/IRI handling library compliant with [RFC 3986] and [RFC 3987]. It is: - **Fast:** Zero-copy parsing. Benchmarked to be highly performant.[^bench-res] - **Easy:** Carefully designed and documented APIs. Handy percent-encoding utilities. - **Correct:** Forbids unsafe code. Extensively fuzz-tested against other implementations. [![crates.io](https://img.shields.io/crates/v/fluent-uri.svg)](https://crates.io/crates/fluent-uri) [![build](https://img.shields.io/github/actions/workflow/status/yescallop/fluent-uri-rs/ci.yml )](https://github.com/yescallop/fluent-uri-rs/actions/workflows/ci.yml) [![license](https://img.shields.io/crates/l/fluent-uri.svg)](/LICENSE) [Documentation](https://docs.rs/fluent-uri) | [Discussions](https://github.com/yescallop/fluent-uri-rs/discussions) [RFC 3986]: https://datatracker.ietf.org/doc/html/rfc3986 [RFC 3987]: https://datatracker.ietf.org/doc/html/rfc3987 [^bench-res]: In [a benchmark](https://github.com/yescallop/fluent-uri-rs/blob/main/bench/benches/bench.rs) on an Intel Core i5-11300H processor, `fluent-uri` parsed a 61-byte IRI in ~85ns compared to ~125ns for `iref`, `iri-string`, and `oxiri`. ## Terminology A *[URI reference]* is either a *[URI]* or a *[relative reference]*. If it starts with a *[scheme]* (like `http`, `ftp`, `mailto`, etc.) followed by a colon (`:`), it is a URI. For example, `http://example.com/` and `mailto:user@example.com` are URIs. Otherwise, it is a relative reference. For example, `//example.org/`, `/index.html`, `../`, `foo`, `?bar`, and `#baz` are relative references. An *[IRI]* (reference) is an internationalized version of URI (reference) which may contain non-ASCII characters. [URI reference]: https://datatracker.ietf.org/doc/html/rfc3986#section-4.1 [URI]: https://datatracker.ietf.org/doc/html/rfc3986#section-3 [IRI]: https://datatracker.ietf.org/doc/html/rfc3987#section-2 [relative reference]: https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 [scheme]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 ## Examples - Parse and extract components from a URI: ```rust const SCHEME_FOO: &Scheme = Scheme::new_or_panic("foo"); let s = "foo://user@example.com:8042/over/there?name=ferret#nose"; let uri = Uri::parse(s)?; assert_eq!(uri.scheme(), SCHEME_FOO); let auth = uri.authority().unwrap(); assert_eq!(auth.as_str(), "user@example.com:8042"); assert_eq!(auth.userinfo().unwrap(), "user"); assert_eq!(auth.host(), "example.com"); assert!(matches!(auth.host_parsed(), Host::RegName(name) if name == "example.com")); assert_eq!(auth.port().unwrap(), "8042"); assert_eq!(auth.port_to_u16(), Ok(Some(8042))); assert_eq!(uri.path(), "/over/there"); assert_eq!(uri.query().unwrap(), "name=ferret"); assert_eq!(uri.fragment().unwrap(), "nose"); ``` - Build a URI using the builder pattern: ```rust const SCHEME_FOO: &Scheme = Scheme::new_or_panic("foo"); let uri = Uri::builder() .scheme(SCHEME_FOO) .authority_with(|b| { b.userinfo(EStr::new_or_panic("user")) .host(EStr::new_or_panic("example.com")) .port(8042) }) .path(EStr::new_or_panic("/over/there")) .query(EStr::new_or_panic("name=ferret")) .fragment(EStr::new_or_panic("nose")) .build() .unwrap(); assert_eq!( uri.as_str(), "foo://user@example.com:8042/over/there?name=ferret#nose" ); ``` - Resolve a URI reference against a base URI: ```rust let base = Uri::parse("http://example.com/foo/bar")?; let uri_ref = UriRef::parse("baz")?; assert_eq!(uri_ref.resolve_against(&base).unwrap(), "http://example.com/foo/baz"); let uri_ref = UriRef::parse("../baz")?; assert_eq!(uri_ref.resolve_against(&base).unwrap(), "http://example.com/baz"); let uri_ref = UriRef::parse("?baz")?; assert_eq!(uri_ref.resolve_against(&base).unwrap(), "http://example.com/foo/bar?baz"); ``` - Normalize a URI: ```rust let uri = Uri::parse("eXAMPLE://a/./b/../b/%63/%7bfoo%7d")?; assert_eq!(uri.normalize(), "example://a/b/c/%7Bfoo%7D"); ``` - `EStr` (Percent-encoded string slices): All components in a URI that may be percent-encoded are parsed as `EStr`s, which allows easy splitting and decoding: ```rust let s = "?name=%E5%BC%A0%E4%B8%89&speech=%C2%A1Ol%C3%A9%21"; let query = UriRef::parse(s).unwrap().query().unwrap(); let map: HashMap<_, _> = query .split('&') .map(|s| s.split_once('=').unwrap_or((s, EStr::EMPTY))) .map(|(k, v)| (k.decode().to_string_lossy(), v.decode().to_string_lossy())) .collect(); assert_eq!(map["name"], "张三"); assert_eq!(map["speech"], "¡Olé!"); ``` - `EString` (A percent-encoded, growable string): You can encode key-value pairs to a query string and use it to build a URI reference: ```rust let pairs = [("name", "张三"), ("speech", "¡Olé!")]; let mut buf = EString::::new(); for (k, v) in pairs { if !buf.is_empty() { buf.push_byte(b'&'); } buf.encode_str::(k); buf.push_byte(b'='); buf.encode_str::(v); } assert_eq!(buf, "name=%E5%BC%A0%E4%B8%89&speech=%C2%A1Ol%C3%A9%21"); let uri_ref = UriRef::builder() .path(EStr::EMPTY) .query(&buf) .build() .unwrap(); assert_eq!(uri_ref.as_str(), "?name=%E5%BC%A0%E4%B8%89&speech=%C2%A1Ol%C3%A9%21"); ``` fluent-uri-0.4.1/src/build/imp.rs000064400000000000000000000105741046102023000147500ustar 00000000000000use super::BuildError; use crate::{ component::IAuthority, imp::{AuthMeta, HostMeta, Meta}, parse, pct_enc::{ encoder::{IRegName, Port, RegName}, EStr, }, }; use alloc::string::String; use core::{fmt::Write, num::NonZeroUsize}; #[cfg(feature = "net")] use crate::net::{IpAddr, Ipv4Addr, Ipv6Addr}; pub struct BuilderInner { pub buf: String, pub meta: Meta, } impl BuilderInner { pub fn push_scheme(&mut self, v: &str) { self.buf.push_str(v); self.meta.scheme_end = NonZeroUsize::new(self.buf.len()); self.buf.push(':'); } pub fn start_authority(&mut self) { self.buf.push_str("//"); } pub fn push_authority(&mut self, v: IAuthority<'_>) { self.buf.push_str("//"); let start = self.buf.len(); self.buf.push_str(v.as_str()); let mut meta = v.meta(); meta.host_bounds.0 += start; meta.host_bounds.1 += start; self.meta.auth_meta = Some(meta); } pub fn push_userinfo(&mut self, v: &str) { self.buf.push_str(v); self.buf.push('@'); } pub fn push_host(&mut self, meta: HostMeta, f: impl FnOnce(&mut String)) { let start = self.buf.len(); f(&mut self.buf); self.meta.auth_meta = Some(AuthMeta { host_bounds: (start, self.buf.len()), host_meta: meta, }); } pub fn push_path(&mut self, v: &str) { self.meta.path_bounds.0 = self.buf.len(); self.buf.push_str(v); self.meta.path_bounds.1 = self.buf.len(); } pub fn push_query(&mut self, v: &str) { self.buf.push('?'); self.buf.push_str(v); self.meta.query_end = NonZeroUsize::new(self.buf.len()); } pub fn push_fragment(&mut self, v: &str) { self.buf.push('#'); self.buf.push_str(v); } pub fn validate(&self) -> Result<(), BuildError> { fn first_segment_contains_colon(path: &str) -> bool { path.split_once('/').map_or(path, |x| x.0).contains(':') } let (start, end) = self.meta.path_bounds; let path = &self.buf[start..end]; if self.meta.auth_meta.is_some() { if !path.is_empty() && !path.starts_with('/') { return Err(BuildError::NonemptyRootlessPath); } } else { if path.starts_with("//") { return Err(BuildError::PathStartsWithDoubleSlash); } if self.meta.scheme_end.is_none() && first_segment_contains_colon(path) { return Err(BuildError::FirstPathSegmentContainsColon); } } Ok(()) } } pub trait AsHost<'a> { fn push_to(self, b: &mut BuilderInner); } #[cfg(feature = "net")] impl<'a> AsHost<'a> for Ipv4Addr { fn push_to(self, b: &mut BuilderInner) { b.push_host(HostMeta::Ipv4(self), |buf| { write!(buf, "{self}").unwrap(); }); } } #[cfg(feature = "net")] impl<'a> AsHost<'a> for Ipv6Addr { fn push_to(self, b: &mut BuilderInner) { b.push_host(HostMeta::Ipv6(self), |buf| { write!(buf, "[{self}]").unwrap(); }); } } #[cfg(feature = "net")] impl<'a> AsHost<'a> for IpAddr { fn push_to(self, b: &mut BuilderInner) { match self { IpAddr::V4(addr) => addr.push_to(b), IpAddr::V6(addr) => addr.push_to(b), } } } impl<'a> AsHost<'a> for &'a EStr { #[inline] fn push_to(self, b: &mut BuilderInner) { self.cast::().push_to(b); } } impl<'a> AsHost<'a> for &'a EStr { fn push_to(self, b: &mut BuilderInner) { let meta = parse::parse_v4_or_reg_name(self.as_str().as_bytes()); b.push_host(meta, |buf| { buf.push_str(self.as_str()); }); } } pub trait WithEncoder {} #[cfg(feature = "net")] impl WithEncoder for Ipv4Addr {} #[cfg(feature = "net")] impl WithEncoder for Ipv6Addr {} #[cfg(feature = "net")] impl WithEncoder for IpAddr {} impl WithEncoder for &EStr {} impl WithEncoder for &EStr {} pub trait AsPort { fn push_to(self, buf: &mut String); } impl AsPort for u16 { fn push_to(self, buf: &mut String) { write!(buf, ":{self}").unwrap(); } } impl AsPort for &EStr { fn push_to(self, buf: &mut String) { buf.push(':'); buf.push_str(self.as_str()); } } fluent-uri-0.4.1/src/build/mod.rs000064400000000000000000000350011046102023000147320ustar 00000000000000//! Module for URI/IRI (reference) building. #![allow(missing_debug_implementations)] mod imp; pub(crate) mod state; use imp::*; use state::*; use crate::{ component::{Authority, Scheme}, imp::{Meta, RiMaybeRef}, pct_enc::EStr, }; use alloc::string::String; use core::{fmt, marker::PhantomData}; /// An error occurred when building a URI/IRI (reference). #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum BuildError { /// Authority is present, but the path is not empty and does not start with `'/'`. NonemptyRootlessPath, /// Authority is not present, but the path starts with `"//"`. PathStartsWithDoubleSlash, /// Neither scheme nor authority is present, but the first path segment contains `':'`. FirstPathSegmentContainsColon, } impl fmt::Display for BuildError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let msg = match self { Self::NonemptyRootlessPath => { "when authority is present, path should either be empty or start with '/'" } Self::PathStartsWithDoubleSlash => { "when authority is not present, path should not start with \"//\"" } Self::FirstPathSegmentContainsColon => { "when neither scheme nor authority is present, first path segment should not contain ':'" } }; f.write_str(msg) } } #[cfg(feature = "impl-error")] impl crate::Error for BuildError {} /// A builder for URI/IRI (reference). /// /// This struct is created by the `builder` associated /// functions on [`Uri`], [`UriRef`], [`Iri`], and [`IriRef`]. /// /// [`Uri`]: crate::Uri /// [`UriRef`]: crate::UriRef /// [`Iri`]: crate::Iri /// [`IriRef`]: crate::IriRef /// /// # Examples /// /// Basic usage: /// /// ``` /// use fluent_uri::{component::Scheme, pct_enc::EStr, Uri}; /// /// const SCHEME_FOO: &Scheme = Scheme::new_or_panic("foo"); /// /// let uri = Uri::builder() /// .scheme(SCHEME_FOO) /// .authority_with(|b| { /// b.userinfo(EStr::new_or_panic("user")) /// .host(EStr::new_or_panic("example.com")) /// .port(8042) /// }) /// .path(EStr::new_or_panic("/over/there")) /// .query(EStr::new_or_panic("name=ferret")) /// .fragment(EStr::new_or_panic("nose")) /// .build() /// .unwrap(); /// /// assert_eq!( /// uri.as_str(), /// "foo://user@example.com:8042/over/there?name=ferret#nose" /// ); /// ``` /// /// Note that [`EStr::new_or_panic`] *panics* on invalid input and /// should normally be used with constant strings. /// If you want to build a percent-encoded string from scratch, /// use [`EString`] instead. /// /// [`EString`]: crate::pct_enc::EString /// /// # Constraints /// /// Typestates are used to avoid misconfigurations, /// which puts the following constraints: /// /// - Components must be set from start to end, no repetition allowed. /// - Setting [`scheme`] is mandatory when building a URI/IRI. /// - Setting [`path`] is mandatory. /// - Methods [`userinfo`], [`host`], and [`port`] are only available /// within a call to [`authority_with`]. /// - Setting [`host`] is mandatory within a call to [`authority_with`]. /// /// You may otherwise skip setting optional components /// (scheme, authority, userinfo, port, query, and fragment) /// with [`advance`] or set them optionally with [`optional`]. /// /// The builder typestates are currently private. Please open an issue /// if it is a problem not being able to name the type of a builder. /// /// [`advance`]: Self::advance /// [`optional`]: Self::optional /// [`scheme`]: Self::scheme /// [`authority_with`]: Self::authority_with /// [`userinfo`]: Self::userinfo /// [`host`]: Self::host /// [`port`]: Self::port /// [`path`]: Self::path /// [`build`]: Self::build #[must_use] pub struct Builder { inner: BuilderInner, _marker: PhantomData<(R, S)>, } impl Builder { pub(crate) fn new() -> Self { Self { inner: BuilderInner { buf: String::new(), meta: Meta::default(), }, _marker: PhantomData, } } } impl Builder { fn cast(self) -> Builder where S: To, { Builder { inner: self.inner, _marker: PhantomData, } } /// Advances the builder state, skipping optional components in between. /// /// Variable rebinding may be necessary as this changes the type of the builder. /// /// ``` /// use fluent_uri::{component::Scheme, pct_enc::EStr, UriRef}; /// /// fn build(relative: bool) -> UriRef { /// let b = UriRef::builder(); /// let b = if relative { /// b.advance() /// } else { /// b.scheme(Scheme::new_or_panic("http")) /// .authority_with(|b| b.host(EStr::new_or_panic("example.com"))) /// }; /// b.path(EStr::new_or_panic("/foo")).build().unwrap() /// } /// /// assert_eq!(build(false).as_str(), "http://example.com/foo"); /// assert_eq!(build(true).as_str(), "/foo"); /// ``` pub fn advance(self) -> Builder where S: AdvanceTo, { self.cast() } /// Optionally calls a builder method with a value. /// /// ``` /// use fluent_uri::{build::Builder, pct_enc::EStr, UriRef}; /// /// let uri_ref = UriRef::builder() /// .path(EStr::new_or_panic("foo")) /// .optional(Builder::query, Some(EStr::new_or_panic("bar"))) /// .optional(Builder::fragment, None) /// .build() /// .unwrap(); /// /// assert_eq!(uri_ref.as_str(), "foo?bar"); /// ``` pub fn optional(self, f: F, opt: Option) -> Builder where F: FnOnce(Self, V) -> Builder, S: AdvanceTo, { match opt { Some(value) => f(self, value), None => self.advance(), } } } impl> Builder { /// Sets the [scheme] component. /// /// Note that the scheme component is *case-insensitive* and its canonical form is /// *lowercase*. For consistency, you should only produce lowercase scheme names. /// /// [scheme]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 pub fn scheme(mut self, scheme: &Scheme) -> Builder { self.inner.push_scheme(scheme.as_str()); self.cast() } } impl> Builder { /// Builds the [authority] component with the given function. /// /// [authority]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 pub fn authority_with(mut self, f: F) -> Builder where F: FnOnce(Builder) -> Builder, T: To, { self.inner.start_authority(); f(self.cast()).cast() } /// Sets the [authority] component. /// /// This method takes an [`Authority`] (for URI) or [`IAuthority`] (for IRI) as argument. /// /// This method is normally used with an authority which is empty ([`Authority::EMPTY`]) /// or is obtained from a URI/IRI (reference). If you need to build an authority from its /// subcomponents (userinfo, host, and port), use [`authority_with`] instead. /// /// [authority]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 /// [`IAuthority`]: crate::component::IAuthority /// [`authority_with`]: Self::authority_with /// /// # Examples /// /// ``` /// use fluent_uri::{ /// build::Builder, /// component::{Authority, Scheme}, /// pct_enc::EStr, /// Uri, /// }; /// /// let uri = Uri::builder() /// .scheme(Scheme::new_or_panic("file")) /// .authority(Authority::EMPTY) /// .path(EStr::new_or_panic("/path/to/file")) /// .build() /// .unwrap(); /// /// assert_eq!(uri, "file:///path/to/file"); /// /// let auth = Uri::parse("foo://user@example.com:8042")? /// .authority() /// .unwrap(); /// let uri = Uri::builder() /// .scheme(Scheme::new_or_panic("http")) /// .authority(auth) /// .path(EStr::EMPTY) /// .build() /// .unwrap(); /// /// assert_eq!(uri, "http://user@example.com:8042"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` pub fn authority( mut self, authority: Authority<'_, R::UserinfoE, R::RegNameE>, ) -> Builder { self.inner.push_authority(authority.cast()); self.cast::() } } impl> Builder { /// Sets the [userinfo][userinfo-spec] subcomponent of authority. /// /// This method takes an &[EStr]<[Userinfo]> (for URI) /// or &[EStr]<[IUserinfo]> (for IRI) as argument. /// /// [userinfo-spec]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1 /// [Userinfo]: crate::pct_enc::encoder::Userinfo /// [IUserinfo]: crate::pct_enc::encoder::IUserinfo pub fn userinfo(mut self, userinfo: &EStr) -> Builder { self.inner.push_userinfo(userinfo.as_str()); self.cast() } } impl> Builder { /// Sets the [host] subcomponent of authority. /// /// This method takes either an [`Ipv4Addr`], [`Ipv6Addr`], [`IpAddr`], /// &[EStr]<[RegName]> (for URI) /// or &[EStr]<[IRegName]> (for IRI) as argument. /// Crate feature `net` is required for this method to take an IP address as argument. /// /// If the contents of an input `EStr` slice matches the /// `IPv4address` ABNF rule defined in [Section 3.2.2 of RFC 3986][host], /// the resulting URI/IRI (reference) will output a [`Host::Ipv4`] variant instead. /// /// Note that ASCII characters within a host are *case-insensitive*. /// For consistency, you should only produce [normalized] hosts. /// /// [host]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 /// [`Ipv4Addr`]: core::net::Ipv4Addr /// [`Ipv6Addr`]: core::net::Ipv6Addr /// [`IpAddr`]: core::net::IpAddr /// [RegName]: crate::pct_enc::encoder::RegName /// [IRegName]: crate::pct_enc::encoder::IRegName /// [`Host::Ipv4`]: crate::component::Host::Ipv4 /// [normalized]: crate::Uri::normalize /// /// # Examples /// /// ``` /// use fluent_uri::{component::Host, pct_enc::EStr, UriRef}; /// /// let uri_ref = UriRef::builder() /// .authority_with(|b| b.host(EStr::new_or_panic("127.0.0.1"))) /// .path(EStr::EMPTY) /// .build() /// .unwrap(); /// /// assert!(matches!(uri_ref.authority().unwrap().host_parsed(), Host::Ipv4 { .. })); /// ``` pub fn host<'a>( mut self, host: impl AsHost<'a> + WithEncoder, ) -> Builder { host.push_to(&mut self.inner); self.cast() } } impl> Builder { /// Sets the [port][port-spec] subcomponent of authority. /// /// This method takes either a `u16` or &[EStr]<[Port]> as argument. /// /// For consistency, you should not produce an empty or [default] port. /// /// [port-spec]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3 /// [Port]: crate::pct_enc::encoder::Port /// [default]: Scheme::default_port pub fn port(mut self, port: impl AsPort) -> Builder { port.push_to(&mut self.inner.buf); self.cast() } /// Sets the [port] subcomponent of authority, omitting it when it equals the default value. /// /// [port]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3 #[cfg(fluent_uri_unstable)] pub fn port_with_default(self, port: u16, default: u16) -> Builder { if port != default { self.cast() } else { self.port(port) } } } impl> Builder { /// Sets the [path][path-spec] component. /// /// This method takes an &[EStr]<[Path]> (for URI) /// or &[EStr]<[IPath]> (for IRI) as argument. /// /// [path-spec]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 /// [Path]: crate::pct_enc::encoder::Path /// [IPath]: crate::pct_enc::encoder::IPath pub fn path(mut self, path: &EStr) -> Builder { self.inner.push_path(path.as_str()); self.cast() } } impl> Builder { /// Sets the [query][query-spec] component. /// /// This method takes an &[EStr]<[Query]> (for URI) /// or &[EStr]<[IQuery]> (for IRI) as argument. /// /// [query-spec]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.4 /// [Query]: crate::pct_enc::encoder::Query /// [IQuery]: crate::pct_enc::encoder::IQuery pub fn query(mut self, query: &EStr) -> Builder { self.inner.push_query(query.as_str()); self.cast() } } impl> Builder { /// Sets the [fragment][fragment-spec] component. /// /// This method takes an &[EStr]<[Fragment]> (for URI) /// or &[EStr]<[IFragment]> (for IRI) as argument. /// /// [fragment-spec]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.5 /// [Fragment]: crate::pct_enc::encoder::Fragment /// [IFragment]: crate::pct_enc::encoder::IFragment pub fn fragment(mut self, fragment: &EStr) -> Builder { self.inner.push_fragment(fragment.as_str()); self.cast() } } impl, S: To> Builder { /// Builds the URI/IRI (reference). /// /// # Errors /// /// Returns `Err` if any of the following conditions is not met. /// /// - When authority is present, the path must either be empty or start with `'/'`. /// - When authority is not present, the path cannot start with `"//"`. /// - When neither scheme nor authority is present, the first path segment cannot contain `':'`. /// /// [rel-ref]: https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 pub fn build(self) -> Result { self.inner .validate() .map(|()| R::new(self.inner.buf, self.inner.meta)) } } fluent-uri-0.4.1/src/build/state.rs000064400000000000000000000030371046102023000152770ustar 00000000000000//! Builder typestates. /// Start of URI/IRI reference. pub struct Start(()); /// Start of URI/IRI. pub struct NonRefStart(()); /// End of scheme. pub struct SchemeEnd(()); /// Start of authority. pub struct AuthorityStart(()); /// End of userinfo. pub struct UserinfoEnd(()); /// End of host. pub struct HostEnd(()); /// End of port. pub struct PortEnd(()); /// End of authority. pub struct AuthorityEnd(()); /// End of path. pub struct PathEnd(()); /// End of query. pub struct QueryEnd(()); /// End of fragment pub struct FragmentEnd(()); /// End of URI/IRI (reference). pub struct End(()); /// Indicates the next possible state. pub trait To {} /// Indicates the next possible state to advance to. pub trait AdvanceTo: To {} macro_rules! impl_many { ($trait:ident for $($x:ty => $($y:ty),+)*) => { $($( impl $trait<$y> for $x {} )+)* }; } impl_many! { To for Start => SchemeEnd, AuthorityStart, PathEnd NonRefStart => SchemeEnd SchemeEnd => AuthorityStart, PathEnd AuthorityStart => UserinfoEnd, HostEnd UserinfoEnd => HostEnd HostEnd => PortEnd, AuthorityEnd PortEnd => AuthorityEnd AuthorityEnd => PathEnd PathEnd => QueryEnd, FragmentEnd, End QueryEnd => FragmentEnd, End FragmentEnd => End } impl> To for S {} impl_many! { AdvanceTo for Start => SchemeEnd, AuthorityEnd SchemeEnd => AuthorityEnd AuthorityStart => UserinfoEnd HostEnd => PortEnd PathEnd => QueryEnd, FragmentEnd QueryEnd => FragmentEnd } fluent-uri-0.4.1/src/component.rs000064400000000000000000000501431046102023000150620ustar 00000000000000//! URI/IRI components. use crate::{ imp::{AuthMeta, HostMeta}, pct_enc::{ encoder::{IRegName, IUserinfo, Port, RegName, Userinfo}, table, EStr, Encoder, }, }; use core::{hash, iter, marker::PhantomData, num::ParseIntError}; use ref_cast::{ref_cast_custom, RefCastCustom}; #[cfg(feature = "net")] use crate::net::{Ipv4Addr, Ipv6Addr}; #[cfg(all(feature = "net", feature = "std"))] use std::{ io, net::{SocketAddr, ToSocketAddrs}, }; /// An authority component for IRI. pub type IAuthority<'a> = Authority<'a, IUserinfo, IRegName>; /// A parsed host component for IRI. pub type IHost<'a> = Host<'a, IRegName>; /// A [scheme] component. /// /// [scheme]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 /// /// # Comparison /// /// `Scheme`s are compared case-insensitively. You should do a case-insensitive /// comparison if the scheme specification allows both letter cases in the scheme name. /// /// # Examples /// /// ``` /// use fluent_uri::{component::Scheme, Uri}; /// /// const SCHEME_HTTP: &Scheme = Scheme::new_or_panic("http"); /// /// let scheme = Uri::parse("HTTP://EXAMPLE.COM/")?.scheme(); /// /// // Case-insensitive comparison. /// assert_eq!(scheme, SCHEME_HTTP); /// // Case-sensitive comparison. /// assert_eq!(scheme.as_str(), "HTTP"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[derive(RefCastCustom)] #[repr(transparent)] pub struct Scheme { inner: str, } const ASCII_CASE_MASK: u8 = 0b0010_0000; impl Scheme { #[ref_cast_custom] #[inline] pub(crate) const fn new_validated(scheme: &str) -> &Self; /// Converts a string slice to `&Scheme`. /// /// # Panics /// /// Panics if the string is not a valid scheme name according to /// [Section 3.1 of RFC 3986][scheme]. For a non-panicking variant, /// use [`new`](Self::new). /// /// [scheme]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 #[inline] #[must_use] pub const fn new_or_panic(s: &str) -> &Self { match Self::new(s) { Some(scheme) => scheme, None => panic!("invalid scheme"), } } /// Converts a string slice to `&Scheme`, returning `None` if the conversion fails. #[inline] #[must_use] pub const fn new(s: &str) -> Option<&Self> { if matches!(s.as_bytes(), [first, rem @ ..] if first.is_ascii_alphabetic() && table::SCHEME.validate(rem)) { Some(Self::new_validated(s)) } else { None } } /// Returns the scheme component as a string slice. /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("http://example.com/")?; /// assert_eq!(uri.scheme().as_str(), "http"); /// let uri = Uri::parse("HTTP://EXAMPLE.COM/")?; /// assert_eq!(uri.scheme().as_str(), "HTTP"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] #[must_use] pub fn as_str(&self) -> &str { &self.inner } } macro_rules! default_port { ($($name:literal, $bname:literal => $port:literal, rfc($rfc:literal))*) => { impl Scheme { /// Returns the optional default port of the scheme if it is /// registered [at IANA][iana] with a permanent status. /// /// [iana]: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml /// /// The following table lists all schemes concerned, their default ports, and references: /// /// | Scheme | Port | Reference | /// | - | - | - | $(#[doc = concat!("| ", $name, " | ", $port, " | [RFC ", $rfc, "](https://datatracker.ietf.org/doc/html/rfc", $rfc, ") |")])* #[must_use] pub fn default_port(&self) -> Option { const MAX_LEN: usize = { let mut res = 0; $( if $name.len() > res { res = $name.len(); } )* res }; let len = self.inner.len(); if len > MAX_LEN { return None; } let mut buf = [0; MAX_LEN]; for (i, b) in self.inner.bytes().enumerate() { buf[i] = b | ASCII_CASE_MASK; } match &buf[..len] { $($bname => Some($port),)* _ => None, } } } }; } default_port! { "aaa", b"aaa" => 3868, rfc(6733) "aaas", b"aaas" => 5658, rfc(6733) "acap", b"acap" => 674, rfc(2244) "cap", b"cap" => 1026, rfc(4324) "coap", b"coap" => 5683, rfc(7252) "coap+tcp", b"coap+tcp" => 5683, rfc(8323) "coap+ws", b"coap+ws" => 80, rfc(8323) "coaps", b"coaps" => 5684, rfc(7252) "coaps+tcp", b"coaps+tcp" => 5684, rfc(8323) "coaps+ws", b"coaps+ws" => 443, rfc(8323) "dict", b"dict" => 2628, rfc(2229) "dns", b"dns" => 53, rfc(4501) "ftp", b"ftp" => 21, rfc(1738) "go", b"go" => 1096, rfc(3368) "gopher", b"gopher" => 70, rfc(4266) "http", b"http" => 80, rfc(9110) "https", b"https" => 443, rfc(9110) "icap", b"icap" => 1344, rfc(3507) "imap", b"imap" => 143, rfc(5092) "ipp", b"ipp" => 631, rfc(3510) "ipps", b"ipps" => 631, rfc(7472) "ldap", b"ldap" => 389, rfc(4516) "mtqp", b"mtqp" => 1038, rfc(3887) "mupdate", b"mupdate" => 3905, rfc(3656) "nfs", b"nfs" => 2049, rfc(2224) "nntp", b"nntp" => 119, rfc(5538) "pop", b"pop" => 110, rfc(2384) "rtsp", b"rtsp" => 554, rfc(7826) "rtsps", b"rtsps" => 322, rfc(7826) "rtspu", b"rtspu" => 554, rfc(2326) "snmp", b"snmp" => 161, rfc(4088) "stun", b"stun" => 3478, rfc(7064) "stuns", b"stuns" => 5349, rfc(7064) "telnet", b"telnet" => 23, rfc(4248) "tip", b"tip" => 3372, rfc(2371) "tn3270", b"tn3270" => 23, rfc(6270) "turn", b"turn" => 3478, rfc(7065) "turns", b"turns" => 5349, rfc(7065) "vemmi", b"vemmi" => 575, rfc(2122) "vnc", b"vnc" => 5900, rfc(7869) "ws", b"ws" => 80, rfc(6455) "wss", b"wss" => 443, rfc(6455) "z39.50r", b"z39.50r" => 210, rfc(2056) "z39.50s", b"z39.50s" => 210, rfc(2056) } impl PartialEq for Scheme { #[inline] fn eq(&self, other: &Self) -> bool { let (a, b) = (self.inner.as_bytes(), other.inner.as_bytes()); // The only characters allowed in a scheme are alphabets, digits, '+', '-' and '.'. // Their ASCII codes allow us to simply set the sixth bits and compare. a.len() == b.len() && iter::zip(a, b).all(|(x, y)| x | ASCII_CASE_MASK == y | ASCII_CASE_MASK) } } impl Eq for Scheme {} impl hash::Hash for Scheme { fn hash(&self, state: &mut H) { let mut buf = [0; 8]; for chunk in self.inner.as_bytes().chunks(8) { let len = chunk.len(); for i in 0..len { buf[i] = chunk[i] | ASCII_CASE_MASK; } state.write(&buf[..len]); } } } #[derive(Clone, Copy)] struct AuthorityInner<'a> { val: &'a str, meta: AuthMeta, } impl<'a> AuthorityInner<'a> { fn userinfo(&self) -> Option<&'a EStr> { let host_start = self.meta.host_bounds.0; (host_start != 0).then(|| EStr::new_validated(&self.val[..host_start - 1])) } fn host(&self) -> &'a str { let (start, end) = self.meta.host_bounds; &self.val[start..end] } fn port(&self) -> Option<&'a EStr> { let host_end = self.meta.host_bounds.1; (host_end != self.val.len()).then(|| EStr::new_validated(&self.val[host_end + 1..])) } fn port_to_u16(&self) -> Result, ParseIntError> { self.port() .filter(|s| !s.is_empty()) .map(|s| s.as_str().parse()) .transpose() } #[cfg(all(feature = "net", feature = "std"))] fn socket_addrs(&self, default_port: u16) -> io::Result> { use std::vec; let port = self .port_to_u16() .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "invalid port value"))? .unwrap_or(default_port); match self.meta.host_meta { HostMeta::Ipv4(addr) => Ok(vec![(addr, port).into()].into_iter()), HostMeta::Ipv6(addr) => Ok(vec![(addr, port).into()].into_iter()), HostMeta::IpvFuture => Err(io::Error::new( io::ErrorKind::InvalidInput, "address mechanism not supported", )), HostMeta::RegName => { let name = EStr::::new_validated(self.host()); let name = name.decode().to_string().map_err(|_| { io::Error::new( io::ErrorKind::InvalidInput, "registered name does not decode to valid UTF-8", ) })?; (&name[..], port).to_socket_addrs() } } } } /// An [authority] component. /// /// [authority]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 #[derive(Clone, Copy)] pub struct Authority<'a, UserinfoE = Userinfo, RegNameE = RegName> { inner: AuthorityInner<'a>, _marker: PhantomData<(UserinfoE, RegNameE)>, } impl<'a, T, U> Authority<'a, T, U> { pub(crate) fn cast(self) -> Authority<'a, V, W> { Authority { inner: self.inner, _marker: PhantomData, } } } impl<'a, UserinfoE: Encoder, RegNameE: Encoder> Authority<'a, UserinfoE, RegNameE> { pub(crate) const fn new(val: &'a str, meta: AuthMeta) -> Self { Self { inner: AuthorityInner { val, meta }, _marker: PhantomData, } } /// An empty authority component. pub const EMPTY: Authority<'static, UserinfoE, RegNameE> = Authority::new("", AuthMeta::EMPTY); #[cfg(feature = "alloc")] pub(crate) fn meta(&self) -> AuthMeta { self.inner.meta } /// Returns the authority component as a string slice. /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("http://user@example.com:8080/")?; /// let auth = uri.authority().unwrap(); /// assert_eq!(auth.as_str(), "user@example.com:8080"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] #[must_use] pub fn as_str(&self) -> &'a str { self.inner.val } /// Returns the optional [userinfo] subcomponent. /// /// [userinfo]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1 /// /// # Examples /// /// ``` /// use fluent_uri::{pct_enc::EStr, Uri}; /// /// let uri = Uri::parse("http://user@example.com/")?; /// let auth = uri.authority().unwrap(); /// assert_eq!(auth.userinfo(), Some(EStr::new_or_panic("user"))); /// /// let uri = Uri::parse("http://example.com/")?; /// let auth = uri.authority().unwrap(); /// assert_eq!(auth.userinfo(), None); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn userinfo(&self) -> Option<&'a EStr> { self.inner.userinfo().map(EStr::cast) } /// Returns the [host] subcomponent as a string slice. /// /// The host subcomponent is always present, although it may be empty. /// /// The square brackets enclosing an IPv6 or IPvFuture address are included. /// /// Note that ASCII characters within a host are *case-insensitive*. /// /// [host]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("http://user@example.com:8080/")?; /// let auth = uri.authority().unwrap(); /// assert_eq!(auth.host(), "example.com"); /// /// let uri = Uri::parse("file:///path/to/file")?; /// let auth = uri.authority().unwrap(); /// assert_eq!(auth.host(), ""); /// /// let uri = Uri::parse("http://[::1]")?; /// let auth = uri.authority().unwrap(); /// assert_eq!(auth.host(), "[::1]"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn host(&self) -> &'a str { self.inner.host() } /// Returns the parsed [host] subcomponent. /// /// Note that ASCII characters within a host are *case-insensitive*. /// /// [host]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 /// /// # Examples /// /// ``` /// use fluent_uri::{component::Host, pct_enc::EStr, Uri}; #[cfg_attr(feature = "net", doc = "use std::net::{Ipv4Addr, Ipv6Addr};")] /// /// let uri = Uri::parse("foo://127.0.0.1")?; /// let auth = uri.authority().unwrap(); #[cfg_attr( feature = "net", doc = "assert!(matches!(auth.host_parsed(), Host::Ipv4(Ipv4Addr::LOCALHOST)));" )] #[cfg_attr( not(feature = "net"), doc = "assert!(matches!(auth.host_parsed(), Host::Ipv4 { .. }));" )] /// /// let uri = Uri::parse("foo://[::1]")?; /// let auth = uri.authority().unwrap(); #[cfg_attr( feature = "net", doc = "assert!(matches!(auth.host_parsed(), Host::Ipv6(Ipv6Addr::LOCALHOST)));" )] #[cfg_attr( not(feature = "net"), doc = "assert!(matches!(auth.host_parsed(), Host::Ipv6 { .. }));" )] /// /// let uri = Uri::parse("foo://[v1.addr]")?; /// let auth = uri.authority().unwrap(); /// // The API design for IPvFuture addresses is to be determined. /// assert!(matches!(auth.host_parsed(), Host::IpvFuture { .. })); /// /// let uri = Uri::parse("foo://localhost")?; /// let auth = uri.authority().unwrap(); /// assert!(matches!(auth.host_parsed(), Host::RegName(name) if name == "localhost")); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn host_parsed(&self) -> Host<'a, RegNameE> { match self.inner.meta.host_meta { #[cfg(feature = "net")] HostMeta::Ipv4(addr) => Host::Ipv4(addr), #[cfg(feature = "net")] HostMeta::Ipv6(addr) => Host::Ipv6(addr), #[cfg(not(feature = "net"))] HostMeta::Ipv4() => Host::Ipv4(), #[cfg(not(feature = "net"))] HostMeta::Ipv6() => Host::Ipv6(), HostMeta::IpvFuture => Host::IpvFuture, HostMeta::RegName => Host::RegName(EStr::new_validated(self.host())), } } /// Returns the optional [port] subcomponent. /// /// A scheme may define a [default port] to use when the port is /// not present or is empty. /// /// Note that the port may be empty, with leading zeros, or larger than [`u16::MAX`]. /// It is up to you to decide whether to deny such ports, fallback to the scheme's /// default if it is empty, ignore the leading zeros, or use a special addressing /// mechanism that allows ports larger than [`u16::MAX`]. /// /// [port]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3 /// [default port]: Scheme::default_port /// /// # Examples /// /// ``` /// use fluent_uri::{pct_enc::EStr, Uri}; /// /// let uri = Uri::parse("foo://localhost:4673/")?; /// let auth = uri.authority().unwrap(); /// assert_eq!(auth.port(), Some(EStr::new_or_panic("4673"))); /// /// let uri = Uri::parse("foo://localhost:/")?; /// let auth = uri.authority().unwrap(); /// assert_eq!(auth.port(), Some(EStr::EMPTY)); /// /// let uri = Uri::parse("foo://localhost/")?; /// let auth = uri.authority().unwrap(); /// assert_eq!(auth.port(), None); /// /// let uri = Uri::parse("foo://localhost:123456/")?; /// let auth = uri.authority().unwrap(); /// assert_eq!(auth.port(), Some(EStr::new_or_panic("123456"))); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn port(&self) -> Option<&'a EStr> { self.inner.port() } /// Converts the [port] subcomponent to `u16`, if present and nonempty. /// /// Returns `Ok(None)` if the port is not present or is empty. Leading zeros are ignored. /// /// [port]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3 /// /// # Errors /// /// Returns `Err` if the port cannot be parsed into `u16`. /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("foo://localhost:4673/")?; /// let auth = uri.authority().unwrap(); /// assert_eq!(auth.port_to_u16(), Ok(Some(4673))); /// /// let uri = Uri::parse("foo://localhost/")?; /// let auth = uri.authority().unwrap(); /// assert_eq!(auth.port_to_u16(), Ok(None)); /// /// let uri = Uri::parse("foo://localhost:/")?; /// let auth = uri.authority().unwrap(); /// assert_eq!(auth.port_to_u16(), Ok(None)); /// /// let uri = Uri::parse("foo://localhost:123456/")?; /// let auth = uri.authority().unwrap(); /// assert!(auth.port_to_u16().is_err()); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` pub fn port_to_u16(&self) -> Result, ParseIntError> { self.inner.port_to_u16() } /// Converts the host and the port subcomponent to an iterator of resolved [`SocketAddr`]s. /// /// The default port is used if the port component is not present or is empty. /// A registered name is first [decoded] and then resolved with [`ToSocketAddrs`]. /// Punycode encoding is **not** performed prior to resolution. /// /// [decoded]: EStr::decode /// /// # Errors /// /// Returns `Err` if any of the following is true. /// /// - The port cannot be parsed into `u16`. /// - The host is an IPvFuture address. /// - A registered name does not decode to valid UTF-8 or fails to resolve. #[cfg(all(feature = "net", feature = "std"))] pub fn socket_addrs(&self, default_port: u16) -> io::Result> { self.inner.socket_addrs(default_port) } /// Checks whether a userinfo subcomponent is present. /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("http://user@example.com/")?; /// assert!(uri.authority().unwrap().has_userinfo()); /// /// let uri = Uri::parse("http://example.com/")?; /// assert!(!uri.authority().unwrap().has_userinfo()); /// # Ok::<_, fluent_uri::ParseError>(()) #[inline] #[must_use] pub fn has_userinfo(&self) -> bool { self.inner.meta.host_bounds.0 != 0 } /// Checks whether a port subcomponent is present. /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// let uri = Uri::parse("foo://localhost:4673/")?; /// assert!(uri.authority().unwrap().has_port()); /// /// // The port subcomponent can be empty. /// let uri = Uri::parse("foo://localhost:/")?; /// assert!(uri.authority().unwrap().has_port()); /// /// let uri = Uri::parse("foo://localhost/")?; /// let auth = uri.authority().unwrap(); /// assert!(!uri.authority().unwrap().has_port()); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] #[must_use] pub fn has_port(&self) -> bool { self.inner.meta.host_bounds.1 != self.inner.val.len() } } /// A parsed [host] component. /// /// [host]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2 #[derive(Clone, Copy)] #[cfg_attr(fuzzing, derive(PartialEq, Eq))] pub enum Host<'a, RegNameE: Encoder = RegName> { /// An IPv4 address. #[cfg_attr(not(feature = "net"), non_exhaustive)] Ipv4( /// The address. #[cfg(feature = "net")] Ipv4Addr, ), /// An IPv6 address. #[cfg_attr(not(feature = "net"), non_exhaustive)] Ipv6( /// The address. #[cfg(feature = "net")] Ipv6Addr, ), /// An IP address of future version. /// /// This variant is marked as non-exhaustive because the API design /// for IPvFuture addresses is to be determined. #[non_exhaustive] IpvFuture, /// A registered name. /// /// Note that ASCII characters within a registered name are *case-insensitive*. RegName(&'a EStr), } fluent-uri-0.4.1/src/convert.rs000064400000000000000000000147671046102023000145540ustar 00000000000000use crate::{imp::RiMaybeRef, Iri, IriRef, Uri, UriRef}; use borrow_or_share::Bos; use core::str; #[cfg(feature = "alloc")] use crate::{ imp::{HostMeta, Meta, RmrRef}, pct_enc, }; #[cfg(feature = "alloc")] use alloc::string::String; #[cfg(feature = "alloc")] use core::num::NonZeroUsize; macro_rules! impl_from { ($($x:ident => $($y:ident),+)*) => { $($( impl> From<$x> for $y { #[doc = concat!("Consumes the `", stringify!($x), "` and creates a new [`", stringify!($y), "`] with the same contents.")] fn from(value: $x) -> Self { RiMaybeRef::new(value.val, value.meta) } } )+)* }; } impl_from! { Uri => UriRef, Iri, IriRef UriRef => IriRef Iri => IriRef } /// An error occurred when downcasting a URI/IRI (reference). #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ConvertError { /// The input is not ASCII. NotAscii { /// The index of the first non-ASCII character. index: usize, }, /// The input has no scheme. NoScheme, } #[cfg(feature = "impl-error")] impl crate::Error for ConvertError {} macro_rules! impl_try_from { ($(#[$doc:meta] $x:ident if $($cond:ident)&&+ => $y:ident)*) => { $( impl<'a> TryFrom<$x<&'a str>> for $y<&'a str> { type Error = ConvertError; #[$doc] fn try_from(value: $x<&'a str>) -> Result { let r = value.make_ref(); $(r.$cond()?;)+ Ok(RiMaybeRef::new(value.val, value.meta)) } } #[cfg(feature = "alloc")] impl TryFrom<$x> for $y { type Error = (ConvertError, $x); #[$doc] fn try_from(value: $x) -> Result { let r = value.make_ref(); $( if let Err(e) = r.$cond() { return Err((e, value)); } )+ Ok(RiMaybeRef::new(value.val, value.meta)) } } )* }; } impl_try_from! { /// Converts the URI reference to a URI if it has a scheme. UriRef if ensure_has_scheme => Uri /// Converts the IRI to a URI if it is ASCII. Iri if ensure_ascii => Uri /// Converts the IRI reference to a URI if it has a scheme and is ASCII. IriRef if ensure_has_scheme && ensure_ascii => Uri /// Converts the IRI reference to a URI reference if it is ASCII. IriRef if ensure_ascii => UriRef /// Converts the IRI reference to an IRI if it has a scheme. IriRef if ensure_has_scheme => Iri } #[cfg(feature = "alloc")] impl> Iri { /// Converts the IRI to a URI by percent-encoding non-ASCII characters. /// /// Punycode encoding is **not** performed during conversion. /// /// # Examples /// /// ``` /// use fluent_uri::Iri; /// /// let iri = Iri::parse("http://www.example.org/résumé.html").unwrap(); /// assert_eq!(iri.to_uri(), "http://www.example.org/r%C3%A9sum%C3%A9.html"); /// /// let iri = Iri::parse("http://résumé.example.org").unwrap(); /// assert_eq!(iri.to_uri(), "http://r%C3%A9sum%C3%A9.example.org"); /// ``` pub fn to_uri(&self) -> Uri { RiMaybeRef::from_pair(encode_non_ascii(self.make_ref())) } } #[cfg(feature = "alloc")] impl> IriRef { /// Converts the IRI reference to a URI reference by percent-encoding non-ASCII characters. /// /// Punycode encoding is **not** performed during conversion. /// /// # Examples /// /// ``` /// use fluent_uri::IriRef; /// /// let iri_ref = IriRef::parse("résumé.html").unwrap(); /// assert_eq!(iri_ref.to_uri_ref(), "r%C3%A9sum%C3%A9.html"); /// /// let iri_ref = IriRef::parse("//résumé.example.org").unwrap(); /// assert_eq!(iri_ref.to_uri_ref(), "//r%C3%A9sum%C3%A9.example.org"); /// ``` pub fn to_uri_ref(&self) -> UriRef { RiMaybeRef::from_pair(encode_non_ascii(self.make_ref())) } } #[cfg(feature = "alloc")] fn encode_non_ascii(r: RmrRef<'_, '_>) -> (String, Meta) { let len = r .as_str() .chars() .map(|c| if c.is_ascii() { 1 } else { c.len_utf8() * 3 }) .sum(); let mut buf = String::with_capacity(len); let mut meta = Meta::default(); if let Some(scheme) = r.scheme_opt() { buf.push_str(scheme.as_str()); meta.scheme_end = NonZeroUsize::new(buf.len()); buf.push(':'); } if let Some(auth) = r.authority() { buf.push_str("//"); if let Some(userinfo) = auth.userinfo() { encode_non_ascii_str(&mut buf, userinfo.as_str()); buf.push('@'); } let mut auth_meta = auth.meta(); auth_meta.host_bounds.0 = buf.len(); match auth_meta.host_meta { HostMeta::RegName => encode_non_ascii_str(&mut buf, auth.host()), _ => buf.push_str(auth.host()), } auth_meta.host_bounds.1 = buf.len(); meta.auth_meta = Some(auth_meta); if let Some(port) = auth.port() { buf.push(':'); buf.push_str(port.as_str()); } } meta.path_bounds.0 = buf.len(); encode_non_ascii_str(&mut buf, r.path().as_str()); meta.path_bounds.1 = buf.len(); if let Some(query) = r.query() { buf.push('?'); encode_non_ascii_str(&mut buf, query.as_str()); meta.query_end = NonZeroUsize::new(buf.len()); } if let Some(fragment) = r.fragment() { buf.push('#'); encode_non_ascii_str(&mut buf, fragment.as_str()); } debug_assert_eq!(buf.len(), len); (buf, meta) } #[cfg(feature = "alloc")] fn encode_non_ascii_str(buf: &mut String, s: &str) { if s.is_ascii() { buf.push_str(s); } else { let mut iter = s.char_indices(); while let Some((start, ch)) = iter.next() { if ch.is_ascii() { buf.push(ch); } else { // `CharIndices::offset` sadly requires an MSRV of 1.82, // so we do pointer math to get the offset for now. let end = iter.as_str().as_ptr() as usize - s.as_ptr() as usize; for &x in &s.as_bytes()[start..end] { buf.push_str(pct_enc::encode_byte(x)); } } } } } fluent-uri-0.4.1/src/fmt.rs000064400000000000000000000053511046102023000136470ustar 00000000000000use crate::{ component::{Authority, Host, Scheme}, parse::{ParseError, ParseErrorKind}, pct_enc::{EStr, Encoder}, ConvertError, }; use core::fmt::{Debug, Display, Formatter, Result}; impl Debug for EStr { fn fmt(&self, f: &mut Formatter<'_>) -> Result { Debug::fmt(self.as_str(), f) } } impl Display for EStr { fn fmt(&self, f: &mut Formatter<'_>) -> Result { Display::fmt(self.as_str(), f) } } impl Display for ParseError { fn fmt(&self, f: &mut Formatter<'_>) -> Result { let msg = match self.kind { ParseErrorKind::InvalidPctEncodedOctet => "invalid percent-encoded octet at index ", ParseErrorKind::UnexpectedChar => "unexpected character at index ", ParseErrorKind::InvalidIpv6Addr => "invalid IPv6 address at index ", }; write!(f, "{}{}", msg, self.index) } } impl Display for ConvertError { fn fmt(&self, f: &mut Formatter<'_>) -> Result { match self { Self::NotAscii { index } => write!(f, "non-ASCII character at index {index}"), Self::NoScheme => f.write_str("scheme not present"), } } } impl Debug for Scheme { #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> Result { Debug::fmt(self.as_str(), f) } } impl Display for Scheme { #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> Result { Display::fmt(self.as_str(), f) } } impl Debug for Host<'_, RegNameE> { fn fmt(&self, f: &mut Formatter<'_>) -> Result { match self { #[cfg(feature = "net")] Host::Ipv4(addr) => f.debug_tuple("Ipv4").field(addr).finish(), #[cfg(feature = "net")] Host::Ipv6(addr) => f.debug_tuple("Ipv6").field(addr).finish(), #[cfg(not(feature = "net"))] Host::Ipv4() => f.debug_struct("Ipv4").finish_non_exhaustive(), #[cfg(not(feature = "net"))] Host::Ipv6() => f.debug_struct("Ipv6").finish_non_exhaustive(), Host::IpvFuture => f.debug_struct("IpvFuture").finish_non_exhaustive(), Host::RegName(name) => f.debug_tuple("RegName").field(name).finish(), } } } impl Debug for Authority<'_, UserinfoE, RegNameE> { fn fmt(&self, f: &mut Formatter<'_>) -> Result { f.debug_struct("Authority") .field("userinfo", &self.userinfo()) .field("host", &self.host()) .field("host_parsed", &self.host_parsed()) .field("port", &self.port()) .finish() } } impl Display for Authority<'_> { #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> Result { Display::fmt(self.as_str(), f) } } fluent-uri-0.4.1/src/imp.rs000064400000000000000000001275031046102023000136520ustar 00000000000000#![allow(missing_debug_implementations)] use crate::{ component::{Authority, IAuthority, Scheme}, convert::ConvertError, parse::{self, ParseError}, pct_enc::{encoder::*, EStr, Encoder}, }; use borrow_or_share::{BorrowOrShare, Bos}; use core::{borrow::Borrow, cmp::Ordering, fmt, hash, num::NonZeroUsize, str}; #[cfg(feature = "alloc")] use crate::{ build::{ state::{NonRefStart, Start}, Builder, }, normalize::Normalizer, resolve::{self, ResolveError}, }; #[cfg(feature = "alloc")] use alloc::{borrow::ToOwned, string::String}; #[cfg(feature = "alloc")] use core::str::FromStr; #[cfg(feature = "net")] use crate::net::{Ipv4Addr, Ipv6Addr}; #[cfg(feature = "serde")] use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; pub trait Value: Default {} impl Value for &str {} #[cfg(feature = "alloc")] impl Value for String {} pub struct Constraints { pub ascii_only: bool, pub scheme_required: bool, } pub trait RiMaybeRef: Sized { type Val; type WithVal: RiMaybeRef; type UserinfoE: Encoder; type RegNameE: Encoder; type PathE: Encoder; type QueryE: Encoder; type FragmentE: Encoder; const CONSTRAINTS: Constraints; fn new(val: Self::Val, meta: Meta) -> Self; fn from_pair((val, meta): (Self::Val, Meta)) -> Self { Self::new(val, meta) } fn make_ref<'i, 'o>(&'i self) -> RmrRef<'o, 'i> where Self::Val: BorrowOrShare<'i, 'o, str>; } #[cfg(feature = "alloc")] pub trait Ri: RiMaybeRef { type Ref: RiMaybeRef; } pub trait Parse { type Val; type Err; fn parse>(self) -> Result; } impl<'a> Parse for &'a str { type Val = &'a str; type Err = ParseError; fn parse>(self) -> Result { parse::parse(self.as_bytes(), R::CONSTRAINTS).map(|meta| R::new(self, meta)) } } #[cfg(feature = "alloc")] impl Parse for String { type Val = Self; type Err = (ParseError, Self); fn parse>(self) -> Result { match parse::parse(self.as_bytes(), R::CONSTRAINTS) { Ok(meta) => Ok(R::new(self, meta)), Err(e) => Err((e, self)), } } } #[derive(Clone, Copy, Default)] pub struct Meta { // The index of the trailing colon. pub scheme_end: Option, pub auth_meta: Option, pub path_bounds: (usize, usize), // One byte past the last byte of query. pub query_end: Option, } impl Meta { #[inline] pub fn query_or_path_end(&self) -> usize { self.query_end.map_or(self.path_bounds.1, |i| i.get()) } } #[derive(Clone, Copy, Default)] pub struct AuthMeta { pub host_bounds: (usize, usize), pub host_meta: HostMeta, } impl AuthMeta { pub const EMPTY: Self = Self { host_bounds: (0, 0), host_meta: HostMeta::RegName, }; } #[derive(Clone, Copy, Default)] pub enum HostMeta { Ipv4(#[cfg(feature = "net")] Ipv4Addr), Ipv6(#[cfg(feature = "net")] Ipv6Addr), IpvFuture, #[default] RegName, } pub trait PathEncoder: Encoder {} impl PathEncoder for Path {} impl PathEncoder for IPath {} macro_rules! cond { (if true { $($then:tt)* } else { $($else:tt)* }) => { $($then)* }; (if false { $($then:tt)* } else { $($else:tt)* }) => { $($else)* }; } macro_rules! ri_maybe_ref { ( Type = $Ty:ident, type_name = $ty:literal, variable_name = $var:literal, name = $name:literal, indefinite_article = $art:literal, description = $desc:literal, ascii_only = $ascii_only:literal, scheme_required = $scheme_required:tt, rfc = $rfc:literal, abnf_rule = ($abnf:literal, $abnf_link:literal), $( NonRefType = $NonRefTy:ident, non_ref_name = $nr_name:literal, non_ref_link = $nr_link:literal, abnf_rule_absolute = ($abnf_abs:literal, $abnf_abs_link:literal), )? $( RefType = $RefTy:ident, ref_name = $ref_name:literal, )? AuthorityType = $Authority:ident, UserinfoEncoderType = $UserinfoE:ident, RegNameEncoderType = $RegNameE:ident, PathEncoderType = $PathE:ident, QueryEncoderType = $QueryE:ident, FragmentEncoderType = $FragmentE:ident, ) => { #[doc = $desc] /// /// See the [crate-level documentation](crate#terminology) for an explanation of the above term(s). /// /// # Variants /// #[doc = concat!("Two variants of `", $ty, "` are available: ")] #[doc = concat!("`", $ty, "<&str>` (borrowed) and `", $ty, "` (owned).")] /// #[doc = concat!("`", $ty, "<&'a str>`")] /// outputs references with lifetime `'a` where possible /// (thanks to [`borrow-or-share`](borrow_or_share)): /// /// ``` #[doc = concat!("use fluent_uri::", $ty, ";")] /// #[doc = concat!("// Keep a reference to the path after dropping the `", $ty, "`.")] #[doc = concat!("let path = ", $ty, "::parse(\"foo:bar\")?.path();")] /// assert_eq!(path, "bar"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` /// /// # Comparison /// #[doc = concat!("`", $ty, "`s")] /// are compared [lexicographically](Ord#lexicographical-comparison) /// by their byte values. Normalization is **not** performed prior to comparison. /// /// # Examples /// /// Parse and extract components from #[doc = concat!($art, " ", $name, ":")] /// /// ``` /// use fluent_uri::{ /// component::{Host, Scheme}, /// pct_enc::EStr, #[doc = concat!(" ", $ty, ",")] /// }; /// /// const SCHEME_FOO: &Scheme = Scheme::new_or_panic("foo"); /// /// let s = "foo://user@example.com:8042/over/there?name=ferret#nose"; #[doc = concat!("let ", $var, " = ", $ty, "::parse(s)?;")] /// #[doc = concat!("assert_eq!(", $var, ".scheme()", cond!(if $scheme_required { "" } else { ".unwrap()" }), ", SCHEME_FOO);")] /// #[doc = concat!("let auth = ", $var, ".authority().unwrap();")] /// assert_eq!(auth.as_str(), "user@example.com:8042"); /// assert_eq!(auth.userinfo().unwrap(), "user"); /// assert_eq!(auth.host(), "example.com"); /// assert!(matches!(auth.host_parsed(), Host::RegName(name) if name == "example.com")); /// assert_eq!(auth.port().unwrap(), "8042"); /// assert_eq!(auth.port_to_u16(), Ok(Some(8042))); /// #[doc = concat!("assert_eq!(", $var, ".path(), \"/over/there\");")] #[doc = concat!("assert_eq!(", $var, ".query().unwrap(), \"name=ferret\");")] #[doc = concat!("assert_eq!(", $var, ".fragment().unwrap(), \"nose\");")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` /// /// Parse into and convert between #[doc = concat!("`", $ty, "<&str>` and `", $ty, "`:")] /// /// ``` #[doc = concat!("use fluent_uri::", $ty, ";")] /// /// let s = "http://example.com/"; /// #[doc = concat!("// Parse into a `", $ty, "<&str>` from a string slice.")] #[doc = concat!("let ", $var, ": ", $ty, "<&str> = ", $ty, "::parse(s)?;")] /// #[doc = concat!("// Parse into a `", $ty, "` from an owned string.")] #[doc = concat!("let ", $var, "_owned: ", $ty, " = ", $ty, "::parse(s.to_owned()).map_err(|e| e.0)?;")] /// #[doc = concat!("// Convert a `", $ty, "<&str>` to `", $ty, "`.")] #[doc = concat!("let ", $var, "_owned: ", $ty, " = ", $var, ".to_owned();")] /// #[doc = concat!("// Borrow a `", $ty, "` as `", $ty, "<&str>`.")] #[doc = concat!("let ", $var, ": ", $ty, "<&str> = ", $var, "_owned.borrow();")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[derive(Clone, Copy)] pub struct $Ty { /// Value of the URI/IRI (reference). pub(crate) val: T, /// Metadata of the URI/IRI (reference). /// Should be identical to parser output with `val` as input. pub(crate) meta: Meta, } impl RiMaybeRef for $Ty { type Val = T; type WithVal = $Ty; type UserinfoE = $UserinfoE; type RegNameE = $RegNameE; type PathE = $PathE; type QueryE = $QueryE; type FragmentE = $FragmentE; const CONSTRAINTS: Constraints = Constraints { ascii_only: $ascii_only, scheme_required: $scheme_required, }; fn new(val: T, meta: Meta) -> Self { Self { val, meta } } fn make_ref<'i, 'o>(&'i self) -> RmrRef<'o, 'i> where Self::Val: BorrowOrShare<'i, 'o, str>, { RmrRef::new(self.as_str(), &self.meta) } } $( #[cfg(feature = "alloc")] impl> Ri for $Ty { type Ref = $RefTy; } )? impl $Ty { #[doc = concat!("Parses ", $art, " ", $name, " from a string into ", $art, " `", $ty, "`.")] /// /// The return type is /// #[doc = concat!("- `Result<", $ty, "<&str>, ParseError>` for `I = &str`;")] #[doc = concat!("- `Result<", $ty, ", (ParseError, String)>` for `I = String`.")] /// /// # Errors /// /// Returns `Err` if the string does not match the #[doc = concat!("[`", $abnf, "`][abnf] ABNF rule from RFC ", $rfc, ".")] /// #[doc = concat!("[abnf]: ", $abnf_link)] pub fn parse(input: I) -> Result where I: Parse, { input.parse() } } #[cfg(feature = "alloc")] impl $Ty { #[doc = concat!("Creates a new builder for ", $name, ".")] #[inline] pub fn builder() -> Builder { Builder::new() } #[doc = concat!("Borrows this `", $ty, "` as `", $ty, "<&str>`.")] #[allow(clippy::should_implement_trait)] #[inline] #[must_use] pub fn borrow(&self) -> $Ty<&str> { $Ty { val: &self.val, meta: self.meta, } } #[doc = concat!("Consumes this `", $ty, "` and yields the underlying [`String`].")] #[inline] #[must_use] pub fn into_string(self) -> String { self.val } } #[cfg(feature = "alloc")] impl $Ty<&str> { #[doc = concat!("Creates a new `", $ty, "` by cloning the contents of this `", $ty, "<&str>`.")] #[inline] #[must_use] pub fn to_owned(&self) -> $Ty { $Ty { val: self.val.to_owned(), meta: self.meta, } } } impl<'i, 'o, T: BorrowOrShare<'i, 'o, str>> $Ty { #[doc = concat!("Returns the ", $name, " as a string slice.")] #[must_use] pub fn as_str(&'i self) -> &'o str { self.val.borrow_or_share() } cond!(if $scheme_required { /// Returns the [scheme] component. /// /// Note that the scheme component is *case-insensitive*. /// See the documentation of [`Scheme`] for more details on comparison. /// /// [scheme]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::{component::Scheme, ", $ty, "};")] /// /// const SCHEME_HTTP: &Scheme = Scheme::new_or_panic("http"); /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"http://example.com/\")?;")] #[doc = concat!("assert_eq!(", $var, ".scheme(), SCHEME_HTTP);")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn scheme(&'i self) -> &'o Scheme { self.make_ref().scheme() } } else { /// Returns the optional [scheme] component. /// /// Note that the scheme component is *case-insensitive*. /// See the documentation of [`Scheme`] for more details on comparison. /// /// [scheme]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::{component::Scheme, ", $ty, "};")] /// /// const SCHEME_HTTP: &Scheme = Scheme::new_or_panic("http"); /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"http://example.com/\")?;")] #[doc = concat!("assert_eq!(", $var, ".scheme(), Some(SCHEME_HTTP));")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"/path/to/file\")?;")] #[doc = concat!("assert_eq!(", $var, ".scheme(), None);")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn scheme(&'i self) -> Option<&'o Scheme> { self.make_ref().scheme_opt() } }); /// Returns the optional [authority] component. /// /// [authority]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::", $ty, ";")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"http://example.com/\")?;")] #[doc = concat!("assert!(", $var, ".authority().is_some());")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"mailto:user@example.com\")?;")] #[doc = concat!("assert!(", $var, ".authority().is_none());")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn authority(&'i self) -> Option<$Authority<'o>> { self.make_ref().authority().map(Authority::cast) } /// Returns the [path] component. /// /// The path component is always present, although it may be empty. /// /// The returned `EStr` slice has [extension methods] for the path component. /// /// [path]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 /// [extension methods]: EStr#impl-EStr-1 /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::", $ty, ";")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"http://example.com/\")?;")] #[doc = concat!("assert_eq!(", $var, ".path(), \"/\");")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"mailto:user@example.com\")?;")] #[doc = concat!("assert_eq!(", $var, ".path(), \"user@example.com\");")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"http://example.com\")?;")] #[doc = concat!("assert_eq!(", $var, ".path(), \"\");")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn path(&'i self) -> &'o EStr<$PathE> { self.make_ref().path().cast() } /// Returns the optional [query] component. /// /// [query]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.4 /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::{pct_enc::EStr, ", $ty, "};")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"http://example.com/?lang=en\")?;")] #[doc = concat!("assert_eq!(", $var, ".query(), Some(EStr::new_or_panic(\"lang=en\")));")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"ftp://192.0.2.1/\")?;")] #[doc = concat!("assert_eq!(", $var, ".query(), None);")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn query(&'i self) -> Option<&'o EStr<$QueryE>> { self.make_ref().query().map(EStr::cast) } /// Returns the optional [fragment] component. /// /// [fragment]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.5 /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::{pct_enc::EStr, ", $ty, "};")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"http://example.com/#usage\")?;")] #[doc = concat!("assert_eq!(", $var, ".fragment(), Some(EStr::new_or_panic(\"usage\")));")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"ftp://192.0.2.1/\")?;")] #[doc = concat!("assert_eq!(", $var, ".fragment(), None);")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn fragment(&'i self) -> Option<&'o EStr<$FragmentE>> { self.make_ref().fragment().map(EStr::cast) } } impl> $Ty { $( #[doc = concat!("Resolves the ", $name, " against the given base ", $nr_name)] #[doc = concat!("and returns the target ", $nr_name, ".")] /// #[doc = concat!("The base ", $nr_name)] /// **must** have no fragment, i.e., match the #[doc = concat!("[`", $abnf_abs, "`][abnf] ABNF rule from RFC ", $rfc, ".")] /// #[doc = concat!("To prepare a base ", $nr_name, ",")] /// you can use [`strip_fragment`], [`with_fragment`] or [`set_fragment`] to remove the fragment #[doc = concat!("from any ", $nr_name, ".")] /// Note that a base without fragment does **not** guarantee a successful resolution /// (see the **must** below). /// /// This method applies the reference resolution algorithm defined in /// [Section 5 of RFC 3986](https://datatracker.ietf.org/doc/html/rfc3986#section-5), /// except for the following deviations: /// /// - If `base` has a [rootless] path and no authority, then /// `self` **must** either have a scheme, be empty, or start with `'#'`. /// - When the target has no authority and its path would start /// with `"//"`, the string `"/."` is prepended to the path. This closes a /// loophole in the original algorithm that resolving `".//@@"` against /// `"foo:/"` yields `"foo://@@"` which is not a valid URI/IRI. /// - Percent-encoded dot segments (e.g. `"%2E"` and `".%2e"`) are also removed. /// This closes a loophole in the original algorithm that resolving `".."` /// against `"foo:/bar/baz/.%2E/"` yields `"foo:/bar/baz/"`, while first normalizing /// the base and then resolving `".."` against it yields `"foo:/"`. /// - A slash (`'/'`) is appended to the base when it ends with a double-dot /// segment. This closes a loophole in the original algorithm that resolving /// `"."` against `"foo:/bar/.."` yields `"foo:/bar/"`, while first normalizing /// the base and then resolving `"."` against it yields `"foo:/"`. /// - When `base` has an [absolute] path and `self` has an empty path and /// no scheme nor authority, dot segments are removed from the base path before /// using it as the target path. This closes a loophole in the original algorithm /// that resolving `""` against `"foo:/."` yields `"foo:/."` in which /// dot segments are not removed. /// /// No normalization except the removal of dot segments will be performed. /// Use [`normalize`] if necessary. /// #[doc = concat!("[abnf]: ", $abnf_abs_link)] #[doc = concat!("[`strip_fragment`]: ", stringify!($NonRefTy), "::strip_fragment")] #[doc = concat!("[`with_fragment`]: ", stringify!($NonRefTy), "::with_fragment")] #[doc = concat!("[`set_fragment`]: ", stringify!($NonRefTy), "::set_fragment")] /// [rootless]: EStr::::is_rootless /// [absolute]: EStr::::is_absolute /// [`normalize`]: Self::normalize /// /// This method has the property that /// `self.resolve_against(base).map(|r| r.normalize()).ok()` equals /// `self.normalize().resolve_against(&base.normalize()).ok()`. /// /// If you need to resolve multiple references against a common base or configure the behavior /// of resolution, consider using [`Resolver`](crate::resolve::Resolver) instead. /// /// # Errors /// /// Returns `Err` if any of the above two **must**s is violated. /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::{", stringify!($NonRefTy), ", ", $ty, "};")] /// #[doc = concat!("let base = ", stringify!($NonRefTy), "::parse(\"http://example.com/foo/bar\")?;")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"baz\")?;")] #[doc = concat!("assert_eq!(", $var, ".resolve_against(&base).unwrap(), \"http://example.com/foo/baz\");")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"../baz\")?;")] #[doc = concat!("assert_eq!(", $var, ".resolve_against(&base).unwrap(), \"http://example.com/baz\");")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"?baz\")?;")] #[doc = concat!("assert_eq!(", $var, ".resolve_against(&base).unwrap(), \"http://example.com/foo/bar?baz\");")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[cfg(feature = "alloc")] pub fn resolve_against>( &self, base: &$NonRefTy, ) -> Result<$NonRefTy, ResolveError> { resolve::resolve(base.make_ref(), self.make_ref(), true).map(RiMaybeRef::from_pair) } )? #[doc = concat!("Normalizes the ", $name, ".")] /// /// This method applies syntax-based normalization described in /// [Section 6.2.2 of RFC 3986](https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2) /// and [Section 5.3.2 of RFC 3987](https://datatracker.ietf.org/doc/html/rfc3987#section-5.3.2), /// along with IPv6 address and default port normalization. /// This is effectively equivalent to taking the following steps in order: /// /// - Decode any percent-encoded octets that correspond to an allowed character which is not reserved. /// - Uppercase the hexadecimal digits within all percent-encoded octets. /// - Lowercase all ASCII characters within the scheme and the host except the percent-encoded octets. /// - Turn any IPv6 literal address into its canonical form as per /// [RFC 5952](https://datatracker.ietf.org/doc/html/rfc5952). /// - If the port is empty or equals the [scheme's default], remove it along with the `':'` delimiter. /// - If `self` has a scheme and an [absolute] path, apply the /// [`remove_dot_segments`] algorithm to the path, taking account of /// percent-encoded dot segments as described at [`UriRef::resolve_against`]. /// - If `self` has no authority and its path would start with /// `"//"`, prepend `"/."` to the path. /// /// This method is idempotent: `self.normalize()` equals `self.normalize().normalize()`. /// /// If you need to configure the behavior of normalization, consider using [`Normalizer`] instead. /// /// [`UriRef::resolve_against`]: crate::UriRef::resolve_against /// [scheme's default]: Scheme::default_port /// [absolute]: EStr::::is_absolute /// [`remove_dot_segments`]: https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4 /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::", $ty, ";")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"eXAMPLE://a/./b/../b/%63/%7bfoo%7d\")?;")] #[doc = concat!("assert_eq!(", $var, ".normalize(), \"example://a/b/c/%7Bfoo%7D\");")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[cfg(feature = "alloc")] #[must_use] pub fn normalize(&self) -> $Ty { Normalizer::new().normalize(self).unwrap() } cond!(if $scheme_required {} else { /// Checks whether a scheme component is present. /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::", $ty, ";")] /// #[doc = concat!("assert!(", $ty, "::parse(\"http://example.com/\")?.has_scheme());")] #[doc = concat!("assert!(!", $ty, "::parse(\"/path/to/file\")?.has_scheme());")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn has_scheme(&self) -> bool { self.make_ref().has_scheme() } }); /// Checks whether an authority component is present. /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::", $ty, ";")] /// #[doc = concat!("assert!(", $ty, "::parse(\"http://example.com/\")?.has_authority());")] #[doc = concat!("assert!(!", $ty, "::parse(\"mailto:user@example.com\")?.has_authority());")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn has_authority(&self) -> bool { self.make_ref().has_authority() } /// Checks whether a query component is present. /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::", $ty, ";")] /// #[doc = concat!("assert!(", $ty, "::parse(\"http://example.com/?lang=en\")?.has_query());")] #[doc = concat!("assert!(!", $ty, "::parse(\"ftp://192.0.2.1/\")?.has_query());")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn has_query(&self) -> bool { self.make_ref().has_query() } /// Checks whether a fragment component is present. /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::", $ty, ";")] /// #[doc = concat!("assert!(", $ty, "::parse(\"http://example.com/#usage\")?.has_fragment());")] #[doc = concat!("assert!(!", $ty, "::parse(\"ftp://192.0.2.1/\")?.has_fragment());")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn has_fragment(&self) -> bool { self.make_ref().has_fragment() } #[doc = concat!("Returns a slice of this ", $name)] /// with the fragment component removed. /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::", $ty, ";")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"http://example.com/#fragment\")?;")] #[doc = concat!("assert_eq!(", $var, ".strip_fragment(), \"http://example.com/\");")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[must_use] pub fn strip_fragment(&self) -> $Ty<&str> { // Altering only the fragment does not change the metadata. RiMaybeRef::new(self.make_ref().strip_fragment(), self.meta) } #[doc = concat!("Creates a new ", $name)] /// by replacing the fragment component of `self` with the given one. /// /// The fragment component is removed when `opt.is_none()`. /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::{pct_enc::EStr, ", $ty, "};")] /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"http://example.com/\")?;")] /// assert_eq!( #[doc = concat!(" ", $var, ".with_fragment(Some(EStr::new_or_panic(\"fragment\"))),")] /// "http://example.com/#fragment" /// ); /// #[doc = concat!("let ", $var, " = ", $ty, "::parse(\"http://example.com/#fragment\")?;")] #[doc = concat!("assert_eq!(", $var, ".with_fragment(None), \"http://example.com/\");")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[cfg(feature = "alloc")] #[must_use] pub fn with_fragment(&self, opt: Option<&EStr<$FragmentE>>) -> $Ty { // Altering only the fragment does not change the metadata. RiMaybeRef::new(self.make_ref().with_fragment(opt.map(EStr::as_str)), self.meta) } } #[cfg(feature = "alloc")] impl $Ty { /// Replaces the fragment component of `self` with the given one. /// /// The fragment component is removed when `opt.is_none()`. /// /// # Examples /// /// ``` #[doc = concat!("use fluent_uri::{pct_enc::EStr, ", $ty, "};")] /// #[doc = concat!("let mut ", $var, " = ", $ty, "::parse(\"http://example.com/\")?.to_owned();")] /// #[doc = concat!($var, ".set_fragment(Some(EStr::new_or_panic(\"fragment\")));")] #[doc = concat!("assert_eq!(", $var, ", \"http://example.com/#fragment\");")] /// #[doc = concat!($var, ".set_fragment(None);")] #[doc = concat!("assert_eq!(", $var, ", \"http://example.com/\");")] /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` pub fn set_fragment(&mut self, opt: Option<&EStr<$FragmentE>>) { // Altering only the fragment does not change the metadata. RmrRef::set_fragment(&mut self.val, &self.meta, opt.map(EStr::as_str)) } } impl Default for $Ty { #[doc = concat!("Creates an empty ", $name, ".")] fn default() -> Self { Self { val: T::default(), meta: Meta::default(), } } } impl, U: Bos> PartialEq<$Ty> for $Ty { fn eq(&self, other: &$Ty) -> bool { self.as_str() == other.as_str() } } impl> PartialEq for $Ty { fn eq(&self, other: &str) -> bool { self.as_str() == other } } impl> PartialEq<$Ty> for str { fn eq(&self, other: &$Ty) -> bool { self == other.as_str() } } impl> PartialEq<&str> for $Ty { fn eq(&self, other: &&str) -> bool { self.as_str() == *other } } impl> PartialEq<$Ty> for &str { fn eq(&self, other: &$Ty) -> bool { *self == other.as_str() } } impl> Eq for $Ty {} impl> hash::Hash for $Ty { fn hash(&self, state: &mut H) { self.as_str().hash(state); } } impl> PartialOrd for $Ty { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl> Ord for $Ty { fn cmp(&self, other: &Self) -> Ordering { self.as_str().cmp(other.as_str()) } } impl> AsRef for $Ty { fn as_ref(&self) -> &str { self.as_str() } } impl> Borrow for $Ty { fn borrow(&self) -> &str { self.as_str() } } impl<'a> TryFrom<&'a str> for $Ty<&'a str> { type Error = ParseError; /// Equivalent to [`parse`](Self::parse). #[inline] fn try_from(value: &'a str) -> Result { $Ty::parse(value) } } #[cfg(feature = "alloc")] impl TryFrom for $Ty { type Error = (ParseError, String); /// Equivalent to [`parse`](Self::parse). #[inline] fn try_from(value: String) -> Result { $Ty::parse(value) } } impl<'a> From<$Ty<&'a str>> for &'a str { #[doc = concat!("Equivalent to [`as_str`](", $ty, "::as_str).")] #[inline] fn from(value: $Ty<&'a str>) -> &'a str { value.val } } #[cfg(feature = "alloc")] impl<'a> From<$Ty> for String { #[doc = concat!("Equivalent to [`into_string`](", $ty, "::into_string).")] #[inline] fn from(value: $Ty) -> String { value.val } } #[cfg(feature = "alloc")] impl From<$Ty<&str>> for $Ty { /// Equivalent to [`to_owned`](Self::to_owned). #[inline] fn from(value: $Ty<&str>) -> Self { value.to_owned() } } #[cfg(feature = "alloc")] impl FromStr for $Ty { type Err = ParseError; #[doc = concat!("Equivalent to `", $ty, "::parse(s).map(|r| r.to_owned())`.")] #[inline] fn from_str(s: &str) -> Result { $Ty::parse(s).map(|r| r.to_owned()) } } impl> fmt::Debug for $Ty { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct($ty) .field("scheme", &self.scheme()) .field("authority", &self.authority()) .field("path", &self.path()) .field("query", &self.query()) .field("fragment", &self.fragment()) .finish() } } impl> fmt::Display for $Ty { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self.as_str(), f) } } #[cfg(feature = "serde")] impl> Serialize for $Ty { fn serialize(&self, serializer: S) -> Result where S: Serializer, { serializer.serialize_str(self.as_str()) } } #[cfg(feature = "serde")] impl<'de> Deserialize<'de> for $Ty<&'de str> { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let s = <&str>::deserialize(deserializer)?; $Ty::parse(s).map_err(|e| { de::Error::custom(format_args!( "failed to parse {s:?} as {}: {e}", $name )) }) } } #[cfg(feature = "serde")] impl<'de> Deserialize<'de> for $Ty { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let s = String::deserialize(deserializer)?; $Ty::parse(s).map_err(|(s, e)| { de::Error::custom(format_args!( "failed to parse {s:?} as {}: {e}", $name )) }) } } }; } /// References to the value and the metadata of an `RiMaybeRef`. #[derive(Clone, Copy)] pub struct RmrRef<'v, 'm> { val: &'v str, meta: &'m Meta, } impl<'v, 'm> RmrRef<'v, 'm> { pub fn new(val: &'v str, meta: &'m Meta) -> Self { Self { val, meta } } pub fn as_str(self) -> &'v str { self.val } fn slice(self, start: usize, end: usize) -> &'v str { &self.val[start..end] } fn eslice(self, start: usize, end: usize) -> &'v EStr { EStr::new_validated(self.slice(start, end)) } pub fn scheme_opt(self) -> Option<&'v Scheme> { let end = self.meta.scheme_end?.get(); Some(Scheme::new_validated(self.slice(0, end))) } pub fn scheme(self) -> &'v Scheme { let end = self.meta.scheme_end.map_or(0, |i| i.get()); Scheme::new_validated(self.slice(0, end)) } pub fn authority(self) -> Option> { let mut meta = self.meta.auth_meta?; let start = match self.meta.scheme_end { Some(i) => i.get() + 3, None => 2, }; let end = self.meta.path_bounds.0; meta.host_bounds.0 -= start; meta.host_bounds.1 -= start; Some(IAuthority::new(self.slice(start, end), meta)) } pub fn path(self) -> &'v EStr { self.eslice(self.meta.path_bounds.0, self.meta.path_bounds.1) } pub fn query(self) -> Option<&'v EStr> { let end = self.meta.query_end?.get(); Some(self.eslice(self.meta.path_bounds.1 + 1, end)) } fn fragment_start(self) -> Option { Some(self.meta.query_or_path_end()) .filter(|&i| i != self.val.len()) .map(|i| i + 1) } pub fn fragment(self) -> Option<&'v EStr> { self.fragment_start() .map(|i| self.eslice(i, self.val.len())) } #[cfg(feature = "alloc")] pub fn set_fragment(buf: &mut String, meta: &Meta, opt: Option<&str>) { buf.truncate(meta.query_or_path_end()); if let Some(s) = opt { buf.reserve_exact(s.len() + 1); buf.push('#'); buf.push_str(s); } } pub fn strip_fragment(self) -> &'v str { &self.val[..self.meta.query_or_path_end()] } #[cfg(feature = "alloc")] pub fn with_fragment(self, opt: Option<&str>) -> String { let stripped = self.strip_fragment(); if let Some(s) = opt { [stripped, "#", s].concat() } else { stripped.to_owned() } } #[inline] pub fn has_scheme(self) -> bool { self.meta.scheme_end.is_some() } #[inline] pub fn has_authority(self) -> bool { self.meta.auth_meta.is_some() } #[inline] pub fn has_query(self) -> bool { self.meta.query_end.is_some() } #[inline] pub fn has_fragment(self) -> bool { self.meta.query_or_path_end() != self.val.len() } pub fn ensure_has_scheme(self) -> Result<(), ConvertError> { if self.has_scheme() { Ok(()) } else { Err(ConvertError::NoScheme) } } pub fn ensure_ascii(self) -> Result<(), ConvertError> { match self.as_str().bytes().position(|x| !x.is_ascii()) { Some(index) => Err(ConvertError::NotAscii { index }), None => Ok(()), } } } ri_maybe_ref! { Type = Uri, type_name = "Uri", variable_name = "uri", name = "URI", indefinite_article = "a", description = "A URI.", ascii_only = true, scheme_required = true, rfc = 3986, abnf_rule = ("URI", "https://datatracker.ietf.org/doc/html/rfc3986#section-3"), RefType = UriRef, ref_name = "URI reference", AuthorityType = Authority, UserinfoEncoderType = Userinfo, RegNameEncoderType = RegName, PathEncoderType = Path, QueryEncoderType = Query, FragmentEncoderType = Fragment, } ri_maybe_ref! { Type = UriRef, type_name = "UriRef", variable_name = "uri_ref", name = "URI reference", indefinite_article = "a", description = "A URI reference, i.e., either a URI or a relative reference.", ascii_only = true, scheme_required = false, rfc = 3986, abnf_rule = ("URI-reference", "https://datatracker.ietf.org/doc/html/rfc3986#section-4.1"), NonRefType = Uri, non_ref_name = "URI", non_ref_link = "https://datatracker.ietf.org/doc/html/rfc3986#section-3", abnf_rule_absolute = ("absolute-URI", "https://datatracker.ietf.org/doc/html/rfc3986#section-4.3"), AuthorityType = Authority, UserinfoEncoderType = Userinfo, RegNameEncoderType = RegName, PathEncoderType = Path, QueryEncoderType = Query, FragmentEncoderType = Fragment, } ri_maybe_ref! { Type = Iri, type_name = "Iri", variable_name = "iri", name = "IRI", indefinite_article = "an", description = "An IRI.", ascii_only = false, scheme_required = true, rfc = 3987, abnf_rule = ("IRI", "https://datatracker.ietf.org/doc/html/rfc3987#section-2.2"), RefType = IriRef, ref_name = "IRI reference", AuthorityType = IAuthority, UserinfoEncoderType = IUserinfo, RegNameEncoderType = IRegName, PathEncoderType = IPath, QueryEncoderType = IQuery, FragmentEncoderType = IFragment, } ri_maybe_ref! { Type = IriRef, type_name = "IriRef", variable_name = "iri_ref", name = "IRI reference", indefinite_article = "an", description = "An IRI reference, i.e., either a IRI or a relative reference.", ascii_only = false, scheme_required = false, rfc = 3987, abnf_rule = ("IRI-reference", "https://datatracker.ietf.org/doc/html/rfc3987#section-2.2"), NonRefType = Iri, non_ref_name = "IRI", non_ref_link = "https://datatracker.ietf.org/doc/html/rfc3987#section-2.2", abnf_rule_absolute = ("absolute-IRI", "https://datatracker.ietf.org/doc/html/rfc3987#section-2.2"), AuthorityType = IAuthority, UserinfoEncoderType = IUserinfo, RegNameEncoderType = IRegName, PathEncoderType = IPath, QueryEncoderType = IQuery, FragmentEncoderType = IFragment, } fluent-uri-0.4.1/src/lib.rs000064400000000000000000000101351046102023000136230ustar 00000000000000#![warn( future_incompatible, missing_debug_implementations, missing_docs, nonstandard_style, rust_2018_idioms, clippy::checked_conversions, clippy::if_not_else, clippy::ignored_unit_patterns, clippy::map_unwrap_or, clippy::missing_errors_doc, clippy::must_use_candidate, // clippy::redundant_closure_for_method_calls, clippy::redundant_else, clippy::semicolon_if_nothing_returned, // clippy::single_match_else, clippy::use_self, )] #![forbid(unsafe_code)] #![cfg_attr(docsrs, feature(doc_cfg))] #![no_std] //! A generic URI/IRI handling library compliant with [RFC 3986] and [RFC 3987]. //! //! [RFC 3986]: https://datatracker.ietf.org/doc/html/rfc3986 //! [RFC 3987]: https://datatracker.ietf.org/doc/html/rfc3987 //! //! **Examples:** [Parsing](Uri#examples). [Building](build::Builder#examples). //! [Reference resolution](UriRef::resolve_against). [Normalization](Uri::normalize). //! [Percent-decoding](crate::pct_enc::EStr#examples). //! [Percent-encoding](crate::pct_enc::EString#examples). //! //! # Terminology //! //! A *[URI reference]* is either a *[URI]* or a *[relative reference]*. If it starts with a *[scheme]* //! (like `http`, `ftp`, `mailto`, etc.) followed by a colon (`:`), it is a URI. For example, //! `http://example.com/` and `mailto:user@example.com` are URIs. Otherwise, it is //! a relative reference. For example, `//example.org/`, `/index.html`, `../`, `foo`, //! `?bar`, and `#baz` are relative references. //! //! An *[IRI]* (reference) is an internationalized version of URI (reference) //! which may contain non-ASCII characters. //! //! [URI]: https://datatracker.ietf.org/doc/html/rfc3986#section-3 //! [URI reference]: https://datatracker.ietf.org/doc/html/rfc3986#section-4.1 //! [IRI]: https://datatracker.ietf.org/doc/html/rfc3987#section-2 //! [relative reference]: https://datatracker.ietf.org/doc/html/rfc3986#section-4.2 //! [scheme]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.1 //! //! # Guidance for crate users //! //! Advice for designers of new URI schemes can be found in [RFC 7595]. //! Guidance on the specification of URI substructure in standards //! can be found in [RFC 8820]. The crate author recommends [RFC 9413] //! for further reading as the long-term interoperability //! of URI schemes may be of concern. //! //! [RFC 7595]: https://datatracker.ietf.org/doc/html/rfc7595 //! [RFC 8820]: https://datatracker.ietf.org/doc/html/rfc8820 //! [RFC 9413]: https://datatracker.ietf.org/doc/html/rfc9413 //! //! # Crate features //! //! - `std` (default): Implies `alloc` and `impl-error`. Required for [`Authority::socket_addrs`]. //! //! - `alloc`: Required for memory-allocating types and functions. //! //! - `impl-error`: Required for [`Error`] implementations. Disabling `std` //! while enabling `impl-error` requires a minimum Rust version of 1.81. //! //! - `net`: Required for IP address fields in [`Host`], for [`Builder::host`] to //! take an IP address as argument, and for [`Authority::socket_addrs`]. //! Disabling `std` while enabling `net` requires a minimum Rust version of 1.77. //! //! - `serde`: Required for [`Serialize`] and [`Deserialize`] implementations. //! //! [`Host`]: component::Host //! [`Builder::host`]: build::Builder::host //! [`Authority::socket_addrs`]: component::Authority::socket_addrs //! [`Error`]: core::error::Error //! [`Serialize`]: serde::Serialize //! [`Deserialize`]: serde::Deserialize #[cfg(feature = "alloc")] pub mod build; pub mod component; mod convert; mod fmt; mod imp; #[cfg(feature = "alloc")] pub mod normalize; mod parse; pub mod pct_enc; #[cfg(feature = "alloc")] pub mod resolve; mod utf8; pub use convert::ConvertError; pub use imp::{Iri, IriRef, Uri, UriRef}; pub use parse::{ParseError, ParseErrorKind}; #[cfg(feature = "std")] extern crate std; #[cfg(feature = "alloc")] extern crate alloc; #[cfg(all(feature = "net", not(feature = "std")))] use core::net; #[cfg(all(feature = "net", feature = "std"))] use std::net; #[cfg(all(feature = "impl-error", not(feature = "std")))] use core::error::Error; #[cfg(all(feature = "impl-error", feature = "std"))] use std::error::Error; fluent-uri-0.4.1/src/normalize.rs000064400000000000000000000271131046102023000150610ustar 00000000000000//! Module for normalization. use crate::{ component::Scheme, imp::{HostMeta, Meta, RiMaybeRef, RmrRef}, parse, pct_enc::{ self, encoder::{Data, IData}, Decode, DecodedChunk, DecodedUtf8Chunk, Encode, EncodedChunk, Encoder, Table, }, resolve, }; use alloc::string::String; use borrow_or_share::Bos; use core::{ fmt::{self, Write}, num::NonZeroUsize, }; /// An error occurred when normalizing a URI/IRI (reference). #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum NormalizeError { /// An underflow occurred in path normalization. /// /// Used only when [`Normalizer::allow_path_underflow`] is set to `false`. PathUnderflow, } impl fmt::Display for NormalizeError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let msg = match self { Self::PathUnderflow => "underflow occurred in path resolution", }; f.write_str(msg) } } #[cfg(feature = "impl-error")] impl crate::Error for NormalizeError {} /// A configurable URI/IRI (reference) normalizer. #[derive(Clone, Copy)] #[allow(missing_debug_implementations)] #[must_use] pub struct Normalizer { allow_path_underflow: bool, default_port_f: fn(&Scheme) -> Option, } impl Normalizer { /// Creates a new `Normalizer` with default configuration. pub fn new() -> Self { Self { allow_path_underflow: true, default_port_f: Scheme::default_port, } } /// Sets whether to allow underflow in path normalization. /// /// This defaults to `true`. A value of `false` is a deviation from the /// normalization methods described in /// [Section 6 of RFC 3986](https://datatracker.ietf.org/doc/html/rfc3986/#section-6). /// /// # Examples /// /// ``` /// use fluent_uri::{normalize::{Normalizer, NormalizeError}, Uri}; /// /// let normalizer = Normalizer::new().allow_path_underflow(false); /// let uri = Uri::parse("http://example.com/..")?; /// /// assert_eq!(normalizer.normalize(&uri).unwrap_err(), NormalizeError::PathUnderflow); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` pub fn allow_path_underflow(mut self, value: bool) -> Self { self.allow_path_underflow = value; self } /// Sets the function with which to get the default port of a scheme. /// /// This defaults to [`Scheme::default_port`]. /// /// # Examples /// /// ``` /// use fluent_uri::{component::Scheme, normalize::Normalizer, Uri}; /// /// const SCHEME_FOO: &Scheme = Scheme::new_or_panic("foo"); /// /// let normalizer = Normalizer::new().default_port_with(|scheme| { /// if scheme == SCHEME_FOO { /// Some(4673) /// } else { /// scheme.default_port() /// } /// }); /// let uri = Uri::parse("foo://localhost:4673")?; /// /// assert_eq!(normalizer.normalize(&uri).unwrap(), "foo://localhost"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` pub fn default_port_with(mut self, f: fn(&Scheme) -> Option) -> Self { self.default_port_f = f; self } /// Normalizes the given URI/IRI (reference). /// /// See [`Uri::normalize`][crate::Uri::normalize] for the exact behavior of this method. /// /// # Errors /// /// Returns `Err` if an underflow occurred in path normalization /// when [`allow_path_underflow`] is set to `false`. /// /// [`allow_path_underflow`]: Self::allow_path_underflow pub fn normalize(&self, r: &R) -> Result, NormalizeError> where R::Val: Bos, { normalize( r.make_ref(), R::CONSTRAINTS.ascii_only, self.allow_path_underflow, self.default_port_f, ) .map(RiMaybeRef::from_pair) } } impl Default for Normalizer { fn default() -> Self { Self::new() } } pub(crate) fn normalize( r: RmrRef<'_, '_>, ascii_only: bool, allow_path_underflow: bool, default_port_f: fn(&Scheme) -> Option, ) -> Result<(String, Meta), NormalizeError> { // For "a://[::ffff:5:9]/" the capacity is not enough, // but it's fine since this rarely happens. let mut buf = String::with_capacity(r.as_str().len()); let path = r.path().as_str(); let mut path_buf = String::with_capacity(path.len()); let data_table = if ascii_only { Data::TABLE } else { IData::TABLE }; if r.has_scheme() && path.starts_with('/') { normalize_estr(&mut buf, path, false, data_table); let underflow_occurred = resolve::remove_dot_segments(&mut path_buf, 0, &[&buf]); if underflow_occurred && !allow_path_underflow { return Err(NormalizeError::PathUnderflow); } buf.clear(); } else { // Don't remove dot segments from relative reference or rootless path. normalize_estr(&mut path_buf, path, false, data_table); } let mut meta = Meta::default(); if let Some(scheme) = r.scheme_opt() { buf.push_str(scheme.as_str()); buf.make_ascii_lowercase(); meta.scheme_end = NonZeroUsize::new(buf.len()); buf.push(':'); } if let Some(auth) = r.authority() { buf.push_str("//"); if let Some(userinfo) = auth.userinfo() { normalize_estr(&mut buf, userinfo.as_str(), false, data_table); buf.push('@'); } let mut auth_meta = auth.meta(); auth_meta.host_bounds.0 = buf.len(); match auth_meta.host_meta { // An IPv4 address is always canonical. HostMeta::Ipv4(..) => buf.push_str(auth.host()), #[cfg(feature = "net")] HostMeta::Ipv6(addr) => write!(buf, "[{addr}]").unwrap(), #[cfg(not(feature = "net"))] HostMeta::Ipv6() => { buf.push('['); write_v6(&mut buf, parse::parse_v6(&auth.host().as_bytes()[1..])); buf.push(']'); } HostMeta::IpvFuture => { let start = buf.len(); buf.push_str(auth.host()); buf[start..].make_ascii_lowercase(); } HostMeta::RegName => { let start = buf.len(); let host = auth.host(); normalize_estr(&mut buf, host, true, data_table); if buf.len() < start + host.len() { // Only reparse when the length is less than before. auth_meta.host_meta = parse::parse_v4_or_reg_name(&buf.as_bytes()[start..]); } } } auth_meta.host_bounds.1 = buf.len(); meta.auth_meta = Some(auth_meta); if let Some(port) = auth.port() { if !port.is_empty() { let mut eq_default = false; if let Some(scheme) = r.scheme_opt() { if let Some(default) = default_port_f(scheme) { eq_default = port.as_str().parse().ok() == Some(default); } } if !eq_default { buf.push(':'); buf.push_str(port.as_str()); } } } } meta.path_bounds.0 = buf.len(); // Make sure that the output is a valid URI/IRI reference. if r.has_scheme() && !r.has_authority() && path_buf.starts_with("//") { buf.push_str("/."); } buf.push_str(&path_buf); meta.path_bounds.1 = buf.len(); if let Some(query) = r.query() { buf.push('?'); const IQUERY_DATA: &Table = &IData::TABLE.or_iprivate(); let query_data_table = if ascii_only { Data::TABLE } else { IQUERY_DATA }; normalize_estr(&mut buf, query.as_str(), false, query_data_table); meta.query_end = NonZeroUsize::new(buf.len()); } if let Some(fragment) = r.fragment() { buf.push('#'); normalize_estr(&mut buf, fragment.as_str(), false, data_table); } Ok((buf, meta)) } fn normalize_estr(buf: &mut String, s: &str, to_ascii_lowercase: bool, table: &Table) { if table.allows_non_ascii() { Decode::new(s).decode_utf8(|chunk| match chunk { DecodedUtf8Chunk::Unencoded(s) => { let i = buf.len(); buf.push_str(s); if to_ascii_lowercase { buf[i..].make_ascii_lowercase(); } } DecodedUtf8Chunk::Decoded { valid, invalid } => { for chunk in Encode::new(table, valid) { match chunk { EncodedChunk::Unencoded(s) => { let i = buf.len(); buf.push_str(s); if to_ascii_lowercase { buf[i..].make_ascii_lowercase(); } } EncodedChunk::PctEncoded(s) => buf.push_str(s), } } for &x in invalid { buf.push_str(pct_enc::encode_byte(x)); } } }); } else { for chunk in Decode::new(s) { match chunk { DecodedChunk::Unencoded(s) => { let i = buf.len(); buf.push_str(s); if to_ascii_lowercase { buf[i..].make_ascii_lowercase(); } } DecodedChunk::PctDecoded(mut x) => { if table.allows_ascii(x) { if to_ascii_lowercase { x.make_ascii_lowercase(); } buf.push(x as char); } else { buf.push_str(pct_enc::encode_byte(x)); } } } } } } // Taken from `impl Display for Ipv6Addr`. #[cfg(not(feature = "net"))] fn write_v6(buf: &mut String, segments: [u16; 8]) { if let [0, 0, 0, 0, 0, 0xffff, ab, cd] = segments { let [a, b] = ab.to_be_bytes(); let [c, d] = cd.to_be_bytes(); write!(buf, "::ffff:{a}.{b}.{c}.{d}").unwrap(); } else { #[derive(Copy, Clone, Default)] struct Span { start: usize, len: usize, } // Find the inner 0 span let zeroes = { let mut longest = Span::default(); let mut current = Span::default(); for (i, &segment) in segments.iter().enumerate() { if segment == 0 { if current.len == 0 { current.start = i; } current.len += 1; if current.len > longest.len { longest = current; } } else { current = Span::default(); } } longest }; /// Write a colon-separated part of the address #[inline] fn write_subslice(buf: &mut String, chunk: &[u16]) { if let Some((first, tail)) = chunk.split_first() { write!(buf, "{first:x}").unwrap(); for segment in tail { write!(buf, ":{segment:x}").unwrap(); } } } if zeroes.len > 1 { write_subslice(buf, &segments[..zeroes.start]); buf.push_str("::"); write_subslice(buf, &segments[zeroes.start + zeroes.len..]); } else { write_subslice(buf, &segments); } } } fluent-uri-0.4.1/src/parse.rs000064400000000000000000000410471046102023000141750ustar 00000000000000use crate::{ imp::{AuthMeta, Constraints, HostMeta, Meta}, pct_enc::{table::*, Table, OCTET_TABLE_LO}, utf8, }; use core::{ num::NonZeroUsize, ops::{Deref, DerefMut}, str, }; /// Detailed cause of a [`ParseError`]. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ParseErrorKind { /// Invalid percent-encoded octet that is either non-hexadecimal or incomplete. /// /// The error index points to the percent character "%" of the octet. InvalidPctEncodedOctet, /// Unexpected character that is not allowed by the URI/IRI syntax. /// /// The error index points to the first byte of the character. UnexpectedChar, /// Invalid IPv6 address. /// /// The error index points to the first byte of the address. InvalidIpv6Addr, } /// An error occurred when parsing a URI/IRI (reference). #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ParseError { pub(crate) index: usize, pub(crate) kind: ParseErrorKind, } impl ParseError { /// Returns the index at which the error occurred. #[must_use] pub fn index(&self) -> usize { self.index } /// Returns the detailed cause of the error. #[must_use] pub fn kind(&self) -> ParseErrorKind { self.kind } } #[cfg(feature = "impl-error")] impl crate::Error for ParseError {} type Result = core::result::Result; /// Returns immediately with an error. macro_rules! err { ($index:expr, $kind:ident) => { return Err(crate::parse::ParseError { index: $index, kind: crate::parse::ParseErrorKind::$kind, }) }; } pub(crate) fn parse(bytes: &[u8], constraints: Constraints) -> Result { let mut parser = Parser { constraints, reader: Reader::new(bytes), out: Meta::default(), }; parser.parse_from_scheme()?; Ok(parser.out) } /// URI/IRI parser. /// /// # Invariants /// /// `pos <= len`, `pos` is non-decreasing and on the boundary of a UTF-8 code point. /// /// # Preconditions and guarantees /// /// Before parsing, ensure that `pos == 0`, `out` is default initialized /// and `bytes` is valid UTF-8. /// /// Start and finish parsing by calling `parse_from_scheme`. /// The following are guaranteed when parsing succeeds: /// /// - All output indexes are within bounds, correctly ordered /// and on the boundary of a UTF-8 code point. /// - All URI/IRI components defined by output indexes are validated. struct Parser<'a> { constraints: Constraints, reader: Reader<'a>, out: Meta, } struct Reader<'a> { bytes: &'a [u8], pos: usize, } impl<'a> Deref for Parser<'a> { type Target = Reader<'a>; fn deref(&self) -> &Self::Target { &self.reader } } impl DerefMut for Parser<'_> { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.reader } } enum PathKind { General, AbEmpty, ContinuedNoScheme, } enum Seg { // *1":" 1*4HEXDIG Normal(u16, bool), // "::" Ellipsis, // *1":" 1*4HEXDIG "." MaybeV4(bool), // ":" SingleColon, } impl<'a> Reader<'a> { fn new(bytes: &'a [u8]) -> Self { Reader { bytes, pos: 0 } } fn len(&self) -> usize { self.bytes.len() } fn has_remaining(&self) -> bool { self.pos < self.len() } fn peek(&self, i: usize) -> Option { self.bytes.get(self.pos + i).copied() } // Any call to this method must keep the invariants. fn skip(&mut self, n: usize) { // INVARIANT: `pos` is non-decreasing. self.pos += n; debug_assert!(self.pos <= self.len()); } // Returns `true` iff any byte is read. fn read(&mut self, table: &Table) -> Result { let start = self.pos; self._read(table, |_, _| {})?; Ok(self.pos > start) } fn _read(&mut self, table: &Table, mut f: impl FnMut(usize, u32)) -> Result<()> { let mut i = self.pos; let allow_pct_encoded = table.allows_pct_encoded(); let allow_non_ascii = table.allows_non_ascii(); while i < self.len() { let x = self.bytes[i]; if allow_pct_encoded && x == b'%' { let [hi, lo, ..] = self.bytes[i + 1..] else { err!(i, InvalidPctEncodedOctet); }; if !(HEXDIG.allows_ascii(hi) & HEXDIG.allows_ascii(lo)) { err!(i, InvalidPctEncodedOctet); } i += 3; } else if allow_non_ascii { let (x, len) = utf8::next_code_point(self.bytes, i); if !table.allows_code_point(x) { break; } f(i, x); i += len; } else { if !table.allows_ascii(x) { break; } f(i, x as u32); i += 1; } } // INVARIANT: `i` is non-decreasing. self.pos = i; Ok(()) } fn read_str(&mut self, s: &str) -> bool { if self.bytes[self.pos..].starts_with(s.as_bytes()) { // INVARIANT: The remaining bytes start with `s` so it's fine to skip `s.len()`. self.skip(s.len()); true } else { false } } fn read_v6(&mut self) -> Option<[u16; 8]> { let mut segs = [0; 8]; let mut ellipsis_i = 8; let mut i = 0; while i < 8 { match self.read_v6_segment() { Some(Seg::Normal(seg, colon)) => { if colon == (i == 0 || i == ellipsis_i) { // Leading colon, triple colons, or no colon. return None; } segs[i] = seg; i += 1; } Some(Seg::Ellipsis) => { if ellipsis_i != 8 { // Multiple ellipses. return None; } ellipsis_i = i; } Some(Seg::MaybeV4(colon)) => { if i > 6 || colon == (i == ellipsis_i) { // Not enough space, triple colons, or no colon. return None; } let octets = self.read_v4()?.to_be_bytes(); segs[i] = u16::from_be_bytes([octets[0], octets[1]]); segs[i + 1] = u16::from_be_bytes([octets[2], octets[3]]); i += 2; break; } Some(Seg::SingleColon) => return None, None => break, } } if ellipsis_i == 8 { // No ellipsis. if i != 8 { // Too short. return None; } } else if i == 8 { // Eliding nothing. return None; } else { // Shift the segments after the ellipsis to the right. for j in (ellipsis_i..i).rev() { segs[8 - (i - j)] = segs[j]; segs[j] = 0; } } Some(segs) } fn read_v6_segment(&mut self) -> Option { let colon = self.read_str(":"); if !self.has_remaining() { return colon.then_some(Seg::SingleColon); } let first = self.peek(0).unwrap(); let mut x = match OCTET_TABLE_LO[first as usize] { v if v < 128 => v as u16, _ => { return colon.then(|| { if first == b':' { // INVARIANT: Skipping ":" is fine. self.skip(1); Seg::Ellipsis } else { Seg::SingleColon } }); } }; let mut i = 1; while i < 4 { let Some(b) = self.peek(i) else { // INVARIANT: Skipping `i` hexadecimal digits is fine. self.skip(i); return None; }; match OCTET_TABLE_LO[b as usize] { v if v < 128 => { x = (x << 4) | v as u16; i += 1; continue; } _ if b == b'.' => return Some(Seg::MaybeV4(colon)), _ => break, } } // INVARIANT: Skipping `i` hexadecimal digits is fine. self.skip(i); Some(Seg::Normal(x, colon)) } fn read_v4(&mut self) -> Option { let mut addr = self.read_v4_octet()? << 24; for i in (0..3).rev() { if !self.read_str(".") { return None; } addr |= self.read_v4_octet()? << (i * 8); } Some(addr) } fn read_v4_octet(&mut self) -> Option { let mut res = self.peek_digit(0)?; if res == 0 { // INVARIANT: Skipping "0" is fine. self.skip(1); return Some(0); } for i in 1..3 { let Some(x) = self.peek_digit(i) else { // INVARIANT: Skipping `i` digits is fine. self.skip(i); return Some(res); }; res = res * 10 + x; } // INVARIANT: Skipping 3 digits is fine. self.skip(3); u8::try_from(res).is_ok().then_some(res) } fn peek_digit(&self, i: usize) -> Option { self.peek(i).and_then(|x| (x as char).to_digit(10)) } fn read_port(&mut self) { if self.read_str(":") { let mut i = 0; while self.peek_digit(i).is_some() { i += 1; } // INVARIANT: Skipping `i` digits is fine. self.skip(i); } } fn read_ip_literal(&mut self) -> Result> { if !self.read_str("[") { return Ok(None); } let start = self.pos; let meta = if let Some(_addr) = self.read_v6() { HostMeta::Ipv6( #[cfg(feature = "net")] _addr.into(), ) } else if self.pos == start { self.read_ipv_future()?; HostMeta::IpvFuture } else { err!(start, InvalidIpv6Addr); }; if !self.read_str("]") { err!(self.pos, UnexpectedChar); } Ok(Some(meta)) } fn read_ipv_future(&mut self) -> Result<()> { if let Some(b'v' | b'V') = self.peek(0) { // INVARIANT: Skipping "v" or "V" is fine. self.skip(1); if self.read(HEXDIG)? && self.read_str(".") && self.read(IPV_FUTURE)? { return Ok(()); } } err!(self.pos, UnexpectedChar); } } pub(crate) fn parse_v4_or_reg_name(bytes: &[u8]) -> HostMeta { let mut reader = Reader::new(bytes); match reader.read_v4() { Some(_addr) if !reader.has_remaining() => HostMeta::Ipv4( #[cfg(feature = "net")] _addr.into(), ), _ => HostMeta::RegName, } } #[cfg(all(feature = "alloc", not(feature = "net")))] pub(crate) fn parse_v6(bytes: &[u8]) -> [u16; 8] { Reader::new(bytes).read_v6().unwrap() } impl Parser<'_> { fn select(&self, for_uri: T, for_iri: T) -> T { if self.constraints.ascii_only { for_uri } else { for_iri } } fn read_v4_or_reg_name(&mut self) -> Result { let reg_name_table = self.select(REG_NAME, IREG_NAME); Ok(match (self.read_v4(), self.read(reg_name_table)?) { (Some(_addr), false) => HostMeta::Ipv4( #[cfg(feature = "net")] _addr.into(), ), _ => HostMeta::RegName, }) } fn read_host(&mut self) -> Result { match self.read_ip_literal()? { Some(host) => Ok(host), None => self.read_v4_or_reg_name(), } } fn parse_from_scheme(&mut self) -> Result<()> { self.read(SCHEME)?; if self.peek(0) == Some(b':') { // Scheme starts with a letter. if self.pos > 0 && self.bytes[0].is_ascii_alphabetic() { self.out.scheme_end = NonZeroUsize::new(self.pos); } else { err!(0, UnexpectedChar); } // INVARIANT: Skipping ":" is fine. self.skip(1); return if self.read_str("//") { self.parse_from_authority() } else { self.parse_from_path(PathKind::General) }; } else if self.constraints.scheme_required { err!(self.pos, UnexpectedChar); } else if self.pos == 0 { // Nothing read. if self.read_str("//") { return self.parse_from_authority(); } } // Scheme chars are valid for path. self.parse_from_path(PathKind::ContinuedNoScheme) } fn parse_from_authority(&mut self) -> Result<()> { let host; let mut colon_cnt = 0; let mut colon_i = 0; let auth_start = self.pos; let userinfo_table = self.select(USERINFO, IUSERINFO); // `userinfo_table` contains userinfo, registered name, ':', and port. self._read(userinfo_table, |i, x| { if x == ':' as u32 { colon_cnt += 1; colon_i = i; } })?; if self.peek(0) == Some(b'@') { // Userinfo present. // INVARIANT: Skipping "@" is fine. self.skip(1); let host_start = self.pos; let meta = self.read_host()?; host = (host_start, self.pos, meta); self.read_port(); } else if self.pos == auth_start { // Nothing read. We're now at the start of an IP literal or the path. if let Some(meta) = self.read_ip_literal()? { host = (auth_start, self.pos, meta); self.read_port(); } else { // Empty authority. host = (self.pos, self.pos, HostMeta::RegName); } } else { // The whole authority read. Try to parse the host and port. let host_end = match colon_cnt { // All host. 0 => self.pos, // Host and port. 1 => { for i in colon_i + 1..self.pos { if !self.bytes[i].is_ascii_digit() { err!(i, UnexpectedChar); } } colon_i } // Multiple colons. _ => err!(colon_i, UnexpectedChar), }; let meta = parse_v4_or_reg_name(&self.bytes[auth_start..host_end]); host = (auth_start, host_end, meta); } self.out.auth_meta = Some(AuthMeta { host_bounds: (host.0, host.1), host_meta: host.2, }); self.parse_from_path(PathKind::AbEmpty) } fn parse_from_path(&mut self, kind: PathKind) -> Result<()> { let path_table = self.select(PATH, IPATH); self.out.path_bounds = match kind { PathKind::General => { let start = self.pos; self.read(path_table)?; (start, self.pos) } PathKind::AbEmpty => { let start = self.pos; // Either empty or starting with '/'. if self.read(path_table)? && self.bytes[start] != b'/' { err!(start, UnexpectedChar); } (start, self.pos) } PathKind::ContinuedNoScheme => { let segment_table = self.select(SEGMENT_NZ_NC, ISEGMENT_NZ_NC); self.read(segment_table)?; if self.peek(0) == Some(b':') { // In a relative reference, the first path // segment cannot contain a colon character. err!(self.pos, UnexpectedChar); } self.read(path_table)?; (0, self.pos) } }; if self.read_str("?") { let query_table = self.select(QUERY, IQUERY); self.read(query_table)?; self.out.query_end = NonZeroUsize::new(self.pos); } if self.read_str("#") { let fragment_table = self.select(FRAGMENT, IFRAGMENT); self.read(fragment_table)?; } if self.has_remaining() { err!(self.pos, UnexpectedChar); } Ok(()) } } fluent-uri-0.4.1/src/pct_enc/encoder.rs000064400000000000000000000052671046102023000161210ustar 00000000000000#![allow(missing_debug_implementations)] //! Percent-encoders for URI/IRI components. use super::{table::*, Encoder, Table}; /// An encoder for URI userinfo. #[derive(Clone, Copy)] pub struct Userinfo(()); impl Encoder for Userinfo { const TABLE: &'static Table = USERINFO; } /// An encoder for IRI userinfo. #[derive(Clone, Copy)] pub struct IUserinfo(()); impl Encoder for IUserinfo { const TABLE: &'static Table = IUSERINFO; } /// An encoder for URI registered name. #[derive(Clone, Copy)] #[cfg_attr(fuzzing, derive(PartialEq, Eq))] pub struct RegName(()); impl Encoder for RegName { const TABLE: &'static Table = REG_NAME; } /// An encoder for IRI registered name. #[derive(Clone, Copy)] pub struct IRegName(()); impl Encoder for IRegName { const TABLE: &'static Table = IREG_NAME; } /// An encoder for URI/IRI port. #[derive(Clone, Copy)] pub struct Port(()); impl Encoder for Port { const TABLE: &'static Table = DIGIT; } /// An encoder for URI path. /// /// `EStr` has [extension methods] for the path component. /// /// [extension methods]: super::EStr#impl-EStr-1 #[derive(Clone, Copy)] pub struct Path(()); impl Encoder for Path { const TABLE: &'static Table = PATH; } /// An encoder for IRI path. /// /// `EStr` has [extension methods] for the path component. /// /// [extension methods]: super::EStr#impl-EStr-1 #[derive(Clone, Copy)] pub struct IPath(()); impl Encoder for IPath { const TABLE: &'static Table = IPATH; } /// An encoder for URI query. #[derive(Clone, Copy)] pub struct Query(()); impl Encoder for Query { const TABLE: &'static Table = QUERY; } /// An encoder for IRI query. #[derive(Clone, Copy)] pub struct IQuery(()); impl Encoder for IQuery { const TABLE: &'static Table = IQUERY; } /// An encoder for URI fragment. #[derive(Clone, Copy)] pub struct Fragment(()); impl Encoder for Fragment { const TABLE: &'static Table = FRAGMENT; } /// An encoder for IRI fragment. #[derive(Clone, Copy)] pub struct IFragment(()); impl Encoder for IFragment { const TABLE: &'static Table = IFRAGMENT; } /// An encoder for URI data which preserves only [unreserved] characters /// and encodes the others. /// /// [unreserved]: https://datatracker.ietf.org/doc/html/rfc3986#section-2.3 #[derive(Clone, Copy)] pub struct Data(()); impl Encoder for Data { const TABLE: &'static Table = &UNRESERVED.or_pct_encoded(); } /// An encoder for IRI data which preserves only [unreserved] characters /// and encodes the others. /// /// [unreserved]: https://datatracker.ietf.org/doc/html/rfc3987#section-2.1 #[derive(Clone, Copy)] pub struct IData(()); impl Encoder for IData { const TABLE: &'static Table = &UNRESERVED.or_pct_encoded().or_ucschar(); } fluent-uri-0.4.1/src/pct_enc/estring.rs000064400000000000000000000234711046102023000161520ustar 00000000000000use super::{Assert, EStr, Encoder}; use crate::{pct_enc::Encode, utf8::Utf8Chunks}; use alloc::{borrow::ToOwned, string::String}; use core::{borrow::Borrow, cmp::Ordering, fmt, hash, marker::PhantomData, ops::Deref}; /// A percent-encoded, growable string. /// /// The borrowed counterpart of `EString` is [`EStr`]. /// See its documentation for the meaning of the type parameter `E`. /// /// # Comparison /// /// `EString`s are compared [lexicographically](Ord#lexicographical-comparison) /// by their byte values. Normalization is **not** performed prior to comparison. /// /// # Examples /// /// Encode key-value pairs to a query string and use it to build a URI reference: /// /// ``` /// use fluent_uri::{ /// pct_enc::{ /// encoder::{Data, Query}, /// EStr, EString, Encoder, Table, /// }, /// UriRef, /// }; /// /// let pairs = [("name", "张三"), ("speech", "¡Olé!")]; /// let mut buf = EString::::new(); /// for (k, v) in pairs { /// if !buf.is_empty() { /// buf.push('&'); /// } /// /// // WARNING: Absolutely do not confuse data with delimiters! /// // Use `Data` (or `IData`) to encode data contained in a URI /// // (or an IRI) unless you know what you're doing! /// buf.encode_str::(k); /// buf.push('='); /// buf.encode_str::(v); /// } /// /// assert_eq!(buf, "name=%E5%BC%A0%E4%B8%89&speech=%C2%A1Ol%C3%A9%21"); /// /// let uri_ref = UriRef::builder() /// .path(EStr::EMPTY) /// .query(&buf) /// .build() /// .unwrap(); /// assert_eq!(uri_ref.as_str(), "?name=%E5%BC%A0%E4%B8%89&speech=%C2%A1Ol%C3%A9%21"); /// ``` /// /// Encode a path whose segments may contain the slash (`'/'`) character /// by using a custom sub-encoder: /// /// ``` /// use fluent_uri::pct_enc::{encoder::Path, EString, Encoder, Table}; /// /// struct PathSegment; /// /// impl Encoder for PathSegment { /// const TABLE: &'static Table = &Path::TABLE.sub(&Table::new(b"/")); /// } /// /// let mut path = EString::::new(); /// path.push('/'); /// path.encode_str::("foo/bar"); /// /// assert_eq!(path, "/foo%2Fbar"); /// ``` #[derive(Clone, Default)] pub struct EString { pub(crate) buf: String, encoder: PhantomData, } impl Deref for EString { type Target = EStr; fn deref(&self) -> &EStr { EStr::new_validated(&self.buf) } } impl EString { pub(crate) fn new_validated(buf: String) -> Self { Self { buf, encoder: PhantomData, } } /// Creates a new empty `EString`. #[must_use] pub fn new() -> Self { Self::new_validated(String::new()) } /// Creates a new empty `EString` with at least the specified capacity. #[must_use] pub fn with_capacity(capacity: usize) -> Self { Self::new_validated(String::with_capacity(capacity)) } /// Coerces to an `EStr` slice. #[must_use] pub fn as_estr(&self) -> &EStr { self } /// Returns this `EString`'s capacity, in bytes. #[must_use] pub fn capacity(&self) -> usize { self.buf.capacity() } /// Encodes a string with a sub-encoder and appends the result onto the end of this `EString`. /// /// A character will be preserved if `SubE::TABLE` [allows] it and percent-encoded otherwise. /// /// In most cases, use [`Data`] (for URI) or [`IData`] (for IRI) as the sub-encoder. /// When using other sub-encoders, make sure that `SubE::TABLE` does not [allow][allows] /// the component delimiters that delimit the data. /// /// Note that this method will **not** encode `U+0020` (space) as `U+002B` (+). /// /// [allows]: super::Table::allows /// [`Data`]: super::encoder::Data /// [`IData`]: super::encoder::IData /// /// # Panics /// /// Panics at compile time if `SubE` is not a [sub-encoder](Encoder#sub-encoders) of `E`, /// or if `SubE::TABLE` does not [allow percent-encoded octets]. /// /// [allow percent-encoded octets]: super::Table::allows_pct_encoded pub fn encode_str(&mut self, s: &str) { () = Assert::::L_IS_SUB_ENCODER_OF_R; () = EStr::::ASSERT_ALLOWS_PCT_ENCODED; for chunk in Encode::new(SubE::TABLE, s) { self.buf.push_str(chunk.as_str()); } } /// Encodes a byte sequence with a sub-encoder and appends the result onto the end of this `EString`. /// /// A byte will be preserved if it is part of a UTF-8-encoded character /// that `SubE::TABLE` [allows] and percent-encoded otherwise. /// /// In most cases, use [`Data`] (for URI) or [`IData`] (for IRI) as the sub-encoder. /// When using other sub-encoders, make sure that `SubE::TABLE` does not [allow][allows] /// the component delimiters that delimit the data. /// /// Note that this method will **not** encode `0x20` (space) as `U+002B` (+). /// /// If you need to encode a string, use [`encode_str`][Self::encode_str] instead. /// /// [allows]: super::Table::allows /// [`Data`]: super::encoder::Data /// [`IData`]: super::encoder::IData /// /// # Deprecation /// /// This method is deprecated because percent-encoding non-UTF-8 bytes is /// a non-standard operation. If you're developing a new protocol, use /// other encodings such as Base64 instead. If you absolutely must, here's /// a workaround: /// /// ``` /// use fluent_uri::pct_enc::{encoder::Path, EStr, EString}; /// /// let mut buf = EString::::new(); /// /// for chunk in b"D\xFCrst".utf8_chunks() { /// buf.encode_str::(chunk.valid()); /// for &x in chunk.invalid() { /// buf.push_estr(EStr::encode_byte(x)); /// } /// } /// /// assert_eq!(buf, "D%FCrst"); /// ``` /// /// # Panics /// /// Panics at compile time if `SubE` is not a [sub-encoder](Encoder#sub-encoders) of `E`, /// or if `SubE::TABLE` does not [allow percent-encoded octets]. /// /// [allow percent-encoded octets]: super::Table::allows_pct_encoded #[deprecated = "use `<[u8]>::utf8_chunks`, `EString::encode_str`, `EStr::encode_byte`, and `EString::push_estr` instead"] pub fn encode_bytes(&mut self, bytes: &[u8]) { () = Assert::::L_IS_SUB_ENCODER_OF_R; () = EStr::::ASSERT_ALLOWS_PCT_ENCODED; for chunk in Utf8Chunks::new(bytes) { for chunk in Encode::new(SubE::TABLE, chunk.valid()) { self.buf.push_str(chunk.as_str()); } for &x in chunk.invalid() { self.buf.push_str(super::encode_byte(x)); } } } /// Appends an unencoded character onto the end of this `EString`. /// /// # Panics /// /// Panics if `E::TABLE` does not [allow] the character. /// /// [allow]: super::Table::allows pub fn push(&mut self, ch: char) { assert!(E::TABLE.allows(ch), "table does not allow the char"); self.buf.push(ch); } /// Appends an `EStr` slice onto the end of this `EString`. pub fn push_estr(&mut self, s: &EStr) { self.buf.push_str(s.as_str()); } /// Truncates this `EString`, removing all contents. pub fn clear(&mut self) { self.buf.clear(); } /// Consumes this `EString` and yields the underlying `String`. #[must_use] pub fn into_string(self) -> String { self.buf } } impl AsRef> for EString { fn as_ref(&self) -> &EStr { self } } impl AsRef for EString { fn as_ref(&self) -> &str { &self.buf } } impl Borrow> for EString { fn borrow(&self) -> &EStr { self } } impl From<&EStr> for EString { fn from(s: &EStr) -> Self { s.to_owned() } } impl PartialEq for EString { fn eq(&self, other: &Self) -> bool { self.as_str() == other.as_str() } } impl PartialEq> for EString { fn eq(&self, other: &EStr) -> bool { self.as_str() == other.as_str() } } impl PartialEq> for EStr { fn eq(&self, other: &EString) -> bool { self.as_str() == other.as_str() } } impl PartialEq<&EStr> for EString { fn eq(&self, other: &&EStr) -> bool { self.as_str() == other.as_str() } } impl PartialEq> for &EStr { fn eq(&self, other: &EString) -> bool { self.as_str() == other.as_str() } } impl PartialEq for EString { fn eq(&self, other: &str) -> bool { self.as_str() == other } } impl PartialEq> for str { fn eq(&self, other: &EString) -> bool { self == other.as_str() } } impl PartialEq<&str> for EString { fn eq(&self, other: &&str) -> bool { self.as_str() == *other } } impl PartialEq> for &str { fn eq(&self, other: &EString) -> bool { *self == other.as_str() } } impl Eq for EString {} impl hash::Hash for EString { fn hash(&self, state: &mut H) { self.buf.hash(state); } } impl PartialOrd for EString { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for EString { fn cmp(&self, other: &Self) -> Ordering { self.inner.cmp(&other.inner) } } impl fmt::Debug for EString { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.as_str().fmt(f) } } impl fmt::Display for EString { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.as_str().fmt(f) } } fluent-uri-0.4.1/src/pct_enc/mod.rs000064400000000000000000000636461046102023000152660ustar 00000000000000//! Percent-encoding utilities. pub mod encoder; #[cfg(feature = "alloc")] mod estring; pub(crate) mod table; #[cfg(feature = "alloc")] pub use estring::EString; pub use table::Table; use crate::imp::PathEncoder; use core::{cmp::Ordering, hash, iter::FusedIterator, marker::PhantomData, str}; use ref_cast::{ref_cast_custom, RefCastCustom}; #[cfg(feature = "alloc")] use alloc::{ borrow::{Cow, ToOwned}, string::String, vec::Vec, }; /// A trait used by [`EStr`] and [`EString`] to specify the table used for encoding. /// /// # Sub-encoders /// /// A sub-encoder `SubE` of `E` is an encoder such that `SubE::TABLE` is a [subset] of `E::TABLE`. /// /// [subset]: Table::is_subset pub trait Encoder: 'static { /// The table used for encoding. const TABLE: &'static Table; } /// Percent-encoded string slices. /// /// The owned counterpart of `EStr` is [`EString`]. See its documentation /// if you want to build a percent-encoded string from scratch. /// /// # Type parameter /// /// The `EStr` type is parameterized over a type `E` that implements [`Encoder`]. /// The associated constant `E::TABLE` of type [`Table`] specifies the byte patterns /// allowed in a string. In short, the underlying byte sequence of an `EStr` slice /// can be formed by joining any number of the following byte sequences: /// /// - `ch.encode_utf8(&mut [0; 4])` where `E::TABLE.allows(ch)`. /// - `[b'%', hi, lo]` where `E::TABLE.allows_pct_encoded() && hi.is_ascii_hexdigit() && lo.is_ascii_hexdigit()`. /// /// # Comparison /// /// `EStr` slices are compared [lexicographically](Ord#lexicographical-comparison) /// by their byte values. Normalization is **not** performed prior to comparison. /// /// # Examples /// /// Parse key-value pairs from a query string into a hash map: /// /// ``` /// use fluent_uri::{pct_enc::EStr, UriRef}; /// use std::collections::HashMap; /// /// let s = "?name=%E5%BC%A0%E4%B8%89&speech=%C2%A1Ol%C3%A9%21"; /// let query = UriRef::parse(s)?.query().unwrap(); /// let map: HashMap<_, _> = query /// .split('&') /// .map(|s| s.split_once('=').unwrap_or((s, EStr::EMPTY))) /// .map(|(k, v)| (k.decode().to_string_lossy(), v.decode().to_string_lossy())) /// .collect(); /// assert_eq!(map["name"], "张三"); /// assert_eq!(map["speech"], "¡Olé!"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[derive(RefCastCustom)] #[repr(transparent)] pub struct EStr { encoder: PhantomData, inner: str, } #[cfg(feature = "alloc")] struct Assert { _marker: PhantomData<(L, R)>, } #[cfg(feature = "alloc")] impl Assert { const L_IS_SUB_ENCODER_OF_R: () = assert!(L::TABLE.is_subset(R::TABLE), "not a sub-encoder"); } impl EStr { const ASSERT_ALLOWS_PCT_ENCODED: () = assert!( E::TABLE.allows_pct_encoded(), "table does not allow percent-encoded octets" ); /// Converts a string slice to an `EStr` slice assuming validity. #[ref_cast_custom] pub(crate) const fn new_validated(s: &str) -> &Self; /// An empty `EStr` slice. pub const EMPTY: &'static Self = Self::new_validated(""); pub(crate) fn cast(&self) -> &EStr { EStr::new_validated(&self.inner) } /// Converts a string slice to an `EStr` slice. /// /// # Panics /// /// Panics if the string is not properly encoded with `E`. /// For a non-panicking variant, use [`new`](Self::new). #[must_use] pub const fn new_or_panic(s: &str) -> &Self { match Self::new(s) { Some(s) => s, None => panic!("improperly encoded string"), } } /// Converts a string slice to an `EStr` slice, returning `None` if the conversion fails. #[must_use] pub const fn new(s: &str) -> Option<&Self> { if E::TABLE.validate(s.as_bytes()) { Some(Self::new_validated(s)) } else { None } } /// Creates an `EStr` slice containing a single percent-encoded octet representing the given byte. /// /// # Panics /// /// Panics at compile time if `E::TABLE` does not [allow percent-encoded octets]. /// /// [allow percent-encoded octets]: Table::allows_pct_encoded /// /// # Examples /// /// ``` /// use fluent_uri::pct_enc::{encoder::Path, EStr}; /// /// assert_eq!(EStr::::encode_byte(b'1'), "%31"); /// ``` #[must_use] pub fn encode_byte(x: u8) -> &'static Self { () = Self::ASSERT_ALLOWS_PCT_ENCODED; Self::new_validated(encode_byte(x)) } /// Yields the underlying string slice. #[must_use] pub fn as_str(&self) -> &str { &self.inner } /// Returns the length of the `EStr` slice in bytes. #[must_use] pub fn len(&self) -> usize { self.inner.len() } /// Checks whether the `EStr` slice is empty. #[must_use] pub fn is_empty(&self) -> bool { self.inner.is_empty() } /// Upcasts the `EStr` slice to associate it with the given super-encoder. /// /// # Panics /// /// Panics at compile time if `E` is not a [sub-encoder](Encoder#sub-encoders) of `SuperE`. /// /// # Example /// /// ``` /// use fluent_uri::pct_enc::{encoder::{IPath, Path}, EStr}; /// /// let path = EStr::::new_or_panic("foo"); /// let path: &EStr = path.upcast(); /// ``` #[cfg(fluent_uri_unstable)] #[must_use] pub fn upcast(&self) -> &EStr { () = Assert::::L_IS_SUB_ENCODER_OF_R; EStr::new_validated(self.as_str()) } /// Checks whether the `EStr` slice is unencoded, i.e., does not contain `'%'`. /// /// # Examples /// /// ``` /// use fluent_uri::pct_enc::{encoder::Path, EStr}; /// /// assert!(EStr::::new_or_panic("Hello!").is_unencoded()); /// assert!(!EStr::::new_or_panic("%C2%A1Hola%21").is_unencoded()); /// ``` #[cfg(fluent_uri_unstable)] #[must_use] pub fn is_unencoded(&self) -> bool { !(E::TABLE.allows_pct_encoded() && self.inner.contains('%')) } /// Returns an iterator used to decode the `EStr` slice. /// /// Always **split before decoding**, as otherwise the data may be /// mistaken for component delimiters. /// /// Note that the iterator will **not** decode `U+002B` (+) as `0x20` (space). /// /// # Panics /// /// Panics at compile time if `E::TABLE` does not [allow percent-encoded octets]. /// /// [allow percent-encoded octets]: Table::allows_pct_encoded /// /// # Examples /// /// ``` /// use fluent_uri::pct_enc::{encoder::Path, EStr}; /// /// let dec = EStr::::new_or_panic("%C2%A1Hola%21").decode(); /// assert_eq!(*dec.clone().to_bytes(), [0xc2, 0xa1, 0x48, 0x6f, 0x6c, 0x61, 0x21]); /// assert_eq!(dec.to_string().unwrap(), "¡Hola!"); /// ``` pub fn decode(&self) -> Decode<'_> { () = Self::ASSERT_ALLOWS_PCT_ENCODED; Decode::new(&self.inner) } /// Returns an iterator over subslices of the `EStr` slice separated by the given delimiter. /// /// # Panics /// /// Panics if the delimiter is not a [reserved] character. /// /// [reserved]: https://datatracker.ietf.org/doc/html/rfc3986#section-2.2 /// /// # Examples /// /// ``` /// use fluent_uri::pct_enc::{encoder::Path, EStr}; /// /// assert!(EStr::::new_or_panic("a,b,c").split(',').eq(["a", "b", "c"])); /// assert!(EStr::::new_or_panic(",").split(',').eq(["", ""])); /// assert!(EStr::::EMPTY.split(',').eq([""])); /// ``` pub fn split(&self, delim: char) -> Split<'_, E> { assert!( delim.is_ascii() && table::RESERVED.allows(delim), "splitting with non-reserved character" ); Split { inner: self.inner.split(delim), encoder: PhantomData, } } /// Splits the `EStr` slice on the first occurrence of the given delimiter and /// returns prefix before delimiter and suffix after delimiter. /// /// Returns `None` if the delimiter is not found. /// /// # Panics /// /// Panics if the delimiter is not a [reserved] character. /// /// [reserved]: https://datatracker.ietf.org/doc/html/rfc3986#section-2.2 /// /// # Examples /// /// ``` /// use fluent_uri::pct_enc::{encoder::Path, EStr}; /// /// assert_eq!( /// EStr::::new_or_panic("foo;bar;baz").split_once(';'), /// Some((EStr::new_or_panic("foo"), EStr::new_or_panic("bar;baz"))) /// ); /// /// assert_eq!(EStr::::new_or_panic("foo").split_once(';'), None); /// ``` #[must_use] pub fn split_once(&self, delim: char) -> Option<(&Self, &Self)> { assert!( delim.is_ascii() && table::RESERVED.allows(delim), "splitting with non-reserved character" ); self.inner .split_once(delim) .map(|(a, b)| (Self::new_validated(a), Self::new_validated(b))) } /// Splits the `EStr` slice on the last occurrence of the given delimiter and /// returns prefix before delimiter and suffix after delimiter. /// /// Returns `None` if the delimiter is not found. /// /// # Panics /// /// Panics if the delimiter is not a [reserved] character. /// /// [reserved]: https://datatracker.ietf.org/doc/html/rfc3986#section-2.2 /// /// # Examples /// /// ``` /// use fluent_uri::pct_enc::{encoder::Path, EStr}; /// /// assert_eq!( /// EStr::::new_or_panic("foo;bar;baz").rsplit_once(';'), /// Some((EStr::new_or_panic("foo;bar"), EStr::new_or_panic("baz"))) /// ); /// /// assert_eq!(EStr::::new_or_panic("foo").rsplit_once(';'), None); /// ``` #[must_use] pub fn rsplit_once(&self, delim: char) -> Option<(&Self, &Self)> { assert!( delim.is_ascii() && table::RESERVED.allows(delim), "splitting with non-reserved character" ); self.inner .rsplit_once(delim) .map(|(a, b)| (Self::new_validated(a), Self::new_validated(b))) } } impl AsRef for EStr { fn as_ref(&self) -> &Self { self } } impl AsRef for EStr { fn as_ref(&self) -> &str { &self.inner } } impl PartialEq for EStr { fn eq(&self, other: &Self) -> bool { self.inner == other.inner } } impl PartialEq for EStr { fn eq(&self, other: &str) -> bool { &self.inner == other } } impl PartialEq> for str { fn eq(&self, other: &EStr) -> bool { self == &other.inner } } impl Eq for EStr {} impl hash::Hash for EStr { fn hash(&self, state: &mut H) { self.inner.hash(state); } } impl PartialOrd for EStr { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for EStr { fn cmp(&self, other: &Self) -> Ordering { self.inner.cmp(&other.inner) } } impl Default for &EStr { /// Creates an empty `EStr` slice. fn default() -> Self { EStr::EMPTY } } #[cfg(feature = "alloc")] impl ToOwned for EStr { type Owned = EString; fn to_owned(&self) -> EString { EString::new_validated(self.inner.to_owned()) } fn clone_into(&self, target: &mut EString) { self.inner.clone_into(&mut target.buf); } } /// Extension methods for the [path] component. /// /// [path]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.3 impl EStr { /// Checks whether the path is absolute, i.e., starting with `'/'`. #[inline] #[must_use] pub fn is_absolute(&self) -> bool { self.inner.starts_with('/') } /// Checks whether the path is rootless, i.e., not starting with `'/'`. #[inline] #[must_use] pub fn is_rootless(&self) -> bool { !self.inner.starts_with('/') } /// Returns an iterator over the path segments, separated by `'/'`. /// /// Returns `None` if the path is [rootless]. Use [`split`] /// instead if you need to split a rootless path on occurrences of `'/'`. /// /// Note that the path can be [empty] when authority is present, /// in which case this method will return `None`. /// /// [rootless]: Self::is_rootless /// [`split`]: Self::split /// [empty]: Self::is_empty /// /// # Examples /// /// ``` /// use fluent_uri::Uri; /// /// // Segments are separated by '/'. /// // The empty string before a leading '/' is not a segment. /// // However, segments can be empty in the other cases. /// let path = Uri::parse("file:///path/to//dir/")?.path(); /// assert_eq!(path, "/path/to//dir/"); /// assert!(path.segments_if_absolute().unwrap().eq(["path", "to", "", "dir", ""])); /// /// let path = Uri::parse("foo:bar/baz")?.path(); /// assert_eq!(path, "bar/baz"); /// assert!(path.segments_if_absolute().is_none()); /// /// let path = Uri::parse("http://example.com")?.path(); /// assert!(path.is_empty()); /// assert!(path.segments_if_absolute().is_none()); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[inline] #[must_use] pub fn segments_if_absolute(&self) -> Option> { self.inner .strip_prefix('/') .map(|s| Self::new_validated(s).split('/')) } } const fn gen_octet_table(hi: bool) -> [u8; 256] { let mut out = [0xff; 256]; let shift = if hi { 4 } else { 0 }; let mut i = 0; while i < 10 { out[(i + b'0') as usize] = i << shift; i += 1; } while i < 16 { out[(i - 10 + b'A') as usize] = i << shift; out[(i - 10 + b'a') as usize] = i << shift; i += 1; } out } const OCTET_TABLE_HI: &[u8; 256] = &gen_octet_table(true); pub(crate) const OCTET_TABLE_LO: &[u8; 256] = &gen_octet_table(false); /// Decodes a percent-encoded octet, assuming that the bytes are hexadecimal. pub(crate) fn decode_octet(hi: u8, lo: u8) -> u8 { debug_assert!(hi.is_ascii_hexdigit() && lo.is_ascii_hexdigit()); OCTET_TABLE_HI[hi as usize] | OCTET_TABLE_LO[lo as usize] } /// An iterator used to decode an [`EStr`] slice. /// /// This struct is created by [`EStr::decode`]. Normally you'll use the methods below /// instead of iterating over a `Decode` manually, unless you need precise control /// over allocation. /// /// See the [`DecodedChunk`] type for documentation of the items yielded by this iterator. #[derive(Clone, Debug)] #[must_use = "iterators are lazy and do nothing unless consumed"] pub struct Decode<'a> { source: &'a str, } /// An item returned by the [`Decode`] iterator. #[derive(Clone, Copy, Debug)] pub enum DecodedChunk<'a> { /// An unencoded subslice. Unencoded(&'a str), /// A percent-encoded octet, decoded (for example, `"%20"` decoded as `0x20`). PctDecoded(u8), } impl<'a> Decode<'a> { pub(crate) fn new(source: &'a str) -> Self { Self { source } } fn next_if_unencoded(&mut self) -> Option<&'a str> { let i = self .source .bytes() .position(|x| x == b'%') .unwrap_or(self.source.len()); if i == 0 { None } else { let s; (s, self.source) = self.source.split_at(i); Some(s) } } } impl<'a> Iterator for Decode<'a> { type Item = DecodedChunk<'a>; fn next(&mut self) -> Option { if self.source.is_empty() { None } else if let Some(s) = self.next_if_unencoded() { Some(DecodedChunk::Unencoded(s)) } else { let s; (s, self.source) = self.source.split_at(3); let x = decode_octet(s.as_bytes()[1], s.as_bytes()[2]); Some(DecodedChunk::PctDecoded(x)) } } } impl FusedIterator for Decode<'_> {} #[cfg(feature = "alloc")] pub(crate) enum DecodedUtf8Chunk<'a, 'b> { Unencoded(&'a str), Decoded { valid: &'b str, invalid: &'b [u8] }, } #[cfg(feature = "alloc")] impl<'a> Decode<'a> { pub(crate) fn decode_utf8(self, mut handle_chunk: impl FnMut(DecodedUtf8Chunk<'a, '_>)) { use crate::utf8::Utf8Chunks; let mut buf = [0; 32]; let mut len = 0; 'decode: for chunk in self { match chunk { DecodedChunk::Unencoded(s) => { if len > 0 { for chunk in Utf8Chunks::new(&buf[..len]) { handle_chunk(DecodedUtf8Chunk::Decoded { valid: chunk.valid(), invalid: chunk.invalid(), }); } len = 0; } handle_chunk(DecodedUtf8Chunk::Unencoded(s)); } DecodedChunk::PctDecoded(x) => { buf[len] = x; len += 1; if len >= buf.len() { for chunk in Utf8Chunks::new(&buf[..len]) { if chunk.incomplete() { handle_chunk(DecodedUtf8Chunk::Decoded { valid: chunk.valid(), invalid: &[], }); let invalid_len = chunk.invalid().len(); buf.copy_within(len - invalid_len..len, 0); len = invalid_len; continue 'decode; } handle_chunk(DecodedUtf8Chunk::Decoded { valid: chunk.valid(), invalid: chunk.invalid(), }); } len = 0; } } } } for chunk in Utf8Chunks::new(&buf[..len]) { handle_chunk(DecodedUtf8Chunk::Decoded { valid: chunk.valid(), invalid: chunk.invalid(), }); } } fn decoded_len(&self) -> usize { self.source.len() - self.source.bytes().filter(|&x| x == b'%').count() * 2 } fn borrow_all_or_prep_buf(&mut self) -> Result<&'a str, String> { if let Some(s) = self.next_if_unencoded() { if self.source.is_empty() { return Ok(s); } let mut buf = String::with_capacity(s.len() + self.decoded_len()); buf.push_str(s); Err(buf) } else { Err(String::with_capacity(self.decoded_len())) } } /// Decodes the slice to bytes. /// /// This method allocates only when the slice contains any percent-encoded octet. #[must_use] pub fn to_bytes(mut self) -> Cow<'a, [u8]> { if self.source.is_empty() { return Cow::Borrowed(&[]); } let mut buf = match self.borrow_all_or_prep_buf() { Ok(s) => return Cow::Borrowed(s.as_bytes()), Err(buf) => buf.into_bytes(), }; for chunk in self { match chunk { DecodedChunk::Unencoded(s) => buf.extend_from_slice(s.as_bytes()), DecodedChunk::PctDecoded(s) => buf.push(s), } } Cow::Owned(buf) } /// Attempts to decode the slice to a string. /// /// This method allocates only when the slice contains any percent-encoded octet. /// /// # Errors /// /// Returns `Err` containing the decoded bytes if they are not valid UTF-8. pub fn to_string(mut self) -> Result, Vec> { if self.source.is_empty() { return Ok(Cow::Borrowed("")); } let mut buf = match self.borrow_all_or_prep_buf() { Ok(s) => return Ok(Cow::Borrowed(s)), Err(buf) => Ok::<_, Vec>(buf), }; self.decode_utf8(|chunk| match chunk { DecodedUtf8Chunk::Unencoded(s) => match &mut buf { Ok(string) => string.push_str(s), Err(vec) => vec.extend_from_slice(s.as_bytes()), }, DecodedUtf8Chunk::Decoded { valid, invalid } => match &mut buf { Ok(string) => { string.push_str(valid); if !invalid.is_empty() { let mut vec = core::mem::take(string).into_bytes(); vec.extend_from_slice(invalid); buf = Err(vec); } } Err(vec) => { vec.extend_from_slice(valid.as_bytes()); vec.extend_from_slice(invalid); } }, }); match buf { Ok(buf) => Ok(Cow::Owned(buf)), Err(buf) => Err(buf), } } /// Decodes the slice to a string, replacing any invalid UTF-8 sequences with /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]. /// /// [U+FFFD]: char::REPLACEMENT_CHARACTER /// /// This method allocates only when the slice contains any percent-encoded octet. #[must_use] pub fn to_string_lossy(mut self) -> Cow<'a, str> { if self.source.is_empty() { return Cow::Borrowed(""); } let mut buf = match self.borrow_all_or_prep_buf() { Ok(s) => return Cow::Borrowed(s), Err(buf) => buf, }; self.decode_utf8(|chunk| match chunk { DecodedUtf8Chunk::Unencoded(s) => buf.push_str(s), DecodedUtf8Chunk::Decoded { valid, invalid } => { buf.push_str(valid); if !invalid.is_empty() { buf.push(char::REPLACEMENT_CHARACTER); } } }); Cow::Owned(buf) } } pub(crate) fn encode_byte(x: u8) -> &'static str { const TABLE: &[u8; 256 * 3] = &{ const HEX_DIGITS: &[u8; 16] = b"0123456789ABCDEF"; let mut i = 0; let mut table = [0; 256 * 3]; while i < 256 { table[i * 3] = b'%'; table[i * 3 + 1] = HEX_DIGITS[i >> 4]; table[i * 3 + 2] = HEX_DIGITS[i & 0b1111]; i += 1; } table }; const TABLE_STR: &str = match str::from_utf8(TABLE) { Ok(s) => s, Err(_) => unreachable!(), }; &TABLE_STR[x as usize * 3..x as usize * 3 + 3] } /// An iterator used to percent-encode a string slice. /// /// This struct is created by [`Table::encode`]. Normally you'll use [`EString::encode_str`] /// instead, unless you need precise control over allocation. /// /// See the [`EncodedChunk`] type for documentation of the items yielded by this iterator. #[cfg(feature = "alloc")] #[derive(Clone, Debug)] #[must_use = "iterators are lazy and do nothing unless consumed"] pub(crate) struct Encode<'t, 's> { table: &'t Table, source: &'s str, enc_len: usize, enc_i: usize, } #[cfg(feature = "alloc")] impl<'t, 's> Encode<'t, 's> { pub(crate) fn new(table: &'t Table, source: &'s str) -> Self { Self { table, source, enc_len: 0, enc_i: 0, } } } /// An item returned by the [`Encode`] iterator. #[cfg(feature = "alloc")] #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub(crate) enum EncodedChunk<'a> { /// An unencoded subslice. Unencoded(&'a str), /// A byte, percent-encoded (for example, `0x20` encoded as `"%20"`). PctEncoded(&'static str), } #[cfg(feature = "alloc")] impl<'a> EncodedChunk<'a> { /// Returns the chunk as a string slice. #[must_use] pub fn as_str(self) -> &'a str { match self { Self::Unencoded(s) | Self::PctEncoded(s) => s, } } } #[cfg(feature = "alloc")] impl<'t, 's> Iterator for Encode<'t, 's> { type Item = EncodedChunk<'s>; fn next(&mut self) -> Option { if self.enc_i < self.enc_len { let s = encode_byte(self.source.as_bytes()[self.enc_i]); self.enc_i += 1; return Some(EncodedChunk::PctEncoded(s)); } self.source = &self.source[self.enc_len..]; self.enc_len = 0; if self.source.is_empty() { return None; } let mut iter = self.source.char_indices(); let i = iter .find_map(|(i, ch)| (!self.table.allows(ch)).then_some(i)) .unwrap_or(self.source.len()); // `CharIndices::offset` sadly requires an MSRV of 1.82, // so we do pointer math to get the offset for now. if i == 0 { self.enc_len = iter.as_str().as_ptr() as usize - self.source.as_ptr() as usize; self.enc_i = 1; let s = encode_byte(self.source.as_bytes()[0]); Some(EncodedChunk::PctEncoded(s)) } else { let s; (s, self.source) = self.source.split_at(i); self.enc_len = iter.as_str().as_ptr() as usize - self.source.as_ptr() as usize; self.enc_i = 0; Some(EncodedChunk::Unencoded(s)) } } } #[cfg(feature = "alloc")] impl FusedIterator for Encode<'_, '_> {} /// An iterator over subslices of an [`EStr`] slice separated by a delimiter. /// /// This struct is created by [`EStr::split`]. #[derive(Clone, Debug)] #[must_use = "iterators are lazy and do nothing unless consumed"] pub struct Split<'a, E: Encoder> { inner: str::Split<'a, char>, encoder: PhantomData, } impl<'a, E: Encoder> Iterator for Split<'a, E> { type Item = &'a EStr; fn next(&mut self) -> Option<&'a EStr> { self.inner.next().map(EStr::new_validated) } } impl<'a, E: Encoder> DoubleEndedIterator for Split<'a, E> { fn next_back(&mut self) -> Option<&'a EStr> { self.inner.next_back().map(EStr::new_validated) } } impl FusedIterator for Split<'_, E> {} fluent-uri-0.4.1/src/pct_enc/table.rs000064400000000000000000000171771046102023000155740ustar 00000000000000//! Byte pattern tables from RFC 3986 and RFC 3987. //! //! The predefined table constants in this module are documented with //! the ABNF notation of [RFC 5234]. //! //! [RFC 5234]: https://datatracker.ietf.org/doc/html/rfc5234 use crate::utf8; const TABLE_LEN: usize = 256 + 3; const INDEX_PCT_ENCODED: usize = 256; const INDEX_UCSCHAR: usize = 256 + 1; const INDEX_IPRIVATE: usize = 256 + 2; const fn is_ucschar(x: u32) -> bool { matches!(x, 0xa0..=0xd7ff | 0xf900..=0xfdcf | 0xfdf0..=0xffef) || (x >= 0x10000 && x <= 0xdffff && (x & 0xffff) <= 0xfffd) || (x >= 0xe1000 && x <= 0xefffd) } const fn is_iprivate(x: u32) -> bool { (x >= 0xe000 && x <= 0xf8ff) || (x >= 0xf0000 && (x & 0xffff) <= 0xfffd) } /// A table specifying the byte patterns allowed in a string. #[derive(Clone, Copy, Debug)] pub struct Table { table: [bool; TABLE_LEN], } impl Table { /// Creates a table that only allows the given unencoded bytes. /// /// # Panics /// /// Panics if any of the bytes is not ASCII or equals `b'%'`. #[must_use] pub const fn new(mut bytes: &[u8]) -> Self { let mut table = [false; TABLE_LEN]; while let [cur, rem @ ..] = bytes { assert!( cur.is_ascii() && *cur != b'%', "cannot allow non-ASCII byte or %" ); table[*cur as usize] = true; bytes = rem; } Self { table } } /// Combines two tables into one. /// /// Returns a new table that allows all the byte patterns allowed /// by `self` or by `other`. #[must_use] pub const fn or(mut self, other: &Self) -> Self { let mut i = 0; while i < TABLE_LEN { self.table[i] |= other.table[i]; i += 1; } self } /// Marks this table as allowing percent-encoded octets. #[must_use] pub const fn or_pct_encoded(mut self) -> Self { self.table[INDEX_PCT_ENCODED] = true; self } /// Marks this table as allowing characters matching the [`ucschar`] /// ABNF rule from RFC 3987. /// /// [`ucschar`]: https://datatracker.ietf.org/doc/html/rfc3987#section-2.2 #[must_use] pub const fn or_ucschar(mut self) -> Self { self.table[INDEX_UCSCHAR] = true; self } /// Marks this table as allowing characters matching the [`iprivate`] /// ABNF rule from RFC 3987. /// /// [`iprivate`]: https://datatracker.ietf.org/doc/html/rfc3987#section-2.2 #[must_use] pub const fn or_iprivate(mut self) -> Self { self.table[INDEX_IPRIVATE] = true; self } /// Subtracts from this table. /// /// Returns a new table that allows all the byte patterns allowed /// by `self` but not allowed by `other`. #[must_use] pub const fn sub(mut self, other: &Self) -> Self { let mut i = 0; while i < TABLE_LEN { self.table[i] &= !other.table[i]; i += 1; } self } /// Checks whether the table is a subset of another, i.e., `other` /// allows at least all the byte patterns allowed by `self`. #[must_use] pub const fn is_subset(&self, other: &Self) -> bool { let mut i = 0; while i < TABLE_LEN { if self.table[i] & !other.table[i] { return false; } i += 1; } true } #[inline] pub(crate) const fn allows_ascii(&self, x: u8) -> bool { self.table[x as usize] } #[inline] pub(crate) const fn allows_non_ascii(&self) -> bool { self.table[INDEX_UCSCHAR] | self.table[INDEX_IPRIVATE] } pub(crate) const fn allows_code_point(&self, x: u32) -> bool { if x < 128 { self.table[x as usize] } else { (self.table[INDEX_UCSCHAR] && is_ucschar(x)) || (self.table[INDEX_IPRIVATE] && is_iprivate(x)) } } /// Checks whether the given unencoded character is allowed by the table. #[inline] #[must_use] pub const fn allows(&self, ch: char) -> bool { self.allows_code_point(ch as u32) } /// Checks whether percent-encoded octets are allowed by the table. #[inline] #[must_use] pub const fn allows_pct_encoded(&self) -> bool { self.table[INDEX_PCT_ENCODED] } /// Validates the given string with the table. pub(crate) const fn validate(&self, s: &[u8]) -> bool { let mut i = 0; let allow_pct_encoded = self.allows_pct_encoded(); let allow_non_ascii = self.allows_non_ascii(); while i < s.len() { let x = s[i]; if allow_pct_encoded && x == b'%' { if i + 2 >= s.len() { return false; } let (hi, lo) = (s[i + 1], s[i + 2]); if !(HEXDIG.allows_ascii(hi) & HEXDIG.allows_ascii(lo)) { return false; } i += 3; } else if allow_non_ascii { let (x, len) = utf8::next_code_point(s, i); if !self.allows_code_point(x) { return false; } i += len; } else { if !self.allows_ascii(x) { return false; } i += 1; } } true } } const fn new(bytes: &[u8]) -> Table { Table::new(bytes) } // Rules from RFC 3986: /// `ALPHA = %x41-5A / %x61-7A` pub const ALPHA: &Table = &new(b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); /// `DIGIT = %x30-39` pub const DIGIT: &Table = &new(b"0123456789"); /// `HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"` pub const HEXDIG: &Table = &DIGIT.or(&new(b"ABCDEFabcdef")); /// `scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )` pub const SCHEME: &Table = &ALPHA.or(DIGIT).or(&new(b"+-.")); /// `userinfo = *( unreserved / pct-encoded / sub-delims / ":" )` pub const USERINFO: &Table = &UNRESERVED.or(SUB_DELIMS).or(&new(b":")).or_pct_encoded(); /// `IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )` pub const IPV_FUTURE: &Table = &UNRESERVED.or(SUB_DELIMS).or(&new(b":")); /// `reg-name = *( unreserved / pct-encoded / sub-delims )` pub const REG_NAME: &Table = &UNRESERVED.or(SUB_DELIMS).or_pct_encoded(); /// `path = *( pchar / "/" )` pub const PATH: &Table = &PCHAR.or(&new(b"/")); /// `segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )` pub const SEGMENT_NZ_NC: &Table = &UNRESERVED.or(SUB_DELIMS).or(&new(b"@")).or_pct_encoded(); /// `pchar = unreserved / pct-encoded / sub-delims / ":" / "@"` pub const PCHAR: &Table = &UNRESERVED.or(SUB_DELIMS).or(&new(b":@")).or_pct_encoded(); /// `query = *( pchar / "/" / "?" )` pub const QUERY: &Table = &PCHAR.or(&new(b"/?")); /// `fragment = *( pchar / "/" / "?" )` pub const FRAGMENT: &Table = QUERY; /// `unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"` pub const UNRESERVED: &Table = &ALPHA.or(DIGIT).or(&new(b"-._~")); /// `reserved = gen-delims / sub-delims` pub const RESERVED: &Table = &GEN_DELIMS.or(SUB_DELIMS); /// `gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"` pub const GEN_DELIMS: &Table = &new(b":/?#[]@"); /// `sub-delims = "!" / "$" / "&" / "'" / "(" / ")" /// / "*" / "+" / "," / ";" / "="` pub const SUB_DELIMS: &Table = &new(b"!$&'()*+,;="); // Rules from RFC 3987: pub const IUSERINFO: &Table = &USERINFO.or_ucschar(); pub const IREG_NAME: &Table = ®_NAME.or_ucschar(); pub const IPATH: &Table = &PATH.or_ucschar(); pub const ISEGMENT_NZ_NC: &Table = &SEGMENT_NZ_NC.or_ucschar(); pub const IQUERY: &Table = &QUERY.or_ucschar().or_iprivate(); pub const IFRAGMENT: &Table = &FRAGMENT.or_ucschar(); fluent-uri-0.4.1/src/resolve.rs000064400000000000000000000232501046102023000145360ustar 00000000000000//! Module for reference resolution. use crate::imp::{Meta, Ri, RiMaybeRef, RmrRef}; use alloc::string::String; use borrow_or_share::Bos; use core::{fmt, num::NonZeroUsize}; /// An error occurred when resolving a URI/IRI reference. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ResolveError { /// The base has a fragment. BaseWithFragment, /// The base has no authority and its path is rootless, but the reference /// is relative, is not empty and does not start with `'#'`. InvalidReferenceAgainstOpaqueBase, /// An underflow occurred in path resolution. /// /// Used only when [`Resolver::allow_path_underflow`] is set to `false`. PathUnderflow, } impl fmt::Display for ResolveError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let msg = match self { Self::BaseWithFragment => "base should not have fragment", Self::InvalidReferenceAgainstOpaqueBase => { "when base has a rootless path and no authority, reference should either have scheme, be empty or start with '#'" } Self::PathUnderflow => "underflow occurred in path resolution", }; f.write_str(msg) } } #[cfg(feature = "impl-error")] impl crate::Error for ResolveError {} /// A configurable URI/IRI reference resolver against a fixed base. /// /// # Examples /// /// ``` /// use fluent_uri::{resolve::Resolver, Uri, UriRef}; /// /// let base = Uri::parse("http://example.com/foo/bar")?; /// let resolver = Resolver::with_base(base); /// /// assert_eq!(resolver.resolve(&UriRef::parse("baz")?).unwrap(), "http://example.com/foo/baz"); /// assert_eq!(resolver.resolve(&UriRef::parse("../baz")?).unwrap(), "http://example.com/baz"); /// assert_eq!(resolver.resolve(&UriRef::parse("?baz")?).unwrap(), "http://example.com/foo/bar?baz"); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` #[derive(Clone, Copy, Debug)] #[must_use] pub struct Resolver { base: R, allow_path_underflow: bool, } impl Resolver where R::Val: Bos, { /// Creates a new `Resolver` with the given base and default configuration. pub fn with_base(base: R) -> Self { Self { base, allow_path_underflow: true, } } /// Sets whether to allow underflow in path resolution. /// /// This defaults to `true`. A value of `false` is a deviation from the /// reference resolution algorithm defined in /// [Section 5 of RFC 3986](https://datatracker.ietf.org/doc/html/rfc3986/#section-5). /// /// # Examples /// /// ``` /// use fluent_uri::{resolve::{Resolver, ResolveError}, Uri, UriRef}; /// /// let base = Uri::parse("http://example.com/foo/bar")?; /// let resolver = Resolver::with_base(base).allow_path_underflow(false); /// /// assert_eq!(resolver.resolve(&UriRef::parse("../../baz")?).unwrap_err(), ResolveError::PathUnderflow); /// assert_eq!(resolver.resolve(&UriRef::parse("../../../baz")?).unwrap_err(), ResolveError::PathUnderflow); /// assert_eq!(resolver.resolve(&UriRef::parse("/../baz")?).unwrap_err(), ResolveError::PathUnderflow); /// # Ok::<_, fluent_uri::ParseError>(()) /// ``` pub fn allow_path_underflow(mut self, value: bool) -> Self { self.allow_path_underflow = value; self } /// Resolves the given reference against the configured base. /// /// See [`resolve_against`] for the exact behavior of this method. /// /// # Errors /// /// Returns `Err` on the same conditions as [`resolve_against`] or if an underflow /// occurred in path resolution when [`allow_path_underflow`] is set to `false`. /// /// [`resolve_against`]: crate::UriRef::resolve_against /// [`allow_path_underflow`]: Self::allow_path_underflow pub fn resolve>( &self, reference: &R::Ref, ) -> Result, ResolveError> { resolve( self.base.make_ref(), reference.make_ref(), self.allow_path_underflow, ) .map(RiMaybeRef::from_pair) } } pub(crate) fn resolve( base: RmrRef<'_, '_>, /* reference */ r: RmrRef<'_, '_>, allow_path_underflow: bool, ) -> Result<(String, Meta), ResolveError> { assert!(base.has_scheme()); if base.has_fragment() { return Err(ResolveError::BaseWithFragment); } if !base.has_authority() && base.path().is_rootless() && !r.has_scheme() && !matches!(r.as_str().bytes().next(), None | Some(b'#')) { return Err(ResolveError::InvalidReferenceAgainstOpaqueBase); } let (t_scheme, t_authority, t_path, t_query, t_fragment); let r_scheme = r.scheme_opt(); let r_authority = r.authority(); let r_path = r.path(); let r_query = r.query(); let r_fragment = r.fragment(); if let Some(r_scheme) = r_scheme { t_scheme = r_scheme; t_authority = r_authority; t_path = (r_path.as_str(), None); t_query = r_query; } else { if r_authority.is_some() { t_authority = r_authority; t_path = (r_path.as_str(), None); t_query = r_query; } else { if r_path.is_empty() { t_path = (base.path().as_str(), None); if r_query.is_some() { t_query = r_query; } else { t_query = base.query(); } } else { if r_path.is_absolute() { t_path = (r_path.as_str(), None); } else { let base_path = base.path(); let base_path = if base_path.is_empty() { "/" } else { base_path.as_str() }; // Make sure that swapping the order of resolution and normalization // does not change the result. let last_slash_i = base_path.rfind('/').unwrap(); let last_seg = &base_path[last_slash_i + 1..]; let base_path_stripped = match classify_segment(last_seg) { SegKind::DoubleDot => base_path, _ => &base_path[..=last_slash_i], }; // Instead of merging the paths, remove dot segments incrementally. t_path = (base_path_stripped, Some(r_path.as_str())); } t_query = r_query; } t_authority = base.authority(); } t_scheme = base.scheme(); } t_fragment = r_fragment; // Calculate the output length. let mut len = t_scheme.as_str().len() + 1; if let Some(authority) = t_authority { len += authority.as_str().len() + 2; } len += t_path.0.len() + t_path.1.map_or(0, |s| s.len()); if let Some(query) = t_query { len += query.len() + 1; } if let Some(fragment) = t_fragment { len += fragment.len() + 1; } let mut buf = String::with_capacity(len); let mut meta = Meta::default(); buf.push_str(t_scheme.as_str()); meta.scheme_end = NonZeroUsize::new(buf.len()); buf.push(':'); if let Some(authority) = t_authority { let mut auth_meta = authority.meta(); buf.push_str("//"); auth_meta.host_bounds.0 += buf.len(); auth_meta.host_bounds.1 += buf.len(); buf.push_str(authority.as_str()); meta.auth_meta = Some(auth_meta); } let path_start = buf.len(); meta.path_bounds.0 = path_start; if t_path.0.starts_with('/') { let path = [t_path.0, t_path.1.unwrap_or("")]; let path = &path[..t_path.1.is_some() as usize + 1]; let underflow_occurred = remove_dot_segments(&mut buf, path_start, path); if underflow_occurred && !allow_path_underflow { return Err(ResolveError::PathUnderflow); } } else { buf.push_str(t_path.0); } // Close the loophole in the original algorithm. if t_authority.is_none() && buf[path_start..].starts_with("//") { buf.insert_str(path_start, "/."); } meta.path_bounds.1 = buf.len(); if let Some(query) = t_query { buf.push('?'); buf.push_str(query.as_str()); meta.query_end = NonZeroUsize::new(buf.len()); } if let Some(fragment) = t_fragment { buf.push('#'); buf.push_str(fragment.as_str()); } debug_assert!(buf.len() <= len); Ok((buf, meta)) } pub(crate) fn remove_dot_segments(buf: &mut String, start: usize, path: &[&str]) -> bool { let mut underflow_occurred = false; for seg in path.iter().flat_map(|s| s.split_inclusive('/')) { let seg_stripped = seg.strip_suffix('/').unwrap_or(seg); match classify_segment(seg_stripped) { SegKind::Dot => {} SegKind::DoubleDot => { if buf.len() > start + 1 { buf.truncate(buf[..buf.len() - 1].rfind('/').unwrap() + 1); } else { underflow_occurred = true; } } SegKind::Normal => buf.push_str(seg), } } underflow_occurred } enum SegKind { Dot, DoubleDot, Normal, } fn classify_segment(mut seg: &str) -> SegKind { if seg.is_empty() { return SegKind::Normal; } if let Some(rem) = seg.strip_prefix('.') { seg = rem; } else if let Some(rem) = seg.strip_prefix("%2E") { seg = rem; } else if let Some(rem) = seg.strip_prefix("%2e") { seg = rem; } if seg.is_empty() { SegKind::Dot } else if seg == "." || seg == "%2E" || seg == "%2e" { SegKind::DoubleDot } else { SegKind::Normal } } fluent-uri-0.4.1/src/utf8.rs000064400000000000000000000122051046102023000137430ustar 00000000000000//! UTF-8 utilities taken from `core::str`, Rust 1.81. #[inline] const fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 } #[inline] const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 } #[inline] pub const fn next_code_point(bytes: &[u8], i: usize) -> (u32, usize) { let x = bytes[i]; if x < 128 { return (x as u32, 1); } let init = utf8_first_byte(x, 2); let y = bytes[i + 1]; if x < 0xE0 { (utf8_acc_cont_byte(init, y), 2) } else { let z = bytes[i + 2]; let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z); if x < 0xF0 { ((init << 12) | y_z, 3) } else { let w = bytes[i + 3]; (((init & 7) << 18) | utf8_acc_cont_byte(y_z, w), 4) } } } #[cfg(feature = "alloc")] const UTF8_CHAR_WIDTH: &[u8; 256] = &[ // 1 2 3 4 5 6 7 8 9 A B C D E F 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // F ]; #[cfg(feature = "alloc")] #[inline] const fn utf8_char_width(b: u8) -> usize { UTF8_CHAR_WIDTH[b as usize] as usize } const CONT_MASK: u8 = 0b0011_1111; #[cfg(feature = "alloc")] pub struct Utf8Chunk<'a> { valid: &'a str, invalid: &'a [u8], incomplete: bool, } #[cfg(feature = "alloc")] impl<'a> Utf8Chunk<'a> { pub fn valid(&self) -> &'a str { self.valid } pub fn invalid(&self) -> &'a [u8] { self.invalid } pub fn incomplete(&self) -> bool { self.incomplete } } #[cfg(feature = "alloc")] pub struct Utf8Chunks<'a> { source: &'a [u8], } #[cfg(feature = "alloc")] impl<'a> Utf8Chunks<'a> { pub fn new(bytes: &'a [u8]) -> Self { Self { source: bytes } } } #[cfg(feature = "alloc")] impl<'a> Iterator for Utf8Chunks<'a> { type Item = Utf8Chunk<'a>; fn next(&mut self) -> Option> { if self.source.is_empty() { return None; } const TAG_CONT_U8: u8 = 128; let mut incomplete = false; let mut safe_get = |i| match self.source.get(i) { Some(x) => *x, None => { incomplete = true; 0 } }; let mut i = 0; let mut valid_up_to = 0; while i < self.source.len() { let byte = self.source[i]; i += 1; if byte >= 128 { let w = utf8_char_width(byte); match w { 2 => { if safe_get(i) & 192 != TAG_CONT_U8 { break; } i += 1; } 3 => { match (byte, safe_get(i)) { (0xE0, 0xA0..=0xBF) => (), (0xE1..=0xEC, 0x80..=0xBF) => (), (0xED, 0x80..=0x9F) => (), (0xEE..=0xEF, 0x80..=0xBF) => (), _ => break, } i += 1; if safe_get(i) & 192 != TAG_CONT_U8 { break; } i += 1; } 4 => { match (byte, safe_get(i)) { (0xF0, 0x90..=0xBF) => (), (0xF1..=0xF3, 0x80..=0xBF) => (), (0xF4, 0x80..=0x8F) => (), _ => break, } i += 1; if safe_get(i) & 192 != TAG_CONT_U8 { break; } i += 1; if safe_get(i) & 192 != TAG_CONT_U8 { break; } i += 1; } _ => break, } } valid_up_to = i; } let (inspected, remaining) = self.source.split_at(i); self.source = remaining; let (valid, invalid) = inspected.split_at(valid_up_to); Some(Utf8Chunk { valid: core::str::from_utf8(valid).unwrap(), invalid, incomplete, }) } } fluent-uri-0.4.1/tests/convert.rs000064400000000000000000000047051046102023000151160ustar 00000000000000use fluent_uri::{ConvertError, Iri, IriRef, Uri, UriRef}; #[cfg(feature = "alloc")] #[test] fn iri_to_uri() { let iri = Iri::parse("http://résumé.example.org").unwrap(); assert_eq!(iri.to_uri(), "http://r%C3%A9sum%C3%A9.example.org"); let iri = Iri::parse("http://www.example.org/red%09rosé#red").unwrap(); assert_eq!(iri.to_uri(), "http://www.example.org/red%09ros%C3%A9#red"); let iri = Iri::parse("foo://user@example.com:8042/over/there?name=ferret#nose").unwrap(); assert_eq!(iri.to_uri(), iri.as_str()); let iri = Iri::parse("http://example.com/\u{10300}\u{10301}\u{10302}").unwrap(); assert_eq!( iri.to_uri(), "http://example.com/%F0%90%8C%80%F0%90%8C%81%F0%90%8C%82" ); } #[cfg(feature = "alloc")] #[test] fn uri_to_iri() { let uri = Uri::parse("http://www.example.org/r%E9sum%E9.html").unwrap(); assert_eq!( Iri::from(uri).normalize(), "http://www.example.org/r%E9sum%E9.html" ); let uri = Uri::parse("http://www.example.org/D%C3%BCrst").unwrap(); assert_eq!(Iri::from(uri).normalize(), "http://www.example.org/Dürst"); let uri = Uri::parse("http://www.example.org/D%FCrst").unwrap(); assert_eq!(Iri::from(uri).normalize(), "http://www.example.org/D%FCrst"); let uri = Uri::parse("http://xn--99zt52a.example.org/%e2%80%ae").unwrap(); // TODO: Determine if we should implement the MUST in Section 4.1 of RFC 3987. // assert_eq!( // uri.as_iri().normalize(), // "http://xn--99zt52a.example.org/%E2%80%AE" // ); assert_eq!( Iri::from(uri).normalize(), "http://xn--99zt52a.example.org/\u{202e}" ); } #[test] fn convert_error() { let uri_ref = UriRef::parse("rel/ref").unwrap(); let e = Uri::try_from(uri_ref).unwrap_err(); assert_eq!(e, ConvertError::NoScheme); let uri_ref = UriRef::parse("").unwrap(); let e = Uri::try_from(uri_ref).unwrap_err(); assert_eq!(e, ConvertError::NoScheme); let iri = Iri::parse("http://你好.example.com/").unwrap(); let e = Uri::try_from(iri).unwrap_err(); assert_eq!(e, ConvertError::NotAscii { index: 7 }); let iri_ref = IriRef::parse("réf/rel").unwrap(); let e = Uri::try_from(iri_ref).unwrap_err(); assert_eq!(e, ConvertError::NoScheme); let e = UriRef::try_from(iri_ref).unwrap_err(); assert_eq!(e, ConvertError::NotAscii { index: 1 }); let e = Iri::try_from(iri_ref).unwrap_err(); assert_eq!(e, ConvertError::NoScheme); } fluent-uri-0.4.1/tests/normalize.rs000064400000000000000000000123061046102023000154320ustar 00000000000000#![cfg(feature = "alloc")] #[cfg(feature = "net")] use core::net::{Ipv4Addr, Ipv6Addr}; #[cfg(feature = "net")] use fluent_uri::component::Host; use fluent_uri::{IriRef, UriRef}; #[test] fn normalize() { // Example from Section 6.2 of RFC 3986. let r = UriRef::parse("eXAMPLE://a/./b/../b/%63/%7bfoo%7d").unwrap(); assert_eq!(r.normalize(), "example://a/b/c/%7Bfoo%7D"); // Examples from Section 6.2.3 of RFC 3986. // An empty path is not normalized to "/" for now. let r = UriRef::parse("http://example.com").unwrap(); assert_eq!(r.normalize(), "http://example.com"); let r = UriRef::parse("http://example.com/").unwrap(); assert_eq!(r.normalize(), "http://example.com/"); let r = UriRef::parse("http://example.com:/").unwrap(); assert_eq!(r.normalize(), "http://example.com/"); let r = UriRef::parse("http://example.com:80/").unwrap(); assert_eq!(r.normalize(), "http://example.com/"); // Lowercase percent-encoded octet. let r = UriRef::parse("%3a").unwrap(); assert_eq!(r.normalize(), "%3A"); // Uppercase letters in scheme and registered name. let r = UriRef::parse("HTTP://www.EXAMPLE.com/").unwrap(); assert_eq!(r.normalize(), "http://www.example.com/"); // Underflow in path resolution. let r = UriRef::parse("http://a/../../../g").unwrap(); assert_eq!(r.normalize(), "http://a/g"); // Percent-encoded dot segments. let r = UriRef::parse("http://a/b/c/%2E/%2E./%2e%2E/d").unwrap(); assert_eq!(r.normalize(), "http://a/d"); // Don't remove dot segments from relative reference or rootless path. let r = UriRef::parse("foo/../bar").unwrap(); assert_eq!(r.normalize(), "foo/../bar"); let r = UriRef::parse("/foo/../bar").unwrap(); assert_eq!(r.normalize(), "/foo/../bar"); let r = UriRef::parse("foo:bar/../baz").unwrap(); assert_eq!(r.normalize(), "foo:bar/../baz"); // Do remove dot segments for a URI with absolute path. let r = UriRef::parse("foo:/bar/./../baz").unwrap(); assert_eq!(r.normalize(), "foo:/baz"); // However, make sure that the output is a valid URI reference. let r = UriRef::parse("foo:/.//@@").unwrap(); assert_eq!(r.normalize(), "foo:/.//@@"); // Percent-encoded uppercase letters in registered name. let r = UriRef::parse("HTTP://%45XAMPLE.%43Om").unwrap(); assert_eq!(r.normalize(), "http://example.com"); // Percent-encoded unreserved characters. let r = UriRef::parse("%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F%50%51%52%53%54%55%56%57%58%59%5A%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F%70%71%72%73%74%75%76%77%78%79%7A%30%31%32%33%34%35%36%37%38%39%2D%2E%5F%7E").unwrap(); assert_eq!( r.normalize(), "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" ); // Percent-encoded reserved characters. let r = UriRef::parse("%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D%25").unwrap(); assert_eq!(r.normalize(), r); // Builder example. let r = UriRef::parse("foo://user@example.com:8042/over/there?name=ferret#nose").unwrap(); assert_eq!(r.normalize(), r); // Normalization in all components. let r = UriRef::parse("FOO://%55se%72@EXamp%4ce%2ecom:8042/%4b%2f?%4c%2b#%24%4d").unwrap(); assert_eq!(r.normalize(), "foo://User@example.com:8042/K%2F?L%2B#%24M"); // Normal IPv4 address. let r = UriRef::parse("//127.0.0.1").unwrap(); assert_eq!(r.normalize(), "//127.0.0.1"); #[cfg(feature = "net")] assert!(matches!( r.normalize().authority().unwrap().host_parsed(), Host::Ipv4(Ipv4Addr::LOCALHOST) )); // Percent-encoded IPv4 address. let r = UriRef::parse("//127.0.0.%31").unwrap(); assert_eq!(r.normalize(), "//127.0.0.1"); #[cfg(feature = "net")] assert!(matches!( r.normalize().authority().unwrap().host_parsed(), Host::Ipv4(Ipv4Addr::LOCALHOST) )); // Normal IPv6 address. let r = UriRef::parse("//[::1]").unwrap(); assert_eq!(r.normalize(), "//[::1]"); #[cfg(feature = "net")] assert!(matches!( r.normalize().authority().unwrap().host_parsed(), Host::Ipv6(Ipv6Addr::LOCALHOST) )); // Verbose IPv6 address. let r = UriRef::parse("//[0000:0000:0000::1]").unwrap(); assert_eq!(r.normalize(), "//[::1]"); #[cfg(feature = "net")] assert!(matches!( r.normalize().authority().unwrap().host_parsed(), Host::Ipv6(Ipv6Addr::LOCALHOST) )); // IPv4-mapped IPv6 address. let r = UriRef::parse("//[0:0:0:0:0:ffff:192.0.2.1]").unwrap(); assert_eq!(r.normalize(), "//[::ffff:192.0.2.1]"); // Deprecated IPv4-compatible IPv6 address. let r = UriRef::parse("//[::192.0.2.1]").unwrap(); assert_eq!(r.normalize(), "//[::c000:201]"); // IPvFuture address. let r = UriRef::parse("//[v1FdE.AddR]").unwrap(); assert_eq!(r.normalize(), "//[v1fde.addr]"); } #[test] fn normalize_iri() { // Example from Section 5.3.2 of RFC 3987. let r = IriRef::parse("eXAMPLE://a/./b/../b/%63/%7bfoo%7d/ros%C3%A9").unwrap(); assert_eq!(r.normalize(), "example://a/b/c/%7Bfoo%7D/rosé"); // Encoded private character in query. let r = IriRef::parse("?%EE%80%80").unwrap(); assert_eq!(r.normalize(), "?\u{e000}"); } fluent-uri-0.4.1/tests/parse.rs000064400000000000000000000312111046102023000145400ustar 00000000000000#[cfg(feature = "net")] use core::net::{Ipv4Addr, Ipv6Addr}; use fluent_uri::{component::Host, pct_enc::EStr, ParseErrorKind, Uri, UriRef}; #[test] fn parse_absolute() { let r = UriRef::parse("file:///etc/hosts").unwrap(); assert_eq!(r.as_str(), "file:///etc/hosts"); assert_eq!(r.scheme().unwrap().as_str(), "file"); let a = r.authority().unwrap(); assert_eq!(a.as_str(), ""); assert_eq!(a.userinfo(), None); assert_eq!(a.host(), ""); assert!(matches!(a.host_parsed(), Host::RegName(n) if n.is_empty())); assert_eq!(a.port(), None); assert_eq!(r.path(), "/etc/hosts"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("ftp://ftp.is.co.za/rfc/rfc1808.txt").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "ftp"); let a = r.authority().unwrap(); assert_eq!(a.as_str(), "ftp.is.co.za"); assert_eq!(a.userinfo(), None); assert_eq!(a.host(), "ftp.is.co.za"); assert!(matches!(a.host_parsed(), Host::RegName(name) if name == "ftp.is.co.za")); assert_eq!(a.port(), None); assert_eq!(r.path(), "/rfc/rfc1808.txt"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("http://www.ietf.org/rfc/rfc2396.txt").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "http"); let a = r.authority().unwrap(); assert_eq!(a.as_str(), "www.ietf.org"); assert_eq!(a.userinfo(), None); assert_eq!(a.host(), "www.ietf.org"); assert!(matches!(a.host_parsed(), Host::RegName(name) if name == "www.ietf.org")); assert_eq!(a.port(), None); assert_eq!(r.path(), "/rfc/rfc2396.txt"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("ldap://[2001:db8::7]/c=GB?objectClass?one").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "ldap"); let a = r.authority().unwrap(); assert_eq!(a.as_str(), "[2001:db8::7]"); assert_eq!(a.userinfo(), None); assert_eq!(a.host(), "[2001:db8::7]"); #[cfg(feature = "net")] assert!(matches!( a.host_parsed(), Host::Ipv6(addr) if addr == Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 0x7) )); assert_eq!(a.port(), None); assert_eq!(r.path(), "/c=GB"); assert_eq!(r.query(), Some(EStr::new_or_panic("objectClass?one"))); assert_eq!(r.fragment(), None); let r = UriRef::parse("mailto:John.Doe@example.com").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "mailto"); assert!(r.authority().is_none()); assert_eq!(r.path(), "John.Doe@example.com"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("news:comp.infosystems.www.servers.unix").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "news"); assert!(r.authority().is_none()); assert_eq!(r.path(), "comp.infosystems.www.servers.unix"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("tel:+1-816-555-1212").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "tel"); assert!(r.authority().is_none()); assert_eq!(r.path(), "+1-816-555-1212"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("telnet://192.0.2.16:80/").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "telnet"); let a = r.authority().unwrap(); assert_eq!(a.as_str(), "192.0.2.16:80"); assert_eq!(a.userinfo(), None); assert_eq!(a.host(), "192.0.2.16"); #[cfg(feature = "net")] assert!(matches!(a.host_parsed(), Host::Ipv4(addr) if addr == Ipv4Addr::new(192, 0, 2, 16))); assert_eq!(a.port(), Some(EStr::new_or_panic("80"))); assert_eq!(r.path(), "/"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("urn:oasis:names:specification:docbook:dtd:xml:4.1.2").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "urn"); assert!(r.authority().is_none()); assert_eq!(r.path(), "oasis:names:specification:docbook:dtd:xml:4.1.2"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("foo://example.com:8042/over/there?name=ferret#nose").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "foo"); let a = r.authority().unwrap(); assert_eq!(a.as_str(), "example.com:8042"); assert_eq!(a.userinfo(), None); assert_eq!(a.host(), "example.com"); assert!(matches!(a.host_parsed(), Host::RegName(name) if name == "example.com")); assert_eq!(a.port(), Some(EStr::new_or_panic("8042"))); assert_eq!(r.path(), "/over/there"); assert_eq!(r.query(), Some(EStr::new_or_panic("name=ferret"))); assert_eq!(r.fragment(), Some(EStr::new_or_panic("nose"))); let r = UriRef::parse("ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "ftp"); let a = r.authority().unwrap(); assert_eq!(a.as_str(), "cnn.example.com&story=breaking_news@10.0.0.1"); assert_eq!( a.userinfo(), Some(EStr::new_or_panic("cnn.example.com&story=breaking_news")) ); assert_eq!(a.host(), "10.0.0.1"); #[cfg(feature = "net")] assert!(matches!(a.host_parsed(), Host::Ipv4(addr) if addr == Ipv4Addr::new(10, 0, 0, 1))); assert_eq!(a.port(), None); assert_eq!(r.path(), "/top_story.htm"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("http://[vFe.foo.bar]").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "http"); let a = r.authority().unwrap(); assert_eq!(a.as_str(), "[vFe.foo.bar]"); assert_eq!(a.userinfo(), None); assert_eq!(a.host(), "[vFe.foo.bar]"); assert!(matches!(a.host_parsed(), Host::IpvFuture { .. })); assert_eq!(a.port(), None); assert_eq!(r.path(), ""); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("http://127.0.0.1:/").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "http"); let a = r.authority().unwrap(); assert_eq!(a.as_str(), "127.0.0.1:"); assert_eq!(a.userinfo(), None); assert_eq!(a.host(), "127.0.0.1"); #[cfg(feature = "net")] assert!(matches!(a.host_parsed(), Host::Ipv4(addr) if addr == Ipv4Addr::new(127, 0, 0, 1))); assert_eq!(a.port(), Some(EStr::EMPTY)); assert_eq!(r.path(), "/"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("http://127.0.0.1:8080/").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "http"); let a = r.authority().unwrap(); assert_eq!(a.as_str(), "127.0.0.1:8080"); assert_eq!(a.userinfo(), None); assert_eq!(a.host(), "127.0.0.1"); #[cfg(feature = "net")] assert!(matches!(a.host_parsed(), Host::Ipv4(addr) if addr == Ipv4Addr::new(127, 0, 0, 1))); assert_eq!(a.port(), Some(EStr::new_or_panic("8080"))); assert_eq!(r.path(), "/"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("http://127.0.0.1:80808/").unwrap(); assert_eq!(r.scheme().unwrap().as_str(), "http"); let a = r.authority().unwrap(); assert_eq!(a.as_str(), "127.0.0.1:80808"); assert_eq!(a.userinfo(), None); assert_eq!(a.host(), "127.0.0.1"); #[cfg(feature = "net")] assert!(matches!(a.host_parsed(), Host::Ipv4(addr) if addr == Ipv4Addr::new(127, 0, 0, 1))); assert_eq!(a.port(), Some(EStr::new_or_panic("80808"))); assert_eq!(r.path(), "/"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); } #[test] fn parse_relative() { let r = UriRef::parse("").unwrap(); assert!(r.scheme().is_none()); assert!(r.authority().is_none()); assert_eq!(r.path(), ""); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("foo.txt").unwrap(); assert!(r.scheme().is_none()); assert!(r.authority().is_none()); assert_eq!(r.path(), "foo.txt"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse(".").unwrap(); assert!(r.scheme().is_none()); assert!(r.authority().is_none()); assert_eq!(r.path(), "."); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("./this:that").unwrap(); assert!(r.scheme().is_none()); assert!(r.authority().is_none()); assert_eq!(r.path(), "./this:that"); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("//example.com").unwrap(); assert!(r.scheme().is_none()); let a = r.authority().unwrap(); assert_eq!(a.as_str(), "example.com"); assert_eq!(a.userinfo(), None); assert_eq!(a.host(), "example.com"); assert!(matches!(a.host_parsed(), Host::RegName(name) if name == "example.com")); assert_eq!(a.port(), None); assert_eq!(r.path(), ""); assert_eq!(r.query(), None); assert_eq!(r.fragment(), None); let r = UriRef::parse("?query").unwrap(); assert!(r.scheme().is_none()); assert!(r.authority().is_none()); assert_eq!(r.path(), ""); assert_eq!(r.query(), Some(EStr::new_or_panic("query"))); assert_eq!(r.fragment(), None); let r = UriRef::parse("#fragment").unwrap(); assert!(r.scheme().is_none()); assert!(r.authority().is_none()); assert_eq!(r.path(), ""); assert_eq!(r.query(), None); assert_eq!(r.fragment(), Some(EStr::new_or_panic("fragment"))); } use ParseErrorKind::*; #[test] fn parse_error_uri() { #[track_caller] fn fail(input: &str, index: usize, kind: ParseErrorKind) { let e = Uri::parse(input).unwrap_err(); assert_eq!(e.index(), index); assert_eq!(e.kind(), kind); } // No scheme fail("foo", 3, UnexpectedChar); // Empty scheme fail(":hello", 0, UnexpectedChar); // Scheme starts with non-letter fail("3ttp://a.com", 0, UnexpectedChar); // Unexpected char in scheme fail("exam=ple:foo", 4, UnexpectedChar); fail("(:", 0, UnexpectedChar); // Percent-encoded scheme fail("a%20:foo", 1, UnexpectedChar); } #[track_caller] fn fail(input: &str, index: usize, kind: ParseErrorKind) { let e = UriRef::parse(input).unwrap_err(); assert_eq!(e.index(), index); assert_eq!(e.kind(), kind); } #[test] fn parse_error_uri_ref() { // Empty scheme fail(":hello", 0, UnexpectedChar); // Scheme starts with non-letter fail("3ttp://a.com", 0, UnexpectedChar); // After rewriting the parser, the following two cases are interpreted as // containing colon in the first path segment of a relative reference. // Unexpected char in scheme fail("exam=ple:foo", 8, UnexpectedChar); fail("(:", 1, UnexpectedChar); // Percent-encoded scheme fail("a%20:foo", 4, UnexpectedChar); // Unexpected char in path fail("foo\\bar", 3, UnexpectedChar); // Non-hexadecimal percent-encoded octet fail("foo%xxd", 3, InvalidPctEncodedOctet); // Incomplete percent-encoded octet fail("text%a", 4, InvalidPctEncodedOctet); // A single percent fail("%", 0, InvalidPctEncodedOctet); // Non-decimal port fail("http://example.com:80ab", 21, UnexpectedChar); fail("http://user@example.com:80ab", 26, UnexpectedChar); // Multiple colons in authority fail("http://user:pass:example.com/", 16, UnexpectedChar); // Unclosed bracket fail("https://[::1/", 12, UnexpectedChar); // Not port after IP literal fail("https://[::1]wrong", 13, UnexpectedChar); // IP literal too short fail("http://[:]", 8, InvalidIpv6Addr); fail("http://[]", 8, UnexpectedChar); // Non-hexadecimal version in IPvFuture fail("http://[vG.addr]", 9, UnexpectedChar); // Empty version in IPvFuture fail("http://[v.addr]", 9, UnexpectedChar); // Empty address in IPvFuture fail("ftp://[vF.]", 10, UnexpectedChar); // Percent-encoded address in IPvFuture fail("ftp://[vF.%20]", 10, UnexpectedChar); // With zone identifier fail("ftp://[fe80::abcd%eth0]", 17, UnexpectedChar); // Invalid IPv6 address fail("example://[44:55::66::77]", 11, InvalidIpv6Addr); } #[test] fn strict_ip_addr() { let r = UriRef::parse("//127.0.0.001").unwrap(); let a = r.authority().unwrap(); assert!(matches!(a.host_parsed(), Host::RegName(_))); let r = UriRef::parse("//127.1").unwrap(); let a = r.authority().unwrap(); assert!(matches!(a.host_parsed(), Host::RegName(_))); let r = UriRef::parse("//127.00.00.1").unwrap(); let a = r.authority().unwrap(); assert!(matches!(a.host_parsed(), Host::RegName(_))); assert!(UriRef::parse("//[::1.1.1.1]").is_ok()); assert!(UriRef::parse("//[::ffff:1.1.1.1]").is_ok()); assert!(UriRef::parse("//[0000:0000:0000:0000:0000:0000:255.255.255.255]").is_ok()); fail("//[::01.1.1.1]", 3, InvalidIpv6Addr); fail("//[::00.1.1.1]", 3, InvalidIpv6Addr); } fluent-uri-0.4.1/tests/parse_ip.rs000064400000000000000000000106211046102023000152320ustar 00000000000000#![cfg(feature = "net")] use core::net::{Ipv4Addr, Ipv6Addr}; use fluent_uri::{component::Host, UriRef}; fn parse_v4(s: &str) -> Option { let s = format!("//{s}"); match UriRef::parse(&*s).ok()?.authority()?.host_parsed() { Host::Ipv4(addr) => Some(addr), _ => None, } } fn parse_v6(s: &str) -> Option { let s = format!("//[{s}]"); match UriRef::parse(&*s).ok()?.authority()?.host_parsed() { Host::Ipv6(addr) => Some(addr), _ => None, } } #[test] fn test_parse_v4() { assert_eq!(Some(Ipv4Addr::new(127, 0, 0, 1)), parse_v4("127.0.0.1")); assert_eq!( Some(Ipv4Addr::new(255, 255, 255, 255)), parse_v4("255.255.255.255") ); assert_eq!(Some(Ipv4Addr::new(0, 0, 0, 0)), parse_v4("0.0.0.0")); // out of range assert!(parse_v4("256.0.0.1").is_none()); // too short assert!(parse_v4("255.0.0").is_none()); // too long assert!(parse_v4("255.0.0.1.2").is_none()); // no number between dots assert!(parse_v4("255.0..1").is_none()); // octal assert!(parse_v4("255.0.0.01").is_none()); // octal zero assert!(parse_v4("255.0.0.00").is_none()); assert!(parse_v4("255.0.00.0").is_none()); // leading dot assert!(parse_v4(".0.0.0.0").is_none()); // trailing dot assert!(parse_v4("0.0.0.0.").is_none()); } #[test] fn test_parse_v6() { assert_eq!( Some(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)), parse_v6("0:0:0:0:0:0:0:0") ); assert_eq!( Some(Ipv6Addr::new(1, 2, 3, 4, 5, 6, 7, 8)), parse_v6("1:02:003:0004:0005:006:07:8") ); assert_eq!(Some(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)), parse_v6("::1")); assert_eq!(Some(Ipv6Addr::new(1, 0, 0, 0, 0, 0, 0, 0)), parse_v6("1::")); assert_eq!(Some(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)), parse_v6("::")); assert_eq!( Some(Ipv6Addr::new(0x2a02, 0x6b8, 0, 0, 0, 0, 0x11, 0x11)), parse_v6("2a02:6b8::11:11") ); assert_eq!( Some(Ipv6Addr::new(0, 2, 3, 4, 5, 6, 7, 8)), parse_v6("::2:3:4:5:6:7:8") ); assert_eq!( Some(Ipv6Addr::new(1, 2, 3, 4, 0, 6, 7, 8)), parse_v6("1:2:3:4::6:7:8") ); assert_eq!( Some(Ipv6Addr::new(1, 2, 3, 4, 5, 6, 7, 0)), parse_v6("1:2:3:4:5:6:7::") ); // only a colon assert!(parse_v6(":").is_none()); // too long group assert!(parse_v6("::00000").is_none()); // too short assert!(parse_v6("1:2:3:4:5:6:7").is_none()); // too long assert!(parse_v6("1:2:3:4:5:6:7:8:9").is_none()); // triple colon assert!(parse_v6("1:2:::6:7:8").is_none()); assert!(parse_v6("1:2:::").is_none()); assert!(parse_v6(":::6:7:8").is_none()); assert!(parse_v6(":::").is_none()); // two double colons assert!(parse_v6("1:2::6::8").is_none()); assert!(parse_v6("::6::8").is_none()); assert!(parse_v6("1:2::6::").is_none()); assert!(parse_v6("::2:6::").is_none()); // `::` indicating zero groups of zeros assert!(parse_v6("::1:2:3:4:5:6:7:8").is_none()); assert!(parse_v6("1:2:3:4::5:6:7:8").is_none()); assert!(parse_v6("1:2:3:4:5:6:7:8::").is_none()); // leading colon assert!(parse_v6(":1:2:3:4:5:6:7:8").is_none()); assert!(parse_v6(":1::1").is_none()); assert!(parse_v6(":1").is_none()); // trailing colon assert!(parse_v6("1:2:3:4:5:6:7:8:").is_none()); assert!(parse_v6("1::1:").is_none()); assert!(parse_v6("1:").is_none()); } #[test] fn test_parse_v4_in_v6() { assert_eq!( Some(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 49152, 545)), parse_v6("::192.0.2.33") ); assert_eq!( Some(Ipv6Addr::new(0, 0, 0, 0, 0, 0xFFFF, 49152, 545)), parse_v6("::FFFF:192.0.2.33") ); assert_eq!( Some(Ipv6Addr::new(0x64, 0xff9b, 0, 0, 0, 0, 49152, 545)), parse_v6("64:ff9b::192.0.2.33") ); assert_eq!( Some(Ipv6Addr::new( 0x2001, 0xdb8, 0x122, 0xc000, 0x2, 0x2100, 49152, 545 )), parse_v6("2001:db8:122:c000:2:2100:192.0.2.33") ); // colon after v4 assert!(parse_v6("::127.0.0.1:").is_none()); // not enough groups assert!(parse_v6("1:2:3:4:5:127.0.0.1").is_none()); // too many groups assert!(parse_v6("1:2:3:4:5:6:7:127.0.0.1").is_none()); // triple colons before v4 assert!(parse_v6(":::4.4.4.4").is_none()); // no colon before v4 assert!(parse_v6("::ffff4.4.4.4").is_none()); } fluent-uri-0.4.1/tests/resolve.rs000064400000000000000000000110661046102023000151130ustar 00000000000000#![cfg(feature = "alloc")] use fluent_uri::{ resolve::{ResolveError, Resolver}, Uri, UriRef, }; trait Test { fn pass(&self, r: &str, res: &str); fn fail(&self, r: &str, err: ResolveError); } impl Test for Uri<&str> { #[track_caller] fn pass(&self, r: &str, expected: &str) { let r = UriRef::parse(r).unwrap(); for b in [true, false] { let resolver = Resolver::with_base(*self).allow_path_underflow(b); assert_eq!(resolver.resolve(&r).unwrap(), expected); } } #[track_caller] fn fail(&self, r: &str, expected: ResolveError) { let r = UriRef::parse(r).unwrap(); for b in [true, false] { let resolver = Resolver::with_base(*self).allow_path_underflow(b); assert_eq!(resolver.resolve(&r).unwrap_err(), expected); } } } #[test] fn resolve() { // Examples from Section 5.4 of RFC 3986. let base = Uri::parse("http://a/b/c/d;p?q").unwrap(); base.pass("g:h", "g:h"); base.pass("g", "http://a/b/c/g"); base.pass("./g", "http://a/b/c/g"); base.pass("g/", "http://a/b/c/g/"); base.pass("/g", "http://a/g"); base.pass("//g", "http://g"); base.pass("?y", "http://a/b/c/d;p?y"); base.pass("g?y", "http://a/b/c/g?y"); base.pass("#s", "http://a/b/c/d;p?q#s"); base.pass("g#s", "http://a/b/c/g#s"); base.pass("g?y#s", "http://a/b/c/g?y#s"); base.pass(";x", "http://a/b/c/;x"); base.pass("g;x", "http://a/b/c/g;x"); base.pass("g;x?y#s", "http://a/b/c/g;x?y#s"); base.pass("", "http://a/b/c/d;p?q"); base.pass(".", "http://a/b/c/"); base.pass("./", "http://a/b/c/"); base.pass("..", "http://a/b/"); base.pass("../", "http://a/b/"); base.pass("../g", "http://a/b/g"); base.pass("../..", "http://a/"); base.pass("../../", "http://a/"); base.pass("../../g", "http://a/g"); base.pass("/./g", "http://a/g"); base.pass("g.", "http://a/b/c/g."); base.pass(".g", "http://a/b/c/.g"); base.pass("g..", "http://a/b/c/g.."); base.pass("..g", "http://a/b/c/..g"); base.pass("./../g", "http://a/b/g"); base.pass("./g/.", "http://a/b/c/g/"); base.pass("g/./h", "http://a/b/c/g/h"); base.pass("g/../h", "http://a/b/c/h"); base.pass("g;x=1/./y", "http://a/b/c/g;x=1/y"); base.pass("g;x=1/../y", "http://a/b/c/y"); base.pass("g?y/./x", "http://a/b/c/g?y/./x"); base.pass("g?y/../x", "http://a/b/c/g?y/../x"); base.pass("g#s/./x", "http://a/b/c/g#s/./x"); base.pass("g#s/../x", "http://a/b/c/g#s/../x"); base.pass("http:g", "http:g"); // Non-hierarchical base URI. let base = Uri::parse("foo:bar").unwrap(); base.pass("", "foo:bar"); base.pass("#baz", "foo:bar#baz"); base.pass("http://example.com/", "http://example.com/"); base.pass("foo:baz", "foo:baz"); base.pass("bar:baz", "bar:baz"); let base = Uri::parse("foo:/").unwrap(); // The result would be "foo://@@" using the original algorithm. base.pass(".//@@", "foo:/.//@@"); let base = Uri::parse("foo:/bar/baz/.%2E/").unwrap(); // The result would be "foo:/bar/baz" using the original algorithm. base.pass("..", "foo:/"); let base = Uri::parse("foo:/bar/..").unwrap(); // The result would be "foo:/bar/" using the original algorithm. base.pass(".", "foo:/"); let base = base.normalize(); base.borrow().pass(".", "foo:/"); } #[test] fn resolve_error() { let base = Uri::parse("http://example.com/#title1").unwrap(); base.fail("foo", ResolveError::BaseWithFragment); let base = Uri::parse("foo:bar").unwrap(); base.fail("baz", ResolveError::InvalidReferenceAgainstOpaqueBase); base.fail("?baz", ResolveError::InvalidReferenceAgainstOpaqueBase); } #[test] fn resolve_underflow() { for (base, rs) in [ ( "http://a/b/c/d;p?q", ["../../../g", "../../../../g", "/../g"], ), ("foo:/..", ["", "?a", "#a"]), ] { let base = Uri::parse(base).unwrap(); for r in rs { let resolver = Resolver::with_base(base).allow_path_underflow(true); assert!(resolver.resolve(&UriRef::parse(r).unwrap()).is_ok()); let resolver = Resolver::with_base(base).allow_path_underflow(false); assert_eq!( resolver.resolve(&UriRef::parse(r).unwrap()).unwrap_err(), ResolveError::PathUnderflow ); } } let base = Uri::parse("foo:bar/..").unwrap(); base.pass("", "foo:bar/.."); base.fail("?a", ResolveError::InvalidReferenceAgainstOpaqueBase); base.pass("#a", "foo:bar/..#a"); } fluent-uri-0.4.1/tests/to_socket_addrs.rs000064400000000000000000000031001046102023000165710ustar 00000000000000#![cfg(all(feature = "net", feature = "std"))] use std::net::{Ipv4Addr, Ipv6Addr, SocketAddrV4, SocketAddrV6}; use fluent_uri::UriRef; #[test] fn test_to_socket_addrs() { let r = UriRef::parse("//127.0.0.1:81").unwrap(); assert!(r .authority() .unwrap() .socket_addrs(80) .unwrap() .eq([SocketAddrV4::new(Ipv4Addr::new(127, 0, 0, 1), 81).into()])); let r = UriRef::parse("//127.0.0.1").unwrap(); assert!(r .authority() .unwrap() .socket_addrs(80) .unwrap() .eq([SocketAddrV4::new(Ipv4Addr::new(127, 0, 0, 1), 80).into()])); let r = UriRef::parse("//[::1]").unwrap(); assert!(r .authority() .unwrap() .socket_addrs(80) .unwrap() .eq([SocketAddrV6::new(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1), 80, 0, 0).into()])); let r = UriRef::parse("//127.0.0.1:").unwrap(); assert!(r .authority() .unwrap() .socket_addrs(80) .unwrap() .eq([SocketAddrV4::new(Ipv4Addr::new(127, 0, 0, 1), 80).into()])); let r = UriRef::parse("//127.0.0.1:65537").unwrap(); assert_eq!( r.authority() .unwrap() .socket_addrs(80) .err() .unwrap() .to_string(), "invalid port value" ); let r = UriRef::parse("//[vF.whatever]").unwrap(); assert_eq!( r.authority() .unwrap() .socket_addrs(80) .err() .unwrap() .to_string(), "address mechanism not supported" ); }