subfeature-0.0.4/.cargo_vcs_info.json0000644000000001570000000000100132050ustar { "git": { "sha1": "6db45b582f1fe2b358b3553ef2bfa9e16369494f" }, "path_in_vcs": "crates/subfeature" }subfeature-0.0.4/.gitignore000064400000000000000000000000101046102023000137510ustar 00000000000000/target subfeature-0.0.4/Cargo.lock0000644000000056520000000000100111650ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 4 [[package]] name = "aho-corasick" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] [[package]] name = "memchr" version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "proc-macro2" version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" dependencies = [ "proc-macro2", ] [[package]] name = "regex" version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "serde" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ "serde_core", "serde_derive", ] [[package]] name = "serde_core" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "subfeature" version = "0.0.4" dependencies = [ "memchr", "regex", "serde", ] [[package]] name = "syn" version = "2.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "unicode-ident" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" subfeature-0.0.4/Cargo.toml0000644000000024200000000000100111760ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2024" name = "subfeature" version = "0.0.4" authors = ["William Woodruff "] build = false autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Subfeature handling and manipulation APIs" homepage = "https://docs.zizmor.sh" readme = "README.md" license = "MIT" repository = "https://github.com/zizmorcore/zizmor/tree/main/crates/subfeature" resolver = "2" [lib] name = "subfeature" path = "src/lib.rs" [dependencies.memchr] version = "2.7.6" [dependencies.regex] version = "1.12.1" [dependencies.serde] version = "1.0.228" features = ["derive"] [lints.clippy] dbg_macro = "warn" needless_lifetimes = "warn" print_stderr = "warn" print_stdout = "warn" todo = "warn" unimplemented = "warn" unwrap_used = "warn" use_debug = "warn" subfeature-0.0.4/Cargo.toml.orig000064400000000000000000000006451046102023000146660ustar 00000000000000[package] name = "subfeature" description = "Subfeature handling and manipulation APIs" repository = "https://github.com/zizmorcore/zizmor/tree/main/crates/subfeature" version = "0.0.4" readme = "README.md" authors.workspace = true homepage.workspace = true edition.workspace = true license.workspace = true [lints] workspace = true [dependencies] memchr.workspace = true regex.workspace = true serde.workspace = true subfeature-0.0.4/LICENSE000064400000000000000000000021251046102023000127770ustar 00000000000000The MIT License (MIT) Copyright (c) 2025 William Woodruff Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. subfeature-0.0.4/README.md000064400000000000000000000020741046102023000132540ustar 00000000000000# subfeature [![zizmor](https://img.shields.io/badge/%F0%9F%8C%88-zizmor-white?labelColor=white)](https://zizmor.sh/) [![CI](https://github.com/zizmorcore/zizmor/actions/workflows/ci.yml/badge.svg)](https://github.com/zizmorcore/zizmor/actions/workflows/ci.yml) [![Crates.io](https://img.shields.io/crates/v/subfeature)](https://crates.io/crates/subfeature) [![docs.rs](https://img.shields.io/docsrs/subfeature)](https://docs.rs/subfeature) [![GitHub Sponsors](https://img.shields.io/github/sponsors/woodruffw?style=flat&logo=githubsponsors&labelColor=white&color=white)](https://github.com/sponsors/woodruffw) [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?logo=discord&logoColor=white)](https://discord.com/invite/PGU3zGZuGG) Subfeature handling and manipulation APIs. A "subfeature" is a subset of a _feature_, which is zizmor's term for a syntactically relevant extract of a YAML document. This crate provides APIs for creating subfeatures and matching them against parent features. This crate is part of [zizmor](https://zizmor.sh). ## License MIT License. subfeature-0.0.4/src/lib.rs000064400000000000000000000157111046102023000137020ustar 00000000000000//! Subfeature handling and manipulation APIs. #![forbid(unsafe_code)] #![deny(missing_docs)] use std::sync::LazyLock; use serde::Serialize; /// Represent's a subfeature's fragment. /// /// This is used to locate a subfeature's exact location within a surrounding /// feature. #[derive(Serialize, Clone, Debug)] pub enum Fragment<'a> { /// A raw subfeature fragment. /// /// This is useful primarily for matching an exact fragment within /// a larger feature, e.g. a string literal. /// /// It *shouldn't* be used to match things like expressions, since they /// might contain whitespace that won't exactly match the surrounding /// feature. For that, [`Fragment::Regex`] is appropriate. Raw(&'a str), /// A regular expression for matching a subfeature. /// /// This is useful primarily for matching any kind of subfeature that /// might contain multiple lines, e.g. a multi-line GitHub Actions /// expression, since the subfeature's indentation won't necessarily match /// the surrounding feature's YAML-level indentation. Regex(#[serde(serialize_with = "Fragment::serialize_regex")] regex::bytes::Regex), } impl<'a> Fragment<'a> { fn serialize_regex(regex: ®ex::bytes::Regex, serializer: S) -> Result where S: serde::Serializer, { let pattern = regex.as_str(); serializer.serialize_str(pattern) } /// Create a new [`Fragment`] from the given string. /// /// The created fragment's behavior depends on whether the input /// contains newlines or not: if there are no newlines then the fragment /// is a "raw" fragment that gets matched verbatim. If there are newlines, /// then the fragment is a "regex" fragment that allows a degree of /// whitespace malleability to allow for matching against a YAML feature /// with its own syntactically relevant whitespace. pub fn new(fragment: &'a str) -> Self { if !fragment.contains('\n') { // Silly optimization: we don't need to build up a pattern for this // expression if it doesn't have any newlines. Fragment::Raw(fragment) } else { // We turn a spanned expression into a regular expression by // replacing all whitespace with `\\s+`. // // This is a ridiculous overapproximation of the actual difference // in expected whitespace, but it works well enough and saves // us having to walk the expression's nodes and build up a more // precise pattern manually (which ends up being nontrivial, // since our current AST doesn't preserve parentheses). // // This approach is not strictly correct, since it doesn't distinguish // between syntactical whitespace and whitespace within e.g. // string literals. let escaped = regex::escape(fragment); #[allow(clippy::unwrap_used)] static WHITESPACE: LazyLock = LazyLock::new(|| regex::Regex::new(r"\s+").unwrap()); let regex = WHITESPACE.replace_all(&escaped, "\\s+"); Fragment::Regex( regex::bytes::Regex::new(®ex) .expect("internal error: failed to compile fragment regex"), ) } } } impl<'doc> From<&'doc str> for Fragment<'doc> { fn from(fragment: &'doc str) -> Self { Self::new(fragment) } } /// Represents a `[start, end)` byte span for a source expression. #[derive(Copy, Clone, Debug, PartialEq)] pub struct Span { /// The start of the span, inclusive. pub start: usize, /// The end of the span, exclusive. pub end: usize, } impl Span { /// Adjust this span by the given bias. pub fn adjust(self, bias: usize) -> Self { Self { start: self.start + bias, end: self.end + bias, } } /// Returns the span as a range. pub fn as_range(&self) -> std::ops::Range { self.start..self.end } } impl From> for Span { fn from(range: std::ops::Range) -> Self { Self { start: range.start, end: range.end, } } } /// Represents a "subfeature" of a symbolic location, such as a substring /// within a YAML string. #[derive(Serialize, Clone, Debug)] pub struct Subfeature<'a> { /// A byte index after which the subfeature starts. /// /// This is a fuzzy anchor: we know our subfeature starts /// *somewhere* after this index, but we don't know exactly where it is /// in the original feature due to parsed whitespace. pub after: usize, /// The fragment of the subfeature. pub fragment: Fragment<'a>, } impl<'a> Subfeature<'a> { /// Create a new subfeature with the given `after` index and `fragment`. pub fn new(after: usize, fragment: impl Into>) -> Self { Self { after, fragment: fragment.into(), } } /// Locate this subfeature within the given feature. /// /// Returns the subfeature's span within the feature, or `None` if it /// can't be found. The returned span is relative to the feature's /// start. pub fn locate_within(&self, feature: &str) -> Option { // NOTE: Our inputs are always valid UTF-8 but `after` may not // be a valid UTF-8 codepoint index, so everything below operates // on a byte slice. // Why, you might ask, might `after` not be a valid codepoint index? // Because `after` is a fuzzy anchor: we know our subfeature starts // *somewhere* after `after`, but we don't know exactly where. // This happens because we have a rough sense of where the subfeature // is *after* YAML parsing, but we don't know exactly where it is // in the original YAML feature due to significant whitespace. let feature = feature.as_bytes(); let bias = self.after; let focus = &feature[bias..]; match &self.fragment { Fragment::Raw(fragment) => { memchr::memmem::find(focus, fragment.as_bytes()).map(|start| { let end = start + fragment.len(); Span::from(start..end).adjust(bias) }) } Fragment::Regex(regex) => regex .find(focus) .map(|m| Span::from(m.range()).adjust(bias)), } } } #[cfg(test)] mod tests { use crate::Fragment; #[test] fn test_fragment_from_context() { for (ctx, expected) in &[ ("foo.bar", "foo.bar"), ("foo . bar", "foo . bar"), ("foo['bar']", "foo['bar']"), ("foo [\n'bar'\n]", r"foo\s+\[\s+'bar'\s+\]"), ] { match Fragment::from(*ctx) { Fragment::Raw(actual) => assert_eq!(actual, *expected), Fragment::Regex(actual) => assert_eq!(actual.as_str(), *expected), } } } }