fuzzy-matcher-0.3.7/.cargo_vcs_info.json0000644000000001121373627341300137140ustar { "git": { "sha1": "ee73fa4559a9ea06e51b54b0a436590c911c18a1" } } fuzzy-matcher-0.3.7/.github/workflows/ci.yml010064400017510000164000000014031373627335600172410ustar 00000000000000name: Build & Test on: pull_request: push: branches: - master jobs: test: name: test runs-on: ${{matrix.os}} strategy: matrix: os: [ubuntu-latest, macOS-latest] rust: [stable, "1.31.1"] steps: - name: Checkout repository uses: actions/checkout@v1 with: fetch-depth: 1 - name: Install correct toolchain uses: actions-rs/toolchain@v1 with: toolchain: ${{ matrix.rust }} target: ${{ matrix.target }} override: true - name: Run cargo check uses: actions-rs/cargo@v1 with: command: check - name: Run tests run: cargo test --verbose - name: Run tests for compact feature run: cargo test --verbose --features compact fuzzy-matcher-0.3.7/.github/workflows/release.yml010064400017510000164000000012001373627335600202610ustar 00000000000000name: Release on: push: tags: - 'v*.*.*' jobs: release: name: release runs-on: [ubuntu-latest] steps: - name: Checkout repository uses: actions/checkout@v1 with: fetch-depth: 1 - name: Install correct toolchain uses: actions-rs/toolchain@v1 with: toolchain: stable override: true - name: Run cargo check uses: actions-rs/cargo@v1 with: command: check - name: login crates.io run: cargo login ${CRATES_IO_TOKEN} env: CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} - name: publish run: cargo publish fuzzy-matcher-0.3.7/.gitignore010064400017510000164000000000441373627335600145160ustar 00000000000000/target **/*.rs.bk Cargo.lock .idea fuzzy-matcher-0.3.7/Cargo.lock0000644000000032371373627341300117020ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. [[package]] name = "fuzzy-matcher" version = "0.3.7" dependencies = [ "termion", "thread_local", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.78" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa7087f49d294270db4e1928fc110c976cd4b9e5a16348e0a1df09afa99e6c98" [[package]] name = "numtoa" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef" [[package]] name = "redox_syscall" version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" [[package]] name = "redox_termios" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" dependencies = [ "redox_syscall", ] [[package]] name = "termion" version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c22cec9d8978d906be5ac94bceb5a010d885c626c4c8855721a4dbd20e3ac905" dependencies = [ "libc", "numtoa", "redox_syscall", "redox_termios", ] [[package]] name = "thread_local" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" dependencies = [ "lazy_static", ] fuzzy-matcher-0.3.7/Cargo.toml0000644000000020371373627341300117220ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "fuzzy-matcher" version = "0.3.7" authors = ["Jinzhou Zhang "] description = "Fuzzy Matching Library" homepage = "https://github.com/lotabout/fuzzy-matcher" documentation = "https://docs.rs/fuzzy-matcher" readme = "README.md" keywords = ["fuzzy", "match", "text", "search"] license = "MIT" repository = "https://github.com/lotabout/fuzzy-matcher" [dependencies.thread_local] version = "1.0.0" [dev-dependencies.termion] version = "1.5.1" [features] compact = [] default = [] fuzzy-matcher-0.3.7/Cargo.toml.orig010064400017510000164000000010061373627335600154140ustar 00000000000000[package] name = "fuzzy-matcher" version = "0.3.7" authors = ["Jinzhou Zhang "] description = "Fuzzy Matching Library" documentation = "https://docs.rs/fuzzy-matcher" homepage = "https://github.com/lotabout/fuzzy-matcher" repository = "https://github.com/lotabout/fuzzy-matcher" readme = "README.md" keywords = ["fuzzy", "match", "text", "search"] license = "MIT" edition = "2018" [features] default = [] compact = [] [dependencies] thread_local = "1.0.0" [dev-dependencies] termion = "1.5.1" fuzzy-matcher-0.3.7/LICENSE010064400017510000164000000020701373627335600135340ustar 00000000000000The MIT License (MIT) Copyright (c) 2019 Jinzhou Zhang Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. fuzzy-matcher-0.3.7/README.md010064400017510000164000000051361373627335600140140ustar 00000000000000[![Crates.io](https://img.shields.io/crates/v/fuzzy-matcher.svg)](https://crates.io/crates/fuzzy-matcher) # Fuzzy Matcher Fuzzy matching algorithm(s) in Rust! ## Usage In your Cargo.toml add the following: ```toml [dependencies] fuzzy-matcher = "*" ``` Here are some code example: ```rust use fuzzy_matcher::FuzzyMatcher; use fuzzy_matcher::skim::SkimMatcherV2; let matcher = SkimMatcherV2::default(); assert_eq!(None, matcher.fuzzy_match("abc", "abx")); assert!(matcher.fuzzy_match("axbycz", "abc").is_some()); assert!(matcher.fuzzy_match("axbycz", "xyz").is_some()); let (score, indices) = matcher.fuzzy_indices("axbycz", "abc").unwrap(); assert_eq!(indices, [0, 2, 4]); ``` - `fuzzy_match` only return scores while `fuzzy_indices` returns the matching indices as well. - Both function return None if the pattern won't match. - The score is the higher the better. ## More example `echo "axbycz" | cargo run --example fz "abc"` and check what happens. ## About the Algorithm ### Skim The skim is currently used by [skim](https://github.com/lotabout/skim), a fuzzy finder. #### Skim V2 - Just like fzf v2, the algorithm is based on Smith-Waterman algorithm which is normally used in DNA sequence alignment - Also checkout https://www.cs.cmu.edu/~ckingsf/bioinfo-lectures/gaps.pdf for more details - The time complexity is `O(mn)` where `m, n` are the length of the pattern and input line. - Space complexity is `O(mn)` for `fuzzy_indices` and `O(2n)` for `fuzzy_match` which will compress the table for dynamic programming. - V2 matcher has an option to set the max element of the score matrix, if `m*n` exceeded the limit, it will fallback to a linear search. #### Skim V1 - It's based on Smith's post [Reverse Engineering Sublime Text’s Fuzzy Match](https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/) - The implementation here actually has some flaws that don't perform well in certain cases. - It's recommended to checkout original implementation in [C++](https://github.com/forrestthewoods/lib_fts/blob/master/code/fts_fuzzy_match.h) and [JavaScript](https://github.com/forrestthewoods/lib_fts/blob/master/code/fts_fuzzy_match.js) ### Clangd - The algorithm is based on [clangd's FuzzyMatch.cpp](https://github.com/MaskRay/ccls/blob/master/src/fuzzy_match.cc). - Also checkout https://github.com/lewang/flx/issues/98 for some variants. - The algorithm is `O(mn)` where `m, n` are the length of the pattern and input line. - Space complexity is `O(mn)` for `fuzzy_indices` and `O(2n)` for `fuzzy_match` which will compress the table for dynamic programming. fuzzy-matcher-0.3.7/examples/fz.rs010064400017510000164000000036271373627335600153430ustar 00000000000000use fuzzy_matcher::clangd::ClangdMatcher; use fuzzy_matcher::skim::SkimMatcherV2; use fuzzy_matcher::FuzzyMatcher; use std::env; use std::io::{self, BufRead}; use std::process::exit; use termion::style::{Invert, Reset}; #[cfg(not(feature = "compact"))] type IndexType = usize; #[cfg(feature = "compact")] type IndexType = u32; pub fn main() { let args: Vec = env::args().collect(); // arg parsing (manually) let mut arg_iter = args.iter().skip(1); let mut pattern = "".to_string(); let mut algorithm = Some("skim"); while let Some(arg) = arg_iter.next() { if arg == "--algo" { algorithm = arg_iter.next().map(String::as_ref); } else { pattern = arg.to_string(); } } if &pattern == "" { eprintln!("Usage: echo | fz --algo [skim|clangd] "); exit(1); } let matcher: Box = match algorithm { Some("skim") | Some("skim_v2") => Box::new(SkimMatcherV2::default()), Some("clangd") => Box::new(ClangdMatcher::default()), _ => panic!("Algorithm not supported: {:?}", algorithm), }; let stdin = io::stdin(); for line in stdin.lock().lines() { if let Ok(line) = line { if let Some((score, indices)) = matcher.fuzzy_indices(&line, &pattern) { println!("{:8}: {}", score, wrap_matches(&line, &indices)); } } } } fn wrap_matches(line: &str, indices: &[IndexType]) -> String { let mut ret = String::new(); let mut peekable = indices.iter().peekable(); for (idx, ch) in line.chars().enumerate() { let next_id = **peekable.peek().unwrap_or(&&(line.len() as IndexType)); if next_id == (idx as IndexType) { ret.push_str(format!("{}{}{}", Invert, ch, Reset).as_str()); peekable.next(); } else { ret.push(ch); } } ret } fuzzy-matcher-0.3.7/src/clangd.rs010064400017510000164000000375051373627335600151270ustar 00000000000000///! The fuzzy matching algorithm used in clangd. ///! https://github.com/llvm-mirror/clang-tools-extra/blob/master/clangd/FuzzyMatch.cpp ///! ///! # Example: ///! ```edition2018 ///! use fuzzy_matcher::FuzzyMatcher; ///! use fuzzy_matcher::clangd::ClangdMatcher; ///! ///! let matcher = ClangdMatcher::default(); ///! ///! assert_eq!(None, matcher.fuzzy_match("abc", "abx")); ///! assert!(matcher.fuzzy_match("axbycz", "abc").is_some()); ///! assert!(matcher.fuzzy_match("axbycz", "xyz").is_some()); ///! ///! let (score, indices) = matcher.fuzzy_indices("axbycz", "abc").unwrap(); ///! assert_eq!(indices, [0, 2, 4]); ///! ///! ``` ///! ///! Algorithm modified from ///! https://github.com/llvm-mirror/clang-tools-extra/blob/master/clangd/FuzzyMatch.cpp ///! Also check: https://github.com/lewang/flx/issues/98 use crate::util::*; use crate::{FuzzyMatcher, IndexType, ScoreType}; use std::cell::RefCell; use std::cmp::max; use thread_local::CachedThreadLocal; #[derive(Eq, PartialEq, Debug, Copy, Clone)] enum CaseMatching { Respect, Ignore, Smart, } pub struct ClangdMatcher { case: CaseMatching, use_cache: bool, c_cache: CachedThreadLocal>>, // vector to store the characters of choice p_cache: CachedThreadLocal>>, // vector to store the characters of pattern } impl Default for ClangdMatcher { fn default() -> Self { Self { case: CaseMatching::Ignore, use_cache: true, c_cache: CachedThreadLocal::new(), p_cache: CachedThreadLocal::new(), } } } impl ClangdMatcher { pub fn ignore_case(mut self) -> Self { self.case = CaseMatching::Ignore; self } pub fn smart_case(mut self) -> Self { self.case = CaseMatching::Smart; self } pub fn respect_case(mut self) -> Self { self.case = CaseMatching::Respect; self } pub fn use_cache(mut self, use_cache: bool) -> Self { self.use_cache = use_cache; self } fn contains_upper(&self, string: &str) -> bool { for ch in string.chars() { if ch.is_ascii_uppercase() { return true; } } false } fn is_case_sensitive(&self, pattern: &str) -> bool { match self.case { CaseMatching::Respect => true, CaseMatching::Ignore => false, CaseMatching::Smart => self.contains_upper(pattern), } } } impl FuzzyMatcher for ClangdMatcher { fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec)> { let case_sensitive = self.is_case_sensitive(pattern); let mut choice_chars = self .c_cache .get_or(|| RefCell::new(Vec::new())) .borrow_mut(); let mut pattern_chars = self .p_cache .get_or(|| RefCell::new(Vec::new())) .borrow_mut(); choice_chars.clear(); for char in choice.chars() { choice_chars.push(char); } pattern_chars.clear(); for char in pattern.chars() { pattern_chars.push(char); } if cheap_matches(&choice_chars, &pattern_chars, case_sensitive).is_none() { return None; } let num_pattern_chars = pattern_chars.len(); let num_choice_chars = choice_chars.len(); let dp = build_graph(&choice_chars, &pattern_chars, false, case_sensitive); // search backwards for the matched indices let mut indices_reverse = Vec::with_capacity(num_pattern_chars); let cell = dp[num_pattern_chars][num_choice_chars]; let (mut last_action, score) = if cell.match_score > cell.miss_score { (Action::Match, cell.match_score) } else { (Action::Miss, cell.miss_score) }; let mut row = num_pattern_chars; let mut col = num_choice_chars; while row > 0 || col > 0 { if last_action == Action::Match { indices_reverse.push((col - 1) as IndexType); } let cell = &dp[row][col]; if last_action == Action::Match { last_action = cell.last_action_match; row -= 1; col -= 1; } else { last_action = cell.last_action_miss; col -= 1; } } if !self.use_cache { // drop the allocated memory self.c_cache.get().map(|cell| cell.replace(vec![])); self.p_cache.get().map(|cell| cell.replace(vec![])); } indices_reverse.reverse(); Some((adjust_score(score, num_choice_chars), indices_reverse)) } fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option { let case_sensitive = self.is_case_sensitive(pattern); let mut choice_chars = self .c_cache .get_or(|| RefCell::new(Vec::new())) .borrow_mut(); let mut pattern_chars = self .p_cache .get_or(|| RefCell::new(Vec::new())) .borrow_mut(); choice_chars.clear(); for char in choice.chars() { choice_chars.push(char); } pattern_chars.clear(); for char in pattern.chars() { pattern_chars.push(char); } if cheap_matches(&choice_chars, &pattern_chars, case_sensitive).is_none() { return None; } let num_pattern_chars = pattern_chars.len(); let num_choice_chars = choice_chars.len(); let dp = build_graph(&choice_chars, &pattern_chars, true, case_sensitive); let cell = dp[num_pattern_chars & 1][num_choice_chars]; let score = max(cell.match_score, cell.miss_score); if !self.use_cache { // drop the allocated memory self.c_cache.get().map(|cell| cell.replace(vec![])); self.p_cache.get().map(|cell| cell.replace(vec![])); } Some(adjust_score(score, num_choice_chars)) } } /// fuzzy match `line` with `pattern`, returning the score and indices of matches pub fn fuzzy_indices(line: &str, pattern: &str) -> Option<(ScoreType, Vec)> { ClangdMatcher::default() .ignore_case() .fuzzy_indices(line, pattern) } /// fuzzy match `line` with `pattern`, returning the score(the larger the better) on match pub fn fuzzy_match(line: &str, pattern: &str) -> Option { ClangdMatcher::default() .ignore_case() .fuzzy_match(line, pattern) } // checkout https://github.com/llvm-mirror/clang-tools-extra/blob/master/clangd/FuzzyMatch.cpp // for the description fn build_graph( line: &[char], pattern: &[char], compressed: bool, case_sensitive: bool, ) -> Vec> { let num_line_chars = line.len(); let num_pattern_chars = pattern.len(); let max_rows = if compressed { 2 } else { num_pattern_chars + 1 }; let mut dp: Vec> = Vec::with_capacity(max_rows); for _ in 0..max_rows { dp.push(vec![Score::default(); num_line_chars + 1]); } dp[0][0].miss_score = 0; // first line for (idx, &ch) in line.iter().enumerate() { dp[0][idx + 1] = Score { miss_score: dp[0][idx].miss_score - skip_penalty(idx, ch, Action::Miss), last_action_miss: Action::Miss, match_score: AWFUL_SCORE, last_action_match: Action::Miss, }; } // build the matrix let mut pat_prev_ch = '\0'; for (pat_idx, &pat_ch) in pattern.iter().enumerate() { let current_row_idx = if compressed { (pat_idx + 1) & 1 } else { pat_idx + 1 }; let prev_row_idx = if compressed { pat_idx & 1 } else { pat_idx }; let mut line_prev_ch = '\0'; for (line_idx, &line_ch) in line.iter().enumerate() { if line_idx < pat_idx { line_prev_ch = line_ch; continue; } // what if we skip current line character? // we need to calculate the cases where the pre line character is matched/missed let pre_miss = &dp[current_row_idx][line_idx]; let mut match_miss_score = pre_miss.match_score; let mut miss_miss_score = pre_miss.miss_score; if pat_idx < num_pattern_chars - 1 { match_miss_score -= skip_penalty(line_idx, line_ch, Action::Match); miss_miss_score -= skip_penalty(line_idx, line_ch, Action::Miss); } let (miss_score, last_action_miss) = if match_miss_score > miss_miss_score { (match_miss_score, Action::Match) } else { (miss_miss_score, Action::Miss) }; // what if we want to match current line character? // so we need to calculate the cases where the pre pattern character is matched/missed let pre_match = &dp[prev_row_idx][line_idx]; let match_match_score = if allow_match(pat_ch, line_ch, case_sensitive) { pre_match.match_score + match_bonus( pat_idx, pat_ch, pat_prev_ch, line_idx, line_ch, line_prev_ch, Action::Match, ) } else { AWFUL_SCORE }; let miss_match_score = if allow_match(pat_ch, line_ch, case_sensitive) { pre_match.miss_score + match_bonus( pat_idx, pat_ch, pat_prev_ch, line_idx, line_ch, line_prev_ch, Action::Match, ) } else { AWFUL_SCORE }; let (match_score, last_action_match) = if match_match_score > miss_match_score { (match_match_score, Action::Match) } else { (miss_match_score, Action::Miss) }; dp[current_row_idx][line_idx + 1] = Score { miss_score, last_action_miss, match_score, last_action_match, }; line_prev_ch = line_ch; } pat_prev_ch = pat_ch; } dp } fn adjust_score(score: ScoreType, num_line_chars: usize) -> ScoreType { // line width will affect 10 scores score - (((num_line_chars + 1) as f64).ln().floor() as ScoreType) } const AWFUL_SCORE: ScoreType = -(1 << 30); #[derive(Debug, PartialEq, Clone, Copy)] enum Action { Miss, Match, } #[derive(Debug, Clone, Copy)] struct Score { pub last_action_miss: Action, pub last_action_match: Action, pub miss_score: ScoreType, pub match_score: ScoreType, } impl Default for Score { fn default() -> Self { Self { last_action_miss: Action::Miss, last_action_match: Action::Miss, miss_score: AWFUL_SCORE, match_score: AWFUL_SCORE, } } } fn skip_penalty(_ch_idx: usize, ch: char, last_action: Action) -> ScoreType { let mut score = 1; if last_action == Action::Match { // Non-consecutive match. score += 3; } if char_type_of(ch) == CharType::NonWord { // skip separator score += 6; } score } fn allow_match(pat_ch: char, line_ch: char, case_sensitive: bool) -> bool { char_equal(pat_ch, line_ch, case_sensitive) } fn match_bonus( pat_idx: usize, pat_ch: char, pat_prev_ch: char, line_idx: usize, line_ch: char, line_prev_ch: char, last_action: Action, ) -> ScoreType { let mut score = 10; let pat_role = char_role(pat_prev_ch, pat_ch); let line_role = char_role(line_prev_ch, line_ch); // Bonus: pattern so far is a (case-insensitive) prefix of the word. if pat_idx == line_idx { score += 10; } // Bonus: case match if pat_ch == line_ch { score += 8; } // Bonus: match header if line_role == CharRole::Head { score += 9; } // Bonus: a Head in the pattern aligns with one in the word. if pat_role == CharRole::Head && line_role == CharRole::Head { score += 10; } // Penalty: matching inside a segment (and previous char wasn't matched). if line_role == CharRole::Tail && pat_idx > 0 && last_action == Action::Miss { score -= 30; } // Penalty: a Head in the pattern matches in the middle of a word segment. if pat_role == CharRole::Head && line_role == CharRole::Tail { score -= 10; } // Penalty: matching the first pattern character in the middle of a segment. if pat_idx == 0 && line_role == CharRole::Tail { score -= 40; } score } #[cfg(test)] mod tests { use super::*; use crate::util::{assert_order, wrap_matches}; fn wrap_fuzzy_match(line: &str, pattern: &str) -> Option { let (_score, indices) = fuzzy_indices(line, pattern)?; Some(wrap_matches(line, &indices)) } #[test] fn test_match_or_not() { assert_eq!(None, fuzzy_match("abcdefaghi", "中")); assert_eq!(None, fuzzy_match("abc", "abx")); assert!(fuzzy_match("axbycz", "abc").is_some()); assert!(fuzzy_match("axbycz", "xyz").is_some()); assert_eq!("[a]x[b]y[c]z", &wrap_fuzzy_match("axbycz", "abc").unwrap()); assert_eq!("a[x]b[y]c[z]", &wrap_fuzzy_match("axbycz", "xyz").unwrap()); assert_eq!( "[H]ello, [世]界", &wrap_fuzzy_match("Hello, 世界", "H世").unwrap() ); } #[test] fn test_match_quality() { let matcher = ClangdMatcher::default(); // case assert_order(&matcher, "monad", &["monad", "Monad", "mONAD"]); // initials assert_order(&matcher, "ab", &["ab", "aoo_boo", "acb"]); assert_order(&matcher, "CC", &["CamelCase", "camelCase", "camelcase"]); assert_order(&matcher, "cC", &["camelCase", "CamelCase", "camelcase"]); assert_order( &matcher, "cc", &[ "camel case", "camelCase", "camelcase", "CamelCase", "camel ace", ], ); assert_order( &matcher, "Da.Te", &["Data.Text", "Data.Text.Lazy", "Data.Aeson.Encoding.text"], ); assert_order(&matcher, "foobar.h", &["foobar.h", "foo/bar.h"]); // prefix assert_order(&matcher, "is", &["isIEEE", "inSuf"]); // shorter assert_order(&matcher, "ma", &["map", "many", "maximum"]); assert_order(&matcher, "print", &["printf", "sprintf"]); // score(PRINT) = kMinScore assert_order(&matcher, "ast", &["ast", "AST", "INT_FAST16_MAX"]); // score(PRINT) > kMinScore assert_order(&matcher, "Int", &["int", "INT", "PRINT"]); } } #[allow(dead_code)] fn print_dp(line: &str, pattern: &str, dp: &[Vec]) { let num_line_chars = line.chars().count(); let num_pattern_chars = pattern.chars().count(); print!("\t"); for (idx, ch) in line.chars().enumerate() { print!("\t\t{}/{}", idx + 1, ch); } for (row_num, row) in dp.iter().enumerate().take(num_pattern_chars + 1) { print!("\n{}\t", row_num); for cell in row.iter().take(num_line_chars + 1) { print!( "({},{})/({},{})\t", cell.miss_score, if cell.last_action_miss == Action::Miss { 'X' } else { 'O' }, cell.match_score, if cell.last_action_match == Action::Miss { 'X' } else { 'O' } ); } } } fuzzy-matcher-0.3.7/src/lib.rs010064400017510000164000000013111373627335600144270ustar 00000000000000pub mod clangd; pub mod skim; mod util; #[cfg(not(feature = "compact"))] type IndexType = usize; #[cfg(not(feature = "compact"))] type ScoreType = i64; #[cfg(feature = "compact")] type IndexType = u32; #[cfg(feature = "compact")] type ScoreType = i32; pub trait FuzzyMatcher: Send + Sync { /// fuzzy match choice with pattern, and return the score & matched indices of characters fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec)>; /// fuzzy match choice with pattern, and return the score of matching fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option { self.fuzzy_indices(choice, pattern).map(|(score, _)| score) } } fuzzy-matcher-0.3.7/src/skim.rs010064400017510000164000001210541373627335600146330ustar 00000000000000#![allow(deprecated)] use std::cell::RefCell; use std::cmp::max; use std::fmt::Formatter; use thread_local::CachedThreadLocal; use crate::skim::Movement::{Match, Skip}; use crate::util::{char_equal, cheap_matches}; ///! The fuzzy matching algorithm used by skim ///! ///! # Example: ///! ```edition2018 ///! use fuzzy_matcher::FuzzyMatcher; ///! use fuzzy_matcher::skim::SkimMatcherV2; ///! ///! let matcher = SkimMatcherV2::default(); ///! assert_eq!(None, matcher.fuzzy_match("abc", "abx")); ///! assert!(matcher.fuzzy_match("axbycz", "abc").is_some()); ///! assert!(matcher.fuzzy_match("axbycz", "xyz").is_some()); ///! ///! let (score, indices) = matcher.fuzzy_indices("axbycz", "abc").unwrap(); ///! assert_eq!(indices, [0, 2, 4]); ///! ``` use crate::{FuzzyMatcher, IndexType, ScoreType}; const BONUS_MATCHED: ScoreType = 4; const BONUS_CASE_MATCH: ScoreType = 4; const BONUS_UPPER_MATCH: ScoreType = 6; const BONUS_ADJACENCY: ScoreType = 10; const BONUS_SEPARATOR: ScoreType = 8; const BONUS_CAMEL: ScoreType = 8; const PENALTY_CASE_UNMATCHED: ScoreType = -1; const PENALTY_LEADING: ScoreType = -6; // penalty applied for every letter before the first match const PENALTY_MAX_LEADING: ScoreType = -18; // maxing penalty for leading letters const PENALTY_UNMATCHED: ScoreType = -2; #[deprecated(since = "0.3.5", note = "Please use SkimMatcherV2 instead")] pub struct SkimMatcher {} impl Default for SkimMatcher { fn default() -> Self { Self {} } } /// The V1 matcher is based on ForrestTheWoods's post /// https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/ /// /// V1 algorithm is deprecated, checkout `FuzzyMatcherV2` impl FuzzyMatcher for SkimMatcher { fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec)> { fuzzy_indices(choice, pattern) } fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option { fuzzy_match(choice, pattern) } } #[deprecated(since = "0.3.5", note = "Please use SkimMatcherV2 instead")] pub fn fuzzy_match(choice: &str, pattern: &str) -> Option { if pattern.is_empty() { return Some(0); } let scores = build_graph(choice, pattern)?; let last_row = &scores[scores.len() - 1]; let (_, &MatchingStatus { final_score, .. }) = last_row .iter() .enumerate() .max_by_key(|&(_, x)| x.final_score) .expect("fuzzy_indices failed to iterate over last_row"); Some(final_score) } #[deprecated(since = "0.3.5", note = "Please use SkimMatcherV2 instead")] pub fn fuzzy_indices(choice: &str, pattern: &str) -> Option<(ScoreType, Vec)> { if pattern.is_empty() { return Some((0, Vec::new())); } let mut picked = vec![]; let scores = build_graph(choice, pattern)?; let last_row = &scores[scores.len() - 1]; let (mut next_col, &MatchingStatus { final_score, .. }) = last_row .iter() .enumerate() .max_by_key(|&(_, x)| x.final_score) .expect("fuzzy_indices failed to iterate over last_row"); let mut pat_idx = scores.len() as i64 - 1; while pat_idx >= 0 { let status = scores[pat_idx as usize][next_col]; next_col = status.back_ref as usize; picked.push(status.idx); pat_idx -= 1; } picked.reverse(); Some((final_score, picked)) } #[derive(Clone, Copy, Debug)] struct MatchingStatus { pub idx: IndexType, pub score: ScoreType, pub final_score: ScoreType, pub adj_num: IndexType, pub back_ref: IndexType, } impl Default for MatchingStatus { fn default() -> Self { MatchingStatus { idx: 0, score: 0, final_score: 0, adj_num: 1, back_ref: 0, } } } fn build_graph(choice: &str, pattern: &str) -> Option>> { let mut scores = vec![]; let mut match_start_idx = 0; // to ensure that the pushed char are able to match the pattern let mut pat_prev_ch = '\0'; // initialize the match positions and inline scores for (pat_idx, pat_ch) in pattern.chars().enumerate() { let mut vec = vec![]; let mut choice_prev_ch = '\0'; for (idx, ch) in choice.chars().enumerate() { if ch.to_ascii_lowercase() == pat_ch.to_ascii_lowercase() && idx >= match_start_idx { let score = fuzzy_score( ch, idx as IndexType, choice_prev_ch, pat_ch, pat_idx as IndexType, pat_prev_ch, ); vec.push(MatchingStatus { idx: idx as IndexType, score, final_score: score, adj_num: 1, back_ref: 0, }); } choice_prev_ch = ch; } if vec.is_empty() { // not matched return None; } match_start_idx = vec[0].idx as usize + 1; scores.push(vec); pat_prev_ch = pat_ch; } // calculate max scores considering adjacent characters for pat_idx in 1..scores.len() { let (first_half, last_half) = scores.split_at_mut(pat_idx); let prev_row = &first_half[first_half.len() - 1]; let cur_row = &mut last_half[0]; for idx in 0..cur_row.len() { let next = cur_row[idx]; let prev = if idx > 0 { cur_row[idx - 1] } else { MatchingStatus::default() }; let mut score_before_idx = prev.final_score - prev.score + next.score; score_before_idx += PENALTY_UNMATCHED * ((next.idx - prev.idx) as ScoreType); score_before_idx -= if prev.adj_num == 0 { BONUS_ADJACENCY } else { 0 }; let (back_ref, score, adj_num) = prev_row .iter() .enumerate() .take_while(|&(_, &MatchingStatus { idx, .. })| idx < next.idx) .skip_while(|&(_, &MatchingStatus { idx, .. })| idx < prev.idx) .map(|(back_ref, cur)| { let adj_num = next.idx - cur.idx - 1; let mut final_score = cur.final_score + next.score; final_score += if adj_num == 0 { BONUS_ADJACENCY } else { PENALTY_UNMATCHED * adj_num as ScoreType }; (back_ref, final_score, adj_num) }) .max_by_key(|&(_, x, _)| x) .unwrap_or((prev.back_ref as usize, score_before_idx, prev.adj_num)); cur_row[idx] = if idx > 0 && score < score_before_idx { MatchingStatus { final_score: score_before_idx, back_ref: prev.back_ref, adj_num, ..next } } else { MatchingStatus { final_score: score, back_ref: back_ref as IndexType, adj_num, ..next } }; } } Some(scores) } // judge how many scores the current index should get fn fuzzy_score( choice_ch: char, choice_idx: IndexType, choice_prev_ch: char, pat_ch: char, pat_idx: IndexType, _pat_prev_ch: char, ) -> ScoreType { let mut score = BONUS_MATCHED; let choice_prev_ch_type = CharType::of(choice_prev_ch); let choice_role = CharRole::of(choice_prev_ch, choice_ch); if pat_ch == choice_ch { if pat_ch.is_uppercase() { score += BONUS_UPPER_MATCH; } else { score += BONUS_CASE_MATCH; } } else { score += PENALTY_CASE_UNMATCHED; } // apply bonus for camelCases if choice_role == CharRole::Head || choice_role == CharRole::Break || choice_role == CharRole::Camel { score += BONUS_CAMEL; } // apply bonus for matches after a separator if choice_prev_ch_type == CharType::HardSep || choice_prev_ch_type == CharType::SoftSep { score += BONUS_SEPARATOR; } if pat_idx == 0 { score += max( (choice_idx as ScoreType) * PENALTY_LEADING, PENALTY_MAX_LEADING, ); } score } #[derive(Copy, Clone)] pub struct SkimScoreConfig { pub score_match: i32, pub gap_start: i32, pub gap_extension: i32, /// The first character in the typed pattern usually has more significance /// than the rest so it's important that it appears at special positions where /// bonus points are given. e.g. "to-go" vs. "ongoing" on "og" or on "ogo". /// The amount of the extra bonus should be limited so that the gap penalty is /// still respected. pub bonus_first_char_multiplier: i32, /// We prefer matches at the beginning of a word, but the bonus should not be /// too great to prevent the longer acronym matches from always winning over /// shorter fuzzy matches. The bonus point here was specifically chosen that /// the bonus is cancelled when the gap between the acronyms grows over /// 8 characters, which is approximately the average length of the words found /// in web2 dictionary and my file system. pub bonus_head: i32, /// Just like bonus_head, but its breakage of word is not that strong, so it should /// be slighter less then bonus_head pub bonus_break: i32, /// Edge-triggered bonus for matches in camelCase words. /// Compared to word-boundary case, they don't accompany single-character gaps /// (e.g. FooBar vs. foo-bar), so we deduct bonus point accordingly. pub bonus_camel: i32, /// Minimum bonus point given to characters in consecutive chunks. /// Note that bonus points for consecutive matches shouldn't have needed if we /// used fixed match score as in the original algorithm. pub bonus_consecutive: i32, /// Skim will match case-sensitively if the pattern contains ASCII upper case, /// If case of case insensitive match, the penalty will be given to case mismatch pub penalty_case_mismatch: i32, } impl Default for SkimScoreConfig { fn default() -> Self { let score_match = 16; let gap_start = -3; let gap_extension = -1; let bonus_first_char_multiplier = 2; Self { score_match, gap_start, gap_extension, bonus_first_char_multiplier, bonus_head: score_match / 2, bonus_break: score_match / 2 + gap_extension, bonus_camel: score_match / 2 + 2 * gap_extension, bonus_consecutive: -(gap_start + gap_extension), penalty_case_mismatch: gap_extension * 2, } } } #[derive(Debug, Copy, Clone, PartialEq)] enum Movement { Match, Skip, } /// Inner state of the score matrix // Implementation detail: tried to pad to 16B // will store the m and p matrix together #[derive(Clone)] struct MatrixCell { pub m_move: Movement, pub m_score: i32, pub p_move: Movement, pub p_score: i32, // The max score of align pattern[..i] & choice[..j] // temporary fields (make use the rest of the padding) pub matched: bool, pub bonus: i32, } const MATRIX_CELL_NEG_INFINITY: i32 = std::i16::MIN as i32; impl Default for MatrixCell { fn default() -> Self { Self { m_move: Skip, m_score: MATRIX_CELL_NEG_INFINITY, p_move: Skip, p_score: MATRIX_CELL_NEG_INFINITY, matched: false, bonus: 0, } } } impl MatrixCell { pub fn reset(&mut self) { self.m_move = Skip; self.m_score = MATRIX_CELL_NEG_INFINITY; self.p_move = Skip; self.p_score = MATRIX_CELL_NEG_INFINITY; self.bonus = 0; self.matched = false; } } /// Simulate a 1-D vector as 2-D matrix struct ScoreMatrix<'a> { matrix: &'a mut [MatrixCell], pub rows: usize, pub cols: usize, } impl<'a> ScoreMatrix<'a> { /// given a matrix, extend it to be (rows x cols) and fill in as init_val pub fn new(matrix: &'a mut Vec, rows: usize, cols: usize) -> Self { matrix.resize(rows * cols, MatrixCell::default()); ScoreMatrix { matrix, rows, cols } } #[inline] fn get_index(&self, row: usize, col: usize) -> usize { row * self.cols + col } fn get_row(&self, row: usize) -> &[MatrixCell] { let start = row * self.cols; &self.matrix[start..start + self.cols] } } impl<'a> std::ops::Index<(usize, usize)> for ScoreMatrix<'a> { type Output = MatrixCell; fn index(&self, index: (usize, usize)) -> &Self::Output { &self.matrix[self.get_index(index.0, index.1)] } } impl<'a> std::ops::IndexMut<(usize, usize)> for ScoreMatrix<'a> { fn index_mut(&mut self, index: (usize, usize)) -> &mut Self::Output { &mut self.matrix[self.get_index(index.0, index.1)] } } impl<'a> std::fmt::Debug for ScoreMatrix<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let _ = writeln!(f, "M score:"); for row in 0..self.rows { for col in 0..self.cols { let cell = &self[(row, col)]; write!( f, "{:4}/{} ", if cell.m_score == MATRIX_CELL_NEG_INFINITY { -999 } else { cell.m_score }, match cell.m_move { Match => 'M', Skip => 'S', } )?; } writeln!(f)?; } let _ = writeln!(f, "P score:"); for row in 0..self.rows { for col in 0..self.cols { let cell = &self[(row, col)]; write!( f, "{:4}/{} ", if cell.p_score == MATRIX_CELL_NEG_INFINITY { -999 } else { cell.p_score }, match cell.p_move { Match => 'M', Skip => 'S', } )?; } writeln!(f)?; } Ok(()) } } /// We categorize characters into types: /// /// - Empty(E): the start of string /// - Upper(U): the ascii upper case /// - lower(L): the ascii lower case & other unicode characters /// - number(N): ascii number /// - hard separator(S): clearly separate the content: ` ` `/` `\` `|` `(` `) `[` `]` `{` `}` /// - soft separator(s): other ascii punctuation, e.g. `!` `"` `#` `$`, ... #[derive(Debug, PartialEq, Copy, Clone)] enum CharType { Empty, Upper, Lower, Number, HardSep, SoftSep, } impl CharType { pub fn of(ch: char) -> Self { match ch { '\0' => CharType::Empty, ' ' | '/' | '\\' | '|' | '(' | ')' | '[' | ']' | '{' | '}' => CharType::HardSep, '!'..='\'' | '*'..='.' | ':'..='@' | '^'..='`' | '~' => CharType::SoftSep, '0'..='9' => CharType::Number, 'A'..='Z' => CharType::Upper, _ => CharType::Lower, } } } /// Ref: https://github.com/llvm-mirror/clang-tools-extra/blob/master/clangd/FuzzyMatch.cpp /// /// /// ```text /// +-----------+--------------+-------+ /// | Example | Chars | Type | Role | /// +-----------+--------------+-------+ /// | (f)oo | ^fo | Ell | Head | /// | (F)oo | ^Fo | EUl | Head | /// | Foo/(B)ar | /Ba | SUl | Head | /// | Foo/(b)ar | /ba | Sll | Head | /// | Foo.(B)ar | .Ba | SUl | Break | /// | Foo(B)ar | oBa | lUl | Camel | /// | 123(B)ar | 3Ba | nUl | Camel | /// | F(o)oBar | Foo | Ull | Tail | /// | H(T)TP | HTT | UUU | Tail | /// | others | | | Tail | /// +-----------+--------------+-------+ #[derive(Debug, PartialEq, Copy, Clone)] enum CharRole { Head, Tail, Camel, Break, } impl CharRole { pub fn of(prev: char, cur: char) -> Self { Self::of_type(CharType::of(prev), CharType::of(cur)) } pub fn of_type(prev: CharType, cur: CharType) -> Self { match (prev, cur) { (CharType::Empty, _) | (CharType::HardSep, _) => CharRole::Head, (CharType::SoftSep, _) => CharRole::Break, (CharType::Lower, CharType::Upper) | (CharType::Number, CharType::Upper) => { CharRole::Camel } _ => CharRole::Tail, } } } #[derive(Eq, PartialEq, Debug, Copy, Clone)] enum CaseMatching { Respect, Ignore, Smart, } /// Fuzzy matching is a sub problem is sequence alignment. /// Specifically what we'd like to implement is sequence alignment with affine gap penalty. /// Ref: https://www.cs.cmu.edu/~ckingsf/bioinfo-lectures/gaps.pdf /// /// Given `pattern`(i) and `choice`(j), we'll maintain 2 score matrix: /// /// ```text /// M[i][j] = match(i, j) + max(M[i-1][j-1] + consecutive, P[i-1][j-1]) /// M[i][j] = -infinity if p[i][j] do not match /// /// M[i][j] means the score of best alignment of p[..=i] and c[..=j] ending with match/mismatch e.g.: /// /// c: [.........]b /// p: [.........]b /// /// So that p[..=i-1] and c[..=j-1] could be any alignment /// /// P[i][j] = max(M[i][j-k]-gap(k)) for k in 1..j /// /// P[i][j] means the score of best alignment of p[..=i] and c[..=j] where c[j] is not matched. /// So that we need to search through all the previous matches, and calculate the gap. /// /// (j-k)--. j /// c: [....]bcdef /// p: [....]b---- /// i /// ``` /// /// Note that the above is O(n^3) in the worst case. However the above algorithm uses a general gap /// penalty, but we use affine gap: `gap = gap_start + k * gap_extend` where: /// - u: the cost of starting of gap /// - v: the cost of extending a gap by one more space. /// /// So that we could optimize the algorithm by: /// /// ```text /// P[i][j] = max(gap_start + gap_extend + M[i][j-1], gap_extend + P[i][j-1]) /// ``` /// /// Besides, since we are doing fuzzy matching, we'll prefer some pattern over others. /// So we'll calculate in-place bonus for each character. e.g. bonus for camel cases. /// /// In summary: /// /// ```text /// B[j] = in_place_bonus_of(j) /// M[i][j] = match(i, j) + max(M[i-1][j-1] + consecutive, P[i-1][j-1]) /// M[i][j] = -infinity if p[i] and c[j] do not match /// P[i][j] = max(gap_start + gap_extend + M[i][j-1], gap_extend + P[i][j-1]) /// ``` pub struct SkimMatcherV2 { debug: bool, score_config: SkimScoreConfig, element_limit: usize, case: CaseMatching, use_cache: bool, m_cache: CachedThreadLocal>>, c_cache: CachedThreadLocal>>, // vector to store the characters of choice p_cache: CachedThreadLocal>>, // vector to store the characters of pattern } impl Default for SkimMatcherV2 { fn default() -> Self { Self { debug: false, score_config: SkimScoreConfig::default(), element_limit: 0, case: CaseMatching::Smart, use_cache: true, m_cache: CachedThreadLocal::new(), c_cache: CachedThreadLocal::new(), p_cache: CachedThreadLocal::new(), } } } impl SkimMatcherV2 { pub fn score_config(mut self, score_config: SkimScoreConfig) -> Self { self.score_config = score_config; self } pub fn element_limit(mut self, elements: usize) -> Self { self.element_limit = elements; self } pub fn ignore_case(mut self) -> Self { self.case = CaseMatching::Ignore; self } pub fn smart_case(mut self) -> Self { self.case = CaseMatching::Smart; self } pub fn respect_case(mut self) -> Self { self.case = CaseMatching::Respect; self } pub fn use_cache(mut self, use_cache: bool) -> Self { self.use_cache = use_cache; self } pub fn debug(mut self, debug: bool) -> Self { self.debug = debug; self } /// Build the score matrix using the algorithm described above fn build_score_matrix( &self, m: &mut ScoreMatrix, choice: &[char], pattern: &[char], first_match_indices: &[usize], compressed: bool, case_sensitive: bool, ) { let mut in_place_bonuses = vec![0; m.cols]; self.build_in_place_bonus(choice, &mut in_place_bonuses); // need to reset M[row][first_match] & M[i][j-1] m[(0, 0)].reset(); for i in 1..m.rows { m[(i, first_match_indices[i - 1])].reset(); } for j in 0..m.cols { // p[0][j]: the score of best alignment of p[] and c[..=j] where c[j] is not matched m[(0, j)].reset(); m[(0, j)].p_score = self.score_config.gap_extension; } // update the matrix; for (i, &p_ch) in pattern.iter().enumerate() { let row = self.adjust_row_idx(i + 1, compressed); let row_prev = self.adjust_row_idx(i, compressed); let to_skip = first_match_indices[i]; for (j, &c_ch) in choice[to_skip..].iter().enumerate() { let col = to_skip + j + 1; let col_prev = to_skip + j; let idx_cur = m.get_index(row, col); let idx_last = m.get_index(row, col_prev); let idx_prev = m.get_index(row_prev, col_prev); // update M matrix // M[i][j] = match(i, j) + max(M[i-1][j-1], P[i-1][j-1]) if let Some(cur_match_score) = self.calculate_match_score(c_ch, p_ch, case_sensitive) { let prev_match_score = m.matrix[idx_prev].m_score; let prev_skip_score = m.matrix[idx_prev].p_score; let prev_match_bonus = m.matrix[idx_last].bonus; let in_place_bonus = in_place_bonuses[col]; let consecutive_bonus = max( prev_match_bonus, max(in_place_bonus, self.score_config.bonus_consecutive), ); m.matrix[idx_last].bonus = consecutive_bonus; let score_match = prev_match_score + consecutive_bonus; let score_skip = prev_skip_score + in_place_bonus; if score_match >= score_skip { m.matrix[idx_cur].m_score = score_match + cur_match_score as i32; m.matrix[idx_cur].m_move = Movement::Match; } else { m.matrix[idx_cur].m_score = score_skip + cur_match_score as i32; m.matrix[idx_cur].m_move = Movement::Skip; } } else { m.matrix[idx_cur].m_score = MATRIX_CELL_NEG_INFINITY; m.matrix[idx_cur].m_move = Movement::Skip; m.matrix[idx_cur].bonus = 0; } // update P matrix // P[i][j] = max(gap_start + gap_extend + M[i][j-1], gap_extend + P[i][j-1]) let prev_match_score = self.score_config.gap_start + self.score_config.gap_extension + m.matrix[idx_last].m_score; let prev_skip_score = self.score_config.gap_extension + m.matrix[idx_last].p_score; if prev_match_score >= prev_skip_score { m.matrix[idx_cur].p_score = prev_match_score; m.matrix[idx_cur].p_move = Movement::Match; } else { m.matrix[idx_cur].p_score = prev_skip_score; m.matrix[idx_cur].p_move = Movement::Skip; } } } } /// check bonus for start of camel case, etc. fn build_in_place_bonus(&self, choice: &[char], b: &mut [i32]) { let mut prev_ch = '\0'; for (j, &c_ch) in choice.iter().enumerate() { let prev_ch_type = CharType::of(prev_ch); let ch_type = CharType::of(c_ch); b[j + 1] = self.in_place_bonus(prev_ch_type, ch_type); prev_ch = c_ch; } if b.len() > 1 { b[1] *= self.score_config.bonus_first_char_multiplier; } } /// In case we don't need to backtrack the matching indices, we could use only 2 rows for the /// matrix, this function could be used to rotate accessing these two rows. fn adjust_row_idx(&self, row_idx: usize, compressed: bool) -> usize { if compressed { row_idx & 1 } else { row_idx } } /// Calculate the matching score of the characters /// return None if not matched. fn calculate_match_score(&self, c: char, p: char, case_sensitive: bool) -> Option { if !char_equal(c, p, case_sensitive) { return None; } let score = self.score_config.score_match; let mut bonus = 0; // penalty on case mismatch if !case_sensitive && p != c { bonus += self.score_config.penalty_case_mismatch; } Some(max(0, score + bonus) as u16) } #[inline] fn in_place_bonus(&self, prev_char_type: CharType, char_type: CharType) -> i32 { match CharRole::of_type(prev_char_type, char_type) { CharRole::Head => self.score_config.bonus_head, CharRole::Camel => self.score_config.bonus_camel, CharRole::Break => self.score_config.bonus_break, CharRole::Tail => 0, } } fn contains_upper(&self, string: &str) -> bool { for ch in string.chars() { if ch.is_ascii_uppercase() { return true; } } false } pub fn fuzzy( &self, choice: &str, pattern: &str, with_pos: bool, ) -> Option<(ScoreType, Vec)> { if pattern.is_empty() { return Some((0, Vec::new())); } let case_sensitive = match self.case { CaseMatching::Respect => true, CaseMatching::Ignore => false, CaseMatching::Smart => self.contains_upper(pattern), }; let compressed = !with_pos; // initialize the score matrix let mut m = self .m_cache .get_or(|| RefCell::new(Vec::new())) .borrow_mut(); let mut choice_chars = self .c_cache .get_or(|| RefCell::new(Vec::new())) .borrow_mut(); let mut pattern_chars = self .p_cache .get_or(|| RefCell::new(Vec::new())) .borrow_mut(); choice_chars.clear(); for char in choice.chars() { choice_chars.push(char); } pattern_chars.clear(); for char in pattern.chars() { pattern_chars.push(char); } let first_match_indices = cheap_matches(&choice_chars, &pattern_chars, case_sensitive)?; let cols = choice_chars.len() + 1; let num_char_pattern = pattern_chars.len(); let rows = if compressed { 2 } else { num_char_pattern + 1 }; if self.element_limit > 0 && self.element_limit < rows * cols { return self.simple_match( &choice_chars, &pattern_chars, &first_match_indices, case_sensitive, with_pos, ); } let mut m = ScoreMatrix::new(&mut m, rows, cols); self.build_score_matrix( &mut m, &choice_chars, &pattern_chars, &first_match_indices, compressed, case_sensitive, ); let first_col_of_last_row = first_match_indices[first_match_indices.len() - 1]; let last_row = m.get_row(self.adjust_row_idx(num_char_pattern, compressed)); let (pat_idx, &MatrixCell { m_score, .. }) = last_row[first_col_of_last_row..] .iter() .enumerate() .max_by_key(|&(_, x)| x.m_score) .map(|(idx, cell)| (idx + first_col_of_last_row, cell)) .expect("fuzzy_matcher failed to iterate over last_row"); let mut positions = if with_pos { Vec::with_capacity(num_char_pattern) } else { Vec::new() }; if with_pos { let mut i = m.rows - 1; let mut j = pat_idx; let mut track_m = true; let mut current_move = Match; let first_col_first_row = first_match_indices[0]; while i > 0 && j > first_col_first_row { if current_move == Match { positions.push((j - 1) as IndexType); } let cell = &m[(i, j)]; current_move = if track_m { cell.m_move } else { cell.p_move }; if track_m { i -= 1; } j -= 1; track_m = match current_move { Match => true, Skip => false, }; } positions.reverse(); } if self.debug { println!("Matrix:\n{:?}", m); } if !self.use_cache { // drop the allocated memory self.m_cache.get().map(|cell| cell.replace(vec![])); self.c_cache.get().map(|cell| cell.replace(vec![])); self.p_cache.get().map(|cell| cell.replace(vec![])); } Some((m_score as ScoreType, positions)) } pub fn simple_match( &self, choice: &[char], pattern: &[char], first_match_indices: &[usize], case_sensitive: bool, with_pos: bool, ) -> Option<(ScoreType, Vec)> { if pattern.len() <= 0 { return Some((0, Vec::new())); } else if pattern.len() == 1 { let match_idx = first_match_indices[0]; let prev_ch = if match_idx > 0 { choice[match_idx - 1] } else { '\0' }; let prev_ch_type = CharType::of(prev_ch); let ch_type = CharType::of(choice[match_idx]); let in_place_bonus = self.in_place_bonus(prev_ch_type, ch_type); return Some((in_place_bonus as ScoreType, vec![match_idx as IndexType])); } let mut start_idx = first_match_indices[0]; let end_idx = first_match_indices[first_match_indices.len() - 1]; let mut pattern_iter = pattern.iter().rev().peekable(); for (idx, &c) in choice[start_idx..=end_idx].iter().enumerate().rev() { match pattern_iter.peek() { Some(&&p) => { if char_equal(c, p, case_sensitive) { let _ = pattern_iter.next(); start_idx = idx; } } None => break, } } Some(self.calculate_score_with_pos( choice, pattern, start_idx, end_idx, case_sensitive, with_pos, )) } fn calculate_score_with_pos( &self, choice: &[char], pattern: &[char], start_idx: usize, end_idx: usize, case_sensitive: bool, with_pos: bool, ) -> (ScoreType, Vec) { let mut pos = Vec::new(); let choice_iter = choice[start_idx..=end_idx].iter().enumerate(); let mut pattern_iter = pattern.iter().enumerate().peekable(); // unfortunately we could not get the the character before the first character's(for performance) // so we tread them as NonWord let mut prev_ch = '\0'; let mut score: i32 = 0; let mut in_gap = false; let mut prev_match_bonus = 0; for (c_idx, &c) in choice_iter { let op = pattern_iter.peek(); if op.is_none() { break; } let prev_ch_type = CharType::of(prev_ch); let ch_type = CharType::of(c); let in_place_bonus = self.in_place_bonus(prev_ch_type, ch_type); let (_p_idx, &p) = *op.unwrap(); if let Some(match_score) = self.calculate_match_score(c, p, case_sensitive) { if with_pos { pos.push((c_idx + start_idx) as IndexType); } score += match_score as i32; let consecutive_bonus = max( prev_match_bonus, max(in_place_bonus, self.score_config.bonus_consecutive), ); prev_match_bonus = consecutive_bonus; if !in_gap { score += consecutive_bonus; } in_gap = false; let _ = pattern_iter.next(); } else { if !in_gap { score += self.score_config.gap_start; } score += self.score_config.gap_extension; in_gap = true; prev_match_bonus = 0; } prev_ch = c; } (score as ScoreType, pos) } } impl FuzzyMatcher for SkimMatcherV2 { fn fuzzy_indices(&self, choice: &str, pattern: &str) -> Option<(ScoreType, Vec)> { self.fuzzy(choice, pattern, true) } fn fuzzy_match(&self, choice: &str, pattern: &str) -> Option { self.fuzzy(choice, pattern, false).map(|(score, _)| score) } } #[cfg(test)] mod tests { use crate::util::{assert_order, wrap_matches}; use super::*; fn wrap_fuzzy_match(matcher: &dyn FuzzyMatcher, line: &str, pattern: &str) -> Option { let (_score, indices) = matcher.fuzzy_indices(line, pattern)?; println!("score: {:?}, indices: {:?}", _score, indices); Some(wrap_matches(line, &indices)) } #[test] fn test_match_or_not() { let matcher = SkimMatcherV2::default(); assert_eq!(Some(0), matcher.fuzzy_match("", "")); assert_eq!(Some(0), matcher.fuzzy_match("abcdefaghi", "")); assert_eq!(None, matcher.fuzzy_match("", "a")); assert_eq!(None, matcher.fuzzy_match("abcdefaghi", "中")); assert_eq!(None, matcher.fuzzy_match("abc", "abx")); assert!(matcher.fuzzy_match("axbycz", "abc").is_some()); assert!(matcher.fuzzy_match("axbycz", "xyz").is_some()); assert_eq!( "[a]x[b]y[c]z", &wrap_fuzzy_match(&matcher, "axbycz", "abc").unwrap() ); assert_eq!( "a[x]b[y]c[z]", &wrap_fuzzy_match(&matcher, "axbycz", "xyz").unwrap() ); assert_eq!( "[H]ello, [世]界", &wrap_fuzzy_match(&matcher, "Hello, 世界", "H世").unwrap() ); } #[test] fn test_match_quality() { let matcher = SkimMatcherV2::default().ignore_case(); // initials assert_order(&matcher, "ab", &["ab", "aoo_boo", "acb"]); assert_order(&matcher, "CC", &["CamelCase", "camelCase", "camelcase"]); assert_order(&matcher, "cC", &["camelCase", "CamelCase", "camelcase"]); assert_order( &matcher, "cc", &[ "camel case", "camelCase", "CamelCase", "camelcase", "camel ace", ], ); assert_order( &matcher, "Da.Te", &["Data.Text", "Data.Text.Lazy", "Data.Aeson.Encoding.text"], ); // prefix assert_order(&matcher, "is", &["isIEEE", "inSuf"]); // shorter assert_order(&matcher, "ma", &["map", "many", "maximum"]); assert_order(&matcher, "print", &["printf", "sprintf"]); // score(PRINT) = kMinScore assert_order(&matcher, "ast", &["ast", "AST", "INT_FAST16_MAX"]); // score(PRINT) > kMinScore assert_order(&matcher, "Int", &["int", "INT", "PRINT"]); } fn simple_match( matcher: &SkimMatcherV2, choice: &str, pattern: &str, case_sensitive: bool, with_pos: bool, ) -> Option<(ScoreType, Vec)> { let choice: Vec = choice.chars().collect(); let pattern: Vec = pattern.chars().collect(); let first_match_indices = cheap_matches(&choice, &pattern, case_sensitive)?; matcher.simple_match( &choice, &pattern, &first_match_indices, case_sensitive, with_pos, ) } #[test] fn test_match_or_not_simple() { let matcher = SkimMatcherV2::default(); assert_eq!( simple_match(&matcher, "axbycz", "xyz", false, true) .unwrap() .1, vec![1, 3, 5] ); assert_eq!( simple_match(&matcher, "", "", false, false), Some((0, vec![])) ); assert_eq!( simple_match(&matcher, "abcdefaghi", "", false, false), Some((0, vec![])) ); assert_eq!(simple_match(&matcher, "", "a", false, false), None); assert_eq!( simple_match(&matcher, "abcdefaghi", "中", false, false), None ); assert_eq!(simple_match(&matcher, "abc", "abx", false, false), None); assert_eq!( simple_match(&matcher, "axbycz", "abc", false, true) .unwrap() .1, vec![0, 2, 4] ); assert_eq!( simple_match(&matcher, "axbycz", "xyz", false, true) .unwrap() .1, vec![1, 3, 5] ); assert_eq!( simple_match(&matcher, "Hello, 世界", "H世", false, true) .unwrap() .1, vec![0, 7] ); } #[test] fn test_match_or_not_v2() { let matcher = SkimMatcherV2::default().debug(true); assert_eq!(matcher.fuzzy_match("", ""), Some(0)); assert_eq!(matcher.fuzzy_match("abcdefaghi", ""), Some(0)); assert_eq!(matcher.fuzzy_match("", "a"), None); assert_eq!(matcher.fuzzy_match("abcdefaghi", "中"), None); assert_eq!(matcher.fuzzy_match("abc", "abx"), None); assert!(matcher.fuzzy_match("axbycz", "abc").is_some()); assert!(matcher.fuzzy_match("axbycz", "xyz").is_some()); assert_eq!( &wrap_fuzzy_match(&matcher, "axbycz", "abc").unwrap(), "[a]x[b]y[c]z" ); assert_eq!( &wrap_fuzzy_match(&matcher, "axbycz", "xyz").unwrap(), "a[x]b[y]c[z]" ); assert_eq!( &wrap_fuzzy_match(&matcher, "Hello, 世界", "H世").unwrap(), "[H]ello, [世]界" ); } #[test] fn test_case_option_v2() { let matcher = SkimMatcherV2::default().ignore_case(); assert!(matcher.fuzzy_match("aBc", "abc").is_some()); assert!(matcher.fuzzy_match("aBc", "aBc").is_some()); assert!(matcher.fuzzy_match("aBc", "aBC").is_some()); let matcher = SkimMatcherV2::default().respect_case(); assert!(matcher.fuzzy_match("aBc", "abc").is_none()); assert!(matcher.fuzzy_match("aBc", "aBc").is_some()); assert!(matcher.fuzzy_match("aBc", "aBC").is_none()); let matcher = SkimMatcherV2::default().smart_case(); assert!(matcher.fuzzy_match("aBc", "abc").is_some()); assert!(matcher.fuzzy_match("aBc", "aBc").is_some()); assert!(matcher.fuzzy_match("aBc", "aBC").is_none()); } #[test] fn test_matcher_quality_v2() { let matcher = SkimMatcherV2::default(); assert_order(&matcher, "ab", &["ab", "aoo_boo", "acb"]); assert_order( &matcher, "cc", &[ "camel case", "camelCase", "CamelCase", "camelcase", "camel ace", ], ); assert_order( &matcher, "Da.Te", &["Data.Text", "Data.Text.Lazy", "Data.Aeson.Encoding.Text"], ); assert_order(&matcher, "is", &["isIEEE", "inSuf"]); assert_order(&matcher, "ma", &["map", "many", "maximum"]); assert_order(&matcher, "print", &["printf", "sprintf"]); assert_order(&matcher, "ast", &["ast", "AST", "INT_FAST16_MAX"]); assert_order(&matcher, "int", &["int", "INT", "PRINT"]); } #[test] fn test_reuse_should_not_affect_indices() { let matcher = SkimMatcherV2::default(); let pattern = "139"; for num in 0..10000 { let choice = num.to_string(); if let Some((_score, indices)) = matcher.fuzzy_indices(&choice, pattern) { assert_eq!(indices.len(), 3); } } } } fuzzy-matcher-0.3.7/src/util.rs010064400017510000164000000076751373627335600146610ustar 00000000000000use crate::{FuzzyMatcher, IndexType, ScoreType}; pub fn cheap_matches( choice: &[char], pattern: &[char], case_sensitive: bool, ) -> Option> { let mut first_match_indices = vec![]; let mut pattern_iter = pattern.iter().peekable(); for (idx, &c) in choice.iter().enumerate() { match pattern_iter.peek() { Some(&&p) => { if char_equal(c, p, case_sensitive) { first_match_indices.push(idx); let _ = pattern_iter.next(); } } None => break, } } if pattern_iter.peek().is_none() { Some(first_match_indices) } else { None } } /// Given 2 character, check if they are equal (considering ascii case) /// e.g. ('a', 'A', true) => false /// e.g. ('a', 'A', false) => true #[inline] pub fn char_equal(a: char, b: char, case_sensitive: bool) -> bool { if case_sensitive { a == b } else { a.eq_ignore_ascii_case(&b) } } #[derive(Debug, PartialEq)] pub enum CharType { NonWord, Lower, Upper, Number, } #[inline] pub fn char_type_of(ch: char) -> CharType { if ch.is_lowercase() { CharType::Lower } else if ch.is_uppercase() { CharType::Upper } else if ch.is_numeric() { CharType::Number } else { CharType::NonWord } } #[derive(Debug, PartialEq)] pub enum CharRole { Tail, Head, } // checkout https://github.com/llvm-mirror/clang-tools-extra/blob/master/clangd/FuzzyMatch.cpp // The Role can be determined from the Type of a character and its neighbors: // // Example | Chars | Type | Role // ---------+--------------+----- // F(o)oBar | Foo | Ull | Tail // Foo(B)ar | oBa | lUl | Head // (f)oo | ^fo | Ell | Head // H(T)TP | HTT | UUU | Tail // // Curr= Empty Lower Upper Separ // Prev=Empty 0x00, 0xaa, 0xaa, 0xff, // At start, Lower|Upper->Head // Prev=Lower 0x00, 0x55, 0xaa, 0xff, // In word, Upper->Head;Lower->Tail // Prev=Upper 0x00, 0x55, 0x59, 0xff, // Ditto, but U(U)U->Tail // Prev=Separ 0x00, 0xaa, 0xaa, 0xff, // After separator, like at start pub fn char_role(prev: char, cur: char) -> CharRole { use self::CharRole::*; use self::CharType::*; match (char_type_of(prev), char_type_of(cur)) { (Lower, Upper) | (NonWord, Lower) | (NonWord, Upper) => Head, _ => Tail, } } #[allow(dead_code)] pub fn assert_order(matcher: &dyn FuzzyMatcher, pattern: &str, choices: &[&'static str]) { let result = filter_and_sort(matcher, pattern, choices); if result != choices { // debug print println!("pattern: {}", pattern); for &choice in choices.iter() { if let Some((score, indices)) = matcher.fuzzy_indices(choice, pattern) { println!("{}: {:?}", score, wrap_matches(choice, &indices)); } else { println!("NO MATCH for {}", choice); } } } assert_eq!(result, choices); } #[allow(dead_code)] pub fn filter_and_sort( matcher: &dyn FuzzyMatcher, pattern: &str, lines: &[&'static str], ) -> Vec<&'static str> { let mut lines_with_score: Vec<(ScoreType, &'static str)> = lines .iter() .filter_map(|&s| matcher.fuzzy_match(s, pattern).map(|score| (score, s))) .collect(); lines_with_score.sort_by_key(|(score, _)| -score); lines_with_score .into_iter() .map(|(_, string)| string) .collect() } #[allow(dead_code)] pub fn wrap_matches(line: &str, indices: &[IndexType]) -> String { let mut ret = String::new(); let mut peekable = indices.iter().peekable(); for (idx, ch) in line.chars().enumerate() { let next_id = **peekable.peek().unwrap_or(&&(line.len() as IndexType)); if next_id == (idx as IndexType) { ret.push_str(format!("[{}]", ch).as_str()); peekable.next(); } else { ret.push(ch); } } ret }