scan_fmt-0.2.6/.cargo_vcs_info.json0000644000000001120000000000000126240ustar { "git": { "sha1": "07abb97f8591d5923e998fd940f42503b6ef9a69" } } scan_fmt-0.2.6/.gitignore000064400000000000000000000000310000000000000133620ustar 00000000000000target Cargo.lock *~ doc scan_fmt-0.2.6/.travis.yml000064400000000000000000000000730000000000000135110ustar 00000000000000language: rust rust: - stable - beta - nightly scan_fmt-0.2.6/Cargo.toml0000644000000015540000000000000106350ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] name = "scan_fmt" version = "0.2.6" authors = ["wlentz"] description = "A simple scanf()-like input for Rust" readme = "README.md" license = "MIT" repository = "https://github.com/wlentz/scan_fmt" [lib] name = "scan_fmt" path = "src/lib.rs" [dependencies.regex] version = "1" optional = true [features] default = ["regex", "std"] std = [] scan_fmt-0.2.6/Cargo.toml.orig000064400000000000000000000005470000000000000142750ustar 00000000000000[package] name = "scan_fmt" version = "0.2.6" authors = ["wlentz"] description = "A simple scanf()-like input for Rust" repository = "https://github.com/wlentz/scan_fmt" license = "MIT" readme = "README.md" [features] default = ["regex", "std"] std = [] [dependencies] regex = { version = "1", optional = true } [lib] name = "scan_fmt" path = "src/lib.rs" scan_fmt-0.2.6/LICENSE000064400000000000000000000020620000000000000124050ustar 00000000000000The MIT License (MIT) Copyright (c) 2015 wlentz Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. scan_fmt-0.2.6/README.md000064400000000000000000000047660000000000000126740ustar 00000000000000# scan_fmt ![BuildStatus](https://travis-ci.org/wlentz/scan_fmt.svg?branch=master) scan_fmt provides a simple scanf()-like input for Rust. The goal is to make it easier to read data from a string or stdin. Currently the format string supports the following special sequences:
   {{ = escape for '{'
   }} = escape for '}'
   {} = return any value (until next whitespace)
   {d} = return base-10 decimal
   {x} = return hex (0xab or ab)
   {f} = return float
   {*d} = "*" as the first character means "match but don't return"
   {2d} or {2x} or {2f} = limit the maximum width to 2.  Any positive integer works.
   {[...]} = return pattern.
     ^ inverts if it is the first character
     - is for ranges.  For a literal - put it at the start or end.
     To add a literal ] do "[]abc]"
   {e} = doesn't return a value, but matches end of line.  Use this if you
         don't want to ignore potential extra characters at end of input.
   Examples:
     {[0-9ab]} = match 0-9 or a or b
     {[^,.]} = match anything but , or .
   {/.../} = return regex inside of `//`.
     If there is a single capture group inside of the slashes then
     that group will make up the pattern.
   Examples:
     {/[0-9ab]/} = same as {[0-9ab]}, above
     {/a+/} = matches at least one `a`, greedily
     {/jj(a*)jj/} = matches any number of `a`s, but only if
       they're surrounded by two `j`s
### Examples ```rust #[macro_use] extern crate scan_fmt; use std::error::Error ; fn main() -> Result<(),Box> { let (a,b,c) = scan_fmt!( "hello 0x12 345 bye", // input string "hello {x} {} {}", // format [hex u8], i32, String) ? ; // type of a-c Options assert_eq!( a, 0x12 ) ; assert_eq!( b, 345 ) ; assert_eq!( c, "bye" ) ; println!("Enter something like: 123-22"); let (c,d) = scanln_fmt!( "{d}-{d}", // format u16, u8) ? ; // type of a&b Options println!("Got {} and {}",c,d) ; // Note - currently scanln_fmt! just calls unwrap() on read_line() let (a,b) = scan_fmt_some!( "hello 12 345", // input string "hello {} {}", // format u8, i32) ; // types assert_eq!( a, Some(12) ) ; assert_eq!( b, Some(345) ) ; Ok(()) } ``` ### Limitations There is no compile-time warning if the number of {}'s in the format string doesn't match the number of return values. You'll just get None for extra return values. See src/lib.rs for more details. scan_fmt-0.2.6/src/lib.rs000064400000000000000000000246100000000000000133060ustar 00000000000000// Copyright 2015-2019 Will Lentz. // Licensed under the MIT license. //! This crate provides a simple sscanf()-like interface to extract //! data from strings and stdin. //! //! In version 0.2 scan_fmt! changed to return a Result. //! Use scan_fmt_some! for the 0.1.x behavior. //! //! To use this crate, do: //! //! ```ignore //! #[macro_use] extern crate scan_fmt; //! ``` //! //! Example to read from a string: //! //! ```rust //! # #[macro_use] extern crate scan_fmt; //! # fn main() { //! if let Ok((a,b)) = scan_fmt!( "-11 0x22", // input string //! "{d} {x}", // format //! i8, [hex u8]) { // types //! assert_eq!( a, -11 ) ; //! assert_eq!( b, 0x22 ) ; //! } //! //! let (a,b,c) = scan_fmt_some!( "hello 12 345 bye", // input string //! "hello {} {d} {}", // format //! u8, i32, String); // type of a-c Options //! assert_eq!( a, Some(12) ) ; //! assert_eq!( b, Some(345) ) ; //! assert_eq!( c, Some("bye".into()) ) ; //! # } //! ``` //! //! Special format_string tokens: //!
//!   {{ = escape for '{'
//!   }} = escape for '}'
//!   {} = return any value (until next whitespace)
//!   {d} = return base-10 decimal
//!   {x} = return hex (0xab or ab)
//!       = you must wrap the type in [hex type], e.g. "[hex u32]"
//!   {f} = return float
//!   {*d} = "*" as the first character means "match but don't return"
//!   {2d} or {2x} or {2f} = limit the maximum width to 2.  Any positive integer works.
//!   {[...]} = return pattern.
//!     ^ inverts if it is the first character
//!     - is for ranges.  For a literal - put it at the start or end.
//!     To add a literal ] do "[]abc]"
//!   {e} = doesn't return a value, but matches end of line.  Use this if you
//!         don't want to ignore potential extra characters at end of input.
//!   Examples:
//!     {[0-9ab]} = match 0-9 or a or b
//!     {[^,.]} = match anything but , or .
//!     {/.../} = return regex inside of `//`. (if regex feature is installed)
//!      If there is a single capture group inside of the slashes then
//!      that group will make up the pattern.
//!   Examples:
//!     {/[0-9ab]/} = same as {[0-9ab]}, above
//!     {/a+/} = matches at least one `a`, greedily
//!     {/jj(a*)jj/} = matches any number of `a`s, but only if
//!       they're surrounded by two `j`s
//! 
//! //! Example to read from stdin: //! //! ```ignore //! # #[macro_use] extern crate scan_fmt; //! # use std::error::Error ; //! # fn main() -> Result<(),Box> { //! let (a,b) = scanln_fmt!( "{}-{}", u16, u8) ? ; //! println!("Got {} and {}",a,b); //! //! let (a,b) = scanln_fmt_some!( "{}-{}", // format //! u16, u8); // type of a&b Options //! match (a,b) { //! (Some(aa),Some(bb)) => println!("Got {} and {}",aa,bb), //! _ => println!("input error") //! } //! Ok(()) //! # } //! ``` //! //! ## LIMITATIONS: //! There are no compile-time checks to make sure the format //! strings matches the number of return arguments. Extra //! return values will be None or cause a Result error. //! //! Like sscanf(), whitespace (including \n) is largely ignored. //! //! Conversion to output values is done using parse::(). #![no_std] #[cfg(feature = "regex")] extern crate regex; #[cfg(any(test, doctest, feature = "std"))] extern crate std; #[macro_use] extern crate alloc; pub mod parse; #[macro_export] macro_rules! scan_fmt_help { ( wrap $res:expr, [hex $arg:tt] ) => { match $res.next() { Some(item) => $arg::from_str_radix(&item, 16).ok(), _ => None, } }; ( wrap $res:expr , $($arg1:tt)::* ) => { match $res.next() { Some(item) => item.parse::<$($arg1)::*>().ok(), _ => None, } }; ( no_wrap $err:ident, $res:expr, [hex $arg:tt] ) => { match $res.next() { Some(item) => { let ret = $arg::from_str_radix(&item, 16); if ret.is_err() { $err = "from_str_radix hex"; } ret.unwrap_or(0) } _ => { $err = "internal hex"; 0 } } }; ( no_wrap $err:ident, $res:expr , $($arg1:tt)::* ) => {{ // We need to return a value of type $($arg1)::* if parsing fails. // Is there a better way? let mut err = "0".parse::<$($arg1)::*>(); // most types if err.is_err() { err = "0.0.0.0".parse::<$($arg1)::*>(); // IpAddr } let err = err.unwrap(); match $res.next() { Some(item) => { let ret = item.parse::<$($arg1)::*>(); if(item == "") { $err = "match::none"; } else if ret.is_err() { $err = concat!("parse::", stringify!($($arg1)::*)); } ret.unwrap_or(err) } _ => { $err = concat!("internal ", stringify!($($arg1)::*)); err } } }}; } #[macro_export] macro_rules! scan_fmt_some { ( $instr:expr, $fmt:expr, $($($args:tt)::*),* ) => { { let mut res = $crate::parse::scan( $instr, $fmt ) ; ($($crate::scan_fmt_help!(wrap res,$($args)::*)),*) } }; } #[macro_export] macro_rules! scan_fmt { ( $instr:expr, $fmt:expr, $($($args:tt)::*),* ) => { { let mut err = "" ; let mut res = $crate::parse::scan( $instr, $fmt ) ; let result = ($($crate::scan_fmt_help!(no_wrap err,res,$($args)::*)),*) ; if err == "" { Ok(result) } else { Err($crate::parse::ScanError(err.into())) } } }; } #[cfg(feature = "std")] pub use std_features::*; #[cfg(feature = "std")] mod std_features { use std::string::String; pub fn get_input_unwrap() -> String { let mut input = String::new(); std::io::stdin().read_line(&mut input).unwrap(); input } /// (a,+) = scanln_fmt!( format_string, types,+ ) ///

Same as scan_fmt!(), but reads input string from stdin.

#[macro_export] macro_rules! scanln_fmt { ($($arg:tt)*) => {{ scan_fmt!(&$crate::get_input_unwrap(), $($arg)*) }} } /// (a,+) = scanln_fmt_some!( format_string, types,+ ) ///

Same as scan_fmt_some!(), but reads input string from stdin.

#[macro_export] macro_rules! scanln_fmt_some { ($($arg:tt)*) => {{ scan_fmt_some!(&$crate::get_input_unwrap(), $($arg)*) }} } } #[cfg(test)] use alloc::string::{String, ToString}; #[cfg(test)] use parse::ScanError; #[cfg(test)] macro_rules! assert_flt_eq { ($t:ident, $v1:expr, $v2:expr) => {{ assert!(($v1 - $v2).abs() <= 2.0 * std::$t::EPSILON); }}; } #[cfg(test)] fn ret_scan_all() -> Result<(), ScanError> { let (a, b) = scan_fmt!("1.2 e","{f} {x}",f32,[hex u32])?; assert_flt_eq!(f32, a, 1.2); assert_eq!(b, 14); Ok(()) } #[test] fn test_scan_all() { if let Ok(a) = scan_fmt!("hi1 3", "{} {d}", std::string::String, u32) { assert_eq!(a, ("hi1".to_string(), 3)); } else { assert!(false, "error 0"); } if let Ok((a, b, c)) = scan_fmt!("hi1 0xf -3","{} {x} {d}",String,[hex u32],i8) { assert_eq!(a, "hi1"); assert_eq!(b, 0xf); assert_eq!(c, -3); } else { assert!(false, "error 1"); } let a = scan_fmt!("hi1 f", "{} {d}", String, i32); assert!(a.is_err()); let a = ret_scan_all(); std::println!("{:?}", a); assert!(a.is_ok()); } #[test] fn test_plus_sign() { let a = scan_fmt_some!("+42", "{d}", i32); assert_eq!(a, Some(42)); let a = scan_fmt_some!("+42.0", "{f}", f64); assert_flt_eq!(f64, a.unwrap(), 42.0); } #[test] fn test_hex() { let (a, b, c) = scan_fmt_some!("DEV 0xab 0x1234", "{} {x} {x}", std::string::String, [hex u32], [hex u64]); assert_eq!(a, Some("DEV".into())); assert_eq!(b, Some(0xab)); assert_eq!(c, Some(0x1234)); } #[test] fn test_limited_data_range() { let (a, b, c) = scan_fmt_some!( "test{\t 1e9 \n bye 257} hi 22.7e-1", "test{{ {} bye {d}}} hi {f}", f64, u8, f32 ); assert_flt_eq!(f64, a.unwrap(), 1e9); assert_eq!(b, None); // 257 doesn't fit into a u8 assert_flt_eq!(f32, c.unwrap(), 2.27); } #[test] fn test_too_many_outputs() { let (a, b, c, d) = scan_fmt_some!("a_aa bb_b c", "{} {s} {}", String, String, String, String); assert_eq!(a.unwrap(), "a_aa"); assert_eq!(b.unwrap(), "bb_b"); assert_eq!(c.unwrap(), "c"); assert_eq!(d, None); } #[test] fn test_skip_assign() { let (a, b) = scan_fmt_some!("1 2 3, 4 5, 6 7", "{[^,]},{*[^,]},{[^,]}", String, String); assert_eq!(a.unwrap(), "1 2 3"); assert_eq!(b.unwrap(), "6 7"); let a = scan_fmt!("1 2 3, 4 5, 6 7", "{[^,]},{*[^,]},{[^,]}", String, String).unwrap(); assert_eq!(a.0, "1 2 3"); assert_eq!(a.1, "6 7"); } #[test] fn test_width_specifier() { let a = scan_fmt!("123ab71 2.1234", "{1d}{2d}{3x}{4d}{3f}", u8, u8, [hex u16], u16, f32) .unwrap(); assert_eq!(a.0, 1); assert_eq!(a.1, 23); assert_eq!(a.2, 0xab7); assert_eq!(a.3, 1); assert_flt_eq!(f32, a.4, 2.1); } #[test] fn test_err_equals() { let a = scan_fmt!("hi 123", "hi {d", u8); assert_eq!(a, Err(parse::ScanError("internal u8".to_string()))); } #[test] fn test_no_post_match_regex() { let a = scan_fmt!("74in", "{d}{/in/}", u8, String); assert_eq!(a, Ok((74, String::from("in")))); let a = scan_fmt!("74in", "{d}{/cm/}", u8, String); assert_eq!(a, Err(parse::ScanError("match::none".to_string()))); } #[test] fn test_no_post_match() { let a = scan_fmt!("17in", "{d}in", u8); assert_eq!(a, Ok(17u8)); let a = scan_fmt!("17in", "{d}cm", u8); assert_eq!(a, Err(parse::ScanError("match::none".to_string()))); } #[test] fn test_match_end() { let a = scan_fmt!("17in", "{d}in{e}", u8); assert_eq!(a, Ok(17u8)); let a = scan_fmt!("17ink", "{d}in{e}", u8); assert_eq!(a, Err(parse::ScanError("match::none".to_string()))); } #[test] fn test_ip_addr() { let a = scan_fmt!("x 185.187.165.163 y", "x {} y", std::net::IpAddr); assert_eq!( a.unwrap(), std::net::IpAddr::V4(std::net::Ipv4Addr::new(185, 187, 165, 163)) ); } scan_fmt-0.2.6/src/parse.rs000064400000000000000000000430770000000000000136620ustar 00000000000000// Copyright 2015-2019 Will Lentz. // Licensed under the MIT license. use alloc::string::{String, ToString}; #[cfg(feature = "regex")] use regex::Regex; #[derive(Debug, PartialEq)] enum FmtType { NonWhitespaceOrEnd, OnlyEnd, Pattern, Dec10, Hex16, Flt, #[cfg(feature = "regex")] Regex, } #[cfg(feature = "std")] use std::error::Error; use alloc::vec::Vec; use core::fmt; #[derive(Debug, PartialEq)] pub struct ScanError(pub String); #[cfg(feature = "std")] impl Error for ScanError {} impl fmt::Display for ScanError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "Scan error: {}", self.0) } } // Handle the following format strings: // {}X -> everything until whitespace or next character 'X' // {s} -> everything until whitespace // {d} -> only base-10 integers // {x} -> only unsigned base-16 integers. Allow 0xfff or fff // {f} -> only floats // {*} -> get token, but don't assign it to output // {[]} -> only search for given characters // starting with '^' negates everything // ranges with '-' work. To include '-' put it at end or start // to include ']' put it at the start (or right after ^) // e.g., {[^,]} -> match everything until next comma // Make it slightly easier to scan through a Vec<> struct VecScanner { data: Vec, pos: usize, limit_pos: usize, // if non-0, then inc_limit() returns when 'pos' gets here } impl VecScanner { fn new(d: Vec) -> VecScanner { VecScanner { data: d, pos: 0, limit_pos: 0, } } fn cur(&self) -> char { self.data[self.pos] } fn peek(&self, n: usize) -> Option { if self.pos + n < self.data.len() { Some(self.data[self.pos + n]) } else { None } } fn is_end(&self) -> bool { self.pos >= self.data.len() } // returns true if we have more data fn inc(&mut self) -> bool { self.pos += 1; !self.is_end() } // set the maximum position for inc_limit() fn start_inc_limit(&mut self, max_length: Option) { match max_length { Some(n) => { self.limit_pos = self.pos + n; } None => { self.limit_pos = 0; } } } fn hit_inc_limit(&mut self) -> bool { self.limit_pos > 0 && self.pos >= self.limit_pos } // same as inc(), but also honors start_inc_limit(max_length) fn inc_limit(&mut self) -> bool { self.pos += 1; !(self.is_end() || self.hit_inc_limit()) } } fn is_whitespace(c: char) -> bool { match c { ' ' | '\t' | '\n' | '\r' => true, _ => false, } } // scan to past whitespace. Return false if end of input. fn skip_whitespace(vs: &mut VecScanner) -> bool { while !vs.is_end() { if is_whitespace(vs.cur()) { vs.inc(); } else { break; } } !vs.is_end() } struct FmtResult { data_type: FmtType, max_length: Option, store_result: bool, invert_char_list: bool, end_char: char, // Store pattern characters and ranges. It might be worth // optimizing this if format strings are long. char_list: Vec<(char, char)>, #[cfg(feature = "regex")] regex: Option, } // See top-level docs for allowed formats. // Starts right after opening '{'. Consumes characters to final } // Note that '{' and '}' can exist unescaped inside []. fn get_format(fstr: &mut VecScanner) -> Option { let mut res = FmtResult { data_type: FmtType::NonWhitespaceOrEnd, max_length: None, end_char: ' ', store_result: true, invert_char_list: false, char_list: vec![], #[cfg(feature = "regex")] regex: None, }; if fstr.cur() == '*' { res.store_result = false; if !fstr.inc() { return None; } } if fstr.cur() == '}' { if fstr.inc() { res.end_char = fstr.cur(); } return Some(res); } // Read optional field width specifier (e.g., the "2" in {2d}) let pos_start = fstr.pos; while fstr.cur().is_digit(10) { if !fstr.inc() { return None; } } if fstr.pos > pos_start { let max_length_string: String = fstr.data[pos_start..fstr.pos].iter().cloned().collect(); res.max_length = max_length_string.parse::().ok(); } match fstr.cur() { 's' => { /* already FmtType::NonWhitespaceOrEnd */ } 'e' => { res.data_type = FmtType::OnlyEnd; } 'd' => { res.data_type = FmtType::Dec10; } 'x' => { res.data_type = FmtType::Hex16; } 'f' => { res.data_type = FmtType::Flt; } '[' => { res.data_type = FmtType::Pattern; } #[cfg(feature = "regex")] '/' => { res.data_type = FmtType::Regex; } _ => return None, // unexpected format } if !fstr.inc() { return None; } match res.data_type { FmtType::Pattern => handle_pattern(res, fstr), #[cfg(feature = "regex")] FmtType::Regex => handle_regex(res, fstr), _ => { if fstr.cur() != '}' { return None; } fstr.inc(); Some(res) } } } fn handle_pattern(mut res: FmtResult, fstr: &mut VecScanner) -> Option { // handle [] pattern res.data_type = FmtType::Pattern; if fstr.cur() == '^' { res.invert_char_list = true; if !fstr.inc() { return None; } } match fstr.cur() { ']' | '-' => { res.char_list.push((fstr.cur(), fstr.cur())); if !fstr.inc() { return None; } } _ => (), } // look for end of [] pattern while fstr.cur() != ']' { if fstr.peek(1) == Some('-') && fstr.peek(2) != Some(']') { let prev_char = fstr.cur(); if !fstr.inc() { break; } // go to '-' if !fstr.inc() { break; } // go past '-' // add character range res.char_list.push((prev_char, fstr.cur())); } else { res.char_list.push((fstr.cur(), fstr.cur())); } if !fstr.inc() { return None; } } if !fstr.inc() { return None; } // go past ']' if fstr.cur() != '}' { return None; } fstr.inc(); // go past closing '}' Some(res) } #[cfg(feature = "regex")] fn handle_regex(mut res: FmtResult, fstr: &mut VecScanner) -> Option { let start = fstr.pos; let mut last_was_escape = false; while fstr.inc() { if fstr.cur() == '/' && !last_was_escape { break; } if fstr.cur() == '\\' { last_was_escape = true; } else { last_was_escape = false; } } if fstr.cur() != '/' { // invalid return None; } let substr = Some('^') .into_iter() .chain(fstr.data[start..fstr.pos].iter().cloned()) .collect::(); if let Ok(re) = Regex::new(&substr) { res.regex = Some(re); } else { return None; } // consume close fstr.inc(); if fstr.cur() != '}' { return None; } fstr.inc(); Some(res) } fn scan_dec10(vs: &mut VecScanner, max_length: Option) { // look for [+-]{0,1}[0-9]+, up to max_length characters vs.start_inc_limit(max_length); scan_dec10_nest(vs); } // advance past base-10 decimal - assumes someone has called start_inc_limit() fn scan_dec10_nest(vs: &mut VecScanner) { // look for [+-]{0,1}[0-9]+ match vs.cur() { '+' | '-' => { if !vs.inc_limit() { return; } } _ => (), } while vs.cur().is_digit(10) { if !vs.inc_limit() { return; } } } // advance past base-16 hex // look for (0x){0,1}[0-9a-fA-F]+ fn scan_hex16(vs: &mut VecScanner, max_length: Option) { vs.start_inc_limit(max_length); if vs.cur() == '0' { if !vs.inc_limit() { return; } } if vs.cur() == 'x' { if !vs.inc_limit() { return; } } while vs.cur().is_digit(16) { if !vs.inc_limit() { return; }; } } // advance past float // look for [+-]{0,1}[0-9]+ // then optional .[0-9]+ // then optional e[+-]{1}[0-9]+ fn scan_float(vs: &mut VecScanner, max_length: Option) { vs.start_inc_limit(max_length); scan_dec10_nest(vs); if vs.cur() == '.' { if !vs.inc_limit() { return; } while vs.cur().is_digit(10) { if !vs.inc_limit() { return; } } } if vs.cur() == 'e' { if !vs.inc_limit() { return; } scan_dec10_nest(vs); } } // advance until 'end' or whitespace fn scan_nonws_or_end(vs: &mut VecScanner, end: char) { while !is_whitespace(vs.cur()) && vs.cur() != end { if !vs.inc() { return; } } } // advance past pattern fn scan_pattern(vs: &mut VecScanner, fmt: &mut FmtResult) { // if invert, scan until character not in char_list // else scan while character is in char_list loop { let c = vs.cur(); let mut found = false; for &(start, end) in fmt.char_list.iter() { if c >= start && c <= end { found = true; break; } } if found == fmt.invert_char_list { return; } if !vs.inc() { return; } } } #[cfg(feature = "regex")] enum ReMatch { Captured { len: usize }, NoCapture, } #[cfg(feature = "regex")] fn scan_regex(vs: &mut VecScanner, fmt: &mut FmtResult) -> ReMatch { let re = fmt.regex.take().unwrap(); let remainder = vs.data[vs.pos..].iter().cloned().collect::(); if let Some(mat) = re.captures(&remainder) { vs.pos += remainder[..mat.get(0).unwrap().end()].chars().count(); if let Some(cap) = mat.get(1) { return ReMatch::Captured { len: cap.end() }; } } return ReMatch::NoCapture; } // return data matching the format from user input (else "") fn get_token(vs: &mut VecScanner, fmt: &mut FmtResult) -> String { let mut pos_start = vs.pos; match fmt.data_type { FmtType::OnlyEnd => {} // handled in scan() FmtType::NonWhitespaceOrEnd => scan_nonws_or_end(vs, fmt.end_char), FmtType::Dec10 => scan_dec10(vs, fmt.max_length), FmtType::Hex16 => scan_hex16(vs, fmt.max_length), FmtType::Flt => scan_float(vs, fmt.max_length), FmtType::Pattern => scan_pattern(vs, fmt), #[cfg(feature = "regex")] FmtType::Regex => { // if the regex has an internal group then we want to use the group // to select the substring, but either way the scan_regex function // will set pos to the end of the entire match consumed by the // regex match scan_regex(vs, fmt) { ReMatch::Captured { len } => { return vs.data[pos_start..pos_start + len] .iter() .cloned() .collect(); } ReMatch::NoCapture => {} } } } if fmt.data_type == FmtType::Dec10 || fmt.data_type == FmtType::Flt { // parse won't accept "+" in front of numbers if vs.data[pos_start] == '+' { pos_start += 1; } } vs.data[pos_start..vs.pos].iter().cloned().collect() } // Extract String tokens from the input string based on // the format string. See lib.rs for more info. // Returns an iterator of the String results. pub fn scan(input_string: &str, format: &str) -> alloc::vec::IntoIter { let mut res: Vec = vec![]; let mut fmtstr = VecScanner::new(format.chars().collect()); let mut instr = VecScanner::new(input_string.chars().collect()); loop { let mut do_compare = true; if !skip_whitespace(&mut fmtstr) { break; } if !skip_whitespace(&mut instr) { break; } if fmtstr.cur() == '{' { if !fmtstr.inc() { break; } if fmtstr.cur() == '{' { // got an escaped {{ } else { let fmt = get_format(&mut fmtstr); let mut fmt = if let Some(fmt) = fmt { fmt } else { break; }; if fmt.data_type == FmtType::OnlyEnd && !instr.is_end() { // we didn't get an end of input where expected, so invalidate any matches return vec![String::from("")].into_iter(); } let data = get_token(&mut instr, &mut fmt); if fmt.store_result { if fmt.data_type == FmtType::Hex16 { let no_prefix = data.trim_start_matches("0x"); res.push(no_prefix.to_string()); } else { res.push(data); } } do_compare = false; } } else { if fmtstr.cur() == '}' { // handle escaped }} by skipping first '}' if !fmtstr.inc() { break; } } } if do_compare { if fmtstr.cur() != instr.cur() { return vec![String::from("")].into_iter(); // we had a non match! --> if we only break here we will return all matches found so far. // This will create a misbehaviour when there is something like `{d}in` as the in is not cared for. } if !fmtstr.inc() { break; } if !instr.inc() { break; } } } res.into_iter() } #[test] fn test_simple() { let mut res = scan(" data 42-12=30", "data {d}-{d}={d}"); assert_eq!(res.next().unwrap(), "42"); assert_eq!(res.next().unwrap(), "12"); assert_eq!(res.next().unwrap(), "30"); assert_eq!(res.next(), None); } #[test] fn test_plus_sign() { let mut res = scan("+42", "{d}"); assert_eq!(res.next().unwrap(), "42"); let mut res = scan("+42.7", "{f}"); assert_eq!(res.next().unwrap(), "42.7"); } #[test] fn test_complex() { let mut res = scan( "test{123 bye -456} hi -22.7e-1 +1.23fg", "test{{{d} bye {}}} hi {f} {f}", ); assert_eq!(res.next().unwrap(), "123"); assert_eq!(res.next().unwrap(), "-456"); assert_eq!(res.next().unwrap(), "-22.7e-1"); assert_eq!(res.next().unwrap(), "1.23"); assert_eq!(res.next(), None); } #[test] fn test_endline() { let mut res = scan("hi 15.7\r\n", "{} {}"); assert_eq!(res.next().unwrap(), "hi"); assert_eq!(res.next().unwrap(), "15.7"); } #[test] fn test_hex() { let mut res = scan("hi 0x15 ff fg", "hi {x} {x} {x}"); assert_eq!(res.next().unwrap(), "15"); assert_eq!(res.next().unwrap(), "ff"); assert_eq!(res.next().unwrap(), "f"); } #[test] fn test_string() { let mut res = scan("The quick brown fox", "{s}{s} {}n {s}x"); assert_eq!(res.next().unwrap(), "The"); assert_eq!(res.next().unwrap(), "quick"); assert_eq!(res.next().unwrap(), "brow"); assert_eq!(res.next().unwrap(), "fox"); } #[test] fn test_pattern() { let mut res = scan( "hi abcdefghijklmnop 0123456789", "hi {[a-l]}{[^a-l ]} {[01234-8]}{[9]}", ); assert_eq!(res.next().unwrap(), "abcdefghijkl"); assert_eq!(res.next().unwrap(), "mnop"); assert_eq!(res.next().unwrap(), "012345678"); assert_eq!(res.next().unwrap(), "9"); let mut res = scan("xyz 01234567λ89", "xyz {[40-3]}{*[65]}{[7-78-9λ]}"); assert_eq!(res.next().unwrap(), "01234"); assert_eq!(res.next().unwrap(), "7λ89"); } #[test] fn test_width() { let mut res = scan("01123fe071 432", "{2d}{3d}{4x}{2d} {3d}"); assert_eq!(res.next().unwrap(), "01"); assert_eq!(res.next().unwrap(), "123"); assert_eq!(res.next().unwrap(), "fe07"); assert_eq!(res.next().unwrap(), "1"); assert_eq!(res.next().unwrap(), "432"); } #[test] fn match_end() { let mut res = scan("12 hi", "{d} hi{e}"); assert_eq!(res.next().unwrap(), "12"); assert_eq!(res.next(), None); let mut res = scan("12 hi2", "{d} hi{e}"); assert_eq!(res.next().unwrap(), ""); } #[cfg(all(test, feature = "regex"))] mod test_regex { use super::scan; #[test] fn simple() { let mut res = scan("one (hello) two", "one ({/[^)]+/}) two"); assert_eq!(res.next().unwrap(), "hello"); } #[test] fn mixed_regex_and_pattern() { let mut res = scan("one ((hello)) two", r#"one ({/[^)]+\)?/}) two"#); assert_eq!(res.next().unwrap(), "(hello)"); } #[test] fn bad_pattern() { // note the extra close paren let mut scanner = scan("one (hello)) two", "one ({/[^)]+/}) two"); assert_eq!(scanner.next().unwrap(), ""); } #[test] fn uses_group_if_present() { let mut res = scan("one (((hello))) two", r#"one {/(\(.*\)) /}two"#); assert_eq!(res.next().unwrap(), "(((hello)))"); } #[test] fn unicode() { let mut res = scan("й", "{/.*/}"); assert_eq!(res.next().unwrap(), "й"); } }