scan_fmt-0.2.6/.cargo_vcs_info.json 0000644 00000000112 00000000000 0012624 0 ustar {
"git": {
"sha1": "07abb97f8591d5923e998fd940f42503b6ef9a69"
}
}
scan_fmt-0.2.6/.gitignore 0000644 0000000 0000000 00000000031 00000000000 0013362 0 ustar 0000000 0000000 target
Cargo.lock
*~
doc
scan_fmt-0.2.6/.travis.yml 0000644 0000000 0000000 00000000073 00000000000 0013511 0 ustar 0000000 0000000 language: rust
rust:
- stable
- beta
- nightly
scan_fmt-0.2.6/Cargo.toml 0000644 00000001554 00000000000 0010635 0 ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "scan_fmt"
version = "0.2.6"
authors = ["wlentz"]
description = "A simple scanf()-like input for Rust"
readme = "README.md"
license = "MIT"
repository = "https://github.com/wlentz/scan_fmt"
[lib]
name = "scan_fmt"
path = "src/lib.rs"
[dependencies.regex]
version = "1"
optional = true
[features]
default = ["regex", "std"]
std = []
scan_fmt-0.2.6/Cargo.toml.orig 0000644 0000000 0000000 00000000547 00000000000 0014275 0 ustar 0000000 0000000 [package]
name = "scan_fmt"
version = "0.2.6"
authors = ["wlentz"]
description = "A simple scanf()-like input for Rust"
repository = "https://github.com/wlentz/scan_fmt"
license = "MIT"
readme = "README.md"
[features]
default = ["regex", "std"]
std = []
[dependencies]
regex = { version = "1", optional = true }
[lib]
name = "scan_fmt"
path = "src/lib.rs"
scan_fmt-0.2.6/LICENSE 0000644 0000000 0000000 00000002062 00000000000 0012405 0 ustar 0000000 0000000 The MIT License (MIT)
Copyright (c) 2015 wlentz
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
scan_fmt-0.2.6/README.md 0000644 0000000 0000000 00000004766 00000000000 0012674 0 ustar 0000000 0000000 # scan_fmt 
scan_fmt provides a simple scanf()-like input for Rust. The goal is to make it easier to read data from a string or stdin.
Currently the format string supports the following special sequences:
{{ = escape for '{'
}} = escape for '}'
{} = return any value (until next whitespace)
{d} = return base-10 decimal
{x} = return hex (0xab or ab)
{f} = return float
{*d} = "*" as the first character means "match but don't return"
{2d} or {2x} or {2f} = limit the maximum width to 2. Any positive integer works.
{[...]} = return pattern.
^ inverts if it is the first character
- is for ranges. For a literal - put it at the start or end.
To add a literal ] do "[]abc]"
{e} = doesn't return a value, but matches end of line. Use this if you
don't want to ignore potential extra characters at end of input.
Examples:
{[0-9ab]} = match 0-9 or a or b
{[^,.]} = match anything but , or .
{/.../} = return regex inside of `//`.
If there is a single capture group inside of the slashes then
that group will make up the pattern.
Examples:
{/[0-9ab]/} = same as {[0-9ab]}, above
{/a+/} = matches at least one `a`, greedily
{/jj(a*)jj/} = matches any number of `a`s, but only if
they're surrounded by two `j`s
### Examples
```rust
#[macro_use] extern crate scan_fmt;
use std::error::Error ;
fn main() -> Result<(),Box> {
let (a,b,c) = scan_fmt!( "hello 0x12 345 bye", // input string
"hello {x} {} {}", // format
[hex u8], i32, String) ? ; // type of a-c Options
assert_eq!( a, 0x12 ) ;
assert_eq!( b, 345 ) ;
assert_eq!( c, "bye" ) ;
println!("Enter something like: 123-22");
let (c,d) = scanln_fmt!( "{d}-{d}", // format
u16, u8) ? ; // type of a&b Options
println!("Got {} and {}",c,d) ;
// Note - currently scanln_fmt! just calls unwrap() on read_line()
let (a,b) = scan_fmt_some!( "hello 12 345", // input string
"hello {} {}", // format
u8, i32) ; // types
assert_eq!( a, Some(12) ) ;
assert_eq!( b, Some(345) ) ;
Ok(())
}
```
### Limitations
There is no compile-time warning if the number of {}'s in the format string doesn't match the number of return values. You'll just get None for extra return values. See src/lib.rs for more details.
scan_fmt-0.2.6/src/lib.rs 0000644 0000000 0000000 00000024610 00000000000 0013306 0 ustar 0000000 0000000 // Copyright 2015-2019 Will Lentz.
// Licensed under the MIT license.
//! This crate provides a simple sscanf()-like interface to extract
//! data from strings and stdin.
//!
//! In version 0.2 scan_fmt! changed to return a Result.
//! Use scan_fmt_some! for the 0.1.x behavior.
//!
//! To use this crate, do:
//!
//! ```ignore
//! #[macro_use] extern crate scan_fmt;
//! ```
//!
//! Example to read from a string:
//!
//! ```rust
//! # #[macro_use] extern crate scan_fmt;
//! # fn main() {
//! if let Ok((a,b)) = scan_fmt!( "-11 0x22", // input string
//! "{d} {x}", // format
//! i8, [hex u8]) { // types
//! assert_eq!( a, -11 ) ;
//! assert_eq!( b, 0x22 ) ;
//! }
//!
//! let (a,b,c) = scan_fmt_some!( "hello 12 345 bye", // input string
//! "hello {} {d} {}", // format
//! u8, i32, String); // type of a-c Options
//! assert_eq!( a, Some(12) ) ;
//! assert_eq!( b, Some(345) ) ;
//! assert_eq!( c, Some("bye".into()) ) ;
//! # }
//! ```
//!
//! Special format_string tokens:
//!
//! {{ = escape for '{'
//! }} = escape for '}'
//! {} = return any value (until next whitespace)
//! {d} = return base-10 decimal
//! {x} = return hex (0xab or ab)
//! = you must wrap the type in [hex type], e.g. "[hex u32]"
//! {f} = return float
//! {*d} = "*" as the first character means "match but don't return"
//! {2d} or {2x} or {2f} = limit the maximum width to 2. Any positive integer works.
//! {[...]} = return pattern.
//! ^ inverts if it is the first character
//! - is for ranges. For a literal - put it at the start or end.
//! To add a literal ] do "[]abc]"
//! {e} = doesn't return a value, but matches end of line. Use this if you
//! don't want to ignore potential extra characters at end of input.
//! Examples:
//! {[0-9ab]} = match 0-9 or a or b
//! {[^,.]} = match anything but , or .
//! {/.../} = return regex inside of `//`. (if regex feature is installed)
//! If there is a single capture group inside of the slashes then
//! that group will make up the pattern.
//! Examples:
//! {/[0-9ab]/} = same as {[0-9ab]}, above
//! {/a+/} = matches at least one `a`, greedily
//! {/jj(a*)jj/} = matches any number of `a`s, but only if
//! they're surrounded by two `j`s
//!
//!
//! Example to read from stdin:
//!
//! ```ignore
//! # #[macro_use] extern crate scan_fmt;
//! # use std::error::Error ;
//! # fn main() -> Result<(),Box> {
//! let (a,b) = scanln_fmt!( "{}-{}", u16, u8) ? ;
//! println!("Got {} and {}",a,b);
//!
//! let (a,b) = scanln_fmt_some!( "{}-{}", // format
//! u16, u8); // type of a&b Options
//! match (a,b) {
//! (Some(aa),Some(bb)) => println!("Got {} and {}",aa,bb),
//! _ => println!("input error")
//! }
//! Ok(())
//! # }
//! ```
//!
//! ## LIMITATIONS:
//! There are no compile-time checks to make sure the format
//! strings matches the number of return arguments. Extra
//! return values will be None or cause a Result error.
//!
//! Like sscanf(), whitespace (including \n) is largely ignored.
//!
//! Conversion to output values is done using parse::().
#![no_std]
#[cfg(feature = "regex")]
extern crate regex;
#[cfg(any(test, doctest, feature = "std"))]
extern crate std;
#[macro_use]
extern crate alloc;
pub mod parse;
#[macro_export]
macro_rules! scan_fmt_help {
( wrap $res:expr, [hex $arg:tt] ) => {
match $res.next() {
Some(item) => $arg::from_str_radix(&item, 16).ok(),
_ => None,
}
};
( wrap $res:expr , $($arg1:tt)::* ) => {
match $res.next() {
Some(item) => item.parse::<$($arg1)::*>().ok(),
_ => None,
}
};
( no_wrap $err:ident, $res:expr, [hex $arg:tt] ) => {
match $res.next() {
Some(item) => {
let ret = $arg::from_str_radix(&item, 16);
if ret.is_err() {
$err = "from_str_radix hex";
}
ret.unwrap_or(0)
}
_ => {
$err = "internal hex";
0
}
}
};
( no_wrap $err:ident, $res:expr , $($arg1:tt)::* ) => {{
// We need to return a value of type $($arg1)::* if parsing fails.
// Is there a better way?
let mut err = "0".parse::<$($arg1)::*>(); // most types
if err.is_err() {
err = "0.0.0.0".parse::<$($arg1)::*>(); // IpAddr
}
let err = err.unwrap();
match $res.next() {
Some(item) => {
let ret = item.parse::<$($arg1)::*>();
if(item == "") {
$err = "match::none";
} else if ret.is_err() {
$err = concat!("parse::", stringify!($($arg1)::*));
}
ret.unwrap_or(err)
}
_ => {
$err = concat!("internal ", stringify!($($arg1)::*));
err
}
}
}};
}
#[macro_export]
macro_rules! scan_fmt_some {
( $instr:expr, $fmt:expr, $($($args:tt)::*),* ) => {
{
let mut res = $crate::parse::scan( $instr, $fmt ) ;
($($crate::scan_fmt_help!(wrap res,$($args)::*)),*)
}
};
}
#[macro_export]
macro_rules! scan_fmt {
( $instr:expr, $fmt:expr, $($($args:tt)::*),* ) => {
{
let mut err = "" ;
let mut res = $crate::parse::scan( $instr, $fmt ) ;
let result = ($($crate::scan_fmt_help!(no_wrap err,res,$($args)::*)),*) ;
if err == "" {
Ok(result)
} else {
Err($crate::parse::ScanError(err.into()))
}
}
};
}
#[cfg(feature = "std")]
pub use std_features::*;
#[cfg(feature = "std")]
mod std_features {
use std::string::String;
pub fn get_input_unwrap() -> String {
let mut input = String::new();
std::io::stdin().read_line(&mut input).unwrap();
input
}
/// (a,+) = scanln_fmt!( format_string, types,+ )
/// Same as scan_fmt!(), but reads input string from stdin.
#[macro_export]
macro_rules! scanln_fmt {
($($arg:tt)*) => {{ scan_fmt!(&$crate::get_input_unwrap(), $($arg)*) }}
}
/// (a,+) = scanln_fmt_some!( format_string, types,+ )
/// Same as scan_fmt_some!(), but reads input string from stdin.
#[macro_export]
macro_rules! scanln_fmt_some {
($($arg:tt)*) => {{ scan_fmt_some!(&$crate::get_input_unwrap(), $($arg)*) }}
}
}
#[cfg(test)]
use alloc::string::{String, ToString};
#[cfg(test)]
use parse::ScanError;
#[cfg(test)]
macro_rules! assert_flt_eq {
($t:ident, $v1:expr, $v2:expr) => {{
assert!(($v1 - $v2).abs() <= 2.0 * std::$t::EPSILON);
}};
}
#[cfg(test)]
fn ret_scan_all() -> Result<(), ScanError> {
let (a, b) = scan_fmt!("1.2 e","{f} {x}",f32,[hex u32])?;
assert_flt_eq!(f32, a, 1.2);
assert_eq!(b, 14);
Ok(())
}
#[test]
fn test_scan_all() {
if let Ok(a) = scan_fmt!("hi1 3", "{} {d}", std::string::String, u32) {
assert_eq!(a, ("hi1".to_string(), 3));
} else {
assert!(false, "error 0");
}
if let Ok((a, b, c)) = scan_fmt!("hi1 0xf -3","{} {x} {d}",String,[hex u32],i8) {
assert_eq!(a, "hi1");
assert_eq!(b, 0xf);
assert_eq!(c, -3);
} else {
assert!(false, "error 1");
}
let a = scan_fmt!("hi1 f", "{} {d}", String, i32);
assert!(a.is_err());
let a = ret_scan_all();
std::println!("{:?}", a);
assert!(a.is_ok());
}
#[test]
fn test_plus_sign() {
let a = scan_fmt_some!("+42", "{d}", i32);
assert_eq!(a, Some(42));
let a = scan_fmt_some!("+42.0", "{f}", f64);
assert_flt_eq!(f64, a.unwrap(), 42.0);
}
#[test]
fn test_hex() {
let (a, b, c) =
scan_fmt_some!("DEV 0xab 0x1234", "{} {x} {x}", std::string::String, [hex u32], [hex u64]);
assert_eq!(a, Some("DEV".into()));
assert_eq!(b, Some(0xab));
assert_eq!(c, Some(0x1234));
}
#[test]
fn test_limited_data_range() {
let (a, b, c) = scan_fmt_some!(
"test{\t 1e9 \n bye 257} hi 22.7e-1",
"test{{ {} bye {d}}} hi {f}",
f64,
u8,
f32
);
assert_flt_eq!(f64, a.unwrap(), 1e9);
assert_eq!(b, None); // 257 doesn't fit into a u8
assert_flt_eq!(f32, c.unwrap(), 2.27);
}
#[test]
fn test_too_many_outputs() {
let (a, b, c, d) = scan_fmt_some!("a_aa bb_b c", "{} {s} {}", String, String, String, String);
assert_eq!(a.unwrap(), "a_aa");
assert_eq!(b.unwrap(), "bb_b");
assert_eq!(c.unwrap(), "c");
assert_eq!(d, None);
}
#[test]
fn test_skip_assign() {
let (a, b) = scan_fmt_some!("1 2 3, 4 5, 6 7", "{[^,]},{*[^,]},{[^,]}", String, String);
assert_eq!(a.unwrap(), "1 2 3");
assert_eq!(b.unwrap(), "6 7");
let a = scan_fmt!("1 2 3, 4 5, 6 7", "{[^,]},{*[^,]},{[^,]}", String, String).unwrap();
assert_eq!(a.0, "1 2 3");
assert_eq!(a.1, "6 7");
}
#[test]
fn test_width_specifier() {
let a = scan_fmt!("123ab71 2.1234",
"{1d}{2d}{3x}{4d}{3f}",
u8, u8, [hex u16], u16, f32)
.unwrap();
assert_eq!(a.0, 1);
assert_eq!(a.1, 23);
assert_eq!(a.2, 0xab7);
assert_eq!(a.3, 1);
assert_flt_eq!(f32, a.4, 2.1);
}
#[test]
fn test_err_equals() {
let a = scan_fmt!("hi 123", "hi {d", u8);
assert_eq!(a, Err(parse::ScanError("internal u8".to_string())));
}
#[test]
fn test_no_post_match_regex() {
let a = scan_fmt!("74in", "{d}{/in/}", u8, String);
assert_eq!(a, Ok((74, String::from("in"))));
let a = scan_fmt!("74in", "{d}{/cm/}", u8, String);
assert_eq!(a, Err(parse::ScanError("match::none".to_string())));
}
#[test]
fn test_no_post_match() {
let a = scan_fmt!("17in", "{d}in", u8);
assert_eq!(a, Ok(17u8));
let a = scan_fmt!("17in", "{d}cm", u8);
assert_eq!(a, Err(parse::ScanError("match::none".to_string())));
}
#[test]
fn test_match_end() {
let a = scan_fmt!("17in", "{d}in{e}", u8);
assert_eq!(a, Ok(17u8));
let a = scan_fmt!("17ink", "{d}in{e}", u8);
assert_eq!(a, Err(parse::ScanError("match::none".to_string())));
}
#[test]
fn test_ip_addr() {
let a = scan_fmt!("x 185.187.165.163 y", "x {} y", std::net::IpAddr);
assert_eq!(
a.unwrap(),
std::net::IpAddr::V4(std::net::Ipv4Addr::new(185, 187, 165, 163))
);
}
scan_fmt-0.2.6/src/parse.rs 0000644 0000000 0000000 00000043077 00000000000 0013662 0 ustar 0000000 0000000 // Copyright 2015-2019 Will Lentz.
// Licensed under the MIT license.
use alloc::string::{String, ToString};
#[cfg(feature = "regex")]
use regex::Regex;
#[derive(Debug, PartialEq)]
enum FmtType {
NonWhitespaceOrEnd,
OnlyEnd,
Pattern,
Dec10,
Hex16,
Flt,
#[cfg(feature = "regex")]
Regex,
}
#[cfg(feature = "std")]
use std::error::Error;
use alloc::vec::Vec;
use core::fmt;
#[derive(Debug, PartialEq)]
pub struct ScanError(pub String);
#[cfg(feature = "std")]
impl Error for ScanError {}
impl fmt::Display for ScanError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Scan error: {}", self.0)
}
}
// Handle the following format strings:
// {}X -> everything until whitespace or next character 'X'
// {s} -> everything until whitespace
// {d} -> only base-10 integers
// {x} -> only unsigned base-16 integers. Allow 0xfff or fff
// {f} -> only floats
// {*} -> get token, but don't assign it to output
// {[]} -> only search for given characters
// starting with '^' negates everything
// ranges with '-' work. To include '-' put it at end or start
// to include ']' put it at the start (or right after ^)
// e.g., {[^,]} -> match everything until next comma
// Make it slightly easier to scan through a Vec<>
struct VecScanner {
data: Vec,
pos: usize,
limit_pos: usize, // if non-0, then inc_limit() returns when 'pos' gets here
}
impl VecScanner {
fn new(d: Vec) -> VecScanner {
VecScanner {
data: d,
pos: 0,
limit_pos: 0,
}
}
fn cur(&self) -> char {
self.data[self.pos]
}
fn peek(&self, n: usize) -> Option {
if self.pos + n < self.data.len() {
Some(self.data[self.pos + n])
} else {
None
}
}
fn is_end(&self) -> bool {
self.pos >= self.data.len()
}
// returns true if we have more data
fn inc(&mut self) -> bool {
self.pos += 1;
!self.is_end()
}
// set the maximum position for inc_limit()
fn start_inc_limit(&mut self, max_length: Option) {
match max_length {
Some(n) => {
self.limit_pos = self.pos + n;
}
None => {
self.limit_pos = 0;
}
}
}
fn hit_inc_limit(&mut self) -> bool {
self.limit_pos > 0 && self.pos >= self.limit_pos
}
// same as inc(), but also honors start_inc_limit(max_length)
fn inc_limit(&mut self) -> bool {
self.pos += 1;
!(self.is_end() || self.hit_inc_limit())
}
}
fn is_whitespace(c: char) -> bool {
match c {
' ' | '\t' | '\n' | '\r' => true,
_ => false,
}
}
// scan to past whitespace. Return false if end of input.
fn skip_whitespace(vs: &mut VecScanner) -> bool {
while !vs.is_end() {
if is_whitespace(vs.cur()) {
vs.inc();
} else {
break;
}
}
!vs.is_end()
}
struct FmtResult {
data_type: FmtType,
max_length: Option,
store_result: bool,
invert_char_list: bool,
end_char: char,
// Store pattern characters and ranges. It might be worth
// optimizing this if format strings are long.
char_list: Vec<(char, char)>,
#[cfg(feature = "regex")]
regex: Option,
}
// See top-level docs for allowed formats.
// Starts right after opening '{'. Consumes characters to final }
// Note that '{' and '}' can exist unescaped inside [].
fn get_format(fstr: &mut VecScanner) -> Option {
let mut res = FmtResult {
data_type: FmtType::NonWhitespaceOrEnd,
max_length: None,
end_char: ' ',
store_result: true,
invert_char_list: false,
char_list: vec![],
#[cfg(feature = "regex")]
regex: None,
};
if fstr.cur() == '*' {
res.store_result = false;
if !fstr.inc() {
return None;
}
}
if fstr.cur() == '}' {
if fstr.inc() {
res.end_char = fstr.cur();
}
return Some(res);
}
// Read optional field width specifier (e.g., the "2" in {2d})
let pos_start = fstr.pos;
while fstr.cur().is_digit(10) {
if !fstr.inc() {
return None;
}
}
if fstr.pos > pos_start {
let max_length_string: String = fstr.data[pos_start..fstr.pos].iter().cloned().collect();
res.max_length = max_length_string.parse::().ok();
}
match fstr.cur() {
's' => { /* already FmtType::NonWhitespaceOrEnd */ }
'e' => {
res.data_type = FmtType::OnlyEnd;
}
'd' => {
res.data_type = FmtType::Dec10;
}
'x' => {
res.data_type = FmtType::Hex16;
}
'f' => {
res.data_type = FmtType::Flt;
}
'[' => {
res.data_type = FmtType::Pattern;
}
#[cfg(feature = "regex")]
'/' => {
res.data_type = FmtType::Regex;
}
_ => return None, // unexpected format
}
if !fstr.inc() {
return None;
}
match res.data_type {
FmtType::Pattern => handle_pattern(res, fstr),
#[cfg(feature = "regex")]
FmtType::Regex => handle_regex(res, fstr),
_ => {
if fstr.cur() != '}' {
return None;
}
fstr.inc();
Some(res)
}
}
}
fn handle_pattern(mut res: FmtResult, fstr: &mut VecScanner) -> Option {
// handle [] pattern
res.data_type = FmtType::Pattern;
if fstr.cur() == '^' {
res.invert_char_list = true;
if !fstr.inc() {
return None;
}
}
match fstr.cur() {
']' | '-' => {
res.char_list.push((fstr.cur(), fstr.cur()));
if !fstr.inc() {
return None;
}
}
_ => (),
}
// look for end of [] pattern
while fstr.cur() != ']' {
if fstr.peek(1) == Some('-') && fstr.peek(2) != Some(']') {
let prev_char = fstr.cur();
if !fstr.inc() {
break;
} // go to '-'
if !fstr.inc() {
break;
} // go past '-'
// add character range
res.char_list.push((prev_char, fstr.cur()));
} else {
res.char_list.push((fstr.cur(), fstr.cur()));
}
if !fstr.inc() {
return None;
}
}
if !fstr.inc() {
return None;
} // go past ']'
if fstr.cur() != '}' {
return None;
}
fstr.inc(); // go past closing '}'
Some(res)
}
#[cfg(feature = "regex")]
fn handle_regex(mut res: FmtResult, fstr: &mut VecScanner) -> Option {
let start = fstr.pos;
let mut last_was_escape = false;
while fstr.inc() {
if fstr.cur() == '/' && !last_was_escape {
break;
}
if fstr.cur() == '\\' {
last_was_escape = true;
} else {
last_was_escape = false;
}
}
if fstr.cur() != '/' {
// invalid
return None;
}
let substr = Some('^')
.into_iter()
.chain(fstr.data[start..fstr.pos].iter().cloned())
.collect::();
if let Ok(re) = Regex::new(&substr) {
res.regex = Some(re);
} else {
return None;
}
// consume close
fstr.inc();
if fstr.cur() != '}' {
return None;
}
fstr.inc();
Some(res)
}
fn scan_dec10(vs: &mut VecScanner, max_length: Option) {
// look for [+-]{0,1}[0-9]+, up to max_length characters
vs.start_inc_limit(max_length);
scan_dec10_nest(vs);
}
// advance past base-10 decimal - assumes someone has called start_inc_limit()
fn scan_dec10_nest(vs: &mut VecScanner) {
// look for [+-]{0,1}[0-9]+
match vs.cur() {
'+' | '-' => {
if !vs.inc_limit() {
return;
}
}
_ => (),
}
while vs.cur().is_digit(10) {
if !vs.inc_limit() {
return;
}
}
}
// advance past base-16 hex
// look for (0x){0,1}[0-9a-fA-F]+
fn scan_hex16(vs: &mut VecScanner, max_length: Option) {
vs.start_inc_limit(max_length);
if vs.cur() == '0' {
if !vs.inc_limit() {
return;
}
}
if vs.cur() == 'x' {
if !vs.inc_limit() {
return;
}
}
while vs.cur().is_digit(16) {
if !vs.inc_limit() {
return;
};
}
}
// advance past float
// look for [+-]{0,1}[0-9]+
// then optional .[0-9]+
// then optional e[+-]{1}[0-9]+
fn scan_float(vs: &mut VecScanner, max_length: Option) {
vs.start_inc_limit(max_length);
scan_dec10_nest(vs);
if vs.cur() == '.' {
if !vs.inc_limit() {
return;
}
while vs.cur().is_digit(10) {
if !vs.inc_limit() {
return;
}
}
}
if vs.cur() == 'e' {
if !vs.inc_limit() {
return;
}
scan_dec10_nest(vs);
}
}
// advance until 'end' or whitespace
fn scan_nonws_or_end(vs: &mut VecScanner, end: char) {
while !is_whitespace(vs.cur()) && vs.cur() != end {
if !vs.inc() {
return;
}
}
}
// advance past pattern
fn scan_pattern(vs: &mut VecScanner, fmt: &mut FmtResult) {
// if invert, scan until character not in char_list
// else scan while character is in char_list
loop {
let c = vs.cur();
let mut found = false;
for &(start, end) in fmt.char_list.iter() {
if c >= start && c <= end {
found = true;
break;
}
}
if found == fmt.invert_char_list {
return;
}
if !vs.inc() {
return;
}
}
}
#[cfg(feature = "regex")]
enum ReMatch {
Captured { len: usize },
NoCapture,
}
#[cfg(feature = "regex")]
fn scan_regex(vs: &mut VecScanner, fmt: &mut FmtResult) -> ReMatch {
let re = fmt.regex.take().unwrap();
let remainder = vs.data[vs.pos..].iter().cloned().collect::();
if let Some(mat) = re.captures(&remainder) {
vs.pos += remainder[..mat.get(0).unwrap().end()].chars().count();
if let Some(cap) = mat.get(1) {
return ReMatch::Captured { len: cap.end() };
}
}
return ReMatch::NoCapture;
}
// return data matching the format from user input (else "")
fn get_token(vs: &mut VecScanner, fmt: &mut FmtResult) -> String {
let mut pos_start = vs.pos;
match fmt.data_type {
FmtType::OnlyEnd => {} // handled in scan()
FmtType::NonWhitespaceOrEnd => scan_nonws_or_end(vs, fmt.end_char),
FmtType::Dec10 => scan_dec10(vs, fmt.max_length),
FmtType::Hex16 => scan_hex16(vs, fmt.max_length),
FmtType::Flt => scan_float(vs, fmt.max_length),
FmtType::Pattern => scan_pattern(vs, fmt),
#[cfg(feature = "regex")]
FmtType::Regex => {
// if the regex has an internal group then we want to use the group
// to select the substring, but either way the scan_regex function
// will set pos to the end of the entire match consumed by the
// regex
match scan_regex(vs, fmt) {
ReMatch::Captured { len } => {
return vs.data[pos_start..pos_start + len]
.iter()
.cloned()
.collect();
}
ReMatch::NoCapture => {}
}
}
}
if fmt.data_type == FmtType::Dec10 || fmt.data_type == FmtType::Flt {
// parse won't accept "+" in front of numbers
if vs.data[pos_start] == '+' {
pos_start += 1;
}
}
vs.data[pos_start..vs.pos].iter().cloned().collect()
}
// Extract String tokens from the input string based on
// the format string. See lib.rs for more info.
// Returns an iterator of the String results.
pub fn scan(input_string: &str, format: &str) -> alloc::vec::IntoIter {
let mut res: Vec = vec![];
let mut fmtstr = VecScanner::new(format.chars().collect());
let mut instr = VecScanner::new(input_string.chars().collect());
loop {
let mut do_compare = true;
if !skip_whitespace(&mut fmtstr) {
break;
}
if !skip_whitespace(&mut instr) {
break;
}
if fmtstr.cur() == '{' {
if !fmtstr.inc() {
break;
}
if fmtstr.cur() == '{' {
// got an escaped {{
} else {
let fmt = get_format(&mut fmtstr);
let mut fmt = if let Some(fmt) = fmt {
fmt
} else {
break;
};
if fmt.data_type == FmtType::OnlyEnd && !instr.is_end() {
// we didn't get an end of input where expected, so invalidate any matches
return vec![String::from("")].into_iter();
}
let data = get_token(&mut instr, &mut fmt);
if fmt.store_result {
if fmt.data_type == FmtType::Hex16 {
let no_prefix = data.trim_start_matches("0x");
res.push(no_prefix.to_string());
} else {
res.push(data);
}
}
do_compare = false;
}
} else {
if fmtstr.cur() == '}' {
// handle escaped }} by skipping first '}'
if !fmtstr.inc() {
break;
}
}
}
if do_compare {
if fmtstr.cur() != instr.cur() {
return vec![String::from("")].into_iter();
// we had a non match! --> if we only break here we will return all matches found so far.
// This will create a misbehaviour when there is something like `{d}in` as the in is not cared for.
}
if !fmtstr.inc() {
break;
}
if !instr.inc() {
break;
}
}
}
res.into_iter()
}
#[test]
fn test_simple() {
let mut res = scan(" data 42-12=30", "data {d}-{d}={d}");
assert_eq!(res.next().unwrap(), "42");
assert_eq!(res.next().unwrap(), "12");
assert_eq!(res.next().unwrap(), "30");
assert_eq!(res.next(), None);
}
#[test]
fn test_plus_sign() {
let mut res = scan("+42", "{d}");
assert_eq!(res.next().unwrap(), "42");
let mut res = scan("+42.7", "{f}");
assert_eq!(res.next().unwrap(), "42.7");
}
#[test]
fn test_complex() {
let mut res = scan(
"test{123 bye -456} hi -22.7e-1 +1.23fg",
"test{{{d} bye {}}} hi {f} {f}",
);
assert_eq!(res.next().unwrap(), "123");
assert_eq!(res.next().unwrap(), "-456");
assert_eq!(res.next().unwrap(), "-22.7e-1");
assert_eq!(res.next().unwrap(), "1.23");
assert_eq!(res.next(), None);
}
#[test]
fn test_endline() {
let mut res = scan("hi 15.7\r\n", "{} {}");
assert_eq!(res.next().unwrap(), "hi");
assert_eq!(res.next().unwrap(), "15.7");
}
#[test]
fn test_hex() {
let mut res = scan("hi 0x15 ff fg", "hi {x} {x} {x}");
assert_eq!(res.next().unwrap(), "15");
assert_eq!(res.next().unwrap(), "ff");
assert_eq!(res.next().unwrap(), "f");
}
#[test]
fn test_string() {
let mut res = scan("The quick brown fox", "{s}{s} {}n {s}x");
assert_eq!(res.next().unwrap(), "The");
assert_eq!(res.next().unwrap(), "quick");
assert_eq!(res.next().unwrap(), "brow");
assert_eq!(res.next().unwrap(), "fox");
}
#[test]
fn test_pattern() {
let mut res = scan(
"hi abcdefghijklmnop 0123456789",
"hi {[a-l]}{[^a-l ]} {[01234-8]}{[9]}",
);
assert_eq!(res.next().unwrap(), "abcdefghijkl");
assert_eq!(res.next().unwrap(), "mnop");
assert_eq!(res.next().unwrap(), "012345678");
assert_eq!(res.next().unwrap(), "9");
let mut res = scan("xyz 01234567λ89", "xyz {[40-3]}{*[65]}{[7-78-9λ]}");
assert_eq!(res.next().unwrap(), "01234");
assert_eq!(res.next().unwrap(), "7λ89");
}
#[test]
fn test_width() {
let mut res = scan("01123fe071 432", "{2d}{3d}{4x}{2d} {3d}");
assert_eq!(res.next().unwrap(), "01");
assert_eq!(res.next().unwrap(), "123");
assert_eq!(res.next().unwrap(), "fe07");
assert_eq!(res.next().unwrap(), "1");
assert_eq!(res.next().unwrap(), "432");
}
#[test]
fn match_end() {
let mut res = scan("12 hi", "{d} hi{e}");
assert_eq!(res.next().unwrap(), "12");
assert_eq!(res.next(), None);
let mut res = scan("12 hi2", "{d} hi{e}");
assert_eq!(res.next().unwrap(), "");
}
#[cfg(all(test, feature = "regex"))]
mod test_regex {
use super::scan;
#[test]
fn simple() {
let mut res = scan("one (hello) two", "one ({/[^)]+/}) two");
assert_eq!(res.next().unwrap(), "hello");
}
#[test]
fn mixed_regex_and_pattern() {
let mut res = scan("one ((hello)) two", r#"one ({/[^)]+\)?/}) two"#);
assert_eq!(res.next().unwrap(), "(hello)");
}
#[test]
fn bad_pattern() {
// note the extra close paren
let mut scanner = scan("one (hello)) two", "one ({/[^)]+/}) two");
assert_eq!(scanner.next().unwrap(), "");
}
#[test]
fn uses_group_if_present() {
let mut res = scan("one (((hello))) two", r#"one {/(\(.*\)) /}two"#);
assert_eq!(res.next().unwrap(), "(((hello)))");
}
#[test]
fn unicode() {
let mut res = scan("й", "{/.*/}");
assert_eq!(res.next().unwrap(), "й");
}
}