├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE ├── README.md └── src ├── lib.rs └── parse.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | *~ 4 | doc 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | rust: 3 | - stable 4 | - beta 5 | - nightly 6 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "scan_fmt" 3 | version = "0.2.6" 4 | authors = ["wlentz"] 5 | description = "A simple scanf()-like input for Rust" 6 | repository = "https://github.com/wlentz/scan_fmt" 7 | license = "MIT" 8 | readme = "README.md" 9 | 10 | [features] 11 | default = ["regex", "std"] 12 | std = [] 13 | 14 | [dependencies] 15 | regex = { version = "1", optional = true } 16 | 17 | [lib] 18 | name = "scan_fmt" 19 | path = "src/lib.rs" 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 wlentz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # scan_fmt ![BuildStatus](https://travis-ci.org/wlentz/scan_fmt.svg?branch=master) 2 | scan_fmt provides a simple scanf()-like input for Rust. The goal is to make it easier to read data from a string or stdin. 3 | 4 | Currently the format string supports the following special sequences: 5 |
 6 |    {{ = escape for '{'
 7 |    }} = escape for '}'
 8 |    {} = return any value (until next whitespace)
 9 |    {d} = return base-10 decimal
10 |    {x} = return hex (0xab or ab)
11 |    {f} = return float
12 |    {*d} = "*" as the first character means "match but don't return"
13 |    {2d} or {2x} or {2f} = limit the maximum width to 2.  Any positive integer works.
14 |    {[...]} = return pattern.
15 |      ^ inverts if it is the first character
16 |      - is for ranges.  For a literal - put it at the start or end.
17 |      To add a literal ] do "[]abc]"
18 |    {e} = doesn't return a value, but matches end of line.  Use this if you
19 |          don't want to ignore potential extra characters at end of input.
20 |    Examples:
21 |      {[0-9ab]} = match 0-9 or a or b
22 |      {[^,.]} = match anything but , or .
23 |    {/.../} = return regex inside of `//`.
24 |      If there is a single capture group inside of the slashes then
25 |      that group will make up the pattern.
26 |    Examples:
27 |      {/[0-9ab]/} = same as {[0-9ab]}, above
28 |      {/a+/} = matches at least one `a`, greedily
29 |      {/jj(a*)jj/} = matches any number of `a`s, but only if
30 |        they're surrounded by two `j`s
31 | 
32 | 33 | ### Examples 34 | ```rust 35 | #[macro_use] extern crate scan_fmt; 36 | use std::error::Error ; 37 | fn main() -> Result<(),Box> { 38 | let (a,b,c) = scan_fmt!( "hello 0x12 345 bye", // input string 39 | "hello {x} {} {}", // format 40 | [hex u8], i32, String) ? ; // type of a-c Options 41 | assert_eq!( a, 0x12 ) ; 42 | assert_eq!( b, 345 ) ; 43 | assert_eq!( c, "bye" ) ; 44 | 45 | println!("Enter something like: 123-22"); 46 | let (c,d) = scanln_fmt!( "{d}-{d}", // format 47 | u16, u8) ? ; // type of a&b Options 48 | println!("Got {} and {}",c,d) ; 49 | // Note - currently scanln_fmt! just calls unwrap() on read_line() 50 | 51 | let (a,b) = scan_fmt_some!( "hello 12 345", // input string 52 | "hello {} {}", // format 53 | u8, i32) ; // types 54 | assert_eq!( a, Some(12) ) ; 55 | assert_eq!( b, Some(345) ) ; 56 | Ok(()) 57 | } 58 | ``` 59 | 60 | ### Limitations 61 | There is no compile-time warning if the number of {}'s in the format string doesn't match the number of return values. You'll just get None for extra return values. See src/lib.rs for more details. 62 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2015-2019 Will Lentz. 2 | // Licensed under the MIT license. 3 | 4 | //! This crate provides a simple sscanf()-like interface to extract 5 | //! data from strings and stdin. 6 | //! 7 | //! In version 0.2 scan_fmt! changed to return a Result. 8 | //! Use scan_fmt_some! for the 0.1.x behavior. 9 | //! 10 | //! To use this crate, do: 11 | //! 12 | //! ```ignore 13 | //! #[macro_use] extern crate scan_fmt; 14 | //! ``` 15 | //! 16 | //! Example to read from a string: 17 | //! 18 | //! ```rust 19 | //! # #[macro_use] extern crate scan_fmt; 20 | //! # fn main() { 21 | //! if let Ok((a,b)) = scan_fmt!( "-11 0x22", // input string 22 | //! "{d} {x}", // format 23 | //! i8, [hex u8]) { // types 24 | //! assert_eq!( a, -11 ) ; 25 | //! assert_eq!( b, 0x22 ) ; 26 | //! } 27 | //! 28 | //! let (a,b,c) = scan_fmt_some!( "hello 12 345 bye", // input string 29 | //! "hello {} {d} {}", // format 30 | //! u8, i32, String); // type of a-c Options 31 | //! assert_eq!( a, Some(12) ) ; 32 | //! assert_eq!( b, Some(345) ) ; 33 | //! assert_eq!( c, Some("bye".into()) ) ; 34 | //! # } 35 | //! ``` 36 | //! 37 | //! Special format_string tokens: 38 | //!
 39 | //!   {{ = escape for '{'
 40 | //!   }} = escape for '}'
 41 | //!   {} = return any value (until next whitespace)
 42 | //!   {d} = return base-10 decimal
 43 | //!   {x} = return hex (0xab or ab)
 44 | //!       = you must wrap the type in [hex type], e.g. "[hex u32]"
 45 | //!   {f} = return float
 46 | //!   {*d} = "*" as the first character means "match but don't return"
 47 | //!   {2d} or {2x} or {2f} = limit the maximum width to 2.  Any positive integer works.
 48 | //!   {[...]} = return pattern.
 49 | //!     ^ inverts if it is the first character
 50 | //!     - is for ranges.  For a literal - put it at the start or end.
 51 | //!     To add a literal ] do "[]abc]"
 52 | //!   {e} = doesn't return a value, but matches end of line.  Use this if you
 53 | //!         don't want to ignore potential extra characters at end of input.
 54 | //!   Examples:
 55 | //!     {[0-9ab]} = match 0-9 or a or b
 56 | //!     {[^,.]} = match anything but , or .
 57 | //!     {/.../} = return regex inside of `//`. (if regex feature is installed)
 58 | //!      If there is a single capture group inside of the slashes then
 59 | //!      that group will make up the pattern.
 60 | //!   Examples:
 61 | //!     {/[0-9ab]/} = same as {[0-9ab]}, above
 62 | //!     {/a+/} = matches at least one `a`, greedily
 63 | //!     {/jj(a*)jj/} = matches any number of `a`s, but only if
 64 | //!       they're surrounded by two `j`s
 65 | //! 
66 | //! 67 | //! Example to read from stdin: 68 | //! 69 | //! ```ignore 70 | //! # #[macro_use] extern crate scan_fmt; 71 | //! # use std::error::Error ; 72 | //! # fn main() -> Result<(),Box> { 73 | //! let (a,b) = scanln_fmt!( "{}-{}", u16, u8) ? ; 74 | //! println!("Got {} and {}",a,b); 75 | //! 76 | //! let (a,b) = scanln_fmt_some!( "{}-{}", // format 77 | //! u16, u8); // type of a&b Options 78 | //! match (a,b) { 79 | //! (Some(aa),Some(bb)) => println!("Got {} and {}",aa,bb), 80 | //! _ => println!("input error") 81 | //! } 82 | //! Ok(()) 83 | //! # } 84 | //! ``` 85 | //! 86 | //! ## LIMITATIONS: 87 | //! There are no compile-time checks to make sure the format 88 | //! strings matches the number of return arguments. Extra 89 | //! return values will be None or cause a Result error. 90 | //! 91 | //! Like sscanf(), whitespace (including \n) is largely ignored. 92 | //! 93 | //! Conversion to output values is done using parse::(). 94 | 95 | #![no_std] 96 | 97 | #[cfg(feature = "regex")] 98 | extern crate regex; 99 | 100 | #[cfg(any(test, doctest, feature = "std"))] 101 | extern crate std; 102 | 103 | #[macro_use] 104 | extern crate alloc; 105 | 106 | pub mod parse; 107 | 108 | #[macro_export] 109 | macro_rules! scan_fmt_help { 110 | ( wrap $res:expr, [hex $arg:tt] ) => { 111 | match $res.next() { 112 | Some(item) => $arg::from_str_radix(&item, 16).ok(), 113 | _ => None, 114 | } 115 | }; 116 | ( wrap $res:expr , $($arg1:tt)::* ) => { 117 | match $res.next() { 118 | Some(item) => item.parse::<$($arg1)::*>().ok(), 119 | _ => None, 120 | } 121 | }; 122 | ( no_wrap $err:ident, $res:expr, [hex $arg:tt] ) => { 123 | match $res.next() { 124 | Some(item) => { 125 | let ret = $arg::from_str_radix(&item, 16); 126 | if ret.is_err() { 127 | $err = "from_str_radix hex"; 128 | } 129 | ret.unwrap_or(0) 130 | } 131 | _ => { 132 | $err = "internal hex"; 133 | 0 134 | } 135 | } 136 | }; 137 | ( no_wrap $err:ident, $res:expr , $($arg1:tt)::* ) => {{ 138 | // We need to return a value of type $($arg1)::* if parsing fails. 139 | // Is there a better way? 140 | let mut err = "0".parse::<$($arg1)::*>(); // most types 141 | if err.is_err() { 142 | err = "0.0.0.0".parse::<$($arg1)::*>(); // IpAddr 143 | } 144 | let err = err.unwrap(); 145 | match $res.next() { 146 | Some(item) => { 147 | let ret = item.parse::<$($arg1)::*>(); 148 | if(item == "") { 149 | $err = "match::none"; 150 | } else if ret.is_err() { 151 | $err = concat!("parse::", stringify!($($arg1)::*)); 152 | } 153 | ret.unwrap_or(err) 154 | } 155 | _ => { 156 | $err = concat!("internal ", stringify!($($arg1)::*)); 157 | err 158 | } 159 | } 160 | }}; 161 | } 162 | 163 | #[macro_export] 164 | macro_rules! scan_fmt_some { 165 | ( $instr:expr, $fmt:expr, $($($args:tt)::*),* ) => { 166 | { 167 | let mut res = $crate::parse::scan( $instr, $fmt ) ; 168 | ($($crate::scan_fmt_help!(wrap res,$($args)::*)),*) 169 | } 170 | }; 171 | } 172 | 173 | #[macro_export] 174 | macro_rules! scan_fmt { 175 | ( $instr:expr, $fmt:expr, $($($args:tt)::*),* ) => { 176 | { 177 | let mut err = "" ; 178 | let mut res = $crate::parse::scan( $instr, $fmt ) ; 179 | let result = ($($crate::scan_fmt_help!(no_wrap err,res,$($args)::*)),*) ; 180 | if err == "" { 181 | Ok(result) 182 | } else { 183 | Err($crate::parse::ScanError(err.into())) 184 | } 185 | } 186 | }; 187 | } 188 | 189 | #[cfg(feature = "std")] 190 | pub use std_features::*; 191 | 192 | #[cfg(feature = "std")] 193 | mod std_features { 194 | use std::string::String; 195 | 196 | pub fn get_input_unwrap() -> String { 197 | let mut input = String::new(); 198 | std::io::stdin().read_line(&mut input).unwrap(); 199 | input 200 | } 201 | 202 | /// (a,+) = scanln_fmt!( format_string, types,+ ) 203 | ///

Same as scan_fmt!(), but reads input string from stdin.

204 | #[macro_export] 205 | macro_rules! scanln_fmt { 206 | ($($arg:tt)*) => {{ scan_fmt!(&$crate::get_input_unwrap(), $($arg)*) }} 207 | } 208 | 209 | /// (a,+) = scanln_fmt_some!( format_string, types,+ ) 210 | ///

Same as scan_fmt_some!(), but reads input string from stdin.

211 | #[macro_export] 212 | macro_rules! scanln_fmt_some { 213 | ($($arg:tt)*) => {{ scan_fmt_some!(&$crate::get_input_unwrap(), $($arg)*) }} 214 | } 215 | } 216 | 217 | #[cfg(test)] 218 | use alloc::string::{String, ToString}; 219 | #[cfg(test)] 220 | use parse::ScanError; 221 | 222 | #[cfg(test)] 223 | macro_rules! assert_flt_eq { 224 | ($t:ident, $v1:expr, $v2:expr) => {{ 225 | assert!(($v1 - $v2).abs() <= 2.0 * std::$t::EPSILON); 226 | }}; 227 | } 228 | 229 | #[cfg(test)] 230 | fn ret_scan_all() -> Result<(), ScanError> { 231 | let (a, b) = scan_fmt!("1.2 e","{f} {x}",f32,[hex u32])?; 232 | assert_flt_eq!(f32, a, 1.2); 233 | assert_eq!(b, 14); 234 | Ok(()) 235 | } 236 | 237 | #[test] 238 | fn test_scan_all() { 239 | if let Ok(a) = scan_fmt!("hi1 3", "{} {d}", std::string::String, u32) { 240 | assert_eq!(a, ("hi1".to_string(), 3)); 241 | } else { 242 | assert!(false, "error 0"); 243 | } 244 | if let Ok((a, b, c)) = scan_fmt!("hi1 0xf -3","{} {x} {d}",String,[hex u32],i8) { 245 | assert_eq!(a, "hi1"); 246 | assert_eq!(b, 0xf); 247 | assert_eq!(c, -3); 248 | } else { 249 | assert!(false, "error 1"); 250 | } 251 | let a = scan_fmt!("hi1 f", "{} {d}", String, i32); 252 | assert!(a.is_err()); 253 | let a = ret_scan_all(); 254 | std::println!("{:?}", a); 255 | assert!(a.is_ok()); 256 | } 257 | 258 | #[test] 259 | fn test_plus_sign() { 260 | let a = scan_fmt_some!("+42", "{d}", i32); 261 | assert_eq!(a, Some(42)); 262 | let a = scan_fmt_some!("+42.0", "{f}", f64); 263 | assert_flt_eq!(f64, a.unwrap(), 42.0); 264 | } 265 | 266 | #[test] 267 | fn test_hex() { 268 | let (a, b, c) = 269 | scan_fmt_some!("DEV 0xab 0x1234", "{} {x} {x}", std::string::String, [hex u32], [hex u64]); 270 | assert_eq!(a, Some("DEV".into())); 271 | assert_eq!(b, Some(0xab)); 272 | assert_eq!(c, Some(0x1234)); 273 | } 274 | 275 | #[test] 276 | fn test_limited_data_range() { 277 | let (a, b, c) = scan_fmt_some!( 278 | "test{\t 1e9 \n bye 257} hi 22.7e-1", 279 | "test{{ {} bye {d}}} hi {f}", 280 | f64, 281 | u8, 282 | f32 283 | ); 284 | assert_flt_eq!(f64, a.unwrap(), 1e9); 285 | assert_eq!(b, None); // 257 doesn't fit into a u8 286 | assert_flt_eq!(f32, c.unwrap(), 2.27); 287 | } 288 | 289 | #[test] 290 | fn test_too_many_outputs() { 291 | let (a, b, c, d) = scan_fmt_some!("a_aa bb_b c", "{} {s} {}", String, String, String, String); 292 | assert_eq!(a.unwrap(), "a_aa"); 293 | assert_eq!(b.unwrap(), "bb_b"); 294 | assert_eq!(c.unwrap(), "c"); 295 | assert_eq!(d, None); 296 | } 297 | 298 | #[test] 299 | fn test_skip_assign() { 300 | let (a, b) = scan_fmt_some!("1 2 3, 4 5, 6 7", "{[^,]},{*[^,]},{[^,]}", String, String); 301 | assert_eq!(a.unwrap(), "1 2 3"); 302 | assert_eq!(b.unwrap(), "6 7"); 303 | let a = scan_fmt!("1 2 3, 4 5, 6 7", "{[^,]},{*[^,]},{[^,]}", String, String).unwrap(); 304 | assert_eq!(a.0, "1 2 3"); 305 | assert_eq!(a.1, "6 7"); 306 | } 307 | 308 | #[test] 309 | fn test_width_specifier() { 310 | let a = scan_fmt!("123ab71 2.1234", 311 | "{1d}{2d}{3x}{4d}{3f}", 312 | u8, u8, [hex u16], u16, f32) 313 | .unwrap(); 314 | assert_eq!(a.0, 1); 315 | assert_eq!(a.1, 23); 316 | assert_eq!(a.2, 0xab7); 317 | assert_eq!(a.3, 1); 318 | assert_flt_eq!(f32, a.4, 2.1); 319 | } 320 | 321 | #[test] 322 | fn test_err_equals() { 323 | let a = scan_fmt!("hi 123", "hi {d", u8); 324 | assert_eq!(a, Err(parse::ScanError("internal u8".to_string()))); 325 | } 326 | 327 | #[test] 328 | fn test_no_post_match_regex() { 329 | let a = scan_fmt!("74in", "{d}{/in/}", u8, String); 330 | assert_eq!(a, Ok((74, String::from("in")))); 331 | let a = scan_fmt!("74in", "{d}{/cm/}", u8, String); 332 | assert_eq!(a, Err(parse::ScanError("match::none".to_string()))); 333 | } 334 | 335 | #[test] 336 | fn test_no_post_match() { 337 | let a = scan_fmt!("17in", "{d}in", u8); 338 | assert_eq!(a, Ok(17u8)); 339 | 340 | let a = scan_fmt!("17in", "{d}cm", u8); 341 | assert_eq!(a, Err(parse::ScanError("match::none".to_string()))); 342 | } 343 | 344 | #[test] 345 | fn test_match_end() { 346 | let a = scan_fmt!("17in", "{d}in{e}", u8); 347 | assert_eq!(a, Ok(17u8)); 348 | let a = scan_fmt!("17ink", "{d}in{e}", u8); 349 | assert_eq!(a, Err(parse::ScanError("match::none".to_string()))); 350 | } 351 | 352 | #[test] 353 | fn test_ip_addr() { 354 | let a = scan_fmt!("x 185.187.165.163 y", "x {} y", std::net::IpAddr); 355 | assert_eq!( 356 | a.unwrap(), 357 | std::net::IpAddr::V4(std::net::Ipv4Addr::new(185, 187, 165, 163)) 358 | ); 359 | } 360 | -------------------------------------------------------------------------------- /src/parse.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2015-2019 Will Lentz. 2 | // Licensed under the MIT license. 3 | use alloc::string::{String, ToString}; 4 | 5 | #[cfg(feature = "regex")] 6 | use regex::Regex; 7 | 8 | #[derive(Debug, PartialEq)] 9 | enum FmtType { 10 | NonWhitespaceOrEnd, 11 | OnlyEnd, 12 | Pattern, 13 | Dec10, 14 | Hex16, 15 | Flt, 16 | #[cfg(feature = "regex")] 17 | Regex, 18 | } 19 | 20 | #[cfg(feature = "std")] 21 | use std::error::Error; 22 | 23 | use alloc::vec::Vec; 24 | use core::fmt; 25 | 26 | #[derive(Debug, PartialEq)] 27 | pub struct ScanError(pub String); 28 | 29 | #[cfg(feature = "std")] 30 | impl Error for ScanError {} 31 | 32 | impl fmt::Display for ScanError { 33 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 34 | write!(f, "Scan error: {}", self.0) 35 | } 36 | } 37 | 38 | // Handle the following format strings: 39 | // {}X -> everything until whitespace or next character 'X' 40 | // {s} -> everything until whitespace 41 | // {d} -> only base-10 integers 42 | // {x} -> only unsigned base-16 integers. Allow 0xfff or fff 43 | // {f} -> only floats 44 | // {*} -> get token, but don't assign it to output 45 | // {[]} -> only search for given characters 46 | // starting with '^' negates everything 47 | // ranges with '-' work. To include '-' put it at end or start 48 | // to include ']' put it at the start (or right after ^) 49 | // e.g., {[^,]} -> match everything until next comma 50 | 51 | // Make it slightly easier to scan through a Vec<> 52 | struct VecScanner { 53 | data: Vec, 54 | pos: usize, 55 | limit_pos: usize, // if non-0, then inc_limit() returns when 'pos' gets here 56 | } 57 | 58 | impl VecScanner { 59 | fn new(d: Vec) -> VecScanner { 60 | VecScanner { 61 | data: d, 62 | pos: 0, 63 | limit_pos: 0, 64 | } 65 | } 66 | 67 | fn cur(&self) -> char { 68 | self.data[self.pos] 69 | } 70 | 71 | fn peek(&self, n: usize) -> Option { 72 | if self.pos + n < self.data.len() { 73 | Some(self.data[self.pos + n]) 74 | } else { 75 | None 76 | } 77 | } 78 | 79 | fn is_end(&self) -> bool { 80 | self.pos >= self.data.len() 81 | } 82 | 83 | // returns true if we have more data 84 | fn inc(&mut self) -> bool { 85 | self.pos += 1; 86 | !self.is_end() 87 | } 88 | 89 | // set the maximum position for inc_limit() 90 | fn start_inc_limit(&mut self, max_length: Option) { 91 | match max_length { 92 | Some(n) => { 93 | self.limit_pos = self.pos + n; 94 | } 95 | None => { 96 | self.limit_pos = 0; 97 | } 98 | } 99 | } 100 | 101 | fn hit_inc_limit(&mut self) -> bool { 102 | self.limit_pos > 0 && self.pos >= self.limit_pos 103 | } 104 | 105 | // same as inc(), but also honors start_inc_limit(max_length) 106 | fn inc_limit(&mut self) -> bool { 107 | self.pos += 1; 108 | !(self.is_end() || self.hit_inc_limit()) 109 | } 110 | } 111 | 112 | fn is_whitespace(c: char) -> bool { 113 | match c { 114 | ' ' | '\t' | '\n' | '\r' => true, 115 | _ => false, 116 | } 117 | } 118 | 119 | // scan to past whitespace. Return false if end of input. 120 | fn skip_whitespace(vs: &mut VecScanner) -> bool { 121 | while !vs.is_end() { 122 | if is_whitespace(vs.cur()) { 123 | vs.inc(); 124 | } else { 125 | break; 126 | } 127 | } 128 | !vs.is_end() 129 | } 130 | 131 | struct FmtResult { 132 | data_type: FmtType, 133 | max_length: Option, 134 | store_result: bool, 135 | invert_char_list: bool, 136 | end_char: char, 137 | // Store pattern characters and ranges. It might be worth 138 | // optimizing this if format strings are long. 139 | char_list: Vec<(char, char)>, 140 | #[cfg(feature = "regex")] 141 | regex: Option, 142 | } 143 | 144 | // See top-level docs for allowed formats. 145 | // Starts right after opening '{'. Consumes characters to final } 146 | // Note that '{' and '}' can exist unescaped inside []. 147 | fn get_format(fstr: &mut VecScanner) -> Option { 148 | let mut res = FmtResult { 149 | data_type: FmtType::NonWhitespaceOrEnd, 150 | max_length: None, 151 | end_char: ' ', 152 | store_result: true, 153 | invert_char_list: false, 154 | char_list: vec![], 155 | #[cfg(feature = "regex")] 156 | regex: None, 157 | }; 158 | if fstr.cur() == '*' { 159 | res.store_result = false; 160 | if !fstr.inc() { 161 | return None; 162 | } 163 | } 164 | 165 | if fstr.cur() == '}' { 166 | if fstr.inc() { 167 | res.end_char = fstr.cur(); 168 | } 169 | return Some(res); 170 | } 171 | 172 | // Read optional field width specifier (e.g., the "2" in {2d}) 173 | let pos_start = fstr.pos; 174 | while fstr.cur().is_digit(10) { 175 | if !fstr.inc() { 176 | return None; 177 | } 178 | } 179 | if fstr.pos > pos_start { 180 | let max_length_string: String = fstr.data[pos_start..fstr.pos].iter().cloned().collect(); 181 | res.max_length = max_length_string.parse::().ok(); 182 | } 183 | 184 | match fstr.cur() { 185 | 's' => { /* already FmtType::NonWhitespaceOrEnd */ } 186 | 'e' => { 187 | res.data_type = FmtType::OnlyEnd; 188 | } 189 | 'd' => { 190 | res.data_type = FmtType::Dec10; 191 | } 192 | 'x' => { 193 | res.data_type = FmtType::Hex16; 194 | } 195 | 'f' => { 196 | res.data_type = FmtType::Flt; 197 | } 198 | '[' => { 199 | res.data_type = FmtType::Pattern; 200 | } 201 | #[cfg(feature = "regex")] 202 | '/' => { 203 | res.data_type = FmtType::Regex; 204 | } 205 | _ => return None, // unexpected format 206 | } 207 | if !fstr.inc() { 208 | return None; 209 | } 210 | 211 | match res.data_type { 212 | FmtType::Pattern => handle_pattern(res, fstr), 213 | #[cfg(feature = "regex")] 214 | FmtType::Regex => handle_regex(res, fstr), 215 | _ => { 216 | if fstr.cur() != '}' { 217 | return None; 218 | } 219 | fstr.inc(); 220 | Some(res) 221 | } 222 | } 223 | } 224 | 225 | fn handle_pattern(mut res: FmtResult, fstr: &mut VecScanner) -> Option { 226 | // handle [] pattern 227 | res.data_type = FmtType::Pattern; 228 | 229 | if fstr.cur() == '^' { 230 | res.invert_char_list = true; 231 | if !fstr.inc() { 232 | return None; 233 | } 234 | } 235 | 236 | match fstr.cur() { 237 | ']' | '-' => { 238 | res.char_list.push((fstr.cur(), fstr.cur())); 239 | if !fstr.inc() { 240 | return None; 241 | } 242 | } 243 | _ => (), 244 | } 245 | 246 | // look for end of [] pattern 247 | while fstr.cur() != ']' { 248 | if fstr.peek(1) == Some('-') && fstr.peek(2) != Some(']') { 249 | let prev_char = fstr.cur(); 250 | if !fstr.inc() { 251 | break; 252 | } // go to '-' 253 | if !fstr.inc() { 254 | break; 255 | } // go past '-' 256 | // add character range 257 | res.char_list.push((prev_char, fstr.cur())); 258 | } else { 259 | res.char_list.push((fstr.cur(), fstr.cur())); 260 | } 261 | if !fstr.inc() { 262 | return None; 263 | } 264 | } 265 | if !fstr.inc() { 266 | return None; 267 | } // go past ']' 268 | if fstr.cur() != '}' { 269 | return None; 270 | } 271 | fstr.inc(); // go past closing '}' 272 | 273 | Some(res) 274 | } 275 | 276 | #[cfg(feature = "regex")] 277 | fn handle_regex(mut res: FmtResult, fstr: &mut VecScanner) -> Option { 278 | let start = fstr.pos; 279 | let mut last_was_escape = false; 280 | while fstr.inc() { 281 | if fstr.cur() == '/' && !last_was_escape { 282 | break; 283 | } 284 | 285 | if fstr.cur() == '\\' { 286 | last_was_escape = true; 287 | } else { 288 | last_was_escape = false; 289 | } 290 | } 291 | if fstr.cur() != '/' { 292 | // invalid 293 | return None; 294 | } 295 | 296 | let substr = Some('^') 297 | .into_iter() 298 | .chain(fstr.data[start..fstr.pos].iter().cloned()) 299 | .collect::(); 300 | 301 | if let Ok(re) = Regex::new(&substr) { 302 | res.regex = Some(re); 303 | } else { 304 | return None; 305 | } 306 | 307 | // consume close 308 | fstr.inc(); 309 | if fstr.cur() != '}' { 310 | return None; 311 | } 312 | fstr.inc(); 313 | 314 | Some(res) 315 | } 316 | 317 | fn scan_dec10(vs: &mut VecScanner, max_length: Option) { 318 | // look for [+-]{0,1}[0-9]+, up to max_length characters 319 | vs.start_inc_limit(max_length); 320 | scan_dec10_nest(vs); 321 | } 322 | 323 | // advance past base-10 decimal - assumes someone has called start_inc_limit() 324 | fn scan_dec10_nest(vs: &mut VecScanner) { 325 | // look for [+-]{0,1}[0-9]+ 326 | match vs.cur() { 327 | '+' | '-' => { 328 | if !vs.inc_limit() { 329 | return; 330 | } 331 | } 332 | _ => (), 333 | } 334 | 335 | while vs.cur().is_digit(10) { 336 | if !vs.inc_limit() { 337 | return; 338 | } 339 | } 340 | } 341 | 342 | // advance past base-16 hex 343 | // look for (0x){0,1}[0-9a-fA-F]+ 344 | fn scan_hex16(vs: &mut VecScanner, max_length: Option) { 345 | vs.start_inc_limit(max_length); 346 | if vs.cur() == '0' { 347 | if !vs.inc_limit() { 348 | return; 349 | } 350 | } 351 | if vs.cur() == 'x' { 352 | if !vs.inc_limit() { 353 | return; 354 | } 355 | } 356 | while vs.cur().is_digit(16) { 357 | if !vs.inc_limit() { 358 | return; 359 | }; 360 | } 361 | } 362 | 363 | // advance past float 364 | // look for [+-]{0,1}[0-9]+ 365 | // then optional .[0-9]+ 366 | // then optional e[+-]{1}[0-9]+ 367 | fn scan_float(vs: &mut VecScanner, max_length: Option) { 368 | vs.start_inc_limit(max_length); 369 | scan_dec10_nest(vs); 370 | if vs.cur() == '.' { 371 | if !vs.inc_limit() { 372 | return; 373 | } 374 | while vs.cur().is_digit(10) { 375 | if !vs.inc_limit() { 376 | return; 377 | } 378 | } 379 | } 380 | if vs.cur() == 'e' { 381 | if !vs.inc_limit() { 382 | return; 383 | } 384 | scan_dec10_nest(vs); 385 | } 386 | } 387 | 388 | // advance until 'end' or whitespace 389 | fn scan_nonws_or_end(vs: &mut VecScanner, end: char) { 390 | while !is_whitespace(vs.cur()) && vs.cur() != end { 391 | if !vs.inc() { 392 | return; 393 | } 394 | } 395 | } 396 | 397 | // advance past pattern 398 | fn scan_pattern(vs: &mut VecScanner, fmt: &mut FmtResult) { 399 | // if invert, scan until character not in char_list 400 | // else scan while character is in char_list 401 | loop { 402 | let c = vs.cur(); 403 | let mut found = false; 404 | for &(start, end) in fmt.char_list.iter() { 405 | if c >= start && c <= end { 406 | found = true; 407 | break; 408 | } 409 | } 410 | if found == fmt.invert_char_list { 411 | return; 412 | } 413 | if !vs.inc() { 414 | return; 415 | } 416 | } 417 | } 418 | 419 | #[cfg(feature = "regex")] 420 | enum ReMatch { 421 | Captured { len: usize }, 422 | NoCapture, 423 | } 424 | 425 | #[cfg(feature = "regex")] 426 | fn scan_regex(vs: &mut VecScanner, fmt: &mut FmtResult) -> ReMatch { 427 | let re = fmt.regex.take().unwrap(); 428 | let remainder = vs.data[vs.pos..].iter().cloned().collect::(); 429 | if let Some(mat) = re.captures(&remainder) { 430 | vs.pos += remainder[..mat.get(0).unwrap().end()].chars().count(); 431 | if let Some(cap) = mat.get(1) { 432 | return ReMatch::Captured { len: cap.end() }; 433 | } 434 | } 435 | return ReMatch::NoCapture; 436 | } 437 | 438 | // return data matching the format from user input (else "") 439 | fn get_token(vs: &mut VecScanner, fmt: &mut FmtResult) -> String { 440 | let mut pos_start = vs.pos; 441 | match fmt.data_type { 442 | FmtType::OnlyEnd => {} // handled in scan() 443 | FmtType::NonWhitespaceOrEnd => scan_nonws_or_end(vs, fmt.end_char), 444 | FmtType::Dec10 => scan_dec10(vs, fmt.max_length), 445 | FmtType::Hex16 => scan_hex16(vs, fmt.max_length), 446 | FmtType::Flt => scan_float(vs, fmt.max_length), 447 | FmtType::Pattern => scan_pattern(vs, fmt), 448 | #[cfg(feature = "regex")] 449 | FmtType::Regex => { 450 | // if the regex has an internal group then we want to use the group 451 | // to select the substring, but either way the scan_regex function 452 | // will set pos to the end of the entire match consumed by the 453 | // regex 454 | match scan_regex(vs, fmt) { 455 | ReMatch::Captured { len } => { 456 | return vs.data[pos_start..pos_start + len] 457 | .iter() 458 | .cloned() 459 | .collect(); 460 | } 461 | ReMatch::NoCapture => {} 462 | } 463 | } 464 | } 465 | if fmt.data_type == FmtType::Dec10 || fmt.data_type == FmtType::Flt { 466 | // parse won't accept "+" in front of numbers 467 | if vs.data[pos_start] == '+' { 468 | pos_start += 1; 469 | } 470 | } 471 | vs.data[pos_start..vs.pos].iter().cloned().collect() 472 | } 473 | 474 | // Extract String tokens from the input string based on 475 | // the format string. See lib.rs for more info. 476 | // Returns an iterator of the String results. 477 | pub fn scan(input_string: &str, format: &str) -> alloc::vec::IntoIter { 478 | let mut res: Vec = vec![]; 479 | let mut fmtstr = VecScanner::new(format.chars().collect()); 480 | let mut instr = VecScanner::new(input_string.chars().collect()); 481 | loop { 482 | let mut do_compare = true; 483 | if !skip_whitespace(&mut fmtstr) { 484 | break; 485 | } 486 | if !skip_whitespace(&mut instr) { 487 | break; 488 | } 489 | 490 | if fmtstr.cur() == '{' { 491 | if !fmtstr.inc() { 492 | break; 493 | } 494 | if fmtstr.cur() == '{' { 495 | // got an escaped {{ 496 | } else { 497 | let fmt = get_format(&mut fmtstr); 498 | let mut fmt = if let Some(fmt) = fmt { 499 | fmt 500 | } else { 501 | break; 502 | }; 503 | 504 | if fmt.data_type == FmtType::OnlyEnd && !instr.is_end() { 505 | // we didn't get an end of input where expected, so invalidate any matches 506 | return vec![String::from("")].into_iter(); 507 | } 508 | let data = get_token(&mut instr, &mut fmt); 509 | if fmt.store_result { 510 | if fmt.data_type == FmtType::Hex16 { 511 | let no_prefix = data.trim_start_matches("0x"); 512 | res.push(no_prefix.to_string()); 513 | } else { 514 | res.push(data); 515 | } 516 | } 517 | do_compare = false; 518 | } 519 | } else { 520 | if fmtstr.cur() == '}' { 521 | // handle escaped }} by skipping first '}' 522 | if !fmtstr.inc() { 523 | break; 524 | } 525 | } 526 | } 527 | if do_compare { 528 | if fmtstr.cur() != instr.cur() { 529 | return vec![String::from("")].into_iter(); 530 | // we had a non match! --> if we only break here we will return all matches found so far. 531 | // This will create a misbehaviour when there is something like `{d}in` as the in is not cared for. 532 | } 533 | if !fmtstr.inc() { 534 | break; 535 | } 536 | if !instr.inc() { 537 | break; 538 | } 539 | } 540 | } 541 | res.into_iter() 542 | } 543 | 544 | #[test] 545 | fn test_simple() { 546 | let mut res = scan(" data 42-12=30", "data {d}-{d}={d}"); 547 | assert_eq!(res.next().unwrap(), "42"); 548 | assert_eq!(res.next().unwrap(), "12"); 549 | assert_eq!(res.next().unwrap(), "30"); 550 | assert_eq!(res.next(), None); 551 | } 552 | 553 | #[test] 554 | fn test_plus_sign() { 555 | let mut res = scan("+42", "{d}"); 556 | assert_eq!(res.next().unwrap(), "42"); 557 | let mut res = scan("+42.7", "{f}"); 558 | assert_eq!(res.next().unwrap(), "42.7"); 559 | } 560 | 561 | #[test] 562 | fn test_complex() { 563 | let mut res = scan( 564 | "test{123 bye -456} hi -22.7e-1 +1.23fg", 565 | "test{{{d} bye {}}} hi {f} {f}", 566 | ); 567 | assert_eq!(res.next().unwrap(), "123"); 568 | assert_eq!(res.next().unwrap(), "-456"); 569 | assert_eq!(res.next().unwrap(), "-22.7e-1"); 570 | assert_eq!(res.next().unwrap(), "1.23"); 571 | assert_eq!(res.next(), None); 572 | } 573 | 574 | #[test] 575 | fn test_endline() { 576 | let mut res = scan("hi 15.7\r\n", "{} {}"); 577 | assert_eq!(res.next().unwrap(), "hi"); 578 | assert_eq!(res.next().unwrap(), "15.7"); 579 | } 580 | 581 | #[test] 582 | fn test_hex() { 583 | let mut res = scan("hi 0x15 ff fg", "hi {x} {x} {x}"); 584 | assert_eq!(res.next().unwrap(), "15"); 585 | assert_eq!(res.next().unwrap(), "ff"); 586 | assert_eq!(res.next().unwrap(), "f"); 587 | } 588 | 589 | #[test] 590 | fn test_string() { 591 | let mut res = scan("The quick brown fox", "{s}{s} {}n {s}x"); 592 | assert_eq!(res.next().unwrap(), "The"); 593 | assert_eq!(res.next().unwrap(), "quick"); 594 | assert_eq!(res.next().unwrap(), "brow"); 595 | assert_eq!(res.next().unwrap(), "fox"); 596 | } 597 | 598 | #[test] 599 | fn test_pattern() { 600 | let mut res = scan( 601 | "hi abcdefghijklmnop 0123456789", 602 | "hi {[a-l]}{[^a-l ]} {[01234-8]}{[9]}", 603 | ); 604 | assert_eq!(res.next().unwrap(), "abcdefghijkl"); 605 | assert_eq!(res.next().unwrap(), "mnop"); 606 | assert_eq!(res.next().unwrap(), "012345678"); 607 | assert_eq!(res.next().unwrap(), "9"); 608 | 609 | let mut res = scan("xyz 01234567λ89", "xyz {[40-3]}{*[65]}{[7-78-9λ]}"); 610 | assert_eq!(res.next().unwrap(), "01234"); 611 | assert_eq!(res.next().unwrap(), "7λ89"); 612 | } 613 | 614 | #[test] 615 | fn test_width() { 616 | let mut res = scan("01123fe071 432", "{2d}{3d}{4x}{2d} {3d}"); 617 | assert_eq!(res.next().unwrap(), "01"); 618 | assert_eq!(res.next().unwrap(), "123"); 619 | assert_eq!(res.next().unwrap(), "fe07"); 620 | assert_eq!(res.next().unwrap(), "1"); 621 | assert_eq!(res.next().unwrap(), "432"); 622 | } 623 | 624 | #[test] 625 | fn match_end() { 626 | let mut res = scan("12 hi", "{d} hi{e}"); 627 | assert_eq!(res.next().unwrap(), "12"); 628 | assert_eq!(res.next(), None); 629 | let mut res = scan("12 hi2", "{d} hi{e}"); 630 | assert_eq!(res.next().unwrap(), ""); 631 | } 632 | 633 | #[cfg(all(test, feature = "regex"))] 634 | mod test_regex { 635 | use super::scan; 636 | 637 | #[test] 638 | fn simple() { 639 | let mut res = scan("one (hello) two", "one ({/[^)]+/}) two"); 640 | assert_eq!(res.next().unwrap(), "hello"); 641 | } 642 | 643 | #[test] 644 | fn mixed_regex_and_pattern() { 645 | let mut res = scan("one ((hello)) two", r#"one ({/[^)]+\)?/}) two"#); 646 | assert_eq!(res.next().unwrap(), "(hello)"); 647 | } 648 | 649 | #[test] 650 | fn bad_pattern() { 651 | // note the extra close paren 652 | let mut scanner = scan("one (hello)) two", "one ({/[^)]+/}) two"); 653 | assert_eq!(scanner.next().unwrap(), ""); 654 | } 655 | 656 | #[test] 657 | fn uses_group_if_present() { 658 | let mut res = scan("one (((hello))) two", r#"one {/(\(.*\)) /}two"#); 659 | assert_eq!(res.next().unwrap(), "(((hello)))"); 660 | } 661 | 662 | #[test] 663 | fn unicode() { 664 | let mut res = scan("й", "{/.*/}"); 665 | assert_eq!(res.next().unwrap(), "й"); 666 | } 667 | } 668 | --------------------------------------------------------------------------------