├── .gitignore
├── .travis.yml
├── Cargo.toml
├── LICENSE
├── README.md
└── src
    ├── lib.rs
    └── parse.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 | *~
4 | doc
5 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: rust
2 | rust:
3 |     - stable
4 |     - beta
5 |     - nightly
6 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "scan_fmt"
 3 | version = "0.2.6"
 4 | authors = ["wlentz"]
 5 | description = "A simple scanf()-like input for Rust"
 6 | repository = "https://github.com/wlentz/scan_fmt"
 7 | license = "MIT"
 8 | readme = "README.md"
 9 | 
10 | [features]
11 | default = ["regex", "std"]
12 | std = []
13 | 
14 | [dependencies]
15 | regex = { version = "1", optional = true }
16 | 
17 | [lib]
18 | name = "scan_fmt"
19 | path = "src/lib.rs"
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 wlentz
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # scan_fmt ![BuildStatus](https://travis-ci.org/wlentz/scan_fmt.svg?branch=master)
 2 | scan_fmt provides a simple scanf()-like input for Rust.  The goal is to make it easier to read data from a string or stdin.
 3 | 
 4 | Currently the format string supports the following special sequences:
 5 | <pre>
 6 |    {{ = escape for '{'
 7 |    }} = escape for '}'
 8 |    {} = return any value (until next whitespace)
 9 |    {d} = return base-10 decimal
10 |    {x} = return hex (0xab or ab)
11 |    {f} = return float
12 |    {*d} = "*" as the first character means "match but don't return"
13 |    {2d} or {2x} or {2f} = limit the maximum width to 2.  Any positive integer works.
14 |    {[...]} = return pattern.
15 |      ^ inverts if it is the first character
16 |      - is for ranges.  For a literal - put it at the start or end.
17 |      To add a literal ] do "[]abc]"
18 |    {e} = doesn't return a value, but matches end of line.  Use this if you
19 |          don't want to ignore potential extra characters at end of input.
20 |    Examples:
21 |      {[0-9ab]} = match 0-9 or a or b
22 |      {[^,.]} = match anything but , or .
23 |    {/.../} = return regex inside of `//`.
24 |      If there is a single capture group inside of the slashes then
25 |      that group will make up the pattern.
26 |    Examples:
27 |      {/[0-9ab]/} = same as {[0-9ab]}, above
28 |      {/a+/} = matches at least one `a`, greedily
29 |      {/jj(a*)jj/} = matches any number of `a`s, but only if
30 |        they're surrounded by two `j`s
31 | </pre>
32 | 
33 | ### Examples
34 | ```rust
35 |  #[macro_use] extern crate scan_fmt;
36 |  use std::error::Error ;
37 |  fn main() -> Result<(),Box<dyn Error>> {
38 |    let (a,b,c) = scan_fmt!( "hello 0x12 345 bye",  // input string
39 |                             "hello {x} {} {}",     // format
40 |                             [hex u8], i32, String) ? ;   // type of a-c Options
41 |    assert_eq!( a, 0x12 ) ;
42 |    assert_eq!( b, 345 ) ;
43 |    assert_eq!( c, "bye" ) ;
44 | 
45 |    println!("Enter something like: 123-22");
46 |    let (c,d) = scanln_fmt!( "{d}-{d}", // format
47 |                             u16, u8) ? ;  // type of a&b Options
48 |    println!("Got {} and {}",c,d) ;
49 |    // Note - currently scanln_fmt! just calls unwrap() on read_line()
50 | 
51 |    let (a,b) = scan_fmt_some!( "hello 12 345", // input string
52 |                                "hello {} {}",   // format
53 |                                u8, i32) ;   // types
54 |    assert_eq!( a, Some(12) ) ;
55 |    assert_eq!( b, Some(345) ) ;
56 |    Ok(())
57 |   }
58 | ```
59 | 
60 | ### Limitations
61 | There is no compile-time warning if the number of {}'s in the format string doesn't match the number of return values.  You'll just get None for extra return values.  See src/lib.rs for more details.
62 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2015-2019 Will Lentz.
  2 | // Licensed under the MIT license.
  3 | 
  4 | //! This crate provides a simple sscanf()-like interface to extract
  5 | //! data from strings and stdin.
  6 | //!
  7 | //! In version 0.2 scan_fmt! changed to return a Result.
  8 | //! Use scan_fmt_some! for the 0.1.x behavior.
  9 | //!
 10 | //! To use this crate, do:
 11 | //!
 12 | //! ```ignore
 13 | //! #[macro_use] extern crate scan_fmt;
 14 | //! ```
 15 | //!
 16 | //! Example to read from a string:
 17 | //!
 18 | //! ```rust
 19 | //! # #[macro_use] extern crate scan_fmt;
 20 | //! # fn main() {
 21 | //!   if let Ok((a,b)) = scan_fmt!( "-11 0x22", // input string
 22 | //!                                 "{d} {x}",  // format
 23 | //!                                 i8, [hex u8]) { // types
 24 | //!     assert_eq!( a, -11 ) ;
 25 | //!     assert_eq!( b, 0x22 ) ;
 26 | //!   }
 27 | //!
 28 | //!   let (a,b,c) = scan_fmt_some!( "hello 12 345 bye", // input string
 29 | //!                                 "hello {} {d} {}",  // format
 30 | //!                                 u8, i32, String);   // type of a-c Options
 31 | //!   assert_eq!( a, Some(12) ) ;
 32 | //!   assert_eq!( b, Some(345) ) ;
 33 | //!   assert_eq!( c, Some("bye".into()) ) ;
 34 | //! # }
 35 | //! ```
 36 | //!
 37 | //! Special format_string tokens:
 38 | //! <pre class="rust">
 39 | //!   {{ = escape for '{'
 40 | //!   }} = escape for '}'
 41 | //!   {} = return any value (until next whitespace)
 42 | //!   {d} = return base-10 decimal
 43 | //!   {x} = return hex (0xab or ab)
 44 | //!       = you must wrap the type in [hex type], e.g. "[hex u32]"
 45 | //!   {f} = return float
 46 | //!   {*d} = "*" as the first character means "match but don't return"
 47 | //!   {2d} or {2x} or {2f} = limit the maximum width to 2.  Any positive integer works.
 48 | //!   {[...]} = return pattern.
 49 | //!     ^ inverts if it is the first character
 50 | //!     - is for ranges.  For a literal - put it at the start or end.
 51 | //!     To add a literal ] do "[]abc]"
 52 | //!   {e} = doesn't return a value, but matches end of line.  Use this if you
 53 | //!         don't want to ignore potential extra characters at end of input.
 54 | //!   Examples:
 55 | //!     {[0-9ab]} = match 0-9 or a or b
 56 | //!     {[^,.]} = match anything but , or .
 57 | //!     {/.../} = return regex inside of `//`. (if regex feature is installed)
 58 | //!      If there is a single capture group inside of the slashes then
 59 | //!      that group will make up the pattern.
 60 | //!   Examples:
 61 | //!     {/[0-9ab]/} = same as {[0-9ab]}, above
 62 | //!     {/a+/} = matches at least one `a`, greedily
 63 | //!     {/jj(a*)jj/} = matches any number of `a`s, but only if
 64 | //!       they're surrounded by two `j`s
 65 | //! </pre>
 66 | //!
 67 | //! Example to read from stdin:
 68 | //!
 69 | //! ```ignore
 70 | //! # #[macro_use] extern crate scan_fmt;
 71 | //! # use std::error::Error ;
 72 | //! # fn main() -> Result<(),Box<dyn Error>> {
 73 | //!     let (a,b) = scanln_fmt!( "{}-{}", u16, u8) ? ;
 74 | //!     println!("Got {} and {}",a,b);
 75 | //!
 76 | //!     let (a,b) = scanln_fmt_some!( "{}-{}",   // format
 77 | //!                                  u16, u8);    // type of a&b Options
 78 | //!     match (a,b) {
 79 | //!       (Some(aa),Some(bb)) => println!("Got {} and {}",aa,bb),
 80 | //!       _ => println!("input error")
 81 | //!     }
 82 | //!     Ok(())
 83 | //! # }
 84 | //! ```
 85 | //!
 86 | //! ## LIMITATIONS:
 87 | //! There are no compile-time checks to make sure the format
 88 | //! strings matches the number of return arguments.  Extra
 89 | //! return values will be None or cause a Result error.
 90 | //!
 91 | //! Like sscanf(), whitespace (including \n) is largely ignored.
 92 | //!
 93 | //! Conversion to output values is done using parse::<T>().
 94 | 
 95 | #![no_std]
 96 | 
 97 | #[cfg(feature = "regex")]
 98 | extern crate regex;
 99 | 
100 | #[cfg(any(test, doctest, feature = "std"))]
101 | extern crate std;
102 | 
103 | #[macro_use]
104 | extern crate alloc;
105 | 
106 | pub mod parse;
107 | 
108 | #[macro_export]
109 | macro_rules! scan_fmt_help {
110 |     ( wrap $res:expr, [hex $arg:tt] ) => {
111 |         match $res.next() {
112 |             Some(item) => $arg::from_str_radix(&item, 16).ok(),
113 |             _ => None,
114 |         }
115 |     };
116 |     ( wrap $res:expr , $($arg1:tt)::* ) => {
117 |         match $res.next() {
118 |             Some(item) => item.parse::<$($arg1)::*>().ok(),
119 |             _ => None,
120 |         }
121 |     };
122 |     ( no_wrap $err:ident, $res:expr, [hex $arg:tt] ) => {
123 |         match $res.next() {
124 |             Some(item) => {
125 |                 let ret = $arg::from_str_radix(&item, 16);
126 |                 if ret.is_err() {
127 |                     $err = "from_str_radix hex";
128 |                 }
129 |                 ret.unwrap_or(0)
130 |             }
131 |             _ => {
132 |                 $err = "internal hex";
133 |                 0
134 |             }
135 |         }
136 |     };
137 |     ( no_wrap $err:ident, $res:expr , $($arg1:tt)::* ) => {{
138 |         // We need to return a value of type $($arg1)::* if parsing fails.
139 |         // Is there a better way?
140 |         let mut err = "0".parse::<$($arg1)::*>(); // most types
141 |         if err.is_err() {
142 |            err = "0.0.0.0".parse::<$($arg1)::*>(); // IpAddr
143 |         }
144 |         let err = err.unwrap();
145 |         match $res.next() {
146 |             Some(item) => {
147 |                 let ret = item.parse::<$($arg1)::*>();
148 |                 if(item == "") {
149 |                     $err = "match::none";
150 |                 } else if ret.is_err() {
151 |                     $err = concat!("parse::", stringify!($($arg1)::*));
152 |                 }
153 |                 ret.unwrap_or(err)
154 |             }
155 |             _ => {
156 |                 $err = concat!("internal ", stringify!($($arg1)::*));
157 |                 err
158 |             }
159 |         }
160 |     }};
161 | }
162 | 
163 | #[macro_export]
164 | macro_rules! scan_fmt_some {
165 |     ( $instr:expr, $fmt:expr, $($($args:tt)::*),* ) => {
166 |         {
167 |             let mut res = $crate::parse::scan( $instr, $fmt ) ;
168 |             ($($crate::scan_fmt_help!(wrap res,$($args)::*)),*)
169 |         }
170 |     };
171 | }
172 | 
173 | #[macro_export]
174 | macro_rules! scan_fmt {
175 |     ( $instr:expr, $fmt:expr, $($($args:tt)::*),* ) => {
176 |         {
177 |             let mut err = "" ;
178 |             let mut res = $crate::parse::scan( $instr, $fmt ) ;
179 |             let result = ($($crate::scan_fmt_help!(no_wrap err,res,$($args)::*)),*) ;
180 |             if err == "" {
181 |                 Ok(result)
182 |             } else {
183 |                 Err($crate::parse::ScanError(err.into()))
184 |             }
185 |         }
186 |     };
187 | }
188 | 
189 | #[cfg(feature = "std")]
190 | pub use std_features::*;
191 | 
192 | #[cfg(feature = "std")]
193 | mod std_features {
194 |     use std::string::String;
195 | 
196 |     pub fn get_input_unwrap() -> String {
197 |         let mut input = String::new();
198 |         std::io::stdin().read_line(&mut input).unwrap();
199 |         input
200 |     }
201 | 
202 |     /// (a,+) = scanln_fmt!( format_string, types,+ )
203 |     /// <p>Same as scan_fmt!(), but reads input string from stdin.</p>
204 |     #[macro_export]
205 |     macro_rules! scanln_fmt {
206 |         ($($arg:tt)*) => {{ scan_fmt!(&$crate::get_input_unwrap(), $($arg)*) }}
207 |     }
208 | 
209 |     /// (a,+) = scanln_fmt_some!( format_string, types,+ )
210 |     /// <p>Same as scan_fmt_some!(), but reads input string from stdin.</p>
211 |     #[macro_export]
212 |     macro_rules! scanln_fmt_some {
213 |         ($($arg:tt)*) => {{ scan_fmt_some!(&$crate::get_input_unwrap(), $($arg)*) }}
214 |     }
215 | }
216 | 
217 | #[cfg(test)]
218 | use alloc::string::{String, ToString};
219 | #[cfg(test)]
220 | use parse::ScanError;
221 | 
222 | #[cfg(test)]
223 | macro_rules! assert_flt_eq {
224 |     ($t:ident, $v1:expr, $v2:expr) => {{
225 |         assert!(($v1 - $v2).abs() <= 2.0 * std::$t::EPSILON);
226 |     }};
227 | }
228 | 
229 | #[cfg(test)]
230 | fn ret_scan_all() -> Result<(), ScanError> {
231 |     let (a, b) = scan_fmt!("1.2 e","{f} {x}",f32,[hex u32])?;
232 |     assert_flt_eq!(f32, a, 1.2);
233 |     assert_eq!(b, 14);
234 |     Ok(())
235 | }
236 | 
237 | #[test]
238 | fn test_scan_all() {
239 |     if let Ok(a) = scan_fmt!("hi1 3", "{} {d}", std::string::String, u32) {
240 |         assert_eq!(a, ("hi1".to_string(), 3));
241 |     } else {
242 |         assert!(false, "error 0");
243 |     }
244 |     if let Ok((a, b, c)) = scan_fmt!("hi1 0xf -3","{} {x} {d}",String,[hex u32],i8) {
245 |         assert_eq!(a, "hi1");
246 |         assert_eq!(b, 0xf);
247 |         assert_eq!(c, -3);
248 |     } else {
249 |         assert!(false, "error 1");
250 |     }
251 |     let a = scan_fmt!("hi1 f", "{} {d}", String, i32);
252 |     assert!(a.is_err());
253 |     let a = ret_scan_all();
254 |     std::println!("{:?}", a);
255 |     assert!(a.is_ok());
256 | }
257 | 
258 | #[test]
259 | fn test_plus_sign() {
260 |     let a = scan_fmt_some!("+42", "{d}", i32);
261 |     assert_eq!(a, Some(42));
262 |     let a = scan_fmt_some!("+42.0", "{f}", f64);
263 |     assert_flt_eq!(f64, a.unwrap(), 42.0);
264 | }
265 | 
266 | #[test]
267 | fn test_hex() {
268 |     let (a, b, c) =
269 |         scan_fmt_some!("DEV 0xab 0x1234", "{} {x} {x}", std::string::String, [hex u32], [hex u64]);
270 |     assert_eq!(a, Some("DEV".into()));
271 |     assert_eq!(b, Some(0xab));
272 |     assert_eq!(c, Some(0x1234));
273 | }
274 | 
275 | #[test]
276 | fn test_limited_data_range() {
277 |     let (a, b, c) = scan_fmt_some!(
278 |         "test{\t 1e9 \n bye 257} hi  22.7e-1",
279 |         "test{{ {} bye {d}}} hi {f}",
280 |         f64,
281 |         u8,
282 |         f32
283 |     );
284 |     assert_flt_eq!(f64, a.unwrap(), 1e9);
285 |     assert_eq!(b, None); // 257 doesn't fit into a u8
286 |     assert_flt_eq!(f32, c.unwrap(), 2.27);
287 | }
288 | 
289 | #[test]
290 | fn test_too_many_outputs() {
291 |     let (a, b, c, d) = scan_fmt_some!("a_aa bb_b c", "{} {s} {}", String, String, String, String);
292 |     assert_eq!(a.unwrap(), "a_aa");
293 |     assert_eq!(b.unwrap(), "bb_b");
294 |     assert_eq!(c.unwrap(), "c");
295 |     assert_eq!(d, None);
296 | }
297 | 
298 | #[test]
299 | fn test_skip_assign() {
300 |     let (a, b) = scan_fmt_some!("1 2 3, 4 5, 6 7", "{[^,]},{*[^,]},{[^,]}", String, String);
301 |     assert_eq!(a.unwrap(), "1 2 3");
302 |     assert_eq!(b.unwrap(), "6 7");
303 |     let a = scan_fmt!("1 2 3, 4 5, 6 7", "{[^,]},{*[^,]},{[^,]}", String, String).unwrap();
304 |     assert_eq!(a.0, "1 2 3");
305 |     assert_eq!(a.1, "6 7");
306 | }
307 | 
308 | #[test]
309 | fn test_width_specifier() {
310 |     let a = scan_fmt!("123ab71 2.1234",
311 |                       "{1d}{2d}{3x}{4d}{3f}",
312 |                       u8, u8, [hex u16], u16, f32)
313 |     .unwrap();
314 |     assert_eq!(a.0, 1);
315 |     assert_eq!(a.1, 23);
316 |     assert_eq!(a.2, 0xab7);
317 |     assert_eq!(a.3, 1);
318 |     assert_flt_eq!(f32, a.4, 2.1);
319 | }
320 | 
321 | #[test]
322 | fn test_err_equals() {
323 |     let a = scan_fmt!("hi 123", "hi {d", u8);
324 |     assert_eq!(a, Err(parse::ScanError("internal u8".to_string())));
325 | }
326 | 
327 | #[test]
328 | fn test_no_post_match_regex() {
329 |     let a = scan_fmt!("74in", "{d}{/in/}", u8, String);
330 |     assert_eq!(a, Ok((74, String::from("in"))));
331 |     let a = scan_fmt!("74in", "{d}{/cm/}", u8, String);
332 |     assert_eq!(a, Err(parse::ScanError("match::none".to_string())));
333 | }
334 | 
335 | #[test]
336 | fn test_no_post_match() {
337 |     let a = scan_fmt!("17in", "{d}in", u8);
338 |     assert_eq!(a, Ok(17u8));
339 | 
340 |     let a = scan_fmt!("17in", "{d}cm", u8);
341 |     assert_eq!(a, Err(parse::ScanError("match::none".to_string())));
342 | }
343 | 
344 | #[test]
345 | fn test_match_end() {
346 |     let a = scan_fmt!("17in", "{d}in{e}", u8);
347 |     assert_eq!(a, Ok(17u8));
348 |     let a = scan_fmt!("17ink", "{d}in{e}", u8);
349 |     assert_eq!(a, Err(parse::ScanError("match::none".to_string())));
350 | }
351 | 
352 | #[test]
353 | fn test_ip_addr() {
354 |     let a = scan_fmt!("x 185.187.165.163 y", "x {} y", std::net::IpAddr);
355 |     assert_eq!(
356 |         a.unwrap(),
357 |         std::net::IpAddr::V4(std::net::Ipv4Addr::new(185, 187, 165, 163))
358 |     );
359 | }
360 | 


--------------------------------------------------------------------------------
/src/parse.rs:
--------------------------------------------------------------------------------
  1 | // Copyright 2015-2019 Will Lentz.
  2 | // Licensed under the MIT license.
  3 | use alloc::string::{String, ToString};
  4 | 
  5 | #[cfg(feature = "regex")]
  6 | use regex::Regex;
  7 | 
  8 | #[derive(Debug, PartialEq)]
  9 | enum FmtType {
 10 |     NonWhitespaceOrEnd,
 11 |     OnlyEnd,
 12 |     Pattern,
 13 |     Dec10,
 14 |     Hex16,
 15 |     Flt,
 16 |     #[cfg(feature = "regex")]
 17 |     Regex,
 18 | }
 19 | 
 20 | #[cfg(feature = "std")]
 21 | use std::error::Error;
 22 | 
 23 | use alloc::vec::Vec;
 24 | use core::fmt;
 25 | 
 26 | #[derive(Debug, PartialEq)]
 27 | pub struct ScanError(pub String);
 28 | 
 29 | #[cfg(feature = "std")]
 30 | impl Error for ScanError {}
 31 | 
 32 | impl fmt::Display for ScanError {
 33 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 34 |         write!(f, "Scan error: {}", self.0)
 35 |     }
 36 | }
 37 | 
 38 | // Handle the following format strings:
 39 | // {}X -> everything until whitespace or next character 'X'
 40 | // {s} -> everything until whitespace
 41 | // {d} -> only base-10 integers
 42 | // {x} -> only unsigned base-16 integers.  Allow 0xfff or fff
 43 | // {f} -> only floats
 44 | // {*} -> get token, but don't assign it to output
 45 | // {[]} -> only search for given characters
 46 | //         starting with '^' negates everything
 47 | //         ranges with '-' work.  To include '-' put it at end or start
 48 | //         to include ']' put it at the start (or right after ^)
 49 | //  e.g., {[^,]} -> match everything until next comma
 50 | 
 51 | // Make it slightly easier to scan through a Vec<>
 52 | struct VecScanner {
 53 |     data: Vec<char>,
 54 |     pos: usize,
 55 |     limit_pos: usize, // if non-0, then inc_limit() returns when 'pos' gets here
 56 | }
 57 | 
 58 | impl VecScanner {
 59 |     fn new(d: Vec<char>) -> VecScanner {
 60 |         VecScanner {
 61 |             data: d,
 62 |             pos: 0,
 63 |             limit_pos: 0,
 64 |         }
 65 |     }
 66 | 
 67 |     fn cur(&self) -> char {
 68 |         self.data[self.pos]
 69 |     }
 70 | 
 71 |     fn peek(&self, n: usize) -> Option<char> {
 72 |         if self.pos + n < self.data.len() {
 73 |             Some(self.data[self.pos + n])
 74 |         } else {
 75 |             None
 76 |         }
 77 |     }
 78 | 
 79 |     fn is_end(&self) -> bool {
 80 |         self.pos >= self.data.len()
 81 |     }
 82 | 
 83 |     // returns true if we have more data
 84 |     fn inc(&mut self) -> bool {
 85 |         self.pos += 1;
 86 |         !self.is_end()
 87 |     }
 88 | 
 89 |     // set the maximum position for inc_limit()
 90 |     fn start_inc_limit(&mut self, max_length: Option<usize>) {
 91 |         match max_length {
 92 |             Some(n) => {
 93 |                 self.limit_pos = self.pos + n;
 94 |             }
 95 |             None => {
 96 |                 self.limit_pos = 0;
 97 |             }
 98 |         }
 99 |     }
100 | 
101 |     fn hit_inc_limit(&mut self) -> bool {
102 |         self.limit_pos > 0 && self.pos >= self.limit_pos
103 |     }
104 | 
105 |     // same as inc(), but also honors start_inc_limit(max_length)
106 |     fn inc_limit(&mut self) -> bool {
107 |         self.pos += 1;
108 |         !(self.is_end() || self.hit_inc_limit())
109 |     }
110 | }
111 | 
112 | fn is_whitespace(c: char) -> bool {
113 |     match c {
114 |         ' ' | '\t' | '\n' | '\r' => true,
115 |         _ => false,
116 |     }
117 | }
118 | 
119 | // scan to past whitespace. Return false if end of input.
120 | fn skip_whitespace(vs: &mut VecScanner) -> bool {
121 |     while !vs.is_end() {
122 |         if is_whitespace(vs.cur()) {
123 |             vs.inc();
124 |         } else {
125 |             break;
126 |         }
127 |     }
128 |     !vs.is_end()
129 | }
130 | 
131 | struct FmtResult {
132 |     data_type: FmtType,
133 |     max_length: Option<usize>,
134 |     store_result: bool,
135 |     invert_char_list: bool,
136 |     end_char: char,
137 |     // Store pattern characters and ranges.  It might be worth
138 |     // optimizing this if format strings are long.
139 |     char_list: Vec<(char, char)>,
140 |     #[cfg(feature = "regex")]
141 |     regex: Option<Regex>,
142 | }
143 | 
144 | // See top-level docs for allowed formats.
145 | // Starts right after opening '{'.  Consumes characters to final }
146 | // Note that '{' and '}' can exist unescaped inside [].
147 | fn get_format(fstr: &mut VecScanner) -> Option<FmtResult> {
148 |     let mut res = FmtResult {
149 |         data_type: FmtType::NonWhitespaceOrEnd,
150 |         max_length: None,
151 |         end_char: ' ',
152 |         store_result: true,
153 |         invert_char_list: false,
154 |         char_list: vec![],
155 |         #[cfg(feature = "regex")]
156 |         regex: None,
157 |     };
158 |     if fstr.cur() == '*' {
159 |         res.store_result = false;
160 |         if !fstr.inc() {
161 |             return None;
162 |         }
163 |     }
164 | 
165 |     if fstr.cur() == '}' {
166 |         if fstr.inc() {
167 |             res.end_char = fstr.cur();
168 |         }
169 |         return Some(res);
170 |     }
171 | 
172 |     // Read optional field width specifier (e.g., the "2" in {2d})
173 |     let pos_start = fstr.pos;
174 |     while fstr.cur().is_digit(10) {
175 |         if !fstr.inc() {
176 |             return None;
177 |         }
178 |     }
179 |     if fstr.pos > pos_start {
180 |         let max_length_string: String = fstr.data[pos_start..fstr.pos].iter().cloned().collect();
181 |         res.max_length = max_length_string.parse::<usize>().ok();
182 |     }
183 | 
184 |     match fstr.cur() {
185 |         's' => { /* already FmtType::NonWhitespaceOrEnd */ }
186 |         'e' => {
187 |             res.data_type = FmtType::OnlyEnd;
188 |         }
189 |         'd' => {
190 |             res.data_type = FmtType::Dec10;
191 |         }
192 |         'x' => {
193 |             res.data_type = FmtType::Hex16;
194 |         }
195 |         'f' => {
196 |             res.data_type = FmtType::Flt;
197 |         }
198 |         '[' => {
199 |             res.data_type = FmtType::Pattern;
200 |         }
201 |         #[cfg(feature = "regex")]
202 |         '/' => {
203 |             res.data_type = FmtType::Regex;
204 |         }
205 |         _ => return None, // unexpected format
206 |     }
207 |     if !fstr.inc() {
208 |         return None;
209 |     }
210 | 
211 |     match res.data_type {
212 |         FmtType::Pattern => handle_pattern(res, fstr),
213 |         #[cfg(feature = "regex")]
214 |         FmtType::Regex => handle_regex(res, fstr),
215 |         _ => {
216 |             if fstr.cur() != '}' {
217 |                 return None;
218 |             }
219 |             fstr.inc();
220 |             Some(res)
221 |         }
222 |     }
223 | }
224 | 
225 | fn handle_pattern(mut res: FmtResult, fstr: &mut VecScanner) -> Option<FmtResult> {
226 |     // handle [] pattern
227 |     res.data_type = FmtType::Pattern;
228 | 
229 |     if fstr.cur() == '^' {
230 |         res.invert_char_list = true;
231 |         if !fstr.inc() {
232 |             return None;
233 |         }
234 |     }
235 | 
236 |     match fstr.cur() {
237 |         ']' | '-' => {
238 |             res.char_list.push((fstr.cur(), fstr.cur()));
239 |             if !fstr.inc() {
240 |                 return None;
241 |             }
242 |         }
243 |         _ => (),
244 |     }
245 | 
246 |     // look for end of [] pattern
247 |     while fstr.cur() != ']' {
248 |         if fstr.peek(1) == Some('-') && fstr.peek(2) != Some(']') {
249 |             let prev_char = fstr.cur();
250 |             if !fstr.inc() {
251 |                 break;
252 |             } // go to '-'
253 |             if !fstr.inc() {
254 |                 break;
255 |             } // go past '-'
256 |               // add character range
257 |             res.char_list.push((prev_char, fstr.cur()));
258 |         } else {
259 |             res.char_list.push((fstr.cur(), fstr.cur()));
260 |         }
261 |         if !fstr.inc() {
262 |             return None;
263 |         }
264 |     }
265 |     if !fstr.inc() {
266 |         return None;
267 |     } // go past ']'
268 |     if fstr.cur() != '}' {
269 |         return None;
270 |     }
271 |     fstr.inc(); // go past closing '}'
272 | 
273 |     Some(res)
274 | }
275 | 
276 | #[cfg(feature = "regex")]
277 | fn handle_regex(mut res: FmtResult, fstr: &mut VecScanner) -> Option<FmtResult> {
278 |     let start = fstr.pos;
279 |     let mut last_was_escape = false;
280 |     while fstr.inc() {
281 |         if fstr.cur() == '/' && !last_was_escape {
282 |             break;
283 |         }
284 | 
285 |         if fstr.cur() == '\\' {
286 |             last_was_escape = true;
287 |         } else {
288 |             last_was_escape = false;
289 |         }
290 |     }
291 |     if fstr.cur() != '/' {
292 |         // invalid
293 |         return None;
294 |     }
295 | 
296 |     let substr = Some('^')
297 |         .into_iter()
298 |         .chain(fstr.data[start..fstr.pos].iter().cloned())
299 |         .collect::<String>();
300 | 
301 |     if let Ok(re) = Regex::new(&substr) {
302 |         res.regex = Some(re);
303 |     } else {
304 |         return None;
305 |     }
306 | 
307 |     // consume close
308 |     fstr.inc();
309 |     if fstr.cur() != '}' {
310 |         return None;
311 |     }
312 |     fstr.inc();
313 | 
314 |     Some(res)
315 | }
316 | 
317 | fn scan_dec10(vs: &mut VecScanner, max_length: Option<usize>) {
318 |     // look for [+-]{0,1}[0-9]+, up to max_length characters
319 |     vs.start_inc_limit(max_length);
320 |     scan_dec10_nest(vs);
321 | }
322 | 
323 | // advance past base-10 decimal - assumes someone has called start_inc_limit()
324 | fn scan_dec10_nest(vs: &mut VecScanner) {
325 |     // look for [+-]{0,1}[0-9]+
326 |     match vs.cur() {
327 |         '+' | '-' => {
328 |             if !vs.inc_limit() {
329 |                 return;
330 |             }
331 |         }
332 |         _ => (),
333 |     }
334 | 
335 |     while vs.cur().is_digit(10) {
336 |         if !vs.inc_limit() {
337 |             return;
338 |         }
339 |     }
340 | }
341 | 
342 | // advance past base-16 hex
343 | // look for (0x){0,1}[0-9a-fA-F]+
344 | fn scan_hex16(vs: &mut VecScanner, max_length: Option<usize>) {
345 |     vs.start_inc_limit(max_length);
346 |     if vs.cur() == '0' {
347 |         if !vs.inc_limit() {
348 |             return;
349 |         }
350 |     }
351 |     if vs.cur() == 'x' {
352 |         if !vs.inc_limit() {
353 |             return;
354 |         }
355 |     }
356 |     while vs.cur().is_digit(16) {
357 |         if !vs.inc_limit() {
358 |             return;
359 |         };
360 |     }
361 | }
362 | 
363 | // advance past float
364 | // look for [+-]{0,1}[0-9]+
365 | // then optional .[0-9]+
366 | // then optional e[+-]{1}[0-9]+
367 | fn scan_float(vs: &mut VecScanner, max_length: Option<usize>) {
368 |     vs.start_inc_limit(max_length);
369 |     scan_dec10_nest(vs);
370 |     if vs.cur() == '.' {
371 |         if !vs.inc_limit() {
372 |             return;
373 |         }
374 |         while vs.cur().is_digit(10) {
375 |             if !vs.inc_limit() {
376 |                 return;
377 |             }
378 |         }
379 |     }
380 |     if vs.cur() == 'e' {
381 |         if !vs.inc_limit() {
382 |             return;
383 |         }
384 |         scan_dec10_nest(vs);
385 |     }
386 | }
387 | 
388 | // advance until 'end' or whitespace
389 | fn scan_nonws_or_end(vs: &mut VecScanner, end: char) {
390 |     while !is_whitespace(vs.cur()) && vs.cur() != end {
391 |         if !vs.inc() {
392 |             return;
393 |         }
394 |     }
395 | }
396 | 
397 | // advance past pattern
398 | fn scan_pattern(vs: &mut VecScanner, fmt: &mut FmtResult) {
399 |     // if invert, scan until character not in char_list
400 |     // else scan while character is in char_list
401 |     loop {
402 |         let c = vs.cur();
403 |         let mut found = false;
404 |         for &(start, end) in fmt.char_list.iter() {
405 |             if c >= start && c <= end {
406 |                 found = true;
407 |                 break;
408 |             }
409 |         }
410 |         if found == fmt.invert_char_list {
411 |             return;
412 |         }
413 |         if !vs.inc() {
414 |             return;
415 |         }
416 |     }
417 | }
418 | 
419 | #[cfg(feature = "regex")]
420 | enum ReMatch {
421 |     Captured { len: usize },
422 |     NoCapture,
423 | }
424 | 
425 | #[cfg(feature = "regex")]
426 | fn scan_regex(vs: &mut VecScanner, fmt: &mut FmtResult) -> ReMatch {
427 |     let re = fmt.regex.take().unwrap();
428 |     let remainder = vs.data[vs.pos..].iter().cloned().collect::<String>();
429 |     if let Some(mat) = re.captures(&remainder) {
430 |         vs.pos += remainder[..mat.get(0).unwrap().end()].chars().count();
431 |         if let Some(cap) = mat.get(1) {
432 |             return ReMatch::Captured { len: cap.end() };
433 |         }
434 |     }
435 |     return ReMatch::NoCapture;
436 | }
437 | 
438 | // return data matching the format from user input (else "")
439 | fn get_token(vs: &mut VecScanner, fmt: &mut FmtResult) -> String {
440 |     let mut pos_start = vs.pos;
441 |     match fmt.data_type {
442 |         FmtType::OnlyEnd => {} // handled in scan()
443 |         FmtType::NonWhitespaceOrEnd => scan_nonws_or_end(vs, fmt.end_char),
444 |         FmtType::Dec10 => scan_dec10(vs, fmt.max_length),
445 |         FmtType::Hex16 => scan_hex16(vs, fmt.max_length),
446 |         FmtType::Flt => scan_float(vs, fmt.max_length),
447 |         FmtType::Pattern => scan_pattern(vs, fmt),
448 |         #[cfg(feature = "regex")]
449 |         FmtType::Regex => {
450 |             // if the regex has an internal group then we want to use the group
451 |             // to select the substring, but either way the scan_regex function
452 |             // will set pos to the end of the entire match consumed by the
453 |             // regex
454 |             match scan_regex(vs, fmt) {
455 |                 ReMatch::Captured { len } => {
456 |                     return vs.data[pos_start..pos_start + len]
457 |                         .iter()
458 |                         .cloned()
459 |                         .collect();
460 |                 }
461 |                 ReMatch::NoCapture => {}
462 |             }
463 |         }
464 |     }
465 |     if fmt.data_type == FmtType::Dec10 || fmt.data_type == FmtType::Flt {
466 |         // parse<i32/f32> won't accept "+" in front of numbers
467 |         if vs.data[pos_start] == '+' {
468 |             pos_start += 1;
469 |         }
470 |     }
471 |     vs.data[pos_start..vs.pos].iter().cloned().collect()
472 | }
473 | 
474 | // Extract String tokens from the input string based on
475 | // the format string.  See lib.rs for more info.
476 | // Returns an iterator of the String results.
477 | pub fn scan(input_string: &str, format: &str) -> alloc::vec::IntoIter<String> {
478 |     let mut res: Vec<String> = vec![];
479 |     let mut fmtstr = VecScanner::new(format.chars().collect());
480 |     let mut instr = VecScanner::new(input_string.chars().collect());
481 |     loop {
482 |         let mut do_compare = true;
483 |         if !skip_whitespace(&mut fmtstr) {
484 |             break;
485 |         }
486 |         if !skip_whitespace(&mut instr) {
487 |             break;
488 |         }
489 | 
490 |         if fmtstr.cur() == '{' {
491 |             if !fmtstr.inc() {
492 |                 break;
493 |             }
494 |             if fmtstr.cur() == '{' {
495 |                 // got an escaped {{
496 |             } else {
497 |                 let fmt = get_format(&mut fmtstr);
498 |                 let mut fmt = if let Some(fmt) = fmt {
499 |                     fmt
500 |                 } else {
501 |                     break;
502 |                 };
503 | 
504 |                 if fmt.data_type == FmtType::OnlyEnd && !instr.is_end() {
505 |                     // we didn't get an end of input where expected, so invalidate any matches
506 |                     return vec![String::from("")].into_iter();
507 |                 }
508 |                 let data = get_token(&mut instr, &mut fmt);
509 |                 if fmt.store_result {
510 |                     if fmt.data_type == FmtType::Hex16 {
511 |                         let no_prefix = data.trim_start_matches("0x");
512 |                         res.push(no_prefix.to_string());
513 |                     } else {
514 |                         res.push(data);
515 |                     }
516 |                 }
517 |                 do_compare = false;
518 |             }
519 |         } else {
520 |             if fmtstr.cur() == '}' {
521 |                 // handle escaped }} by skipping first '}'
522 |                 if !fmtstr.inc() {
523 |                     break;
524 |                 }
525 |             }
526 |         }
527 |         if do_compare {
528 |             if fmtstr.cur() != instr.cur() {
529 |                 return vec![String::from("")].into_iter();
530 |                 // we had a non match! --> if we only break here we will return all matches found so far.
531 |                 // This will create a misbehaviour when there is something like `{d}in` as the in is not cared for.
532 |             }
533 |             if !fmtstr.inc() {
534 |                 break;
535 |             }
536 |             if !instr.inc() {
537 |                 break;
538 |             }
539 |         }
540 |     }
541 |     res.into_iter()
542 | }
543 | 
544 | #[test]
545 | fn test_simple() {
546 |     let mut res = scan(" data 42-12=30", "data {d}-{d}={d}");
547 |     assert_eq!(res.next().unwrap(), "42");
548 |     assert_eq!(res.next().unwrap(), "12");
549 |     assert_eq!(res.next().unwrap(), "30");
550 |     assert_eq!(res.next(), None);
551 | }
552 | 
553 | #[test]
554 | fn test_plus_sign() {
555 |     let mut res = scan("+42", "{d}");
556 |     assert_eq!(res.next().unwrap(), "42");
557 |     let mut res = scan("+42.7", "{f}");
558 |     assert_eq!(res.next().unwrap(), "42.7");
559 | }
560 | 
561 | #[test]
562 | fn test_complex() {
563 |     let mut res = scan(
564 |         "test{123  bye -456} hi  -22.7e-1 +1.23fg",
565 |         "test{{{d} bye {}}} hi {f} {f}",
566 |     );
567 |     assert_eq!(res.next().unwrap(), "123");
568 |     assert_eq!(res.next().unwrap(), "-456");
569 |     assert_eq!(res.next().unwrap(), "-22.7e-1");
570 |     assert_eq!(res.next().unwrap(), "1.23");
571 |     assert_eq!(res.next(), None);
572 | }
573 | 
574 | #[test]
575 | fn test_endline() {
576 |     let mut res = scan("hi 15.7\r\n", "{} {}");
577 |     assert_eq!(res.next().unwrap(), "hi");
578 |     assert_eq!(res.next().unwrap(), "15.7");
579 | }
580 | 
581 | #[test]
582 | fn test_hex() {
583 |     let mut res = scan("hi 0x15 ff fg", "hi {x} {x} {x}");
584 |     assert_eq!(res.next().unwrap(), "15");
585 |     assert_eq!(res.next().unwrap(), "ff");
586 |     assert_eq!(res.next().unwrap(), "f");
587 | }
588 | 
589 | #[test]
590 | fn test_string() {
591 |     let mut res = scan("The quick brown fox", "{s}{s} {}n {s}x");
592 |     assert_eq!(res.next().unwrap(), "The");
593 |     assert_eq!(res.next().unwrap(), "quick");
594 |     assert_eq!(res.next().unwrap(), "brow");
595 |     assert_eq!(res.next().unwrap(), "fox");
596 | }
597 | 
598 | #[test]
599 | fn test_pattern() {
600 |     let mut res = scan(
601 |         "hi abcdefghijklmnop 0123456789",
602 |         "hi {[a-l]}{[^a-l ]} {[01234-8]}{[9]}",
603 |     );
604 |     assert_eq!(res.next().unwrap(), "abcdefghijkl");
605 |     assert_eq!(res.next().unwrap(), "mnop");
606 |     assert_eq!(res.next().unwrap(), "012345678");
607 |     assert_eq!(res.next().unwrap(), "9");
608 | 
609 |     let mut res = scan("xyz  01234567λ89", "xyz {[40-3]}{*[65]}{[7-78-9λ]}");
610 |     assert_eq!(res.next().unwrap(), "01234");
611 |     assert_eq!(res.next().unwrap(), "7λ89");
612 | }
613 | 
614 | #[test]
615 | fn test_width() {
616 |     let mut res = scan("01123fe071 432", "{2d}{3d}{4x}{2d} {3d}");
617 |     assert_eq!(res.next().unwrap(), "01");
618 |     assert_eq!(res.next().unwrap(), "123");
619 |     assert_eq!(res.next().unwrap(), "fe07");
620 |     assert_eq!(res.next().unwrap(), "1");
621 |     assert_eq!(res.next().unwrap(), "432");
622 | }
623 | 
624 | #[test]
625 | fn match_end() {
626 |     let mut res = scan("12 hi", "{d} hi{e}");
627 |     assert_eq!(res.next().unwrap(), "12");
628 |     assert_eq!(res.next(), None);
629 |     let mut res = scan("12 hi2", "{d} hi{e}");
630 |     assert_eq!(res.next().unwrap(), "");
631 | }
632 | 
633 | #[cfg(all(test, feature = "regex"))]
634 | mod test_regex {
635 |     use super::scan;
636 | 
637 |     #[test]
638 |     fn simple() {
639 |         let mut res = scan("one (hello) two", "one ({/[^)]+/}) two");
640 |         assert_eq!(res.next().unwrap(), "hello");
641 |     }
642 | 
643 |     #[test]
644 |     fn mixed_regex_and_pattern() {
645 |         let mut res = scan("one ((hello)) two", r#"one ({/[^)]+\)?/}) two"#);
646 |         assert_eq!(res.next().unwrap(), "(hello)");
647 |     }
648 | 
649 |     #[test]
650 |     fn bad_pattern() {
651 |         // note the extra close paren
652 |         let mut scanner = scan("one (hello)) two", "one ({/[^)]+/}) two");
653 |         assert_eq!(scanner.next().unwrap(), "");
654 |     }
655 | 
656 |     #[test]
657 |     fn uses_group_if_present() {
658 |         let mut res = scan("one (((hello))) two", r#"one {/(\(.*\)) /}two"#);
659 |         assert_eq!(res.next().unwrap(), "(((hello)))");
660 |     }
661 | 
662 |     #[test]
663 |     fn unicode() {
664 |         let mut res = scan("й", "{/.*/}");
665 |         assert_eq!(res.next().unwrap(), "й");
666 |     }
667 | }
668 | 


--------------------------------------------------------------------------------