├── .gitignore
├── .travis.yml
├── Cargo.toml
├── LICENSE
├── README.md
└── src
├── lib.rs
└── parse.rs
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 | *~
4 | doc
5 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: rust
2 | rust:
3 | - stable
4 | - beta
5 | - nightly
6 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "scan_fmt"
3 | version = "0.2.6"
4 | authors = ["wlentz"]
5 | description = "A simple scanf()-like input for Rust"
6 | repository = "https://github.com/wlentz/scan_fmt"
7 | license = "MIT"
8 | readme = "README.md"
9 |
10 | [features]
11 | default = ["regex", "std"]
12 | std = []
13 |
14 | [dependencies]
15 | regex = { version = "1", optional = true }
16 |
17 | [lib]
18 | name = "scan_fmt"
19 | path = "src/lib.rs"
20 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 wlentz
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # scan_fmt 
2 | scan_fmt provides a simple scanf()-like input for Rust. The goal is to make it easier to read data from a string or stdin.
3 |
4 | Currently the format string supports the following special sequences:
5 |
6 | {{ = escape for '{'
7 | }} = escape for '}'
8 | {} = return any value (until next whitespace)
9 | {d} = return base-10 decimal
10 | {x} = return hex (0xab or ab)
11 | {f} = return float
12 | {*d} = "*" as the first character means "match but don't return"
13 | {2d} or {2x} or {2f} = limit the maximum width to 2. Any positive integer works.
14 | {[...]} = return pattern.
15 | ^ inverts if it is the first character
16 | - is for ranges. For a literal - put it at the start or end.
17 | To add a literal ] do "[]abc]"
18 | {e} = doesn't return a value, but matches end of line. Use this if you
19 | don't want to ignore potential extra characters at end of input.
20 | Examples:
21 | {[0-9ab]} = match 0-9 or a or b
22 | {[^,.]} = match anything but , or .
23 | {/.../} = return regex inside of `//`.
24 | If there is a single capture group inside of the slashes then
25 | that group will make up the pattern.
26 | Examples:
27 | {/[0-9ab]/} = same as {[0-9ab]}, above
28 | {/a+/} = matches at least one `a`, greedily
29 | {/jj(a*)jj/} = matches any number of `a`s, but only if
30 | they're surrounded by two `j`s
31 |
32 |
33 | ### Examples
34 | ```rust
35 | #[macro_use] extern crate scan_fmt;
36 | use std::error::Error ;
37 | fn main() -> Result<(),Box> {
38 | let (a,b,c) = scan_fmt!( "hello 0x12 345 bye", // input string
39 | "hello {x} {} {}", // format
40 | [hex u8], i32, String) ? ; // type of a-c Options
41 | assert_eq!( a, 0x12 ) ;
42 | assert_eq!( b, 345 ) ;
43 | assert_eq!( c, "bye" ) ;
44 |
45 | println!("Enter something like: 123-22");
46 | let (c,d) = scanln_fmt!( "{d}-{d}", // format
47 | u16, u8) ? ; // type of a&b Options
48 | println!("Got {} and {}",c,d) ;
49 | // Note - currently scanln_fmt! just calls unwrap() on read_line()
50 |
51 | let (a,b) = scan_fmt_some!( "hello 12 345", // input string
52 | "hello {} {}", // format
53 | u8, i32) ; // types
54 | assert_eq!( a, Some(12) ) ;
55 | assert_eq!( b, Some(345) ) ;
56 | Ok(())
57 | }
58 | ```
59 |
60 | ### Limitations
61 | There is no compile-time warning if the number of {}'s in the format string doesn't match the number of return values. You'll just get None for extra return values. See src/lib.rs for more details.
62 |
--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Copyright 2015-2019 Will Lentz.
2 | // Licensed under the MIT license.
3 |
4 | //! This crate provides a simple sscanf()-like interface to extract
5 | //! data from strings and stdin.
6 | //!
7 | //! In version 0.2 scan_fmt! changed to return a Result.
8 | //! Use scan_fmt_some! for the 0.1.x behavior.
9 | //!
10 | //! To use this crate, do:
11 | //!
12 | //! ```ignore
13 | //! #[macro_use] extern crate scan_fmt;
14 | //! ```
15 | //!
16 | //! Example to read from a string:
17 | //!
18 | //! ```rust
19 | //! # #[macro_use] extern crate scan_fmt;
20 | //! # fn main() {
21 | //! if let Ok((a,b)) = scan_fmt!( "-11 0x22", // input string
22 | //! "{d} {x}", // format
23 | //! i8, [hex u8]) { // types
24 | //! assert_eq!( a, -11 ) ;
25 | //! assert_eq!( b, 0x22 ) ;
26 | //! }
27 | //!
28 | //! let (a,b,c) = scan_fmt_some!( "hello 12 345 bye", // input string
29 | //! "hello {} {d} {}", // format
30 | //! u8, i32, String); // type of a-c Options
31 | //! assert_eq!( a, Some(12) ) ;
32 | //! assert_eq!( b, Some(345) ) ;
33 | //! assert_eq!( c, Some("bye".into()) ) ;
34 | //! # }
35 | //! ```
36 | //!
37 | //! Special format_string tokens:
38 | //!
39 | //! {{ = escape for '{'
40 | //! }} = escape for '}'
41 | //! {} = return any value (until next whitespace)
42 | //! {d} = return base-10 decimal
43 | //! {x} = return hex (0xab or ab)
44 | //! = you must wrap the type in [hex type], e.g. "[hex u32]"
45 | //! {f} = return float
46 | //! {*d} = "*" as the first character means "match but don't return"
47 | //! {2d} or {2x} or {2f} = limit the maximum width to 2. Any positive integer works.
48 | //! {[...]} = return pattern.
49 | //! ^ inverts if it is the first character
50 | //! - is for ranges. For a literal - put it at the start or end.
51 | //! To add a literal ] do "[]abc]"
52 | //! {e} = doesn't return a value, but matches end of line. Use this if you
53 | //! don't want to ignore potential extra characters at end of input.
54 | //! Examples:
55 | //! {[0-9ab]} = match 0-9 or a or b
56 | //! {[^,.]} = match anything but , or .
57 | //! {/.../} = return regex inside of `//`. (if regex feature is installed)
58 | //! If there is a single capture group inside of the slashes then
59 | //! that group will make up the pattern.
60 | //! Examples:
61 | //! {/[0-9ab]/} = same as {[0-9ab]}, above
62 | //! {/a+/} = matches at least one `a`, greedily
63 | //! {/jj(a*)jj/} = matches any number of `a`s, but only if
64 | //! they're surrounded by two `j`s
65 | //!
66 | //!
67 | //! Example to read from stdin:
68 | //!
69 | //! ```ignore
70 | //! # #[macro_use] extern crate scan_fmt;
71 | //! # use std::error::Error ;
72 | //! # fn main() -> Result<(),Box> {
73 | //! let (a,b) = scanln_fmt!( "{}-{}", u16, u8) ? ;
74 | //! println!("Got {} and {}",a,b);
75 | //!
76 | //! let (a,b) = scanln_fmt_some!( "{}-{}", // format
77 | //! u16, u8); // type of a&b Options
78 | //! match (a,b) {
79 | //! (Some(aa),Some(bb)) => println!("Got {} and {}",aa,bb),
80 | //! _ => println!("input error")
81 | //! }
82 | //! Ok(())
83 | //! # }
84 | //! ```
85 | //!
86 | //! ## LIMITATIONS:
87 | //! There are no compile-time checks to make sure the format
88 | //! strings matches the number of return arguments. Extra
89 | //! return values will be None or cause a Result error.
90 | //!
91 | //! Like sscanf(), whitespace (including \n) is largely ignored.
92 | //!
93 | //! Conversion to output values is done using parse::().
94 |
95 | #![no_std]
96 |
97 | #[cfg(feature = "regex")]
98 | extern crate regex;
99 |
100 | #[cfg(any(test, doctest, feature = "std"))]
101 | extern crate std;
102 |
103 | #[macro_use]
104 | extern crate alloc;
105 |
106 | pub mod parse;
107 |
108 | #[macro_export]
109 | macro_rules! scan_fmt_help {
110 | ( wrap $res:expr, [hex $arg:tt] ) => {
111 | match $res.next() {
112 | Some(item) => $arg::from_str_radix(&item, 16).ok(),
113 | _ => None,
114 | }
115 | };
116 | ( wrap $res:expr , $($arg1:tt)::* ) => {
117 | match $res.next() {
118 | Some(item) => item.parse::<$($arg1)::*>().ok(),
119 | _ => None,
120 | }
121 | };
122 | ( no_wrap $err:ident, $res:expr, [hex $arg:tt] ) => {
123 | match $res.next() {
124 | Some(item) => {
125 | let ret = $arg::from_str_radix(&item, 16);
126 | if ret.is_err() {
127 | $err = "from_str_radix hex";
128 | }
129 | ret.unwrap_or(0)
130 | }
131 | _ => {
132 | $err = "internal hex";
133 | 0
134 | }
135 | }
136 | };
137 | ( no_wrap $err:ident, $res:expr , $($arg1:tt)::* ) => {{
138 | // We need to return a value of type $($arg1)::* if parsing fails.
139 | // Is there a better way?
140 | let mut err = "0".parse::<$($arg1)::*>(); // most types
141 | if err.is_err() {
142 | err = "0.0.0.0".parse::<$($arg1)::*>(); // IpAddr
143 | }
144 | let err = err.unwrap();
145 | match $res.next() {
146 | Some(item) => {
147 | let ret = item.parse::<$($arg1)::*>();
148 | if(item == "") {
149 | $err = "match::none";
150 | } else if ret.is_err() {
151 | $err = concat!("parse::", stringify!($($arg1)::*));
152 | }
153 | ret.unwrap_or(err)
154 | }
155 | _ => {
156 | $err = concat!("internal ", stringify!($($arg1)::*));
157 | err
158 | }
159 | }
160 | }};
161 | }
162 |
163 | #[macro_export]
164 | macro_rules! scan_fmt_some {
165 | ( $instr:expr, $fmt:expr, $($($args:tt)::*),* ) => {
166 | {
167 | let mut res = $crate::parse::scan( $instr, $fmt ) ;
168 | ($($crate::scan_fmt_help!(wrap res,$($args)::*)),*)
169 | }
170 | };
171 | }
172 |
173 | #[macro_export]
174 | macro_rules! scan_fmt {
175 | ( $instr:expr, $fmt:expr, $($($args:tt)::*),* ) => {
176 | {
177 | let mut err = "" ;
178 | let mut res = $crate::parse::scan( $instr, $fmt ) ;
179 | let result = ($($crate::scan_fmt_help!(no_wrap err,res,$($args)::*)),*) ;
180 | if err == "" {
181 | Ok(result)
182 | } else {
183 | Err($crate::parse::ScanError(err.into()))
184 | }
185 | }
186 | };
187 | }
188 |
189 | #[cfg(feature = "std")]
190 | pub use std_features::*;
191 |
192 | #[cfg(feature = "std")]
193 | mod std_features {
194 | use std::string::String;
195 |
196 | pub fn get_input_unwrap() -> String {
197 | let mut input = String::new();
198 | std::io::stdin().read_line(&mut input).unwrap();
199 | input
200 | }
201 |
202 | /// (a,+) = scanln_fmt!( format_string, types,+ )
203 | /// Same as scan_fmt!(), but reads input string from stdin.
204 | #[macro_export]
205 | macro_rules! scanln_fmt {
206 | ($($arg:tt)*) => {{ scan_fmt!(&$crate::get_input_unwrap(), $($arg)*) }}
207 | }
208 |
209 | /// (a,+) = scanln_fmt_some!( format_string, types,+ )
210 | /// Same as scan_fmt_some!(), but reads input string from stdin.
211 | #[macro_export]
212 | macro_rules! scanln_fmt_some {
213 | ($($arg:tt)*) => {{ scan_fmt_some!(&$crate::get_input_unwrap(), $($arg)*) }}
214 | }
215 | }
216 |
217 | #[cfg(test)]
218 | use alloc::string::{String, ToString};
219 | #[cfg(test)]
220 | use parse::ScanError;
221 |
222 | #[cfg(test)]
223 | macro_rules! assert_flt_eq {
224 | ($t:ident, $v1:expr, $v2:expr) => {{
225 | assert!(($v1 - $v2).abs() <= 2.0 * std::$t::EPSILON);
226 | }};
227 | }
228 |
229 | #[cfg(test)]
230 | fn ret_scan_all() -> Result<(), ScanError> {
231 | let (a, b) = scan_fmt!("1.2 e","{f} {x}",f32,[hex u32])?;
232 | assert_flt_eq!(f32, a, 1.2);
233 | assert_eq!(b, 14);
234 | Ok(())
235 | }
236 |
237 | #[test]
238 | fn test_scan_all() {
239 | if let Ok(a) = scan_fmt!("hi1 3", "{} {d}", std::string::String, u32) {
240 | assert_eq!(a, ("hi1".to_string(), 3));
241 | } else {
242 | assert!(false, "error 0");
243 | }
244 | if let Ok((a, b, c)) = scan_fmt!("hi1 0xf -3","{} {x} {d}",String,[hex u32],i8) {
245 | assert_eq!(a, "hi1");
246 | assert_eq!(b, 0xf);
247 | assert_eq!(c, -3);
248 | } else {
249 | assert!(false, "error 1");
250 | }
251 | let a = scan_fmt!("hi1 f", "{} {d}", String, i32);
252 | assert!(a.is_err());
253 | let a = ret_scan_all();
254 | std::println!("{:?}", a);
255 | assert!(a.is_ok());
256 | }
257 |
258 | #[test]
259 | fn test_plus_sign() {
260 | let a = scan_fmt_some!("+42", "{d}", i32);
261 | assert_eq!(a, Some(42));
262 | let a = scan_fmt_some!("+42.0", "{f}", f64);
263 | assert_flt_eq!(f64, a.unwrap(), 42.0);
264 | }
265 |
266 | #[test]
267 | fn test_hex() {
268 | let (a, b, c) =
269 | scan_fmt_some!("DEV 0xab 0x1234", "{} {x} {x}", std::string::String, [hex u32], [hex u64]);
270 | assert_eq!(a, Some("DEV".into()));
271 | assert_eq!(b, Some(0xab));
272 | assert_eq!(c, Some(0x1234));
273 | }
274 |
275 | #[test]
276 | fn test_limited_data_range() {
277 | let (a, b, c) = scan_fmt_some!(
278 | "test{\t 1e9 \n bye 257} hi 22.7e-1",
279 | "test{{ {} bye {d}}} hi {f}",
280 | f64,
281 | u8,
282 | f32
283 | );
284 | assert_flt_eq!(f64, a.unwrap(), 1e9);
285 | assert_eq!(b, None); // 257 doesn't fit into a u8
286 | assert_flt_eq!(f32, c.unwrap(), 2.27);
287 | }
288 |
289 | #[test]
290 | fn test_too_many_outputs() {
291 | let (a, b, c, d) = scan_fmt_some!("a_aa bb_b c", "{} {s} {}", String, String, String, String);
292 | assert_eq!(a.unwrap(), "a_aa");
293 | assert_eq!(b.unwrap(), "bb_b");
294 | assert_eq!(c.unwrap(), "c");
295 | assert_eq!(d, None);
296 | }
297 |
298 | #[test]
299 | fn test_skip_assign() {
300 | let (a, b) = scan_fmt_some!("1 2 3, 4 5, 6 7", "{[^,]},{*[^,]},{[^,]}", String, String);
301 | assert_eq!(a.unwrap(), "1 2 3");
302 | assert_eq!(b.unwrap(), "6 7");
303 | let a = scan_fmt!("1 2 3, 4 5, 6 7", "{[^,]},{*[^,]},{[^,]}", String, String).unwrap();
304 | assert_eq!(a.0, "1 2 3");
305 | assert_eq!(a.1, "6 7");
306 | }
307 |
308 | #[test]
309 | fn test_width_specifier() {
310 | let a = scan_fmt!("123ab71 2.1234",
311 | "{1d}{2d}{3x}{4d}{3f}",
312 | u8, u8, [hex u16], u16, f32)
313 | .unwrap();
314 | assert_eq!(a.0, 1);
315 | assert_eq!(a.1, 23);
316 | assert_eq!(a.2, 0xab7);
317 | assert_eq!(a.3, 1);
318 | assert_flt_eq!(f32, a.4, 2.1);
319 | }
320 |
321 | #[test]
322 | fn test_err_equals() {
323 | let a = scan_fmt!("hi 123", "hi {d", u8);
324 | assert_eq!(a, Err(parse::ScanError("internal u8".to_string())));
325 | }
326 |
327 | #[test]
328 | fn test_no_post_match_regex() {
329 | let a = scan_fmt!("74in", "{d}{/in/}", u8, String);
330 | assert_eq!(a, Ok((74, String::from("in"))));
331 | let a = scan_fmt!("74in", "{d}{/cm/}", u8, String);
332 | assert_eq!(a, Err(parse::ScanError("match::none".to_string())));
333 | }
334 |
335 | #[test]
336 | fn test_no_post_match() {
337 | let a = scan_fmt!("17in", "{d}in", u8);
338 | assert_eq!(a, Ok(17u8));
339 |
340 | let a = scan_fmt!("17in", "{d}cm", u8);
341 | assert_eq!(a, Err(parse::ScanError("match::none".to_string())));
342 | }
343 |
344 | #[test]
345 | fn test_match_end() {
346 | let a = scan_fmt!("17in", "{d}in{e}", u8);
347 | assert_eq!(a, Ok(17u8));
348 | let a = scan_fmt!("17ink", "{d}in{e}", u8);
349 | assert_eq!(a, Err(parse::ScanError("match::none".to_string())));
350 | }
351 |
352 | #[test]
353 | fn test_ip_addr() {
354 | let a = scan_fmt!("x 185.187.165.163 y", "x {} y", std::net::IpAddr);
355 | assert_eq!(
356 | a.unwrap(),
357 | std::net::IpAddr::V4(std::net::Ipv4Addr::new(185, 187, 165, 163))
358 | );
359 | }
360 |
--------------------------------------------------------------------------------
/src/parse.rs:
--------------------------------------------------------------------------------
1 | // Copyright 2015-2019 Will Lentz.
2 | // Licensed under the MIT license.
3 | use alloc::string::{String, ToString};
4 |
5 | #[cfg(feature = "regex")]
6 | use regex::Regex;
7 |
8 | #[derive(Debug, PartialEq)]
9 | enum FmtType {
10 | NonWhitespaceOrEnd,
11 | OnlyEnd,
12 | Pattern,
13 | Dec10,
14 | Hex16,
15 | Flt,
16 | #[cfg(feature = "regex")]
17 | Regex,
18 | }
19 |
20 | #[cfg(feature = "std")]
21 | use std::error::Error;
22 |
23 | use alloc::vec::Vec;
24 | use core::fmt;
25 |
26 | #[derive(Debug, PartialEq)]
27 | pub struct ScanError(pub String);
28 |
29 | #[cfg(feature = "std")]
30 | impl Error for ScanError {}
31 |
32 | impl fmt::Display for ScanError {
33 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
34 | write!(f, "Scan error: {}", self.0)
35 | }
36 | }
37 |
38 | // Handle the following format strings:
39 | // {}X -> everything until whitespace or next character 'X'
40 | // {s} -> everything until whitespace
41 | // {d} -> only base-10 integers
42 | // {x} -> only unsigned base-16 integers. Allow 0xfff or fff
43 | // {f} -> only floats
44 | // {*} -> get token, but don't assign it to output
45 | // {[]} -> only search for given characters
46 | // starting with '^' negates everything
47 | // ranges with '-' work. To include '-' put it at end or start
48 | // to include ']' put it at the start (or right after ^)
49 | // e.g., {[^,]} -> match everything until next comma
50 |
51 | // Make it slightly easier to scan through a Vec<>
52 | struct VecScanner {
53 | data: Vec,
54 | pos: usize,
55 | limit_pos: usize, // if non-0, then inc_limit() returns when 'pos' gets here
56 | }
57 |
58 | impl VecScanner {
59 | fn new(d: Vec) -> VecScanner {
60 | VecScanner {
61 | data: d,
62 | pos: 0,
63 | limit_pos: 0,
64 | }
65 | }
66 |
67 | fn cur(&self) -> char {
68 | self.data[self.pos]
69 | }
70 |
71 | fn peek(&self, n: usize) -> Option {
72 | if self.pos + n < self.data.len() {
73 | Some(self.data[self.pos + n])
74 | } else {
75 | None
76 | }
77 | }
78 |
79 | fn is_end(&self) -> bool {
80 | self.pos >= self.data.len()
81 | }
82 |
83 | // returns true if we have more data
84 | fn inc(&mut self) -> bool {
85 | self.pos += 1;
86 | !self.is_end()
87 | }
88 |
89 | // set the maximum position for inc_limit()
90 | fn start_inc_limit(&mut self, max_length: Option) {
91 | match max_length {
92 | Some(n) => {
93 | self.limit_pos = self.pos + n;
94 | }
95 | None => {
96 | self.limit_pos = 0;
97 | }
98 | }
99 | }
100 |
101 | fn hit_inc_limit(&mut self) -> bool {
102 | self.limit_pos > 0 && self.pos >= self.limit_pos
103 | }
104 |
105 | // same as inc(), but also honors start_inc_limit(max_length)
106 | fn inc_limit(&mut self) -> bool {
107 | self.pos += 1;
108 | !(self.is_end() || self.hit_inc_limit())
109 | }
110 | }
111 |
112 | fn is_whitespace(c: char) -> bool {
113 | match c {
114 | ' ' | '\t' | '\n' | '\r' => true,
115 | _ => false,
116 | }
117 | }
118 |
119 | // scan to past whitespace. Return false if end of input.
120 | fn skip_whitespace(vs: &mut VecScanner) -> bool {
121 | while !vs.is_end() {
122 | if is_whitespace(vs.cur()) {
123 | vs.inc();
124 | } else {
125 | break;
126 | }
127 | }
128 | !vs.is_end()
129 | }
130 |
131 | struct FmtResult {
132 | data_type: FmtType,
133 | max_length: Option,
134 | store_result: bool,
135 | invert_char_list: bool,
136 | end_char: char,
137 | // Store pattern characters and ranges. It might be worth
138 | // optimizing this if format strings are long.
139 | char_list: Vec<(char, char)>,
140 | #[cfg(feature = "regex")]
141 | regex: Option,
142 | }
143 |
144 | // See top-level docs for allowed formats.
145 | // Starts right after opening '{'. Consumes characters to final }
146 | // Note that '{' and '}' can exist unescaped inside [].
147 | fn get_format(fstr: &mut VecScanner) -> Option {
148 | let mut res = FmtResult {
149 | data_type: FmtType::NonWhitespaceOrEnd,
150 | max_length: None,
151 | end_char: ' ',
152 | store_result: true,
153 | invert_char_list: false,
154 | char_list: vec![],
155 | #[cfg(feature = "regex")]
156 | regex: None,
157 | };
158 | if fstr.cur() == '*' {
159 | res.store_result = false;
160 | if !fstr.inc() {
161 | return None;
162 | }
163 | }
164 |
165 | if fstr.cur() == '}' {
166 | if fstr.inc() {
167 | res.end_char = fstr.cur();
168 | }
169 | return Some(res);
170 | }
171 |
172 | // Read optional field width specifier (e.g., the "2" in {2d})
173 | let pos_start = fstr.pos;
174 | while fstr.cur().is_digit(10) {
175 | if !fstr.inc() {
176 | return None;
177 | }
178 | }
179 | if fstr.pos > pos_start {
180 | let max_length_string: String = fstr.data[pos_start..fstr.pos].iter().cloned().collect();
181 | res.max_length = max_length_string.parse::().ok();
182 | }
183 |
184 | match fstr.cur() {
185 | 's' => { /* already FmtType::NonWhitespaceOrEnd */ }
186 | 'e' => {
187 | res.data_type = FmtType::OnlyEnd;
188 | }
189 | 'd' => {
190 | res.data_type = FmtType::Dec10;
191 | }
192 | 'x' => {
193 | res.data_type = FmtType::Hex16;
194 | }
195 | 'f' => {
196 | res.data_type = FmtType::Flt;
197 | }
198 | '[' => {
199 | res.data_type = FmtType::Pattern;
200 | }
201 | #[cfg(feature = "regex")]
202 | '/' => {
203 | res.data_type = FmtType::Regex;
204 | }
205 | _ => return None, // unexpected format
206 | }
207 | if !fstr.inc() {
208 | return None;
209 | }
210 |
211 | match res.data_type {
212 | FmtType::Pattern => handle_pattern(res, fstr),
213 | #[cfg(feature = "regex")]
214 | FmtType::Regex => handle_regex(res, fstr),
215 | _ => {
216 | if fstr.cur() != '}' {
217 | return None;
218 | }
219 | fstr.inc();
220 | Some(res)
221 | }
222 | }
223 | }
224 |
225 | fn handle_pattern(mut res: FmtResult, fstr: &mut VecScanner) -> Option {
226 | // handle [] pattern
227 | res.data_type = FmtType::Pattern;
228 |
229 | if fstr.cur() == '^' {
230 | res.invert_char_list = true;
231 | if !fstr.inc() {
232 | return None;
233 | }
234 | }
235 |
236 | match fstr.cur() {
237 | ']' | '-' => {
238 | res.char_list.push((fstr.cur(), fstr.cur()));
239 | if !fstr.inc() {
240 | return None;
241 | }
242 | }
243 | _ => (),
244 | }
245 |
246 | // look for end of [] pattern
247 | while fstr.cur() != ']' {
248 | if fstr.peek(1) == Some('-') && fstr.peek(2) != Some(']') {
249 | let prev_char = fstr.cur();
250 | if !fstr.inc() {
251 | break;
252 | } // go to '-'
253 | if !fstr.inc() {
254 | break;
255 | } // go past '-'
256 | // add character range
257 | res.char_list.push((prev_char, fstr.cur()));
258 | } else {
259 | res.char_list.push((fstr.cur(), fstr.cur()));
260 | }
261 | if !fstr.inc() {
262 | return None;
263 | }
264 | }
265 | if !fstr.inc() {
266 | return None;
267 | } // go past ']'
268 | if fstr.cur() != '}' {
269 | return None;
270 | }
271 | fstr.inc(); // go past closing '}'
272 |
273 | Some(res)
274 | }
275 |
276 | #[cfg(feature = "regex")]
277 | fn handle_regex(mut res: FmtResult, fstr: &mut VecScanner) -> Option {
278 | let start = fstr.pos;
279 | let mut last_was_escape = false;
280 | while fstr.inc() {
281 | if fstr.cur() == '/' && !last_was_escape {
282 | break;
283 | }
284 |
285 | if fstr.cur() == '\\' {
286 | last_was_escape = true;
287 | } else {
288 | last_was_escape = false;
289 | }
290 | }
291 | if fstr.cur() != '/' {
292 | // invalid
293 | return None;
294 | }
295 |
296 | let substr = Some('^')
297 | .into_iter()
298 | .chain(fstr.data[start..fstr.pos].iter().cloned())
299 | .collect::();
300 |
301 | if let Ok(re) = Regex::new(&substr) {
302 | res.regex = Some(re);
303 | } else {
304 | return None;
305 | }
306 |
307 | // consume close
308 | fstr.inc();
309 | if fstr.cur() != '}' {
310 | return None;
311 | }
312 | fstr.inc();
313 |
314 | Some(res)
315 | }
316 |
317 | fn scan_dec10(vs: &mut VecScanner, max_length: Option) {
318 | // look for [+-]{0,1}[0-9]+, up to max_length characters
319 | vs.start_inc_limit(max_length);
320 | scan_dec10_nest(vs);
321 | }
322 |
323 | // advance past base-10 decimal - assumes someone has called start_inc_limit()
324 | fn scan_dec10_nest(vs: &mut VecScanner) {
325 | // look for [+-]{0,1}[0-9]+
326 | match vs.cur() {
327 | '+' | '-' => {
328 | if !vs.inc_limit() {
329 | return;
330 | }
331 | }
332 | _ => (),
333 | }
334 |
335 | while vs.cur().is_digit(10) {
336 | if !vs.inc_limit() {
337 | return;
338 | }
339 | }
340 | }
341 |
342 | // advance past base-16 hex
343 | // look for (0x){0,1}[0-9a-fA-F]+
344 | fn scan_hex16(vs: &mut VecScanner, max_length: Option) {
345 | vs.start_inc_limit(max_length);
346 | if vs.cur() == '0' {
347 | if !vs.inc_limit() {
348 | return;
349 | }
350 | }
351 | if vs.cur() == 'x' {
352 | if !vs.inc_limit() {
353 | return;
354 | }
355 | }
356 | while vs.cur().is_digit(16) {
357 | if !vs.inc_limit() {
358 | return;
359 | };
360 | }
361 | }
362 |
363 | // advance past float
364 | // look for [+-]{0,1}[0-9]+
365 | // then optional .[0-9]+
366 | // then optional e[+-]{1}[0-9]+
367 | fn scan_float(vs: &mut VecScanner, max_length: Option) {
368 | vs.start_inc_limit(max_length);
369 | scan_dec10_nest(vs);
370 | if vs.cur() == '.' {
371 | if !vs.inc_limit() {
372 | return;
373 | }
374 | while vs.cur().is_digit(10) {
375 | if !vs.inc_limit() {
376 | return;
377 | }
378 | }
379 | }
380 | if vs.cur() == 'e' {
381 | if !vs.inc_limit() {
382 | return;
383 | }
384 | scan_dec10_nest(vs);
385 | }
386 | }
387 |
388 | // advance until 'end' or whitespace
389 | fn scan_nonws_or_end(vs: &mut VecScanner, end: char) {
390 | while !is_whitespace(vs.cur()) && vs.cur() != end {
391 | if !vs.inc() {
392 | return;
393 | }
394 | }
395 | }
396 |
397 | // advance past pattern
398 | fn scan_pattern(vs: &mut VecScanner, fmt: &mut FmtResult) {
399 | // if invert, scan until character not in char_list
400 | // else scan while character is in char_list
401 | loop {
402 | let c = vs.cur();
403 | let mut found = false;
404 | for &(start, end) in fmt.char_list.iter() {
405 | if c >= start && c <= end {
406 | found = true;
407 | break;
408 | }
409 | }
410 | if found == fmt.invert_char_list {
411 | return;
412 | }
413 | if !vs.inc() {
414 | return;
415 | }
416 | }
417 | }
418 |
419 | #[cfg(feature = "regex")]
420 | enum ReMatch {
421 | Captured { len: usize },
422 | NoCapture,
423 | }
424 |
425 | #[cfg(feature = "regex")]
426 | fn scan_regex(vs: &mut VecScanner, fmt: &mut FmtResult) -> ReMatch {
427 | let re = fmt.regex.take().unwrap();
428 | let remainder = vs.data[vs.pos..].iter().cloned().collect::();
429 | if let Some(mat) = re.captures(&remainder) {
430 | vs.pos += remainder[..mat.get(0).unwrap().end()].chars().count();
431 | if let Some(cap) = mat.get(1) {
432 | return ReMatch::Captured { len: cap.end() };
433 | }
434 | }
435 | return ReMatch::NoCapture;
436 | }
437 |
438 | // return data matching the format from user input (else "")
439 | fn get_token(vs: &mut VecScanner, fmt: &mut FmtResult) -> String {
440 | let mut pos_start = vs.pos;
441 | match fmt.data_type {
442 | FmtType::OnlyEnd => {} // handled in scan()
443 | FmtType::NonWhitespaceOrEnd => scan_nonws_or_end(vs, fmt.end_char),
444 | FmtType::Dec10 => scan_dec10(vs, fmt.max_length),
445 | FmtType::Hex16 => scan_hex16(vs, fmt.max_length),
446 | FmtType::Flt => scan_float(vs, fmt.max_length),
447 | FmtType::Pattern => scan_pattern(vs, fmt),
448 | #[cfg(feature = "regex")]
449 | FmtType::Regex => {
450 | // if the regex has an internal group then we want to use the group
451 | // to select the substring, but either way the scan_regex function
452 | // will set pos to the end of the entire match consumed by the
453 | // regex
454 | match scan_regex(vs, fmt) {
455 | ReMatch::Captured { len } => {
456 | return vs.data[pos_start..pos_start + len]
457 | .iter()
458 | .cloned()
459 | .collect();
460 | }
461 | ReMatch::NoCapture => {}
462 | }
463 | }
464 | }
465 | if fmt.data_type == FmtType::Dec10 || fmt.data_type == FmtType::Flt {
466 | // parse won't accept "+" in front of numbers
467 | if vs.data[pos_start] == '+' {
468 | pos_start += 1;
469 | }
470 | }
471 | vs.data[pos_start..vs.pos].iter().cloned().collect()
472 | }
473 |
474 | // Extract String tokens from the input string based on
475 | // the format string. See lib.rs for more info.
476 | // Returns an iterator of the String results.
477 | pub fn scan(input_string: &str, format: &str) -> alloc::vec::IntoIter {
478 | let mut res: Vec = vec![];
479 | let mut fmtstr = VecScanner::new(format.chars().collect());
480 | let mut instr = VecScanner::new(input_string.chars().collect());
481 | loop {
482 | let mut do_compare = true;
483 | if !skip_whitespace(&mut fmtstr) {
484 | break;
485 | }
486 | if !skip_whitespace(&mut instr) {
487 | break;
488 | }
489 |
490 | if fmtstr.cur() == '{' {
491 | if !fmtstr.inc() {
492 | break;
493 | }
494 | if fmtstr.cur() == '{' {
495 | // got an escaped {{
496 | } else {
497 | let fmt = get_format(&mut fmtstr);
498 | let mut fmt = if let Some(fmt) = fmt {
499 | fmt
500 | } else {
501 | break;
502 | };
503 |
504 | if fmt.data_type == FmtType::OnlyEnd && !instr.is_end() {
505 | // we didn't get an end of input where expected, so invalidate any matches
506 | return vec![String::from("")].into_iter();
507 | }
508 | let data = get_token(&mut instr, &mut fmt);
509 | if fmt.store_result {
510 | if fmt.data_type == FmtType::Hex16 {
511 | let no_prefix = data.trim_start_matches("0x");
512 | res.push(no_prefix.to_string());
513 | } else {
514 | res.push(data);
515 | }
516 | }
517 | do_compare = false;
518 | }
519 | } else {
520 | if fmtstr.cur() == '}' {
521 | // handle escaped }} by skipping first '}'
522 | if !fmtstr.inc() {
523 | break;
524 | }
525 | }
526 | }
527 | if do_compare {
528 | if fmtstr.cur() != instr.cur() {
529 | return vec![String::from("")].into_iter();
530 | // we had a non match! --> if we only break here we will return all matches found so far.
531 | // This will create a misbehaviour when there is something like `{d}in` as the in is not cared for.
532 | }
533 | if !fmtstr.inc() {
534 | break;
535 | }
536 | if !instr.inc() {
537 | break;
538 | }
539 | }
540 | }
541 | res.into_iter()
542 | }
543 |
544 | #[test]
545 | fn test_simple() {
546 | let mut res = scan(" data 42-12=30", "data {d}-{d}={d}");
547 | assert_eq!(res.next().unwrap(), "42");
548 | assert_eq!(res.next().unwrap(), "12");
549 | assert_eq!(res.next().unwrap(), "30");
550 | assert_eq!(res.next(), None);
551 | }
552 |
553 | #[test]
554 | fn test_plus_sign() {
555 | let mut res = scan("+42", "{d}");
556 | assert_eq!(res.next().unwrap(), "42");
557 | let mut res = scan("+42.7", "{f}");
558 | assert_eq!(res.next().unwrap(), "42.7");
559 | }
560 |
561 | #[test]
562 | fn test_complex() {
563 | let mut res = scan(
564 | "test{123 bye -456} hi -22.7e-1 +1.23fg",
565 | "test{{{d} bye {}}} hi {f} {f}",
566 | );
567 | assert_eq!(res.next().unwrap(), "123");
568 | assert_eq!(res.next().unwrap(), "-456");
569 | assert_eq!(res.next().unwrap(), "-22.7e-1");
570 | assert_eq!(res.next().unwrap(), "1.23");
571 | assert_eq!(res.next(), None);
572 | }
573 |
574 | #[test]
575 | fn test_endline() {
576 | let mut res = scan("hi 15.7\r\n", "{} {}");
577 | assert_eq!(res.next().unwrap(), "hi");
578 | assert_eq!(res.next().unwrap(), "15.7");
579 | }
580 |
581 | #[test]
582 | fn test_hex() {
583 | let mut res = scan("hi 0x15 ff fg", "hi {x} {x} {x}");
584 | assert_eq!(res.next().unwrap(), "15");
585 | assert_eq!(res.next().unwrap(), "ff");
586 | assert_eq!(res.next().unwrap(), "f");
587 | }
588 |
589 | #[test]
590 | fn test_string() {
591 | let mut res = scan("The quick brown fox", "{s}{s} {}n {s}x");
592 | assert_eq!(res.next().unwrap(), "The");
593 | assert_eq!(res.next().unwrap(), "quick");
594 | assert_eq!(res.next().unwrap(), "brow");
595 | assert_eq!(res.next().unwrap(), "fox");
596 | }
597 |
598 | #[test]
599 | fn test_pattern() {
600 | let mut res = scan(
601 | "hi abcdefghijklmnop 0123456789",
602 | "hi {[a-l]}{[^a-l ]} {[01234-8]}{[9]}",
603 | );
604 | assert_eq!(res.next().unwrap(), "abcdefghijkl");
605 | assert_eq!(res.next().unwrap(), "mnop");
606 | assert_eq!(res.next().unwrap(), "012345678");
607 | assert_eq!(res.next().unwrap(), "9");
608 |
609 | let mut res = scan("xyz 01234567λ89", "xyz {[40-3]}{*[65]}{[7-78-9λ]}");
610 | assert_eq!(res.next().unwrap(), "01234");
611 | assert_eq!(res.next().unwrap(), "7λ89");
612 | }
613 |
614 | #[test]
615 | fn test_width() {
616 | let mut res = scan("01123fe071 432", "{2d}{3d}{4x}{2d} {3d}");
617 | assert_eq!(res.next().unwrap(), "01");
618 | assert_eq!(res.next().unwrap(), "123");
619 | assert_eq!(res.next().unwrap(), "fe07");
620 | assert_eq!(res.next().unwrap(), "1");
621 | assert_eq!(res.next().unwrap(), "432");
622 | }
623 |
624 | #[test]
625 | fn match_end() {
626 | let mut res = scan("12 hi", "{d} hi{e}");
627 | assert_eq!(res.next().unwrap(), "12");
628 | assert_eq!(res.next(), None);
629 | let mut res = scan("12 hi2", "{d} hi{e}");
630 | assert_eq!(res.next().unwrap(), "");
631 | }
632 |
633 | #[cfg(all(test, feature = "regex"))]
634 | mod test_regex {
635 | use super::scan;
636 |
637 | #[test]
638 | fn simple() {
639 | let mut res = scan("one (hello) two", "one ({/[^)]+/}) two");
640 | assert_eq!(res.next().unwrap(), "hello");
641 | }
642 |
643 | #[test]
644 | fn mixed_regex_and_pattern() {
645 | let mut res = scan("one ((hello)) two", r#"one ({/[^)]+\)?/}) two"#);
646 | assert_eq!(res.next().unwrap(), "(hello)");
647 | }
648 |
649 | #[test]
650 | fn bad_pattern() {
651 | // note the extra close paren
652 | let mut scanner = scan("one (hello)) two", "one ({/[^)]+/}) two");
653 | assert_eq!(scanner.next().unwrap(), "");
654 | }
655 |
656 | #[test]
657 | fn uses_group_if_present() {
658 | let mut res = scan("one (((hello))) two", r#"one {/(\(.*\)) /}two"#);
659 | assert_eq!(res.next().unwrap(), "(((hello)))");
660 | }
661 |
662 | #[test]
663 | fn unicode() {
664 | let mut res = scan("й", "{/.*/}");
665 | assert_eq!(res.next().unwrap(), "й");
666 | }
667 | }
668 |
--------------------------------------------------------------------------------