├── .bread.yml ├── .github ├── pull_request_template.md └── workflows │ └── copyright.yml ├── .gitignore ├── Cargo.toml ├── crates ├── structre │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── structre_proc_macros │ ├── Cargo.toml │ └── mod.rs └── structre_tests │ ├── Cargo.toml │ └── tests │ └── test.rs ├── license.txt └── readme.md /.bread.yml: -------------------------------------------------------------------------------- 1 | !v1 2 | weights: 3 | accounts: 4 | 1: 100 5 | projects: 6 | https://github.com/LukasKalbertodt/litrs/: 100 7 | https://github.com/dtolnay/proc-macro2: 100 8 | https://github.com/dtolnay/quote: 100 9 | https://github.com/dtolnay/syn: 100 10 | https://github.com/rust-lang/regex: 100 11 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | 4 | I agree that when the request is merged I assign the copyright of the request to the repository owner. 5 | -------------------------------------------------------------------------------- /.github/workflows/copyright.yml: -------------------------------------------------------------------------------- 1 | on: 2 | pull_request: 3 | types: [opened, edited, synchronize] 4 | 5 | jobs: 6 | confirm_agreement: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v2 10 | - env: 11 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 12 | BODY: ${{ github.event.pull_request.body }} 13 | PR_ID: ${{ github.event.pull_request.number }} 14 | run: | 15 | set -xeu 16 | if ! grep -F "$(tail -n 1 .github/pull_request_template.md)" <(echo "$BODY"); then 17 | gh pr close --comment "All changes must include the provided agreement to the copyright assignment." --delete-branch "$PR_ID" 18 | fi 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | members = ["crates/*"] 4 | 5 | [workspace.package] 6 | version = "0.2.0" 7 | edition = "2021" 8 | license = "ISC" 9 | repository = "https://github.com/andrewbaxter/structre" 10 | readme = "readme.md" 11 | -------------------------------------------------------------------------------- /crates/structre/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "structre" 3 | description = "Static-checked parsing of regexes into structs" 4 | version.workspace = true 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | readme.workspace = true 9 | 10 | [dependencies] 11 | regex = { version = "1", default-features = false, features = ["std"] } 12 | structre_proc_macros = { path = "../structre_proc_macros", version = "=0.2.0" } 13 | -------------------------------------------------------------------------------- /crates/structre/src/lib.rs: -------------------------------------------------------------------------------- 1 | use { 2 | std::{ 3 | fmt::Display, 4 | }, 5 | }; 6 | pub use { 7 | structre_proc_macros::structre, 8 | regex, 9 | }; 10 | 11 | #[derive(Debug)] 12 | pub enum Error { 13 | NoMatch, 14 | Field { 15 | field: &'static str, 16 | error: String, 17 | }, 18 | } 19 | 20 | impl Display for Error { 21 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 22 | match self { 23 | Error::NoMatch => { 24 | format_args!("No match").fmt(f) 25 | }, 26 | Error::Field { field, error } => { 27 | format_args!("Error parsing field {}: {}", field, error).fmt(f) 28 | }, 29 | } 30 | } 31 | } 32 | 33 | impl std::error::Error for Error { } 34 | -------------------------------------------------------------------------------- /crates/structre_proc_macros/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "structre_proc_macros" 3 | description = "Static-checked parsing of regexes into structs (helper crate)" 4 | version.workspace = true 5 | edition.workspace = true 6 | license.workspace = true 7 | repository.workspace = true 8 | readme.workspace = true 9 | 10 | [lib] 11 | proc-macro = true 12 | path = "mod.rs" 13 | 14 | [dependencies] 15 | flowcontrol = "0.2" 16 | litrs = "0.4" 17 | proc-macro2 = "1" 18 | quote = "1" 19 | regex-syntax = "0.8" 20 | syn = "2" 21 | 22 | [dev-dependencies] 23 | genemichaels-lib = "0.5" 24 | -------------------------------------------------------------------------------- /crates/structre_proc_macros/mod.rs: -------------------------------------------------------------------------------- 1 | use { 2 | flowcontrol::{ 3 | shed, 4 | superif, 5 | }, 6 | litrs::StringLit, 7 | proc_macro2::{ 8 | Span, 9 | TokenStream, 10 | }, 11 | quote::{ 12 | quote, 13 | ToTokens, 14 | }, 15 | regex_syntax::ast::{ 16 | Ast as ReAst, 17 | GroupKind, 18 | }, 19 | std::{ 20 | collections::HashMap, 21 | }, 22 | syn::{ 23 | self, 24 | parse_macro_input, 25 | spanned::Spanned, 26 | DataEnum, 27 | DataStruct, 28 | Field, 29 | Ident, 30 | Type, 31 | }, 32 | }; 33 | 34 | struct ReUnnamedCapture { 35 | optional: bool, 36 | } 37 | 38 | struct ReNamedCapture { 39 | index: usize, 40 | optional: bool, 41 | } 42 | 43 | #[derive(Default)] 44 | struct ReFlatData { 45 | unnamed_captures: HashMap, 46 | named_captures: HashMap, 47 | } 48 | 49 | fn flatten_re(out: &mut ReFlatData, re: &ReAst, optional_context: bool) { 50 | match re { 51 | ReAst::Flags(_) => (), 52 | ReAst::Dot(_) => (), 53 | ReAst::Assertion(_) => (), 54 | ReAst::Empty(_) => (), 55 | ReAst::Literal(_) => (), 56 | ReAst::ClassUnicode(_) => (), 57 | ReAst::ClassPerl(_) => (), 58 | ReAst::ClassBracketed(_) => (), 59 | ReAst::Repetition(e) => flatten_re(out, &e.ast, optional_context || match &e.op.kind { 60 | regex_syntax::ast::RepetitionKind::ZeroOrOne => true, 61 | regex_syntax::ast::RepetitionKind::ZeroOrMore => true, 62 | regex_syntax::ast::RepetitionKind::OneOrMore => false, 63 | regex_syntax::ast::RepetitionKind::Range(r) => match r { 64 | regex_syntax::ast::RepetitionRange::Exactly(x) => *x == 0, 65 | regex_syntax::ast::RepetitionRange::AtLeast(x) => *x == 0, 66 | regex_syntax::ast::RepetitionRange::Bounded(x, _) => *x == 0, 67 | }, 68 | }), 69 | ReAst::Group(g) => match &g.kind { 70 | regex_syntax::ast::GroupKind::CaptureIndex(index) => { 71 | out.unnamed_captures.insert(*index as usize, ReUnnamedCapture { optional: optional_context }); 72 | }, 73 | regex_syntax::ast::GroupKind::CaptureName { name, .. } => { 74 | out.named_captures.insert(name.name.clone(), ReNamedCapture { 75 | index: name.index as usize, 76 | optional: optional_context, 77 | }); 78 | }, 79 | regex_syntax::ast::GroupKind::NonCapturing(_) => flatten_re(out, &g.ast, optional_context), 80 | }, 81 | ReAst::Concat(c) => { 82 | for c in &c.asts { 83 | flatten_re(out, c, optional_context); 84 | } 85 | }, 86 | ReAst::Alternation(a) => { 87 | for child in &a.asts { 88 | flatten_re(out, child, true); 89 | } 90 | }, 91 | } 92 | } 93 | 94 | // Someone please save me 95 | #[derive(Clone)] 96 | enum SimpleSimpleTypeType { 97 | TryFrom, 98 | FromStr, 99 | } 100 | 101 | #[derive(Clone)] 102 | struct SimpleSimpleType { 103 | span: Span, 104 | type_: TokenStream, 105 | typetype: SimpleSimpleTypeType, 106 | } 107 | 108 | #[derive(Clone)] 109 | enum SimpleType { 110 | Simple(SimpleSimpleType), 111 | Option(SimpleSimpleType), 112 | Tuple(Vec), 113 | } 114 | 115 | fn simple_simple_type(ty: &Type) -> SimpleSimpleType { 116 | let ty_tokens = ty.to_token_stream(); 117 | let typetype; 118 | match ty_tokens.to_string().as_str() { 119 | "u8" | 120 | "u16" | 121 | "u32" | 122 | "u64" | 123 | "u128" | 124 | "usize" | 125 | "i8" | 126 | "i16" | 127 | "i32" | 128 | "i64" | 129 | "i128" | 130 | "isize" | 131 | "f8" | 132 | "f16" | 133 | "f32" | 134 | "f64" | 135 | "f128" | 136 | "bool" | 137 | "char" | 138 | "std::net::IpAddr" | 139 | "net::IpAddr" | 140 | "IpAddr" | 141 | "std::net::Ipv4Addr" | 142 | "net::Ipv4Addr" | 143 | "Ipv4Addr" | 144 | "std::net::Ipv6Addr" | 145 | "net::Ipv6Addr" | 146 | "Ipv6Addr" | 147 | "std::net::SocketAddr" | 148 | "net::SocketAddr" | 149 | "SocketAddr" | 150 | "std::net::SocketAddrV4" | 151 | "net::SocketAddrV4" | 152 | "SocketAddrV4" | 153 | "std::net::SocketAddrV6" | 154 | "net::SocketAddrV6" | 155 | "SocketAddrV6" | 156 | "std::ffi::OsString" | 157 | "ffi::OsString" | 158 | "OsString" | 159 | "std::num::NonZero" | 160 | "std::num::NonZero" | 161 | "std::num::NonZero" | 162 | "std::num::NonZero" | 163 | "std::num::NonZero" | 164 | "std::num::NonZero" | 165 | "std::num::NonZero" | 166 | "std::num::NonZero" | 167 | "std::num::NonZero" | 168 | "std::num::NonZero" | 169 | "std::num::NonZero" | 170 | "std::num::NonZero" | 171 | "num::NonZero" | 172 | "num::NonZero" | 173 | "num::NonZero" | 174 | "num::NonZero" | 175 | "num::NonZero" | 176 | "num::NonZero" | 177 | "num::NonZero" | 178 | "num::NonZero" | 179 | "num::NonZero" | 180 | "num::NonZero" | 181 | "num::NonZero" | 182 | "num::NonZero" | 183 | "NonZero" | 184 | "NonZero" | 185 | "NonZero" | 186 | "NonZero" | 187 | "NonZero" | 188 | "NonZero" | 189 | "NonZero" | 190 | "NonZero" | 191 | "NonZero" | 192 | "NonZero" | 193 | "NonZero" | 194 | "NonZero" => { 195 | typetype = SimpleSimpleTypeType::FromStr; 196 | }, 197 | _ => { 198 | typetype = SimpleSimpleTypeType::TryFrom; 199 | }, 200 | } 201 | return SimpleSimpleType { 202 | span: ty.span(), 203 | type_: ty_tokens, 204 | typetype: typetype, 205 | }; 206 | } 207 | 208 | /// Reduce a type to a handleable, semantically meaningful type (i.e. 1-tuples are 209 | /// unwrapped, parens unwrapped, etc). 210 | fn simple_type(ty: &Type) -> Result { 211 | match ty { 212 | Type::Path(p) => { 213 | let opt = shed!{ 214 | if p.qself.is_some() { 215 | break None; 216 | } 217 | if p.path.segments.len() != 1 { 218 | break None; 219 | }; 220 | let seg = p.path.segments.get(0).unwrap(); 221 | if seg.ident != "Option" { 222 | break None; 223 | }; 224 | let syn::PathArguments::AngleBracketed(args) = &seg.arguments else { 225 | unreachable!(); 226 | }; 227 | let syn::GenericArgument::Type(arg_type) = args.args.get(0).unwrap() else { 228 | unreachable!(); 229 | }; 230 | break Some(arg_type); 231 | }; 232 | if let Some(opt_inner) = opt { 233 | return Ok(SimpleType::Option(simple_simple_type(opt_inner))); 234 | } else { 235 | return Ok(SimpleType::Simple(simple_simple_type(ty))); 236 | } 237 | }, 238 | Type::Paren(t) => { 239 | return Ok(simple_type(&t.elem)?); 240 | }, 241 | Type::Group(t) => { 242 | return Ok(simple_type(&t.elem)?); 243 | }, 244 | Type::Tuple(t) => { 245 | let mut children = vec![]; 246 | for e in &t.elems { 247 | children.push(simple_type(e)?); 248 | } 249 | return Ok(SimpleType::Tuple(children)); 250 | }, 251 | Type::Reference(_) => { 252 | return Ok(SimpleType::Simple(simple_simple_type(ty))); 253 | }, 254 | _ => { }, 255 | } 256 | return Err(syn::Error::new(ty.span(), "This type does not support parsing from regex")); 257 | } 258 | 259 | fn gen_from_capture(str_lifetime: &TokenStream, path: &str, t: &SimpleSimpleType, cap: TokenStream) -> TokenStream { 260 | let p = &t.type_; 261 | match &t.typetype { 262 | SimpleSimpleTypeType::TryFrom => { 263 | return quote!( 264 | < #p as std:: convert:: TryFrom <& #str_lifetime str >>:: try_from( 265 | #cap 266 | ).map_err(| e | structre:: Error:: Field { 267 | field: #path, 268 | error: e.to_string() 269 | }) ? 270 | ); 271 | }, 272 | SimpleSimpleTypeType::FromStr => { 273 | return quote!(< #p as std:: str:: FromStr >:: from_str(#cap).map_err(| e | structre:: Error:: Field { 274 | field: #path, 275 | error: e.to_string() 276 | }) ?); 277 | }, 278 | } 279 | } 280 | 281 | fn gen_named_fields( 282 | str_lifetime: &TokenStream, 283 | re_flat: &mut ReFlatData, 284 | next_unnamed_index: &mut usize, 285 | path: &str, 286 | fields: &mut dyn Iterator, 287 | ) -> Result { 288 | let mut field_tokens = vec![]; 289 | for field in fields { 290 | let name = field.ident.as_ref().unwrap(); 291 | let name_str = name.to_string(); 292 | let path = format!("{}.{}", path, name_str); 293 | match simple_type(&field.ty)? { 294 | SimpleType::Option(p) => { 295 | let Some(cap) = re_flat.named_captures.remove(&name.to_string()) else { 296 | return Err( 297 | syn::Error::new(p.span, format!("No named capture `{}` for field `{}`", name_str, path)), 298 | ); 299 | }; 300 | let cap_index = cap.index; 301 | if !cap.optional { 302 | return Err( 303 | syn::Error::new(p.span, "Field is optional but corresponding capture always matches"), 304 | ); 305 | } 306 | let from_cap = gen_from_capture(str_lifetime, &path, &p, quote!(m.as_str())); 307 | field_tokens.push(quote!(#name: match captures.get(#cap_index) { 308 | Some(m) => Some(#from_cap), 309 | None => None, 310 | })); 311 | }, 312 | SimpleType::Tuple(p) => { 313 | let parse_tuple = 314 | gen_unnamed_fields( 315 | str_lifetime, 316 | re_flat, 317 | next_unnamed_index, 318 | &path, 319 | &mut p.iter().map(|x| x).cloned(), 320 | )?; 321 | field_tokens.push(quote!(#name:(#parse_tuple))); 322 | }, 323 | SimpleType::Simple(p) => { 324 | let Some(cap) = re_flat.named_captures.remove(&name.to_string()) else { 325 | return Err( 326 | syn::Error::new(field.span(), format!("No named capture `{}` for field `{}`", name_str, path)), 327 | ); 328 | }; 329 | let cap_index = cap.index; 330 | if cap.optional { 331 | return Err( 332 | syn::Error::new( 333 | field.span(), 334 | "Field is not optional but corresponding capture optionally matches", 335 | ), 336 | ); 337 | } 338 | let from_cap = 339 | gen_from_capture(str_lifetime, &path, &p, quote!(captures.get(#cap_index).unwrap().as_str())); 340 | field_tokens.push(quote!(#name: #from_cap)); 341 | }, 342 | } 343 | } 344 | return Ok(quote!(#(#field_tokens,) *)); 345 | } 346 | 347 | fn gen_unnamed_fields( 348 | str_lifetime: &TokenStream, 349 | re_flat: &mut ReFlatData, 350 | next_unnamed_index: &mut usize, 351 | path: &str, 352 | fields: &mut dyn Iterator, 353 | ) -> Result { 354 | let mut out = vec![]; 355 | for (field_index, ty) in fields.enumerate() { 356 | let path = format!("{}.{}", path, field_index); 357 | match ty { 358 | SimpleType::Option(p) => { 359 | let cap_index = *next_unnamed_index; 360 | *next_unnamed_index += 1; 361 | let Some(cap) = re_flat.unnamed_captures.remove(&cap_index) else { 362 | return Err(syn::Error::new(p.span, format!("Missing unnamed capture for `{}`", path))); 363 | }; 364 | if !cap.optional { 365 | return Err( 366 | syn::Error::new(p.span, "Field is optional but corresponding capture always matches"), 367 | ); 368 | } 369 | let from_cap = gen_from_capture(str_lifetime, &path, &p, quote!(m.as_str())); 370 | out.push(quote!(match captures.get(#cap_index) { 371 | Some(m) => { 372 | Some(#from_cap) 373 | }, 374 | None => None, 375 | })); 376 | }, 377 | SimpleType::Tuple(p) => { 378 | let child = 379 | gen_unnamed_fields(str_lifetime, re_flat, next_unnamed_index, &path, &mut p.iter().cloned())?; 380 | out.push(quote!((#child))); 381 | }, 382 | SimpleType::Simple(p) => { 383 | let cap_index = *next_unnamed_index; 384 | *next_unnamed_index += 1; 385 | let Some(cap) = re_flat.unnamed_captures.remove(&cap_index) else { 386 | return Err(syn::Error::new(p.span, format!("Missing unnamed capture for `{}`", path))); 387 | }; 388 | if cap.optional { 389 | return Err( 390 | syn::Error::new(p.span, "Field is not optional but corresponding capture optionally matches"), 391 | ); 392 | } 393 | out.push( 394 | gen_from_capture(str_lifetime, &path, &p, quote!(captures.get(#cap_index).unwrap().as_str())), 395 | ); 396 | }, 397 | } 398 | } 399 | return Ok(quote!(#(#out,) *)); 400 | } 401 | 402 | fn gen_struct( 403 | str_lifetime: &TokenStream, 404 | re: &ReAst, 405 | ident: &Ident, 406 | struct_: &DataStruct, 407 | ) -> Result { 408 | let mut re_flat = ReFlatData::default(); 409 | flatten_re(&mut re_flat, re, false); 410 | let mut next_unnamed_index = 1; 411 | let path = ident.to_string(); 412 | let out = match &struct_.fields { 413 | syn::Fields::Named(n) => { 414 | let fields = 415 | gen_named_fields(str_lifetime, &mut re_flat, &mut next_unnamed_index, &path, &mut n.named.iter())?; 416 | quote!({ 417 | #fields 418 | }) 419 | }, 420 | syn::Fields::Unnamed(u) => { 421 | if !re_flat.named_captures.is_empty() { 422 | return Err( 423 | syn::Error::new( 424 | struct_.struct_token.span, 425 | "Tuples must have only unnamed captures, but named captures are present in the regex", 426 | ), 427 | ); 428 | } 429 | let field_tokens = gen_unnamed_fields(str_lifetime, &mut re_flat, &mut next_unnamed_index, &path, &mut { 430 | let mut out = vec![]; 431 | for n in &u.unnamed { 432 | out.push(simple_type(&n.ty)?); 433 | } 434 | out 435 | }.into_iter())?; 436 | quote!((#field_tokens)) 437 | }, 438 | syn::Fields::Unit => { 439 | quote!() 440 | }, 441 | }; 442 | if !re_flat.named_captures.is_empty() { 443 | return Err( 444 | syn::Error::new( 445 | struct_.struct_token.span, 446 | format!("Named captures never used: {:?}", re_flat.named_captures.keys()), 447 | ), 448 | ); 449 | } 450 | if !re_flat.unnamed_captures.is_empty() { 451 | return Err( 452 | syn::Error::new( 453 | struct_.struct_token.span, 454 | format!("Unnamed captures never used: {} remain", re_flat.unnamed_captures.len()), 455 | ), 456 | ); 457 | } 458 | return Ok(quote!(return Ok(#ident #out);)); 459 | } 460 | 461 | fn gen_enum( 462 | str_lifetime: &TokenStream, 463 | re: &ReAst, 464 | ident: &Ident, 465 | enum_: &DataEnum, 466 | ) -> Result { 467 | // Find the topmost re alternatives (variants) 468 | fn find_re_variants<'a>(re: &'a ReAst) -> Result>, String> { 469 | match re { 470 | ReAst::Repetition(x) => { 471 | if find_re_variants(&x.ast)?.is_some() { 472 | return Err(format!("Reptition with regex alternates isn't supported: `{}`", re)); 473 | } else { 474 | return Ok(None); 475 | } 476 | }, 477 | ReAst::Group(x) => { 478 | match x.kind { 479 | GroupKind::CaptureIndex(_) | GroupKind::CaptureName { .. } => { 480 | return Err( 481 | format!( 482 | "All capturing groups must occur within alternates in an enum, but found a capturing group above an enum: `{}`", 483 | re 484 | ), 485 | ); 486 | }, 487 | GroupKind::NonCapturing(_) => { 488 | return find_re_variants(&x.ast); 489 | }, 490 | } 491 | }, 492 | ReAst::Concat(x) => { 493 | let mut found = None; 494 | for x in &x.asts { 495 | if let Some(new_found) = find_re_variants(x)? { 496 | if found.is_some() { 497 | return Err( 498 | format!( 499 | "Enums support only a single alternation in the corresponding regex but found multiple parallel alternates in regex: second = `{}`", 500 | re 501 | ), 502 | ); 503 | } else { 504 | found = Some(new_found); 505 | } 506 | } 507 | } 508 | return Ok(found); 509 | }, 510 | ReAst::Alternation(x) => { 511 | return Ok(Some(&x.asts)); 512 | }, 513 | ReAst::Empty(_) => return Ok(None), 514 | ReAst::Flags(_) => return Ok(None), 515 | ReAst::Literal(_) => return Ok(None), 516 | ReAst::Dot(_) => return Ok(None), 517 | ReAst::Assertion(_) => return Ok(None), 518 | ReAst::ClassUnicode(_) => Ok(None), 519 | ReAst::ClassPerl(_) => Ok(None), 520 | ReAst::ClassBracketed(_) => Ok(None), 521 | } 522 | } 523 | 524 | let Some(re_variants) = find_re_variants(re).map_err(|e| syn::Error::new(enum_.enum_token.span, e))? else { 525 | return Err(syn::Error::new(enum_.enum_token.span, "Regex doesn't contain any alternates (|)")); 526 | }; 527 | if re_variants.len() != enum_.variants.len() { 528 | return Err( 529 | syn::Error::new( 530 | enum_.enum_token.span, 531 | format!( 532 | "Regex alternate count and enum variant counts don't match: found {} regex alternates but {} enum variants", 533 | re_variants.len(), 534 | enum_.variants.len() 535 | ), 536 | ), 537 | ); 538 | } 539 | 540 | // Generate each variant code. 541 | let mut code_variants = vec![]; 542 | for re_variant in re_variants { 543 | let mut re_flat = ReFlatData::default(); 544 | flatten_re(&mut re_flat, re_variant, false); 545 | 546 | // Identify a key field (a named capture that will always be present) in each 547 | // variant to identify it. 548 | let (key, key_index) = shed!{ 549 | 'found_key _; 550 | for (k, v) in &re_flat.named_captures { 551 | if !v.optional { 552 | break 'found_key (k.clone(), v.index); 553 | } 554 | } 555 | return Err( 556 | syn::Error::new( 557 | enum_.enum_token.span, 558 | format!( 559 | "Regex alternatives must have at least one non-optional named capture to use as a key when parsing; this alternative has none: {}", 560 | re_variant 561 | ), 562 | ), 563 | ); 564 | }; 565 | 566 | // Find the corresponding enum variant and generate the field parser code 567 | let mut next_unnamed_index = 1; 568 | let parse_variant; 569 | shed!{ 570 | 'matched_enum_variant _; 571 | for enum_variant in &enum_.variants { 572 | let variant_ident = &enum_variant.ident; 573 | match &enum_variant.fields { 574 | syn::Fields::Named(fields) => { 575 | superif!({ 576 | for f in &fields.named { 577 | if f.ident.as_ref().unwrap().to_string() == key && 578 | // A tuple is actually multiple unnamed fields - the parent field name is unused 579 | !matches!(f.ty, Type::Tuple(_)) { 580 | break 'matched_named; 581 | } 582 | } 583 | } 'matched_named { 584 | let parse_fields = 585 | gen_named_fields( 586 | str_lifetime, 587 | &mut re_flat, 588 | &mut next_unnamed_index, 589 | &format!("{}::{}", ident, variant_ident), 590 | &mut fields.named.iter(), 591 | )?; 592 | parse_variant = quote!(#ident:: #variant_ident { 593 | #parse_fields 594 | }); 595 | break 'matched_enum_variant; 596 | }); 597 | }, 598 | syn::Fields::Unnamed(fields) => { 599 | superif!({ 600 | if fields.unnamed.len() == 1 { 601 | let root = simple_type(&fields.unnamed.get(0).unwrap().ty)?; 602 | let mut at = &root; 603 | loop { 604 | match at { 605 | SimpleType::Option(_) => break, 606 | SimpleType::Tuple(t) => { 607 | if t.len() == 1 { 608 | at = t.get(0).unwrap(); 609 | } else { 610 | break; 611 | } 612 | }, 613 | SimpleType::Simple(_) => { 614 | if key == variant_ident.to_string() { 615 | break 'match_unnamed; 616 | } else { 617 | break; 618 | } 619 | }, 620 | } 621 | } 622 | } else { 623 | return Err( 624 | syn::Error::new( 625 | enum_variant.span(), 626 | "Multi-field unnamed variants have no fields that can be used to discriminate when parsing", 627 | ), 628 | ); 629 | } 630 | } 'match_unnamed { 631 | let cap = re_flat.named_captures.remove(&key).unwrap(); 632 | let cap_index = cap.index; 633 | let field_ty = &fields.unnamed.get(0).as_ref().unwrap().ty; 634 | let path = format!("{}::{}", ident, variant_ident); 635 | parse_variant = 636 | quote!( 637 | #ident:: #variant_ident( 638 | < #field_ty as std:: convert:: TryFrom <& #str_lifetime str >>:: try_from( 639 | captures.get(#cap_index).unwrap().as_str() 640 | ).map_err(| e | structre:: Error:: Field { 641 | field: #path, 642 | error: e.to_string() 643 | }) ? 644 | ) 645 | ); 646 | break 'matched_enum_variant; 647 | }); 648 | }, 649 | syn::Fields::Unit => return Err( 650 | syn::Error::new( 651 | enum_variant.span(), 652 | "Unit variants have no fields that can be used to discriminate when parsing", 653 | ), 654 | ), 655 | }; 656 | } 657 | return Err( 658 | syn::Error::new( 659 | enum_.enum_token.span, 660 | format!("No enum variant found matching key field [{}] in regex alternative: {}", key, re_variant), 661 | ), 662 | ); 663 | }; 664 | 665 | // Assemble code 666 | if !re_flat.named_captures.is_empty() { 667 | return Err( 668 | syn::Error::new( 669 | enum_.enum_token.span, 670 | format!("Named captures never used: {:?}", re_flat.named_captures.keys()), 671 | ), 672 | ); 673 | } 674 | if !re_flat.unnamed_captures.is_empty() { 675 | return Err( 676 | syn::Error::new( 677 | enum_.enum_token.span, 678 | format!("Unnamed captures never used: {} remain", re_flat.unnamed_captures.len()), 679 | ), 680 | ); 681 | } 682 | code_variants.push(quote!{ 683 | if captures.get(#key_index).is_some() { 684 | return Ok(#parse_variant); 685 | } 686 | }); 687 | } 688 | 689 | // Generate 690 | return Ok(quote!{ 691 | #(#code_variants) * unreachable !(); 692 | }); 693 | } 694 | 695 | fn parse_re(regex_raw: &str) -> Result { 696 | return regex_syntax::ast::parse::Parser::new().parse(regex_raw); 697 | } 698 | 699 | fn gen_root(regex_span: Span, regex_raw: &str, ast: syn::DeriveInput) -> Result { 700 | let re = parse_re(regex_raw).map_err(|e| syn::Error::new(regex_span, e.to_string()))?; 701 | let type_generics; 702 | if ast.generics.lt_token.is_some() { 703 | type_generics = ast.generics.to_token_stream(); 704 | } else { 705 | type_generics = quote!(); 706 | }; 707 | let no_lifetime; 708 | let str_lifetime; 709 | let impl_generics; 710 | superif!({ 711 | if !ast.generics.lt_token.is_some() { 712 | break 'no_lifetime; 713 | }; 714 | let Some(l) = ast.generics.lifetimes().next() else { 715 | break 'no_lifetime; 716 | }; 717 | no_lifetime = false; 718 | str_lifetime = l.to_token_stream(); 719 | impl_generics = ast.generics.to_token_stream(); 720 | } 'no_lifetime { 721 | no_lifetime = true; 722 | str_lifetime = quote!('structre); 723 | impl_generics = quote!(< #str_lifetime >); 724 | }); 725 | let root; 726 | match &ast.data { 727 | syn::Data::Struct(d) => { 728 | root = gen_struct(&str_lifetime, &re, &ast.ident, d)?; 729 | }, 730 | syn::Data::Enum(d) => { 731 | root = gen_enum(&str_lifetime, &re, &ast.ident, d)?; 732 | }, 733 | syn::Data::Union(_) => return Err(syn::Error::new(ast.span(), "Union not supported")), 734 | }; 735 | let name = &ast.ident; 736 | let mut out = vec![ast.to_token_stream()]; 737 | out.push(quote!{ 738 | impl #impl_generics std:: convert:: TryFrom <& #str_lifetime str > for #name #type_generics { 739 | type Error = structre::Error; 740 | fn try_from(input:& #str_lifetime str) -> Result < Self, 741 | Self:: Error > { 742 | static RE: std::sync::OnceLock = std::sync::OnceLock::new(); 743 | let captures = RE.get_or_init( 744 | || structre:: regex:: Regex:: new(#regex_raw).unwrap() 745 | ).captures(input).ok_or(structre::Error::NoMatch) ?; 746 | #root 747 | } 748 | } 749 | }); 750 | if no_lifetime { 751 | out.push(quote!{ 752 | impl #impl_generics std:: str:: FromStr for #name #type_generics { 753 | type Err = structre::Error; 754 | 755 | fn from_str(input: &str) -> Result { 756 | return Self::try_from(input); 757 | } 758 | } 759 | }) 760 | } 761 | return Ok(TokenStream::from_iter(out)); 762 | } 763 | 764 | #[proc_macro_attribute] 765 | pub fn structre(args: proc_macro::TokenStream, body: proc_macro::TokenStream) -> proc_macro::TokenStream { 766 | // (Outside body because this macro actually generates compile_error `return`s and 767 | // needs to be in a function returning `proc_macro::TokenStream` directly) 768 | let ast = parse_macro_input!(body as syn::DeriveInput); 769 | match move || -> Result { 770 | let mut args = proc_macro2::TokenStream::from(args).into_iter(); 771 | let Some(first_arg) = args.next() else { 772 | panic!("structre() proc macro call missing regex argument!"); 773 | }; 774 | let regex_span = first_arg.span(); 775 | let regex_raw = match first_arg { 776 | proc_macro2::TokenTree::Literal(l) => match StringLit::try_from(&l) { 777 | Ok(l) => l.value().to_string(), 778 | Err(_) => panic!("First arg must be literal string, got {}", l), 779 | }, 780 | t => panic!("First arg must be literal, got {}", t), 781 | }; 782 | if let Some(next_arg) = args.next() { 783 | return Err(syn::Error::new(next_arg.span(), "Only takes one arg, got more than one")); 784 | } 785 | return Ok(gen_root(regex_span, ®ex_raw, ast)?); 786 | }() { 787 | Ok(t) => { 788 | return t.into(); 789 | }, 790 | Err(e) => { 791 | return e.into_compile_error().into(); 792 | }, 793 | } 794 | } 795 | 796 | #[cfg(test)] 797 | mod tests { 798 | use { 799 | crate::{ 800 | gen_enum, 801 | gen_struct, 802 | parse_re, 803 | }, 804 | genemichaels_lib::FormatConfig, 805 | proc_macro2::TokenStream, 806 | quote::{ 807 | format_ident, 808 | quote, 809 | }, 810 | }; 811 | 812 | fn comp(got: TokenStream, expected: TokenStream) { 813 | let cfg = FormatConfig::default(); 814 | let try_format = |t: TokenStream| -> String { 815 | match genemichaels_lib::format_str("e!(fn x() { 816 | #t 817 | }).to_string(), &cfg) { 818 | Ok(s) => return s.rendered, 819 | Err(_) => return t.to_string(), 820 | } 821 | }; 822 | let got = try_format(got); 823 | let expected = try_format(expected); 824 | assert_eq!(got, expected, "Mismatch:\n\nGot:\n{}\n\nExpected:\n{}", got, expected); 825 | } 826 | 827 | fn comp_struct(re: &str, ident: &str, rust: TokenStream, expected: TokenStream) { 828 | comp( 829 | gen_struct( 830 | "e!('zzz), 831 | &parse_re(re).unwrap(), 832 | &format_ident!("{}", ident), 833 | &match syn::parse2::(rust).unwrap().data { 834 | syn::Data::Struct(d) => d, 835 | _ => unreachable!(), 836 | }, 837 | ).unwrap(), 838 | expected, 839 | ); 840 | } 841 | 842 | fn comp_enum(re: &str, ident: &str, rust: TokenStream, expected: TokenStream) { 843 | comp( 844 | gen_enum( 845 | "e!('zzz), 846 | &parse_re(re).unwrap(), 847 | &format_ident!("{}", ident), 848 | &match syn::parse2::(rust).unwrap().data { 849 | syn::Data::Enum(d) => d, 850 | _ => unreachable!(), 851 | }, 852 | ).unwrap(), 853 | expected, 854 | ); 855 | } 856 | 857 | #[test] 858 | fn test_struct_unit() { 859 | comp_struct( 860 | //. . 861 | "a", 862 | "Parsed", 863 | quote!{ 864 | struct Parsed; 865 | }, 866 | quote!{ 867 | return Ok(Parsed); 868 | }, 869 | ); 870 | } 871 | 872 | #[test] 873 | fn test_struct_unnamed_1() { 874 | comp_struct( 875 | //. . 876 | "(a)", 877 | "Parsed", 878 | quote!{ 879 | struct Parsed(String); 880 | }, 881 | quote!{ 882 | return Ok( 883 | Parsed( 884 | >::try_from( 885 | captures.get(1usize).unwrap().as_str(), 886 | ).map_err(|e| structre::Error::Field { 887 | field: "Parsed.0", 888 | error: e.to_string(), 889 | })?, 890 | ), 891 | ); 892 | }, 893 | ); 894 | } 895 | 896 | #[test] 897 | fn test_struct_unnamed_2() { 898 | comp_struct( 899 | //. . 900 | "(a)(b)", 901 | "Parsed", 902 | quote!( 903 | struct Parsed(String, usize); 904 | ), 905 | quote!{ 906 | return Ok( 907 | Parsed( 908 | >::try_from( 909 | captures.get(1usize).unwrap().as_str(), 910 | ).map_err(|e| structre::Error::Field { 911 | field: "Parsed.0", 912 | error: e.to_string(), 913 | })?, 914 | ::from_str( 915 | captures.get(2usize).unwrap().as_str(), 916 | ).map_err(|e| structre::Error::Field { 917 | field: "Parsed.1", 918 | error: e.to_string(), 919 | })?, 920 | ), 921 | ); 922 | }, 923 | ); 924 | } 925 | 926 | #[test] 927 | fn test_struct_unnamed_tuple() { 928 | comp_struct( 929 | //. . 930 | "(a)(b)", 931 | "Parsed", 932 | quote!( 933 | struct Parsed((String, usize)); 934 | ), 935 | quote!{ 936 | return Ok( 937 | Parsed( 938 | ( 939 | >::try_from( 940 | captures.get(1usize).unwrap().as_str(), 941 | ).map_err(|e| structre::Error::Field { 942 | field: "Parsed.0.0", 943 | error: e.to_string(), 944 | })?, 945 | ::from_str( 946 | captures.get(2usize).unwrap().as_str(), 947 | ).map_err(|e| structre::Error::Field { 948 | field: "Parsed.0.1", 949 | error: e.to_string(), 950 | })?, 951 | ), 952 | ), 953 | ); 954 | }, 955 | ); 956 | } 957 | 958 | #[test] 959 | fn test_struct_named() { 960 | comp_struct( 961 | //. . 962 | "(?Pa)(?Pb)", 963 | "Parsed", 964 | quote!( 965 | struct Parsed { 966 | b: usize, 967 | a: String, 968 | } 969 | ), 970 | quote!{ 971 | return Ok(Parsed { 972 | b: ::from_str( 973 | captures.get(2usize).unwrap().as_str(), 974 | ).map_err(|e| structre::Error::Field { 975 | field: "Parsed.b", 976 | error: e.to_string(), 977 | })?, 978 | a: >::try_from( 979 | captures.get(1usize).unwrap().as_str(), 980 | ).map_err(|e| structre::Error::Field { 981 | field: "Parsed.a", 982 | error: e.to_string(), 983 | })?, 984 | }); 985 | }, 986 | ); 987 | } 988 | 989 | #[test] 990 | fn test_enum() { 991 | comp_enum( 992 | //. . 993 | "(?Pa)|(?Pb)", 994 | "Parsed", 995 | quote!( 996 | enum Parsed { 997 | A(String), 998 | B { 999 | b: String, 1000 | }, 1001 | } 1002 | ), 1003 | quote!{ 1004 | if captures.get(1usize).is_some() { 1005 | return Ok( 1006 | Parsed::A( 1007 | >::try_from( 1008 | captures.get(1usize).unwrap().as_str(), 1009 | ).map_err(|e| structre::Error::Field { 1010 | field: "Parsed::A", 1011 | error: e.to_string(), 1012 | })?, 1013 | ), 1014 | ); 1015 | } 1016 | if captures.get(2usize).is_some() { 1017 | return Ok( 1018 | Parsed::B { 1019 | b: >::try_from( 1020 | captures.get(2usize).unwrap().as_str(), 1021 | ).map_err(|e| structre::Error::Field { 1022 | field: "Parsed::B.b", 1023 | error: e.to_string(), 1024 | })?, 1025 | }, 1026 | ); 1027 | } 1028 | unreachable!(); 1029 | }, 1030 | ); 1031 | } 1032 | 1033 | #[test] 1034 | fn test_borrowed() { 1035 | comp_struct( 1036 | //. . 1037 | "(?.*)", 1038 | "Parsed", 1039 | quote!( 1040 | struct Parsed<'a> { 1041 | x: &'a str, 1042 | } 1043 | ), 1044 | quote!{ 1045 | return Ok( 1046 | Parsed { 1047 | x: <&'a str as std::convert::TryFrom<&'zzz str>>::try_from( 1048 | captures.get(1usize).unwrap().as_str(), 1049 | ).map_err(|e| structre::Error::Field { 1050 | field: "Parsed.x", 1051 | error: e.to_string(), 1052 | })?, 1053 | }, 1054 | ); 1055 | }, 1056 | ) 1057 | } 1058 | } 1059 | -------------------------------------------------------------------------------- /crates/structre_tests/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "structre_tests" 3 | publish = false 4 | 5 | [dependencies] 6 | structre = { path = "../structre" } 7 | -------------------------------------------------------------------------------- /crates/structre_tests/tests/test.rs: -------------------------------------------------------------------------------- 1 | #![cfg(test)] 2 | 3 | extern crate structre; 4 | 5 | use { 6 | std::convert::TryFrom, 7 | std::str::FromStr, 8 | structre::structre, 9 | }; 10 | 11 | #[test] 12 | fn match_() { 13 | #[structre("(a)(44)")] 14 | struct Parsed(String, u32); 15 | 16 | let v = Parsed::from_str("a44").unwrap(); 17 | assert_eq!(v.0, "a"); 18 | assert_eq!(v.1, 44); 19 | } 20 | 21 | #[test] 22 | fn named() { 23 | #[structre("(?Pa)(?P44)")] 24 | struct Parsed { 25 | a: String, 26 | b: u32, 27 | } 28 | 29 | let v = Parsed::from_str("a44").unwrap(); 30 | assert_eq!(v.a, "a"); 31 | assert_eq!(v.b, 44); 32 | } 33 | 34 | #[test] 35 | fn uncapture() { 36 | #[structre("(?:(a))")] 37 | struct Parsed(String); 38 | 39 | let v = Parsed::from_str("a").unwrap(); 40 | assert_eq!(v.0, "a"); 41 | } 42 | 43 | #[test] 44 | fn uncapture_named() { 45 | #[structre("(?:(?Pa))")] 46 | struct Parsed { 47 | a: String, 48 | } 49 | 50 | let v = Parsed::from_str("a").unwrap(); 51 | assert_eq!(v.a, "a"); 52 | } 53 | 54 | #[test] 55 | fn test_struct_opt() { 56 | #[structre("(?Pa)?")] 57 | struct Parsed { 58 | a: Option, 59 | } 60 | 61 | let v = Parsed::from_str("a").unwrap(); 62 | assert_eq!(v.a.as_ref().map(|x| x.as_str()), Some("a")); 63 | } 64 | 65 | #[test] 66 | fn test_enum() { 67 | #[structre("(?Pa)|(?Pb)")] 68 | #[derive(PartialEq, Eq, Debug)] 69 | enum Parsed { 70 | A(String), 71 | B { 72 | b: String, 73 | }, 74 | } 75 | 76 | let v = Parsed::from_str("a").unwrap(); 77 | assert_eq!(v, Parsed::A("a".to_string())); 78 | } 79 | 80 | #[test] 81 | fn test_borrowed() { 82 | #[structre("(?.*)")] 83 | struct Parsed<'a> { 84 | x: &'a str, 85 | } 86 | 87 | let v = Parsed::try_from("abcd").unwrap(); 88 | assert_eq!(v.x, "abcd"); 89 | } 90 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | ISC License 2 | 3 | Copyright (c) 2024 Andrew Baxter 4 | 5 | Permission to use, copy, modify, and/or distribute this software for any 6 | purpose with or without fee is hereby granted, provided that the above 7 | copyright notice and this permission notice appear in all copies. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 10 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11 | AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 12 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13 | LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 14 | OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 | PERFORMANCE OF THIS SOFTWARE. 16 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
crates.iodocs.rs
5 | 6 | Statically-checked regex parsing into structs/enums. 7 | 8 | This avoids common regex pitfalls like 9 | 10 | - Off by one capture indexes 11 | - Trying to get nonexistent captures 12 | - Desync of capture names in regex and the names used to fetch fields 13 | 14 | Note: This isn't like serde in that it doesn't work on arbitrary structs/enums. The struct/enum definition must be written to match the regex. 15 | 16 | # Installation 17 | 18 | ```sh 19 | cargo add structre 20 | ``` 21 | 22 | # Use 23 | 24 | Define a structure and use this macro to implement `TryFrom<&str>` (and `FromStr` if the type has no lifetimes): 25 | 26 | ``` 27 | #[structre("(?P[^:]+): (?P\\d+)")] 28 | struct KV { 29 | key: String, 30 | value: usize, 31 | } 32 | ``` 33 | 34 | ``` 35 | let m = KV::try_from("hi: 39393")?; 36 | ``` 37 | 38 | Both `try_from` and `from_str` returns a result with error type `structre::Error`. The `structre::Error::Field` result only occurs if a field's `try_from` or `from_str` method fails - if all of your fields are strings, you can only get `structre::Error::NoMatch`. 39 | 40 | # Expressing regexes with types 41 | 42 | - Alternate (`|`) captures can be parsed as enums 43 | 44 | All variants must either 45 | 46 | - Have at least one non-optional uniquely-named field where the name matches a named capture: 47 | 48 | Ex: 49 | 50 | ```rust 51 | #[structre("(?.*)|(?.*)")] 52 | enum AOrB { 53 | A { 54 | a_field: String, 55 | }, 56 | B { 57 | b_field: String, 58 | } 59 | } 60 | ``` 61 | 62 | The enum variant is determined by the presence of either the `a_field` capture or `b_field` capture. 63 | 64 | - Be a 1-tuple with a non-optional type. In this case, the variant name must match a named capture: 65 | 66 | Ex: 67 | 68 | ```rust 69 | #[structre("(?
.*)|(?.*)")] 70 | enum AOrB { 71 | A(String), 72 | B(String), 73 | } 74 | ``` 75 | 76 | The enum variant is determined by the presence of either the `A` capture or `B` capture. 77 | 78 | - Non-alternate captures: 79 | 80 | All captures must correspond to a field. Named captures correspond to named fields, unnamed captures correspond to unnamed fields (ex: tuple elements). Repetitions, `?`, and `|` will make a capture optional so the corresponding field must also be optional. 81 | 82 | The following types are supported for fields: 83 | 84 | - Simple types: any type implementing `std::convert::TryFrom<&str>` 85 | 86 | This includes `&str` if you want non-allocating parsing. 87 | 88 | - Simple types: Standard library/core types that implement `std::str::FromStr` 89 | 90 | The standard library doesn't implement `TryFrom<&str>` for any core types currently so an internal database is used to (roughly) identify that a field has a core type and switches to `FromStr` for that. 91 | 92 | - Options with a simple type inside 93 | 94 | - Tuples with either options (as above) or simple types inside 95 | 96 | See the [./crates/structre/tests/tests.rs](tests) for some simple examples. 97 | 98 | # Limitations 99 | 100 | I was hoping to be able to ensure that the regex has valid characters for numbers, but due to the above and the difficulty of reasoning about the contents of regex ASTs I had to scrap that. 101 | 102 | Non-unicode parsing isn't currently supported. One issue is I couldn't find an ascii float parsing library. If this is important and you have a vision of how it could work please raise an issue! 103 | 104 | The regex is lazily compiled and stored statically. Originally I made the regex compilation manual and explicit, but this made the ergonomics much worse (managing parsers) and prevented things like implementing `FromStr`. In `0.1.0` I changed it to statically instantiate the regex. I'd be open to making this configurable in the future, either having an option to manually manage the compiled regex or else compiling on every parse for rarely used regexes. 105 | 106 | ~~String references and other reference types~~ Reference types are now supported via `TryFrom<&T>` starting in `0.2.0`! There was a large discussion 107 | at . In the end I decided to go with basing all use around `TryFrom<&str>` instead of `FromStr` with special cases: 108 | 109 | - Both approaches have special cases: The former has a database of standard library/core types that don't support `TryFrom<&str>` to switch to `FromStr`, the latter has carveouts for `&str` and possibly other types (`Cow`?) 110 | - I think the code for identifing special cases in the latter is more difficult; in the former, all the types are non-generic non-reference types, most without `::` paths 111 | - Hopefully `TryFrom<&str>` support will grow, and at some point the carveouts won't be needed - it seems to be the future-facing choice 112 | - `TryFrom<&str>` should allow users to wrap more types than `FromStr`, without needing annotations to explicitly switch the parsing method/trait, so it works better as an interop trait 113 | --------------------------------------------------------------------------------