├── .github ├── FUNDING.yml └── workflows │ └── dotnet.yml ├── .gitignore ├── LICENSE ├── README.md ├── RegExtract.Test ├── RegExtract.Test.csproj └── Usage.cs ├── RegExtract.sln ├── RegExtract ├── ExtractionPlan.cs ├── ExtractionPlanNode.cs ├── ExtractionPlanNodeTypes.cs ├── ExtractionPlanTypeWrapper.cs ├── RegExtract.csproj ├── RegExtractExtensions.cs └── RegexCaptureGroupTree.cs ├── doc ├── linqpad-samples │ ├── Diagnostic tools.linq │ ├── FileOrder.txt │ ├── Nested types.linq │ ├── Quick start.linq │ └── Using record types.linq └── spec.md ├── images ├── icon.cs └── icon.png └── tools └── ExtractionPlanInspector.linq /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | ko_fi: sblom 2 | -------------------------------------------------------------------------------- /.github/workflows/dotnet.yml: -------------------------------------------------------------------------------- 1 | name: dotnet 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | tags: ["*"] 7 | pull_request: 8 | branches: [ main ] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | with: 16 | fetch-depth: 0 17 | - uses: actions/setup-dotnet@v1 18 | with: 19 | dotnet-version: '8.0.x' 20 | - uses: actions/setup-dotnet@v1 21 | with: 22 | dotnet-version: '3.1.x' 23 | - uses: gittools/actions/gitversion/setup@v0.9.7 24 | with: 25 | versionSpec: "5.x" 26 | - id: gitversion 27 | uses: gittools/actions/gitversion/execute@v0.9.7 28 | - name: Restore dependencies 29 | run: dotnet restore 30 | - name: Build 31 | run: dotnet build --configuration Release --no-restore 32 | - name: Test 33 | run: dotnet test --configuration Release --no-build --verbosity normal 34 | - run: | 35 | dotnet pack \ 36 | --include-source \ 37 | --include-symbols \ 38 | --configuration Release \ 39 | --no-build \ 40 | --no-restore \ 41 | -p:PackageVersion="${{ env.GitVersion_FullSemVer }}" \ 42 | RegExtract/RegExtract.csproj \ 43 | --output ${{ github.workspace }}/nugets/ 44 | - uses: actions/upload-artifact@v2 45 | with: 46 | name: nugets 47 | path: nugets 48 | 49 | nuget-push-dev: 50 | runs-on: ubuntu-latest 51 | if: github.ref == 'refs/heads/main' 52 | needs: build 53 | 54 | steps: 55 | - name: download artifact 56 | uses: actions/download-artifact@v2 57 | with: 58 | name: nugets 59 | 60 | - name: setup dotnet 61 | uses: actions/setup-dotnet@v1 62 | with: 63 | dotnet-version: '8.0.x' 64 | source-url: https://nuget.pkg.github.com/sblom/index.json 65 | env: 66 | NUGET_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 67 | 68 | - name: nuget push 69 | run: dotnet nuget push *.nupkg *.snupkg --skip-duplicate --api-key ${{ secrets.GITHUB_TOKEN }} 70 | 71 | nuget-push-prod: 72 | runs-on: ubuntu-latest 73 | if: startsWith(github.ref, 'refs/tags/') 74 | needs: build 75 | 76 | steps: 77 | - uses: actions/download-artifact@v2 78 | with: 79 | name: nugets 80 | 81 | - uses: actions/setup-dotnet@v1 82 | with: 83 | dotnet-version: '8.0.x' 84 | source-url: https://api.nuget.org/v3/index.json 85 | env: 86 | NUGET_AUTH_TOKEN: ${{ secrets.NUGET_API_KEY }} 87 | 88 | - run: dotnet nuget push *.nupkg *.snupkg --skip-duplicate --api-key ${{ secrets.NUGET_API_KEY }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Aa][Rr][Mm]/ 27 | [Aa][Rr][Mm]64/ 28 | bld/ 29 | [Bb]in/ 30 | [Oo]bj/ 31 | [Ll]og/ 32 | [Ll]ogs/ 33 | 34 | # Visual Studio 2015/2017 cache/options directory 35 | .vs/ 36 | # Uncomment if you have tasks that create the project's static files in wwwroot 37 | #wwwroot/ 38 | 39 | # Visual Studio 2017 auto generated files 40 | Generated\ Files/ 41 | 42 | # MSTest test Results 43 | [Tt]est[Rr]esult*/ 44 | [Bb]uild[Ll]og.* 45 | 46 | # NUnit 47 | *.VisualState.xml 48 | TestResult.xml 49 | nunit-*.xml 50 | 51 | # Build Results of an ATL Project 52 | [Dd]ebugPS/ 53 | [Rr]eleasePS/ 54 | dlldata.c 55 | 56 | # Benchmark Results 57 | BenchmarkDotNet.Artifacts/ 58 | 59 | # .NET Core 60 | project.lock.json 61 | project.fragment.lock.json 62 | artifacts/ 63 | 64 | # StyleCop 65 | StyleCopReport.xml 66 | 67 | # Files built by Visual Studio 68 | *_i.c 69 | *_p.c 70 | *_h.h 71 | *.ilk 72 | *.meta 73 | *.obj 74 | *.iobj 75 | *.pch 76 | *.pdb 77 | *.ipdb 78 | *.pgc 79 | *.pgd 80 | *.rsp 81 | *.sbr 82 | *.tlb 83 | *.tli 84 | *.tlh 85 | *.tmp 86 | *.tmp_proj 87 | *_wpftmp.csproj 88 | *.log 89 | *.vspscc 90 | *.vssscc 91 | .builds 92 | *.pidb 93 | *.svclog 94 | *.scc 95 | 96 | # Chutzpah Test files 97 | _Chutzpah* 98 | 99 | # Visual C++ cache files 100 | ipch/ 101 | *.aps 102 | *.ncb 103 | *.opendb 104 | *.opensdf 105 | *.sdf 106 | *.cachefile 107 | *.VC.db 108 | *.VC.VC.opendb 109 | 110 | # Visual Studio profiler 111 | *.psess 112 | *.vsp 113 | *.vspx 114 | *.sap 115 | 116 | # Visual Studio Trace Files 117 | *.e2e 118 | 119 | # TFS 2012 Local Workspace 120 | $tf/ 121 | 122 | # Guidance Automation Toolkit 123 | *.gpState 124 | 125 | # ReSharper is a .NET coding add-in 126 | _ReSharper*/ 127 | *.[Rr]e[Ss]harper 128 | *.DotSettings.user 129 | 130 | # TeamCity is a build add-in 131 | _TeamCity* 132 | 133 | # DotCover is a Code Coverage Tool 134 | *.dotCover 135 | 136 | # AxoCover is a Code Coverage Tool 137 | .axoCover/* 138 | !.axoCover/settings.json 139 | 140 | # Visual Studio code coverage results 141 | *.coverage 142 | *.coveragexml 143 | 144 | # NCrunch 145 | _NCrunch_* 146 | .*crunch*.local.xml 147 | nCrunchTemp_* 148 | 149 | # MightyMoose 150 | *.mm.* 151 | AutoTest.Net/ 152 | 153 | # Web workbench (sass) 154 | .sass-cache/ 155 | 156 | # Installshield output folder 157 | [Ee]xpress/ 158 | 159 | # DocProject is a documentation generator add-in 160 | DocProject/buildhelp/ 161 | DocProject/Help/*.HxT 162 | DocProject/Help/*.HxC 163 | DocProject/Help/*.hhc 164 | DocProject/Help/*.hhk 165 | DocProject/Help/*.hhp 166 | DocProject/Help/Html2 167 | DocProject/Help/html 168 | 169 | # Click-Once directory 170 | publish/ 171 | 172 | # Publish Web Output 173 | *.[Pp]ublish.xml 174 | *.azurePubxml 175 | # Note: Comment the next line if you want to checkin your web deploy settings, 176 | # but database connection strings (with potential passwords) will be unencrypted 177 | *.pubxml 178 | *.publishproj 179 | 180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 181 | # checkin your Azure Web App publish settings, but sensitive information contained 182 | # in these scripts will be unencrypted 183 | PublishScripts/ 184 | 185 | # NuGet Packages 186 | *.nupkg 187 | # NuGet Symbol Packages 188 | *.snupkg 189 | # The packages folder can be ignored because of Package Restore 190 | **/[Pp]ackages/* 191 | # except build/, which is used as an MSBuild target. 192 | !**/[Pp]ackages/build/ 193 | # Uncomment if necessary however generally it will be regenerated when needed 194 | #!**/[Pp]ackages/repositories.config 195 | # NuGet v3's project.json files produces more ignorable files 196 | *.nuget.props 197 | *.nuget.targets 198 | 199 | # Microsoft Azure Build Output 200 | csx/ 201 | *.build.csdef 202 | 203 | # Microsoft Azure Emulator 204 | ecf/ 205 | rcf/ 206 | 207 | # Windows Store app package directories and files 208 | AppPackages/ 209 | BundleArtifacts/ 210 | Package.StoreAssociation.xml 211 | _pkginfo.txt 212 | *.appx 213 | *.appxbundle 214 | *.appxupload 215 | 216 | # Visual Studio cache files 217 | # files ending in .cache can be ignored 218 | *.[Cc]ache 219 | # but keep track of directories ending in .cache 220 | !?*.[Cc]ache/ 221 | 222 | # Others 223 | ClientBin/ 224 | ~$* 225 | *~ 226 | *.dbmdl 227 | *.dbproj.schemaview 228 | *.jfm 229 | *.pfx 230 | *.publishsettings 231 | orleans.codegen.cs 232 | 233 | # Including strong name files can present a security risk 234 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 235 | #*.snk 236 | 237 | # Since there are multiple workflows, uncomment next line to ignore bower_components 238 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 239 | #bower_components/ 240 | 241 | # RIA/Silverlight projects 242 | Generated_Code/ 243 | 244 | # Backup & report files from converting an old project file 245 | # to a newer Visual Studio version. Backup files are not needed, 246 | # because we have git ;-) 247 | _UpgradeReport_Files/ 248 | Backup*/ 249 | UpgradeLog*.XML 250 | UpgradeLog*.htm 251 | ServiceFabricBackup/ 252 | *.rptproj.bak 253 | 254 | # SQL Server files 255 | *.mdf 256 | *.ldf 257 | *.ndf 258 | 259 | # Business Intelligence projects 260 | *.rdl.data 261 | *.bim.layout 262 | *.bim_*.settings 263 | *.rptproj.rsuser 264 | *- [Bb]ackup.rdl 265 | *- [Bb]ackup ([0-9]).rdl 266 | *- [Bb]ackup ([0-9][0-9]).rdl 267 | 268 | # Microsoft Fakes 269 | FakesAssemblies/ 270 | 271 | # GhostDoc plugin setting file 272 | *.GhostDoc.xml 273 | 274 | # Node.js Tools for Visual Studio 275 | .ntvs_analysis.dat 276 | node_modules/ 277 | 278 | # Visual Studio 6 build log 279 | *.plg 280 | 281 | # Visual Studio 6 workspace options file 282 | *.opt 283 | 284 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 285 | *.vbw 286 | 287 | # Visual Studio LightSwitch build output 288 | **/*.HTMLClient/GeneratedArtifacts 289 | **/*.DesktopClient/GeneratedArtifacts 290 | **/*.DesktopClient/ModelManifest.xml 291 | **/*.Server/GeneratedArtifacts 292 | **/*.Server/ModelManifest.xml 293 | _Pvt_Extensions 294 | 295 | # Paket dependency manager 296 | .paket/paket.exe 297 | paket-files/ 298 | 299 | # FAKE - F# Make 300 | .fake/ 301 | 302 | # CodeRush personal settings 303 | .cr/personal 304 | 305 | # Python Tools for Visual Studio (PTVS) 306 | __pycache__/ 307 | *.pyc 308 | 309 | # Cake - Uncomment if you are using it 310 | # tools/** 311 | # !tools/packages.config 312 | 313 | # Tabs Studio 314 | *.tss 315 | 316 | # Telerik's JustMock configuration file 317 | *.jmconfig 318 | 319 | # BizTalk build output 320 | *.btp.cs 321 | *.btm.cs 322 | *.odx.cs 323 | *.xsd.cs 324 | 325 | # OpenCover UI analysis results 326 | OpenCover/ 327 | 328 | # Azure Stream Analytics local run output 329 | ASALocalRun/ 330 | 331 | # MSBuild Binary and Structured Log 332 | *.binlog 333 | 334 | # NVidia Nsight GPU debugger configuration file 335 | *.nvuser 336 | 337 | # MFractors (Xamarin productivity tool) working folder 338 | .mfractor/ 339 | 340 | # Local History for Visual Studio 341 | .localhistory/ 342 | 343 | # BeatPulse healthcheck temp database 344 | healthchecksdb 345 | 346 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 347 | MigrationBackup/ 348 | 349 | # Ionide (cross platform F# VS Code tools) working folder 350 | .ionide/ 351 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT 2 | 3 | Copyright (c) Scott Blomquist 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RegExtract 2 | Quick and dirty idiomatic C# line parser that extracts text into practical data types. 3 | 4 | [![dotnet](https://github.com/sblom/RegExtract/workflows/dotnet/badge.svg)](https://github.com/sblom/RegExtract/actions) 5 | [![NuGet](https://img.shields.io/nuget/v/RegExtract.svg)](https://www.nuget.org/packages/RegExtract/) 6 | [![Downloads](https://img.shields.io/nuget/dt/RegExtract.svg)](https://www.nuget.org/packages/RegExtract/) 7 | [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) 8 | 9 | ## Table of Contents 10 | - [RegExtract](#regextract) 11 | - [Table of Contents](#table-of-contents) 12 | - [Release History (newest first)](#release-history-newest-first) 13 | - [Using RegExtract](#using-regextract) 14 | - [Basic extraction to `ValueTuple`](#basic-extraction-to-valuetuple) 15 | - [Extracting from multiple input strings (`IEnumerable`)](#extracting-from-multiple-input-strings-ienumerablestring) 16 | - [Extracting to collections (such as `List`)](#extracting-to-collections-such-as-listt) 17 | - [Nullable types](#nullable-types) 18 | - [Nesting compound types (such as tuples) and collections](#nesting-compound-types-such-as-tuples-and-collections) 19 | - [Tuple that contains Collections](#tuple-that-contains-collections) 20 | - [Collection that contains a tuple](#collection-that-contains-a-tuple) 21 | - [Collection that contains a collection](#collection-that-contains-a-collection) 22 | - [Collections with more than one argument (including `Dictionary<,>`)](#collections-with-more-than-one-argument-including-dictionary) 23 | - [`record`s and other compound types](#records-and-other-compound-types) 24 | - [Extracting named capture groups to properties](#extracting-named-capture-groups-to-properties) 25 | - [Other supported types](#other-supported-types) 26 | - [Including REGEXTRACT\_REGEX\_PATTERN templates on types](#including-regextract_regex_pattern-templates-on-types) 27 | - [Performance and troubleshooting](#performance-and-troubleshooting) 28 | - [Creating a re-usable `ExtractionPlan`](#creating-a-re-usable-extractionplan) 29 | - [Inspecting an extraction plan](#inspecting-an-extraction-plan) 30 | - [Regular Expression reference](#regular-expression-reference) 31 | - [History](#history) 32 | 33 | ## Release History (newest first) 34 | 35 | |Release Number |Release Date | Main Features | 36 | |--|--|--| 37 | | 3.0 | FUTURE ROADMAP | Source Generator support to eliminate run-time reflection 38 | | 2.1 | December 15, 2023 | Added caching for up to 6x speedup
Made tuples less magic | 39 | | 2.0 | December 14, 2023 | Rewrote planning engine with better Collections support | 40 | | 1.0 | December 20, 2020 | First modern release with tree-based extraction planner | 41 | | 0.9 | early December 2020 | Pre-release prototypes | 42 |
43 | History of pre-release versions 44 | 45 | |Release Number |Release Date | Main Features | 46 | |--|--|--| 47 | | 0.9.24 | December 2020 | Extraction planner fully operational | 48 | | 0.9.19 | December 2020 | Prototype extraction planner to support nested types | 49 | | 0.9.16 | December 2020 | Add support for REGEXTRACT_REGEX_PATTERN templates | 50 | | 0.9.11 | December 2020 | Add support for Enums | 51 | | 0.9.10 | December 2020 | More support for Lists | 52 | | 0.9.6 | December 2020 | Add support for Lists and Nullables | 53 | | 0.9.4 | December 2020 | Add support for named capture groups initializing properties | 54 | | 0.9.2 | December 2020 | Add positional records | 55 | | 0.9 | December 2020 | Extract capture groups to tuples, and that's all | 56 | 57 |
58 | 59 | # Using RegExtract 60 | 61 | ## Basic extraction to `ValueTuple` 62 | 63 | Let's say you have a string `2-10 c: abcdefghi`, consisting of a two `int`s separated by a dash (-), a `char` followed by a colon (:), and a `string`. 64 | 65 | You could use the regular expression `@"(\d+)-(\d+) (.): (.*)"` to extract that into a tuple `(int min, int max, char ch, string str)`. 66 | Or you could use `@"((\d+)-(\d+)) (.): (.*)"` to extract into a nested tuple `((int min, int max) range, char ch, string str)`. 67 | 68 | > [!TIP] 69 | > If you need a primer on helpful regular expression syntax, see the [Regular Expression Examples](#regular-expression-examples) section below. 70 | 71 | In C# code, those two examples would look like: 72 | 73 | ```cs 74 | using RegExtract; 75 | 76 | var input = "2-10 c: abcdefghi"; 77 | 78 | var flat_tuple = input.Extract<( int min, int max, char ch, string str)>(@"(\d+)-(\d+) (.): (.*)"); 79 | var nested_tuple = input.Extract<((int min, int max) range, char ch, string str)>(@"((\d+)-(\d+)) (.): (.*)"); 80 | ``` 81 | 82 | > [!NOTE] 83 | > The nesting of your capture groups (parts wrapped in `()`) in your regular expression must match the nesting of your type hierarchy. 84 | 85 | ## Extracting from multiple input strings (`IEnumerable`) 86 | 87 | There are many variations on RegExtract extension methods, but there are two that you will use most often. 88 | The first one is the `.Extract()` method demonstrated above. 89 | It's an extension method on `string`, and returns a single fully constructed instance of your type hierarchy `T`. 90 | The other one is very similar, but it accepts any `IEnumerable`. 91 | 92 | Here's an example of using the `IEnumerable` extension method: 93 | 94 | ```cs 95 | using RegExtract; 96 | 97 | var inputs = new[] { 98 | "2-10 c: abcdefghi", 99 | "3-7 e: qwertyuiop" 100 | }; 101 | 102 | IEnumerable<(int,int,char,string)> results = 103 | inputs.Extract<( int min, int max, char ch, string str)>(@"(\d+)-(\d+) (.): (.*)"); 104 | ``` 105 | 106 | Notice that the actual `.Extract<>()` call looks nearly identical to the version that takes a single `string`. 107 | This makes it trivial to switch between extracting a single instance and extracting from each string in an `IEnumerable`. 108 | 109 | > [!TIP] 110 | > These are the two most common RegExtract methods to use, but if you're going to be using the same extraction plan multiple times, you should first [create a reusable `ExtractionPlan`](#creating-a-re-usable-extractionplan), so that RegExtract only has to parse your regular expression and type hierarchy once. 111 | 112 | ## Extracting to collections (such as `List`) 113 | 114 | In addition to arbitrarily long `ValueTuple`s as demonstrated above, RegExtract supports any collection type that works with [C#'s Collection Initializer syntax](https://learn.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/object-and-collection-initializers#collection-initializers) (commonly `List<>`, `HashSet<>`, and `Dictionary<,>`). 115 | 116 | > [!TIP] 117 | > C# collection initializers work with *any collection type that implements `IEnumerable` [the non-generic one, in particular] and has Add with the appropriate signature*. 118 | 119 | To extract a list, you should include a capture group in your regular expression that repeats. 120 | For example, to break a sentence up into individual words, you can do something like: 121 | 122 | ```cs 123 | using RegExtract; 124 | 125 | var input = "The quick brown fox jumps over the lazy dog."; 126 | 127 | var words_with_trailing_spaces = input.Extract>(@"(\w+ ?)+"); 128 | var words_without_spaces = input.Extract>(@"((\w+) ?)+"); 129 | ``` 130 | 131 | Notice in the first example (`words_with_trailing_spaces`), there is only one capture group, and everything inside it is treated as part of the match. 132 | As a result, the strings in the List include trailing spaces (except for "dog", which stopped matching before the trailing period (.)). 133 | 134 | In the second example (`words_without_spaces`), an optional final set of parens was included immediately around `\w+`. 135 | As a result, the strings in the second list will only include the words themselves without trailing spaces. 136 | 137 | > [!NOTE] 138 | > As illustrated by the `words_without_spaces` example, you can always optionally include an extra capture group to capture only a relevant subpart inside the repeating capture group of any **Collection** type. 139 | > 140 | > This is useful if the repeated capture group includes optional separators such as spaces, commas, semicolons, etc., and it allows you to include only the interesting part without the separator. 141 | 142 | ## Nullable types 143 | 144 | Any time a type hierarchy expects a value but there's no corresponding Capture (because of an optional capture group, for example), RegExtract considers the extracted value to be `null`. 145 | For reference types, this works exactly how you'd expect. 146 | For value types, you'll get an `InvalidCastException ("Null object cannot be converted to a value type.")` unless you have marked the value type as nullable in your type hierarchy. 147 | 148 | > [!NOTE] 149 | > Collection types will always be constructed and will never be extracted as a `null`. 150 | > Unlike missing Captures for non-collection values, that there are no matches, it will simply be empty. 151 | 152 | An example of extracting to a nullable type (or not): 153 | 154 | ```csharp 155 | using RegExtract; 156 | 157 | // This will succeed because int? can be null. 158 | var nullable = "".Extract(@"(\d+)"); 159 | // ^Nullable 160 | 161 | // This will throw an exception because \d+ doesn't match anything 162 | // and the int value is required by the type system. 163 | var not_nullable = "".Extract(@"(\d+)"); 164 | // ^Not nullable 165 | ``` 166 | 167 | > [!TIP] 168 | > You can use nullable types in combination with the regular expression alternation operator (`|`) to extract to a different type depending on the details of the match. 169 | 170 | An example of using nullable types to support regular expression alternation (`|`): 171 | ```csharp 172 | var (n,s) = "str".Extract<(int?,string)>(@"(\d+)|(.*)"); 173 | ``` 174 | 175 | ## Nesting compound types (such as tuples) and collections 176 | 177 | You'll frequently find that you need to nest a **compound type** (such as a tuple) inside a **collection** or that you need to nest a **collection** inside a **compound type**. 178 | RegExtract can handle arbitrarily deeply nested mixes of any supported data types. 179 | 180 | ### Tuple that contains Collections 181 | ```csharp 182 | using RegExtract; 183 | 184 | var input = "Item #1: 27 61 49 58 44 2 69 78"; 185 | 186 | var result = input.Extract<(int itemno, HashSet set)>(@"Item #(\d+): (\d+ ?)+"); 187 | ``` 188 | 189 | > [!TIP] 190 | > As you can see in this examples, a `HashSet<>` works just like a `List<>`. 191 | 192 | ### Collection that contains a tuple 193 | ```csharp 194 | using RegExtract; 195 | 196 | var input = "red 10, blue 25, green 12, yellow 19"; 197 | 198 | var result = input.Extract>(@"((\w+) (\d+),? ?)+"); 199 | ``` 200 | 201 | ### Collection that contains a collection 202 | ```csharp 203 | using RegExtract; 204 | 205 | var input = "The quick brown fox jumps over the lazy dog"; 206 | 207 | var result = input.Extract>>(@"((\w)+ ?)+"); 208 | ``` 209 | 210 | ## Collections with more than one argument (including `Dictionary<,>`) 211 | 212 | C# collection initializers will work with `.Add()` methods that take more than one parameter, such as the `.Add(TKey key, TValue value)` that `Dictionary<,>` implements. 213 | RegExtract doesn't have the benefit of inferring generic type arguments from examples of parameters, however, since everything is a `string` before extraction. 214 | So, instead, RegExtract will only consider an `.Add()` method whose parameter types match the generic arguments `TKey` and `TValue`. 215 | 216 | Example using a `Dictionary`: 217 | ```csharp 218 | using RegExtract; 219 | 220 | var input = "red 10, blue 25, green 12, yellow 19"; 221 | 222 | var result = input.Extract>(@"((\w+) (\d+),? ?)+"); 223 | ``` 224 | 225 | > [!TIP] 226 | > RegExtract doesn't yet support having, for example, a capture group with the `value` before the `key`. 227 | > (They have to be in the order that the collection's `.Add()` method expects them.) 228 | > 229 | > You can work around this by capturing to a `List<(TValue value, TKey key)>` and then using `list.ToDictionary(vk => vk.key, vk => vk.Value)` to convert to a `Dictionary<,>` that's organized the way you want. 230 | 231 | 232 | ## `record`s and other compound types 233 | 234 | You can build almost anything you need using `ValueTuple`s and `List<>`s, and for simple, ad hoc scenarios that's often where I begin and end. 235 | 236 | However, when it comes time to extract inputs to more richly modeled types, you'll use RegExtract's support for types such as `record`s that have a single obvious constructor (some might say a [primary constructor!](https://learn.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/instance-constructors#primary-constructors)) with the number of parameters corresponding to the number of capture groups in your regular expression. 237 | (Strictly speaking, it doesn't have to be a record, and you don't have to use primary constructor syntax—absolutely any type with a constructor of the right shape is fine.) 238 | 239 | > [!INFO] 240 | > For custom compound types such as `record`s or `struct`s or `class`es, RegExtract looks for a single public constructor that takes the same number of arguments as the number of capture groups nested inside the compound type's capture group. 241 | > 242 | > It then uses the types of the constructor arguments to determine what types to construct for the nested capture groups. 243 | 244 | Here's an example using a couple of nested `record` types and `List`s: 245 | ```csharp 246 | using RegExtract; 247 | 248 | var input = "Game 14: 9 green, 4 red; 6 blue, 1 red, 7 green; 3 blue, 5 green"; 249 | 250 | var game = input.Extract(@"Game (\d+): (((\d+) (\w+),? ?)+;? ?)+"); 251 | 252 | record Game(int id, List draws); 253 | record Draw(List<(int count, string color)> colors); 254 | ``` 255 | 256 | ## Extracting named capture groups to properties 257 | 258 | All of the examples of compound types so far make use of constructors with positional semantics. 259 | RegExtract uses typical (non-named) capture groups as parameters destined for a tuple slot or a constructor parameter. 260 | 261 | Regular expressions also support named capture groups. 262 | They look like `(?pattern_goes_here)`. 263 | When RegExtract encounters a named capture group, the captures from it are used to call a property setter on the type being extracted after the type is fully constructed from (non-named) positional capture groups. 264 | 265 | A simple example: 266 | ```csharp 267 | using RegExtract; 268 | 269 | var input = 270 | 271 | var result = input.Extract