├── .editorconfig ├── .gitattributes ├── .gitignore ├── License.txt ├── Logo_64x64.png ├── README.md ├── SoftCircuits.Parsing.Helper.sln ├── SoftCircuits.Parsing.Helper ├── ParsePosition.cs ├── ParsingHelper.cs ├── SkipWhiteSpaceOption.cs └── SoftCircuits.Parsing.Helper.csproj └── TestParsingHelper ├── TestParsingHelper.cs └── TestParsingHelper.csproj /.editorconfig: -------------------------------------------------------------------------------- 1 | [*.cs] 2 | 3 | # CS1591: Missing XML comment for publicly visible type or member 4 | dotnet_diagnostic.CS1591.severity = silent 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Build results 17 | [Dd]ebug/ 18 | [Dd]ebugPublic/ 19 | [Rr]elease/ 20 | [Rr]eleases/ 21 | x64/ 22 | x86/ 23 | [Aa][Rr][Mm]/ 24 | [Aa][Rr][Mm]64/ 25 | bld/ 26 | [Bb]in/ 27 | [Oo]bj/ 28 | [Ll]og/ 29 | 30 | # Visual Studio 2015/2017 cache/options directory 31 | .vs/ 32 | # Uncomment if you have tasks that create the project's static files in wwwroot 33 | #wwwroot/ 34 | 35 | # Visual Studio 2017 auto generated files 36 | Generated\ Files/ 37 | 38 | # MSTest test Results 39 | [Tt]est[Rr]esult*/ 40 | [Bb]uild[Ll]og.* 41 | 42 | # NUNIT 43 | *.VisualState.xml 44 | TestResult.xml 45 | 46 | # Build Results of an ATL Project 47 | [Dd]ebugPS/ 48 | [Rr]eleasePS/ 49 | dlldata.c 50 | 51 | # Benchmark Results 52 | BenchmarkDotNet.Artifacts/ 53 | 54 | # .NET Core 55 | project.lock.json 56 | project.fragment.lock.json 57 | artifacts/ 58 | 59 | # StyleCop 60 | StyleCopReport.xml 61 | 62 | # Files built by Visual Studio 63 | *_i.c 64 | *_p.c 65 | *_h.h 66 | *.ilk 67 | *.meta 68 | *.obj 69 | *.iobj 70 | *.pch 71 | *.pdb 72 | *.ipdb 73 | *.pgc 74 | *.pgd 75 | *.rsp 76 | *.sbr 77 | *.tlb 78 | *.tli 79 | *.tlh 80 | *.tmp 81 | *.tmp_proj 82 | *_wpftmp.csproj 83 | *.log 84 | *.vspscc 85 | *.vssscc 86 | .builds 87 | *.pidb 88 | *.svclog 89 | *.scc 90 | 91 | # Chutzpah Test files 92 | _Chutzpah* 93 | 94 | # Visual C++ cache files 95 | ipch/ 96 | *.aps 97 | *.ncb 98 | *.opendb 99 | *.opensdf 100 | *.sdf 101 | *.cachefile 102 | *.VC.db 103 | *.VC.VC.opendb 104 | 105 | # Visual Studio profiler 106 | *.psess 107 | *.vsp 108 | *.vspx 109 | *.sap 110 | 111 | # Visual Studio Trace Files 112 | *.e2e 113 | 114 | # TFS 2012 Local Workspace 115 | $tf/ 116 | 117 | # Guidance Automation Toolkit 118 | *.gpState 119 | 120 | # ReSharper is a .NET coding add-in 121 | _ReSharper*/ 122 | *.[Rr]e[Ss]harper 123 | *.DotSettings.user 124 | 125 | # JustCode is a .NET coding add-in 126 | .JustCode 127 | 128 | # TeamCity is a build add-in 129 | _TeamCity* 130 | 131 | # DotCover is a Code Coverage Tool 132 | *.dotCover 133 | 134 | # AxoCover is a Code Coverage Tool 135 | .axoCover/* 136 | !.axoCover/settings.json 137 | 138 | # Visual Studio code coverage results 139 | *.coverage 140 | *.coveragexml 141 | 142 | # NCrunch 143 | _NCrunch_* 144 | .*crunch*.local.xml 145 | nCrunchTemp_* 146 | 147 | # MightyMoose 148 | *.mm.* 149 | AutoTest.Net/ 150 | 151 | # Web workbench (sass) 152 | .sass-cache/ 153 | 154 | # Installshield output folder 155 | [Ee]xpress/ 156 | 157 | # DocProject is a documentation generator add-in 158 | DocProject/buildhelp/ 159 | DocProject/Help/*.HxT 160 | DocProject/Help/*.HxC 161 | DocProject/Help/*.hhc 162 | DocProject/Help/*.hhk 163 | DocProject/Help/*.hhp 164 | DocProject/Help/Html2 165 | DocProject/Help/html 166 | 167 | # Click-Once directory 168 | publish/ 169 | 170 | # Publish Web Output 171 | *.[Pp]ublish.xml 172 | *.azurePubxml 173 | # Note: Comment the next line if you want to checkin your web deploy settings, 174 | # but database connection strings (with potential passwords) will be unencrypted 175 | *.pubxml 176 | *.publishproj 177 | 178 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 179 | # checkin your Azure Web App publish settings, but sensitive information contained 180 | # in these scripts will be unencrypted 181 | PublishScripts/ 182 | 183 | # NuGet Packages 184 | *.nupkg 185 | # The packages folder can be ignored because of Package Restore 186 | **/[Pp]ackages/* 187 | # except build/, which is used as an MSBuild target. 188 | !**/[Pp]ackages/build/ 189 | # Uncomment if necessary however generally it will be regenerated when needed 190 | #!**/[Pp]ackages/repositories.config 191 | # NuGet v3's project.json files produces more ignorable files 192 | *.nuget.props 193 | *.nuget.targets 194 | 195 | # Microsoft Azure Build Output 196 | csx/ 197 | *.build.csdef 198 | 199 | # Microsoft Azure Emulator 200 | ecf/ 201 | rcf/ 202 | 203 | # Windows Store app package directories and files 204 | AppPackages/ 205 | BundleArtifacts/ 206 | Package.StoreAssociation.xml 207 | _pkginfo.txt 208 | *.appx 209 | 210 | # Visual Studio cache files 211 | # files ending in .cache can be ignored 212 | *.[Cc]ache 213 | # but keep track of directories ending in .cache 214 | !?*.[Cc]ache/ 215 | 216 | # Others 217 | ClientBin/ 218 | ~$* 219 | *~ 220 | *.dbmdl 221 | *.dbproj.schemaview 222 | *.jfm 223 | *.pfx 224 | *.publishsettings 225 | orleans.codegen.cs 226 | 227 | # Including strong name files can present a security risk 228 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 229 | #*.snk 230 | 231 | # Since there are multiple workflows, uncomment next line to ignore bower_components 232 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 233 | #bower_components/ 234 | 235 | # RIA/Silverlight projects 236 | Generated_Code/ 237 | 238 | # Backup & report files from converting an old project file 239 | # to a newer Visual Studio version. Backup files are not needed, 240 | # because we have git ;-) 241 | _UpgradeReport_Files/ 242 | Backup*/ 243 | UpgradeLog*.XML 244 | UpgradeLog*.htm 245 | ServiceFabricBackup/ 246 | *.rptproj.bak 247 | 248 | # SQL Server files 249 | *.mdf 250 | *.ldf 251 | *.ndf 252 | 253 | # Business Intelligence projects 254 | *.rdl.data 255 | *.bim.layout 256 | *.bim_*.settings 257 | *.rptproj.rsuser 258 | *- Backup*.rdl 259 | 260 | # Microsoft Fakes 261 | FakesAssemblies/ 262 | 263 | # GhostDoc plugin setting file 264 | *.GhostDoc.xml 265 | 266 | # Node.js Tools for Visual Studio 267 | .ntvs_analysis.dat 268 | node_modules/ 269 | 270 | # Visual Studio 6 build log 271 | *.plg 272 | 273 | # Visual Studio 6 workspace options file 274 | *.opt 275 | 276 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 277 | *.vbw 278 | 279 | # Visual Studio LightSwitch build output 280 | **/*.HTMLClient/GeneratedArtifacts 281 | **/*.DesktopClient/GeneratedArtifacts 282 | **/*.DesktopClient/ModelManifest.xml 283 | **/*.Server/GeneratedArtifacts 284 | **/*.Server/ModelManifest.xml 285 | _Pvt_Extensions 286 | 287 | # Paket dependency manager 288 | .paket/paket.exe 289 | paket-files/ 290 | 291 | # FAKE - F# Make 292 | .fake/ 293 | 294 | # JetBrains Rider 295 | .idea/ 296 | *.sln.iml 297 | 298 | # CodeRush personal settings 299 | .cr/personal 300 | 301 | # Python Tools for Visual Studio (PTVS) 302 | __pycache__/ 303 | *.pyc 304 | 305 | # Cake - Uncomment if you are using it 306 | # tools/** 307 | # !tools/packages.config 308 | 309 | # Tabs Studio 310 | *.tss 311 | 312 | # Telerik's JustMock configuration file 313 | *.jmconfig 314 | 315 | # BizTalk build output 316 | *.btp.cs 317 | *.btm.cs 318 | *.odx.cs 319 | *.xsd.cs 320 | 321 | # OpenCover UI analysis results 322 | OpenCover/ 323 | 324 | # Azure Stream Analytics local run output 325 | ASALocalRun/ 326 | 327 | # MSBuild Binary and Structured Log 328 | *.binlog 329 | 330 | # NVidia Nsight GPU debugger configuration file 331 | *.nvuser 332 | 333 | # MFractors (Xamarin productivity tool) working folder 334 | .mfractor/ 335 | 336 | # Local History for Visual Studio 337 | .localhistory/ 338 | 339 | # BeatPulse healthcheck temp database 340 | healthchecksdb -------------------------------------------------------------------------------- /License.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019-2024 Jonathan Wood 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Logo_64x64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SoftCircuits/ParsingHelper/75d25b83af07f5e438218b8d80d152eb6a44342c/Logo_64x64.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ParsingHelper 2 | 3 | [![NuGet version (SoftCircuits.Parsing.Helper)](https://img.shields.io/nuget/v/SoftCircuits.Parsing.Helper.svg?style=flat-square)](https://www.nuget.org/packages/SoftCircuits.Parsing.Helper/) 4 | 5 | ``` 6 | Install-Package SoftCircuits.Parsing.Helper 7 | ``` 8 | 9 | ## Introduction 10 | 11 | `ParsingHelper` is a .NET class library that makes it much easier to parse text. The library tracks the current position within the text, ensures your code never accessing characters at an invalid index, and includes many methods that make parsing easier. The library makes your text-parsing code more concise and more robust. 12 | 13 | ## Getting Started 14 | 15 | To parse a string, call the `ParsingHelper` constructor with the string you want to parse. If the string argument is `null`, it will be safely treated as an empty string. The constructor initializes the class instance to parse the given string and sets the current position to the start of that string. 16 | 17 | Use the `Peek()` method to read the character at the current position without changing the current position. The `Peek()` method can optionally accept an integer argument that specifies the character position as the number of characters ahead of the current position. For example, `Peek(1)` would return the character that comes after the character at the current position. (Calling `Peek()` is equal to calling `Peek(0)`.) If the position specified is out of bounds for the current string, `Peek()` returns `ParsingHelper.NullChar` (equal to `'\0'`). 18 | 19 | Use the `Get()` method to read the character at the current position and then increment the current position to the next character. 20 | 21 | You can call the `Reset()` method to reset the current position back to the start of the string. The `Reset()` method accepts an optional string argument and, if supplied, will configure the class to begin parsing the new string. 22 | 23 | The `Text` property returns the string being parsed. And the `Index` property returns the current position within the string being parsed. Although you would normally use the navigation methods to change the `Index` value, you can set it directly. If you set the `Index` property an invalid value, it will be adjusted so it is always in the range of 0 to `Text.Length`. 24 | 25 | The `EndOfText` property returns `true` when you have reached the end of the text. And the `Remaining` property returns the number of characters still to be parsed (calculated as `Text.Length - Index`). 26 | 27 | ```cs 28 | ParsingHelper helper = new ParsingHelper("The quick brown fox jumps over the lazy dog."); 29 | 30 | char c = helper.Peek(); // Returns 'T' 31 | c = helper.Get(); // Returns 'T' 32 | c = helper.Get(); // Returns 'h' 33 | 34 | helper.Reset(); // Returns to start of string 35 | 36 | string text = helper.Text; // Returns "The quick brown fox jumps over the lazy dog." 37 | int index = helper.Index; // Returns 0 38 | 39 | bool endOfText = helper.EndOfText; // Returns false 40 | int remaining = helper.Remaining; // Returns helper.Text.Length 41 | ``` 42 | 43 | ## Navigation 44 | 45 | To advance the parser to the next character position, use the `Next()` method. This method can also accept an optional argument that specifies the number of characters to advance. For example, if you pass `5`, the current position will be advanced five characters. (Calling `Next()` with no arguments is equal to calling `Next(1)`.) The argument to `Next()` can be a negative value if you want to move backwards. 46 | 47 | As an alternative to the `Next()` method, `ParserHelper` overloads several operators that can be used as a shortcut to change the current position. These are demonstrated in the following example. 48 | 49 | ```cs 50 | helper++; // Same as helper.Next() 51 | helper--; // Same as helper.Next(-1) 52 | helper += 2; // Same as helper.Next(2) 53 | helper -= 2; // Same as helper.Next(-2) 54 | helper = helper + 3; // Same as helper.Next(3) 55 | helper = helper - 3; // Same as helper.Next(-3) 56 | int i = helper; // Same as i = helper.Index 57 | 58 | // Safely moves to the end of the text if you add a number that is too large 59 | helper += 1000000; 60 | 61 | // Safely moves to the start of the text if you subtract a number that is too large 62 | helper -= 1000000; 63 | ``` 64 | 65 | This simple example shows how you might print each character in the text being parsed. 66 | 67 | ```cs 68 | while (!helper.EndOfText) 69 | { 70 | Console.WriteLine(helper.Peek()); 71 | helper++; 72 | } 73 | ``` 74 | 75 | ## Tracking Line and Column Position 76 | 77 | For performance reasons, ParsingHelper does not track the current line and column values as it parses. However, you can use the `GetLineColumn()` method to calculate the line and column values that corresponds to the current position. This is useful for providing more information when reporting parsing errors back to the end user. 78 | 79 | ## Skipping Over Characters 80 | 81 | To skip over a group of characters, you can use the `Skip()` method. This method accepts any number of `char` arguments (or a `char` array). It will advance the current position to the first character that is not one of the arguments. 82 | 83 | The following example would skip over any numeric digits. 84 | 85 | ```cs 86 | helper.Skip('1', '2', '3', '4', '5', '6', '7', '8', '9', '0'); 87 | ``` 88 | 89 | The `SkipWhile()` method accepts a predicate that specifies when this method should stop skipping. The following example would skip over any characters that are not an equal sign: 90 | 91 | ```cs 92 | helper.SkipWhile(c => c != '='); 93 | ``` 94 | 95 | A common task when parsing is to skip over any whitespace characters. Use the `SkipWhiteSpace()` method to advance the current position to the next character that is not a white space character. 96 | 97 | The library has a number of other methods and overloads that support skipping over characters. 98 | 99 | ## Skipping to Characters 100 | 101 | In addition to skipping specified characters, the library also provides ways to advance to specified characters. 102 | 103 | The `SkipTo()` method advances to the next occurrence of the given string. 104 | 105 | ```cs 106 | helper.SkipTo("fox"); 107 | ``` 108 | 109 | This example advances the current position to the start of the next occurrence of `"fox"`. If no more occurrences are found, this method advances to the very end of the text and returns `false`. The `SkipTo()` method supports an optional `StringComparison` value to specify how characters should be compared. 110 | 111 | The `SkipTo()` method is overloaded to also accept any number of `char` arguments (or a `char` array). 112 | 113 | ```cs 114 | helper.SkipTo('x', 'y', 'z'); 115 | ``` 116 | 117 | This example will advance the current position to the first occurrence of any one of the specified characters. If none of the characters are found, this method advances to the end of the text and returns `false`. 118 | 119 | Use the `SkipToEndOfLine()` to advance the current position to the first character that is a new-line character (i.e., `'\r'` or `'\n'`). If neither of the characters are found, this method advances to the end of the text and returns `false`. Use the `SkipToNextLine()` to advance the current position to the first character in the next line. If no next line is found, this method advances to the end of the text and returns `false`. 120 | 121 | The library also has a number of other methods and overloads for skipping to characters. 122 | 123 | ## Parsing Characters 124 | 125 | The `ParseWhile()` method accept a predicate that specifies when this method should stop parsing. It works like the `SkipWhile()` method except that `ParseWhile()` will return the characters that were skipped. (Note that `SkipWhile()` is faster and should be used when you do not need the skipped characters.) 126 | 127 | The following example will parse all letters starting from the current position. 128 | 129 | ```cs 130 | string token = helper.ParseWhile(char.IsLetter); 131 | ``` 132 | 133 | The `ParseTo()` method parses characters until a delimiter character is found, and returns the characters that were parsed. There are two versions of this method: one takes a `param` array of characters that specify the delimiters, and the other accepts a predicate that returns true for characters that are delimiters. 134 | 135 | In addition, the library also defines the `ParseToken()` method. This method takes a list of delimiters and will skip all characters that are a delimiter, then parse all characters that are not a delimiter and return the parsed characters. Delimiters can be specified as character parameters, a character array, or a predicate that returns true if the given character is a delimiter. 136 | 137 | ```cs 138 | string token; 139 | token = helper.ParseToken(' ', '\t', '\r', '\n'); 140 | token = helper.ParseToken(char.IsWhiteSpace); 141 | ``` 142 | 143 | The library also has a number of other methods and overloads for parsing characters. 144 | 145 | Note: Methods that return a string also have an `AsSpan` version, which returns a `ReadOnlySpan`. Use this version for less memory allocations and better performance. 146 | 147 | ## Parsing Quoted Text 148 | 149 | You may have an occassion to parse quoted text. In this case, you will probably want the quoted text (without the quotes). The `ParseQuotedText()` method makes this easy. 150 | 151 | Call this method with the current position at the first quote character. The method will use the character at the current position to determine what the quote character is. (So the quote character can be any character you choose.) 152 | 153 | This method will parse characters until the closing quote is found. If the closing quote is found, it will set the current position to the character after the closing quote and return the text within the quotes. If the closing quote is not found, it will return everything after the starting quote to the end of the string, and will advance the current position to the end of the string. 154 | 155 | If `ParseQuotedText()` encounters two quote characters together, it will interpret them as a single quote character and not the end of the quoted text. For example, consider the following example: 156 | 157 | ```cs 158 | ParsingHelper helper = new ParsingHelper("One two \"three and \"\"four\"\"!"); 159 | helper.MoveTo('"'); 160 | string token = helper.ParseQuotedText(); 161 | ``` 162 | 163 | This example would set the `token` variable to `three and "four"`. The two pairs of quotes are interpreted each as one quote in the text and not the end of the quoted text. 164 | 165 | The `ParseQuotedText()` method has a second overload that allows you to specify the escape character (including no escape character), whether or not the escape character is included in the result, and whether or not the enclosing quotes are included in the result. 166 | 167 | ## Extracting Text 168 | 169 | It is common to want to extract text tokens as you parse them. You can use the `Extract()` method to do this. The `Extract()` method accepts two integer arguments that specify the 0-based position of the first character to be extracted and the 0-based position of the character that follows the last character to be extracted. 170 | 171 | ```cs 172 | string token = helper.Extract(start, end); 173 | ``` 174 | 175 | This method is overloaded with a version that only accepts one integer argument. The argument specifies the 0-based position of the first character to be extracted, and this method will extract everything from that position to the end of the text. 176 | 177 | Neither of these methods change the current position. 178 | 179 | ## Comparing Text 180 | 181 | Finally, you may need to test if a predefined string is equal to the text at the current location. The `MatchesCurrentPosition()` method tests this. It accepts a string argument and returns a Boolean value that indicates if the specified string matches the text starting at the current location. The `MatchesCurrentPosition()` method supports an optional `StringComparison` value to specify how characters should be compared. Note that while this method can be handy, it's less performant than most methods in this class. Any type of search function that works by calling this method at each successive position should be avoided where performance matters. 182 | 183 | ## Examples 184 | 185 | Here are a couple of examples to illustrate use of the library. 186 | 187 | #### Parse a Sentence into Words 188 | 189 | This example parses a sentence into words. This implementation only considers spaces and periods as word delimiters. But you could easily add more characters, or use the overload of `ParsingHelper.ParseTokens()` that accepts a lambda expression. 190 | 191 | ```cs 192 | ParsingHelper helper = new ParsingHelper("The quick brown fox jumps over the lazy dog."); 193 | 194 | List words = helper.ParseTokens(' ', '.').ToList(); 195 | 196 | CollectionAssert.AreEqual(new[] { 197 | "The", 198 | "quick", 199 | "brown", 200 | "fox", 201 | "jumps", 202 | "over", 203 | "the", 204 | "lazy", 205 | "dog" }, words); 206 | ``` 207 | 208 | #### Command Line 209 | 210 | This example parses a command line. It detects both arguments and flags (arguments preceded with `'-'` or `'/'`). It's okay with whitespace between the flag character and flag. And any argument or flag that contains whitespace can be enclosed in quotes. 211 | 212 | ```cs 213 | ParsingHelper helper = new ParsingHelper("app -v -f /d-o file1 \"file 2\""); 214 | List arguments = new List(); 215 | List flags = new List(); 216 | 217 | char[] flagCharacters = new char[] { '-', '/' }; 218 | string arg; 219 | bool isFlag = false; 220 | 221 | while (!helper.EndOfText) 222 | { 223 | // Skip any whitespace 224 | helper.SkipWhiteSpace(); 225 | // Is this a flag? 226 | if (flagCharacters.Contains(helper.Peek())) 227 | { 228 | isFlag = true; 229 | // Skip over flag character 230 | helper++; 231 | // Allow whitespace between flag character and flag 232 | helper.SkipWhiteSpace(); 233 | } 234 | else isFlag = false; 235 | // Parse item 236 | if (helper.Peek() == '"') 237 | arg = helper.ParseQuotedText(); 238 | else 239 | arg = helper.ParseWhile(c => !char.IsWhiteSpace(c) && !flagCharacters.Contains(c)); 240 | // Add argument to appropriate collection 241 | if (isFlag) 242 | flags.Add(arg); 243 | else 244 | arguments.Add(arg); 245 | } 246 | 247 | CollectionAssert.AreEqual(new[] { "app", "file1", "file 2" }, arguments); 248 | CollectionAssert.AreEqual(new[] { "v", "f", "d", "o" }, flags); 249 | ``` 250 | 251 | #### Regular Expressions 252 | 253 | This example uses a regular expression to find all the words in a string that start with the letter "J". 254 | 255 | ```cs 256 | ParsingHelper helper = new ParsingHelper("Jim Jack Sally Jennifer Bob Gary Jonathan Bill"); 257 | 258 | IEnumerable results = helper.ParseTokensRegEx(@"\b[J]\w+"); 259 | 260 | CollectionAssert.AreEqual(new[] 261 | { 262 | "Jim", 263 | "Jack", 264 | "Jennifer", 265 | "Jonathan" 266 | }, results.ToList()); 267 | ``` 268 | 269 | -------------------------------------------------------------------------------- /SoftCircuits.Parsing.Helper.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.9.34616.47 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SoftCircuits.Parsing.Helper", "SoftCircuits.Parsing.Helper\SoftCircuits.Parsing.Helper.csproj", "{F76FC2B7-4BD6-4315-86C1-4434D2173649}" 7 | EndProject 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestParsingHelper", "TestParsingHelper\TestParsingHelper.csproj", "{FF09DC20-7D47-4DD3-9313-57C25959A863}" 9 | EndProject 10 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{F5F306A4-1004-4A4B-80CD-6E88F9A1DB41}" 11 | ProjectSection(SolutionItems) = preProject 12 | .editorconfig = .editorconfig 13 | EndProjectSection 14 | EndProject 15 | Global 16 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 17 | Debug|Any CPU = Debug|Any CPU 18 | Release|Any CPU = Release|Any CPU 19 | EndGlobalSection 20 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 21 | {F76FC2B7-4BD6-4315-86C1-4434D2173649}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 22 | {F76FC2B7-4BD6-4315-86C1-4434D2173649}.Debug|Any CPU.Build.0 = Debug|Any CPU 23 | {F76FC2B7-4BD6-4315-86C1-4434D2173649}.Release|Any CPU.ActiveCfg = Release|Any CPU 24 | {F76FC2B7-4BD6-4315-86C1-4434D2173649}.Release|Any CPU.Build.0 = Release|Any CPU 25 | {FF09DC20-7D47-4DD3-9313-57C25959A863}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 26 | {FF09DC20-7D47-4DD3-9313-57C25959A863}.Debug|Any CPU.Build.0 = Debug|Any CPU 27 | {FF09DC20-7D47-4DD3-9313-57C25959A863}.Release|Any CPU.ActiveCfg = Release|Any CPU 28 | {FF09DC20-7D47-4DD3-9313-57C25959A863}.Release|Any CPU.Build.0 = Release|Any CPU 29 | EndGlobalSection 30 | GlobalSection(SolutionProperties) = preSolution 31 | HideSolutionNode = FALSE 32 | EndGlobalSection 33 | GlobalSection(ExtensibilityGlobals) = postSolution 34 | SolutionGuid = {F1934F17-FD72-459C-8B70-900B48F4B063} 35 | EndGlobalSection 36 | EndGlobal 37 | -------------------------------------------------------------------------------- /SoftCircuits.Parsing.Helper/ParsePosition.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019-2024 Jonathan Wood (www.softcircuits.com) 2 | // Licensed under the MIT license. 3 | // 4 | 5 | using System; 6 | 7 | namespace SoftCircuits.Parsing.Helper 8 | { 9 | /// 10 | /// Represents a line and column position within a string. 11 | /// 12 | [Obsolete("This class is deprecated and will be removed in a future version. Please use ParsePosition instead.")] 13 | public class ParsingPosition : ParsePosition { } 14 | 15 | /// 16 | /// Represents a line and column position within a string. 17 | /// 18 | public class ParsePosition 19 | { 20 | /// 21 | /// The 1-based line number for this position. 22 | /// 23 | public int Line { get; internal set; } 24 | 25 | /// 26 | /// The 1-based column number for this position. 27 | /// 28 | public int Column { get; internal set; } 29 | 30 | internal ParsePosition() 31 | { 32 | Line = 0; 33 | Column = 0; 34 | } 35 | 36 | /// 37 | /// Calculates the line and column position for the given text and index. 38 | /// 39 | /// The text to calculate the position for. 40 | /// The position as an index into . 41 | /// A with the calculated line and column 42 | /// positions. 43 | public static ParsePosition CalculatePosition(string text, int index) 44 | { 45 | ParsingHelper helper = new(text); 46 | ParsePosition position = new(); 47 | int lineStartPos = 0; 48 | 49 | while (helper < index) 50 | { 51 | if (!helper.SkipToNextLine() || helper > index) 52 | break; 53 | position.Line++; 54 | lineStartPos = helper; 55 | } 56 | position.Line++; 57 | position.Column = (index - lineStartPos) + 1; 58 | return position; 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /SoftCircuits.Parsing.Helper/ParsingHelper.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019-2024 Jonathan Wood (www.softcircuits.com) 2 | // Licensed under the MIT license. 3 | // 4 | 5 | using System; 6 | using System.Collections.Generic; 7 | using System.Diagnostics; 8 | using System.Diagnostics.CodeAnalysis; 9 | using System.Linq; 10 | using System.Runtime.CompilerServices; 11 | using System.Text; 12 | using System.Text.RegularExpressions; 13 | 14 | namespace SoftCircuits.Parsing.Helper 15 | { 16 | /// 17 | /// Low-level text parsing helper class. 18 | /// 19 | public class ParsingHelper 20 | { 21 | /// 22 | /// Characters that make up a line break. 23 | /// 24 | private static readonly char[] LineBreakCharacters = ['\r', '\n']; 25 | 26 | private int InternalIndex; 27 | 28 | /// 29 | /// Represents an invalid character. This character is returned when attempting to read 30 | /// a character at an invalid position. The character value is '\0'. 31 | /// 32 | public const char NullChar = '\0'; 33 | 34 | /// 35 | /// Specifies regular expression options used by all regular expression methods. 36 | /// 37 | public RegexOptions RegularExpressionOptions { get; set; } 38 | 39 | /// 40 | /// Returns the text currently being parsed. 41 | /// 42 | public string Text { get; private set; } 43 | 44 | /// 45 | /// Constructs a new instance. Sets the text to be parsed 46 | /// and sets the current position to the start of that text. 47 | /// 48 | /// The text to be parsed. Can be null. 49 | /// Specifies regular expression options used by 50 | /// all methods that use regular expressions. 51 | public ParsingHelper(string? text, RegexOptions regularExpressionOptions = RegexOptions.None) 52 | { 53 | RegularExpressionOptions = regularExpressionOptions; 54 | Reset(text); 55 | } 56 | 57 | /// 58 | /// Sets the text to be parsed and sets the current position to the start of that text. 59 | /// 60 | /// The text to be parsed. Can be null. 61 | #if !NETSTANDARD2_0 62 | [MemberNotNull(nameof(Text))] 63 | #endif 64 | public void Reset(string? text) 65 | { 66 | Text = text ?? string.Empty; 67 | InternalIndex = 0; 68 | } 69 | 70 | /// 71 | /// Sets the current position to the start of the current text. 72 | /// 73 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 74 | public void Reset() 75 | { 76 | InternalIndex = 0; 77 | } 78 | 79 | /// 80 | /// Gets or sets the current position within the text being parsed. Safely 81 | /// handles attempts to set to an invalid position. 82 | /// 83 | public int Index 84 | { 85 | get => InternalIndex; 86 | set 87 | { 88 | InternalIndex = value; 89 | if (InternalIndex < 0) 90 | InternalIndex = 0; 91 | else if (InternalIndex > Text.Length) 92 | InternalIndex = Text.Length; 93 | } 94 | } 95 | 96 | /// 97 | /// Returns true if the current position is at the end of the text being parsed. 98 | /// Otherwise, false. 99 | /// 100 | public bool EndOfText => InternalIndex >= Text.Length; 101 | 102 | /// 103 | /// Returns the number of characters not yet parsed. This is equal to the length 104 | /// of the text being parsed, minus the current position. 105 | /// 106 | public int Remaining => Text.Length - InternalIndex; 107 | 108 | /// 109 | /// Returns the character at the current position, or 110 | /// if the current position was at the end of the text being parsed. 111 | /// 112 | /// The character at the current position. 113 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 114 | public char Peek() 115 | { 116 | Debug.Assert(InternalIndex >= 0 && InternalIndex <= Text.Length); 117 | return (InternalIndex < Text.Length) ? Text[InternalIndex] : NullChar; 118 | } 119 | 120 | /// 121 | /// Returns the character at the specified number of characters ahead of the 122 | /// current position, or if the specified position 123 | /// is not valid. Does not change the current position. 124 | /// 125 | /// Specifies the position of the character to read as the number 126 | /// of characters ahead of the current position. May be a negative number. 127 | /// The character at the specified position. 128 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 129 | public char Peek(int count) 130 | { 131 | int index = (InternalIndex + count); 132 | return (index >= 0 && index < Text.Length) ? Text[index] : NullChar; 133 | } 134 | 135 | /// 136 | /// Returns the character at the current position and increments the current position. 137 | /// Returns if the current position was at the end of the text 138 | /// being parsed. 139 | /// 140 | /// The character at the current position. 141 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 142 | public char Get() 143 | { 144 | Debug.Assert(InternalIndex >= 0 && InternalIndex <= Text.Length); 145 | return (InternalIndex < Text.Length) ? Text[InternalIndex++] : NullChar; 146 | } 147 | 148 | /// 149 | /// Moves the current position ahead one character. 150 | /// 151 | public void Next() 152 | { 153 | Debug.Assert(InternalIndex >= 0 && InternalIndex <= Text.Length); 154 | if (InternalIndex < Text.Length) 155 | InternalIndex++; 156 | } 157 | 158 | /// 159 | /// Moves the current position ahead the specified number of characters. 160 | /// 161 | /// The number of characters to move ahead. Use negative numbers 162 | /// to move backwards. 163 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 164 | public void Next(int count) => Index = InternalIndex + count; 165 | 166 | /// 167 | /// Calculates the line and column information for the current position. 168 | /// 169 | /// A that represents the current position. 170 | [Obsolete("This method is obsolete and will be removed in a future version. Please use GetLineColumn() instead.")] 171 | public ParsePosition CalculatePosition() => ParsePosition.CalculatePosition(Text, Index); 172 | 173 | /// 174 | /// Calculates the line and column values that correspond to the current position. 175 | /// 176 | /// A that represents the current position. 177 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 178 | public ParsePosition GetLineColumn() => ParsePosition.CalculatePosition(Text, InternalIndex); 179 | 180 | #region Skip characters 181 | 182 | /// 183 | /// Moves the current position to the next character that causes 184 | /// to return false. 185 | /// 186 | /// Function to return test each character and return true 187 | /// for each character that should be skipped. 188 | public void SkipWhile(Func predicate) 189 | { 190 | Debug.Assert(InternalIndex >= 0 && InternalIndex <= Text.Length); 191 | while (InternalIndex < Text.Length && predicate(Text[InternalIndex])) 192 | InternalIndex++; 193 | } 194 | 195 | /// 196 | /// Moves the current position to the next character that is not one of the specified 197 | /// characters. 198 | /// 199 | /// Characters to skip over. 200 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 201 | public void Skip(params char[] chars) => SkipWhile(chars.Contains); 202 | 203 | /// 204 | /// Moves the current position to the next character that is not a whitespace character. 205 | /// 206 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 207 | public void SkipWhiteSpace() => SkipWhile(char.IsWhiteSpace); 208 | 209 | /// 210 | /// Moves the current position to the next character that is not a whitespace character, 211 | /// with an option to stop sooner under specified conditions. 212 | /// 213 | /// Specifies a condition where skipping should stop before a 214 | /// whitespace character is found. 215 | public void SkipWhiteSpace(SkipWhiteSpaceOption option) 216 | { 217 | Debug.Assert(option == SkipWhiteSpaceOption.StopAtEol || option == SkipWhiteSpaceOption.StopAtNextLine); 218 | SkipWhile(c => char.IsWhiteSpace(c) && !LineBreakCharacters.Contains(c)); 219 | if (option == SkipWhiteSpaceOption.StopAtNextLine && LineBreakCharacters.Contains(Peek())) 220 | SkipLineBreak(); 221 | } 222 | 223 | /// 224 | /// Moves the current position past any characters that match the given regular expression. 225 | /// 226 | /// The regular expression pattern to match. 227 | #if NET7_0_OR_GREATER 228 | public void SkipRegEx([StringSyntax(StringSyntaxAttribute.Regex)] string regularExpression) 229 | #else 230 | public void SkipRegEx(string regularExpression) 231 | #endif 232 | { 233 | Regex regex = new(regularExpression, RegularExpressionOptions); 234 | SkipRegEx(regex); 235 | } 236 | 237 | /// 238 | /// Moves the current position past any characters that match the given regular expression. 239 | /// 240 | /// The regular expression pattern to match. 241 | public void SkipRegEx(Regex regex) 242 | { 243 | #if NETSTANDARD2_0 244 | if (regex == null) 245 | throw new ArgumentNullException(nameof(regex)); 246 | #else 247 | ArgumentNullException.ThrowIfNull(regex); 248 | #endif 249 | 250 | Match match = regex.Match(Text, Index); 251 | if (match.Success && match.Index == Index) 252 | InternalIndex += match.Length; 253 | } 254 | 255 | #endregion 256 | 257 | #region Skip to characters 258 | 259 | /// 260 | /// Moves the current position to the next character that is one of the specified characters 261 | /// and returns true if a match was found. If none of the specified characters are 262 | /// found, this method moves the current position to the end of the text being parsed and 263 | /// returns false. 264 | /// 265 | /// Characters to skip to. 266 | /// True if any of the specified characters were found. Otherwise, false. 267 | public bool SkipTo(params char[] chars) 268 | { 269 | InternalIndex = Text.IndexOfAny(chars, InternalIndex); 270 | if (InternalIndex >= 0) 271 | return true; 272 | InternalIndex = Text.Length; 273 | return false; 274 | } 275 | 276 | /// 277 | /// Moves the current position to the next occurrence of the specified string and returns 278 | /// true if a match was found. If the specified string is not found, this method 279 | /// moves the current position to the end of the text being parsed and returns false. 280 | /// 281 | /// String to skip to. 282 | /// If true and a match is found, the matching string is 283 | /// also skipped. 284 | /// True if the specified string was found. Otherwise, false. 285 | public bool SkipTo(string s, bool includeToken = false) 286 | { 287 | InternalIndex = Text.IndexOf(s, InternalIndex); 288 | if (InternalIndex >= 0) 289 | { 290 | if (includeToken) 291 | InternalIndex += s.Length; 292 | return true; 293 | } 294 | InternalIndex = Text.Length; 295 | return false; 296 | } 297 | 298 | /// 299 | /// Moves the current position to the next occurrence of the specified string and returns 300 | /// true if a match was found. If the specified string is not found, this method 301 | /// moves the current position to the end of the text being parsed and returns false. 302 | /// 303 | /// String to skip to. 304 | /// One of the enumeration values that specifies the rules for 305 | /// search. 306 | /// If true and a match is found, the matching text is 307 | /// also skipped. 308 | /// True if the specified string was found. Otherwise, false. 309 | public bool SkipTo(string s, StringComparison comparison, bool includeToken = false) 310 | { 311 | InternalIndex = Text.IndexOf(s, InternalIndex, comparison); 312 | if (InternalIndex >= 0) 313 | { 314 | if (includeToken) 315 | InternalIndex += s.Length; 316 | return true; 317 | } 318 | InternalIndex = Text.Length; 319 | return false; 320 | } 321 | 322 | /// 323 | /// Moves the current position to the start of the next text that matches the given regular 324 | /// expression and returns true if a match was found. If no match is found, this method 325 | /// moves the current position to the end of the text being parsed and returns false. 326 | /// 327 | /// A regular expression that the text must match. 328 | /// If true and a match is found, the matching text is 329 | /// also skipped. 330 | /// True if a match was found. 331 | #if NET7_0_OR_GREATER 332 | public bool SkipToRegEx([StringSyntax(StringSyntaxAttribute.Regex)] string regularExpression, bool includeToken = false) 333 | #else 334 | public bool SkipToRegEx(string regularExpression, bool includeToken = false) 335 | #endif 336 | { 337 | Regex regex = new(regularExpression, RegularExpressionOptions); 338 | return SkipToRegEx(regex, includeToken); 339 | } 340 | 341 | /// 342 | /// Moves the current position to the start of the next text that matches the given regular 343 | /// expression and returns true if a match was found. If no match is found, this method 344 | /// moves the current position to the end of the text being parsed and returns false. 345 | /// 346 | /// A regular expression that the text must match. 347 | /// If true and a match is found, the matching text is 348 | /// also skipped. 349 | /// True if a match was found. 350 | public bool SkipToRegEx(Regex regex, bool includeToken = false) 351 | { 352 | #if NETSTANDARD2_0 353 | if (regex == null) 354 | throw new ArgumentNullException(nameof(regex)); 355 | #else 356 | ArgumentNullException.ThrowIfNull(regex); 357 | #endif 358 | 359 | Match match = regex.Match(Text, Index); 360 | if (match.Success) 361 | { 362 | InternalIndex = match.Index; 363 | if (includeToken) 364 | InternalIndex += match.Length; 365 | return true; 366 | } 367 | InternalIndex = Text.Length; 368 | return false; 369 | } 370 | 371 | /// 372 | /// Moves the current position to the next line break character and returns true if a line-break 373 | /// character was found. If no line break characters are found, this method moves to the end of 374 | /// the text being parsed and returns false. 375 | /// 376 | /// True if a line break character was found. Otherwise, false. 377 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 378 | public bool SkipToEndOfLine() => SkipTo(LineBreakCharacters); 379 | 380 | /// 381 | /// Moves the current position to the start of the next line and returns true if a line-break 382 | /// character was found. If no more line break characters are found, this method moves to the 383 | /// end of the text being parsed and returns false. 384 | /// 385 | /// True if any more line break characters were found. 386 | public bool SkipToNextLine() 387 | { 388 | // Move to next line break character 389 | bool result = SkipToEndOfLine(); 390 | // Move past line break 391 | SkipLineBreak(); 392 | // Return true if line break characters were found 393 | return result; 394 | } 395 | 396 | /// 397 | /// Skips over a line break. Current position must be at the first line break character or 398 | /// the end of the text being parsed. 399 | /// 400 | private void SkipLineBreak() 401 | { 402 | Debug.Assert(EndOfText || LineBreakCharacters.Contains(Peek())); 403 | if (MatchesCurrentPosition(LineBreakCharacters)) 404 | InternalIndex += LineBreakCharacters.Length; 405 | else 406 | Next(); 407 | } 408 | 409 | #endregion 410 | 411 | #region Parse characters 412 | 413 | /// 414 | /// Parses a single character and increments the current position. Returns an empty string 415 | /// if the current position was at the end of the text being parsed. 416 | /// 417 | /// A string that contains the parsed character, or an empty string if the current 418 | /// position was at the end of the text being parsed. 419 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 420 | public string ParseCharacter() => ParseCharacters(1); 421 | 422 | #if !NETSTANDARD2_0 423 | /// 424 | /// Parses a single character and increments the current position. Returns an empty span 425 | /// if the current position was at the end of the text being parsed. 426 | /// 427 | /// A that contains the parsed character, 428 | /// or an empty span if the current position was at the end of the text being parsed. 429 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 430 | public ReadOnlySpan ParseCharacterAsSpan() => ParseCharactersAsSpan(1); 431 | #endif 432 | 433 | /// 434 | /// Parses the specified number of characters starting at the current position and increments 435 | /// the current position by the number of characters parsed. Returns a string with the parsed 436 | /// characters. Returns a shorter string if the end of the text is reached. 437 | /// 438 | /// The number of characters to parse. 439 | /// A string with the parsed characters. 440 | public string ParseCharacters(int count) 441 | { 442 | int remaining = Remaining; 443 | if (count > remaining) 444 | count = remaining; 445 | else if (count < 0) 446 | count = 0; 447 | int start = InternalIndex; 448 | InternalIndex += count; 449 | return Extract(start, InternalIndex); 450 | } 451 | 452 | #if !NETSTANDARD2_0 453 | /// 454 | /// Parses the specified number of characters starting at the current position and increments 455 | /// the current position by the number of characters parsed. Returns a 456 | /// with the parsed characters. Returns a shorter span if the end of the text is reached. 457 | /// 458 | /// The number of characters to parse. 459 | /// A with the parsed characters. 460 | public ReadOnlySpan ParseCharactersAsSpan(int count) 461 | { 462 | int remaining = Remaining; 463 | if (count > remaining) 464 | count = remaining; 465 | else if (count < 0) 466 | count = 0; 467 | int start = InternalIndex; 468 | InternalIndex += count; 469 | return ExtractAsSpan(start, InternalIndex); 470 | } 471 | #endif 472 | 473 | /// 474 | /// Parses characters until the next character for which 475 | /// returns false, and returns the parsed characters. Can return an empty string. 476 | /// 477 | /// Function to test each character. Should return true 478 | /// for each character that should be parsed. 479 | /// A string with the parsed characters. 480 | public string ParseWhile(Func predicate) 481 | { 482 | int start = InternalIndex; 483 | SkipWhile(predicate); 484 | return Extract(start, InternalIndex); 485 | } 486 | 487 | #if !NETSTANDARD2_0 488 | /// 489 | /// Parses characters until the next character for which 490 | /// returns false, and returns the parsed characters. Can return an empty span. 491 | /// 492 | /// Function to test each character. Should return true 493 | /// for each character that should be parsed. 494 | /// A with the parsed characters. 495 | public ReadOnlySpan ParseWhileAsSpan(Func predicate) 496 | { 497 | int start = InternalIndex; 498 | SkipWhile(predicate); 499 | return ExtractAsSpan(start, InternalIndex); 500 | } 501 | #endif 502 | 503 | /// 504 | /// Parses characters until the next character that is not contained in 505 | /// , and returns a string with the parsed characters. 506 | /// Can return an empty string. 507 | /// 508 | /// Characters to parse. 509 | /// A string with the parsed characters. 510 | public string Parse(params char[] chars) => ParseWhile(chars.Contains); 511 | 512 | #if !NETSTANDARD2_0 513 | /// 514 | /// Parses characters until the next character that is not contained in 515 | /// , and returns a 516 | /// with the parsed characters. Can return an empty span. 517 | /// 518 | /// Characters to parse. 519 | /// A with the parsed characters. 520 | public ReadOnlySpan ParseAsSpan(params char[] chars) => ParseWhileAsSpan(chars.Contains); 521 | #endif 522 | 523 | /// 524 | /// Parses the next line of text and returns true if successful. Returns false if 525 | /// the current position was at the end of the text being parsed. The current position is 526 | /// moved past the line-break characters to the start of the following line. 527 | /// 528 | /// Receives the parsed line. 529 | /// True if successful; otherwise, false if the current position was at the end 530 | /// of the text being parsed. 531 | public bool ParseLine(out string line) 532 | { 533 | if (EndOfText) 534 | { 535 | line = string.Empty; 536 | return false; 537 | } 538 | 539 | int start = InternalIndex; 540 | SkipToEndOfLine(); 541 | // Extract this line 542 | line = Extract(start, InternalIndex); 543 | // Move to start of next line 544 | SkipLineBreak(); 545 | return true; 546 | } 547 | 548 | #if !NETSTANDARD2_0 549 | /// 550 | /// Parses the next line of text and returns true if successful. Returns false if 551 | /// the current position was at the end of the text being parsed. The current position is 552 | /// moved past the line-break characters to the start of the following line. 553 | /// 554 | /// Receives the parsed span. 555 | /// True if successful; otherwise, false if the current position was at the end 556 | /// of the text being parsed. 557 | public bool ParseLine(out ReadOnlySpan span) 558 | { 559 | if (EndOfText) 560 | { 561 | span = string.Empty; 562 | return false; 563 | } 564 | 565 | int start = InternalIndex; 566 | SkipToEndOfLine(); 567 | // Extract this line 568 | span = ExtractAsSpan(start, InternalIndex); 569 | // Move to start of next line 570 | SkipLineBreak(); 571 | return true; 572 | } 573 | #endif 574 | 575 | /// 576 | /// Parses quoted text. The character at the current position is assumed to be the starting quote 577 | /// character. This method parses text up until the matching end quote character. Returns the parsed 578 | /// text without the quotes and sets the current position to the character following the 579 | /// end quote. If the text contains two quote characters together, the pair is handled as a 580 | /// single quote literal and not the end of the quoted text. 581 | /// 582 | /// Returns the text within the quotes. 583 | public string ParseQuotedText() 584 | { 585 | StringBuilder builder = new(); 586 | 587 | // Get and skip quote character 588 | char quote = Get(); 589 | 590 | // Parse quoted text 591 | while (!EndOfText) 592 | { 593 | // Parse to next quote 594 | builder.Append(ParseTo(quote)); 595 | // Skip quote 596 | Next(); 597 | // Two consecutive quotes treated as quote literal 598 | if (Peek() == quote) 599 | { 600 | builder.Append(quote); 601 | Next(); 602 | } 603 | else break; // Done if single closing quote or end of text 604 | } 605 | return builder.ToString(); 606 | } 607 | 608 | /// 609 | /// Parses quoted text with options. The character at the current position is assumed to be the 610 | /// starting quote character. This method parses text up until the next matching end quote character. 611 | /// Returns the parsed text without the quotes and sets the current position to the character 612 | /// following the end quote. 613 | /// 614 | /// Specifies an escape character. If this character is immediately 615 | /// followed by a quote character, the pair is handled as a single quote literal and not the end 616 | /// of the quoted text. Set to null for no escape character, in which case the string is 617 | /// terminated at the next quote character. This parameter can be the same as the quote 618 | /// character. 619 | /// Specifies if the escape characters should be included in 620 | /// the returned string. 621 | /// Specifies if the enclosing quotes should be included in the 622 | /// returned string. 623 | /// Returns the text within the quotes. 624 | public string ParseQuotedText(char? escapeChar, bool includeEscapeChar = false, bool includeQuotes = false) 625 | { 626 | if (EndOfText) 627 | return string.Empty; 628 | 629 | StringBuilder builder = new(); 630 | 631 | // Get and skip quote character 632 | char quote = Get(); 633 | 634 | // Add opening quote if requested 635 | if (includeQuotes) 636 | builder.Append(quote); 637 | 638 | // Parse quoted text 639 | if (escapeChar == null) 640 | { 641 | // No escape character 642 | builder.Append(ParseTo(quote)); 643 | Next(); 644 | } 645 | else if (escapeChar != quote) 646 | { 647 | // Custom escape character 648 | while (!EndOfText) 649 | { 650 | // Parse to next quote or escape character 651 | builder.Append(ParseTo(quote, escapeChar.Value)); 652 | char found = Peek(); 653 | // Skip character 654 | Next(); 655 | // Quote following escape character treated as quote literal 656 | if (found == escapeChar.Value) 657 | { 658 | if (Peek() == quote) 659 | { 660 | if (includeEscapeChar) 661 | builder.Append(escapeChar.Value); 662 | builder.Append(quote); 663 | Next(); 664 | } 665 | else 666 | { 667 | builder.Append(found); 668 | } 669 | } 670 | else break; // Done if single closing quote or end of text 671 | } 672 | } 673 | else 674 | { 675 | // Two quotes escapes 676 | while (!EndOfText) 677 | { 678 | // Parse to next quote 679 | builder.Append(ParseTo(quote)); 680 | // Skip quote 681 | Next(); 682 | // Two consecutive quotes treated as quote literal 683 | if (Peek() == quote) 684 | { 685 | if (includeEscapeChar) 686 | builder.Append(quote); 687 | builder.Append(quote); 688 | Next(); 689 | } 690 | else break; // Done if single closing quote or end of text 691 | } 692 | } 693 | 694 | // Add closing quote if requested 695 | if (includeQuotes && Peek(-1) == quote) 696 | builder.Append(quote); 697 | 698 | return builder.ToString(); 699 | } 700 | 701 | #endregion 702 | 703 | #region Parse to characters 704 | 705 | /// 706 | /// Parses characters until the next occurrence of any one of the specified characters and 707 | /// returns a string with the parsed characters. If none of the specified characters are found, 708 | /// this method parses all character up to the end of the text being parsed. Can return an empty 709 | /// string. 710 | /// 711 | /// The characters that cause parsing to end. 712 | /// A string with the parsed characters. 713 | public string ParseTo(params char[] chars) 714 | { 715 | int start = InternalIndex; 716 | SkipTo(chars); 717 | return Extract(start, InternalIndex); 718 | } 719 | 720 | #if !NETSTANDARD2_0 721 | /// 722 | /// Parses characters until the next occurrence of any one of the specified characters and 723 | /// returns a with the parsed characters. If none of the 724 | /// specified characters are found, this method parses all character up to the end of the 725 | /// text being parsed. Can return an empty span. 726 | /// 727 | /// The characters that cause parsing to end. 728 | /// A with the parsed characters. 729 | public ReadOnlySpan ParseToAsSpan(params char[] chars) 730 | { 731 | int start = InternalIndex; 732 | SkipTo(chars); 733 | return ExtractAsSpan(start, InternalIndex); 734 | } 735 | #endif 736 | 737 | /// 738 | /// Parses characters until the next occurrence of the specified string and returns a 739 | /// string with the parsed characters. If the specified string is not found, this method parses 740 | /// all character to the end of the text being parsed. Can return an empty string. 741 | /// 742 | /// Text that causes parsing to end. 743 | /// If true and a match is found, the matching text is 744 | /// also parsed. 745 | /// A string with the parsed characters. 746 | public string ParseTo(string s, bool includeToken = false) 747 | { 748 | int start = InternalIndex; 749 | SkipTo(s, includeToken); 750 | return Extract(start, InternalIndex); 751 | } 752 | 753 | #if !NETSTANDARD2_0 754 | /// 755 | /// Parses characters until the next occurrence of the specified string and returns a 756 | /// with the parsed characters. If the specified string 757 | /// is not found, this method parses all character to the end of the text being parsed. 758 | /// Can return an empty span. 759 | /// 760 | /// Text that causes parsing to end. 761 | /// If true and a match is found, the matching text is 762 | /// also parsed. 763 | /// A with the parsed characters. 764 | public ReadOnlySpan ParseToAsSpan(string s, bool includeToken = false) 765 | { 766 | int start = InternalIndex; 767 | SkipTo(s, includeToken); 768 | return ExtractAsSpan(start, InternalIndex); 769 | } 770 | #endif 771 | 772 | /// 773 | /// Parses characters until the next occurrence of the specified string and returns a 774 | /// string with the parsed characters. If the specified string is not found, this method parses 775 | /// all character to the end of the text being parsed. Can return an empty string. 776 | /// 777 | /// Text that causes parsing to end. 778 | /// One of the enumeration values that specifies the rules for 779 | /// comparing the specified string. 780 | /// If true and a match is found, the matching text is 781 | /// also parsed. 782 | /// A string with the parsed characters. 783 | public string ParseTo(string s, StringComparison comparison, bool includeToken = false) 784 | { 785 | int start = InternalIndex; 786 | SkipTo(s, comparison, includeToken); 787 | return Extract(start, InternalIndex); 788 | } 789 | 790 | #if !NETSTANDARD2_0 791 | /// 792 | /// Parses characters until the next occurrence of the specified string and returns a 793 | /// with the parsed characters. If the specified string 794 | /// is not found, this method parses all character to the end of the text being parsed. 795 | /// Can return an empty span. 796 | /// 797 | /// Text that causes parsing to end. 798 | /// One of the enumeration values that specifies the rules for 799 | /// comparing the specified string. 800 | /// If true and a match is found, the matching text is 801 | /// also parsed. 802 | /// A with the parsed characters. 803 | public ReadOnlySpan ParseToAsSpan(string s, StringComparison comparison, bool includeToken = false) 804 | { 805 | int start = InternalIndex; 806 | SkipTo(s, comparison, includeToken); 807 | return ExtractAsSpan(start, InternalIndex); 808 | } 809 | #endif 810 | 811 | /// 812 | /// Parses characters until the next occurrence of any one of the specified strings and 813 | /// returns a string with the parsed characters. If none of the specified strings are found, 814 | /// this method parses all character up to the end of the text being parsed. Can return an empty 815 | /// string. 816 | /// 817 | /// The strings that cause parsing to end. 818 | /// One of the enumeration values that specifies the rules for 819 | /// comparing the specified string. 820 | /// If true and a match is found, the matching text is 821 | /// also parsed. 822 | /// A string with the parsed characters. 823 | public string ParseTo(IEnumerable terms, StringComparison comparison, bool includeToken = false) 824 | { 825 | if (!EndOfText) 826 | { 827 | int start = InternalIndex; 828 | int matchIndex = int.MaxValue; 829 | string? matchTerm = null; 830 | 831 | // Search for each term 832 | foreach (string term in terms) 833 | { 834 | int i = Text.IndexOf(term, InternalIndex, comparison); 835 | if (i >= 0 && i < matchIndex) 836 | { 837 | matchIndex = i; 838 | matchTerm = term; 839 | } 840 | } 841 | 842 | // Check for result 843 | if (matchTerm != null) 844 | { 845 | InternalIndex = matchIndex; 846 | if (includeToken) 847 | InternalIndex += matchTerm.Length; 848 | return Extract(start, InternalIndex); 849 | } 850 | } 851 | return string.Empty; 852 | } 853 | 854 | #if !NETSTANDARD2_0 855 | /// 856 | /// Parses characters until the next occurrence of any one of the specified strings and 857 | /// returns a with the parsed characters. If none of the 858 | /// specified strings are found, this method parses all character up to the end of the text 859 | /// being parsed. Can return an empty span. 860 | /// 861 | /// The strings that cause parsing to end. 862 | /// One of the enumeration values that specifies the rules for 863 | /// comparing the specified string. 864 | /// If true and a match is found, the matching text is 865 | /// also parsed. 866 | /// A with the parsed characters. 867 | public ReadOnlySpan ParseToAsSpan(IEnumerable terms, StringComparison comparison, bool includeToken = false) 868 | { 869 | if (!EndOfText) 870 | { 871 | int start = InternalIndex; 872 | int matchIndex = int.MaxValue; 873 | string? matchTerm = null; 874 | 875 | // Search for each term 876 | foreach (string term in terms) 877 | { 878 | int i = Text.IndexOf(term, InternalIndex, comparison); 879 | if (i >= 0 && i < matchIndex) 880 | { 881 | matchIndex = i; 882 | matchTerm = term; 883 | } 884 | } 885 | 886 | // Check for result 887 | if (matchTerm != null) 888 | { 889 | InternalIndex = matchIndex; 890 | if (includeToken) 891 | InternalIndex += matchTerm.Length; 892 | return ExtractAsSpan(start, InternalIndex); 893 | } 894 | } 895 | return []; 896 | } 897 | #endif 898 | 899 | /// 900 | /// Parses characters until the start of the next token that matches the given regular 901 | /// expression and returns a string with the parsed characters. If no match is found, this 902 | /// method parses all characters to the end of the text being parsed. Can return an empty string. 903 | /// 904 | /// A regular expression that the text must match. 905 | /// If true and a match is found, the matching text is 906 | /// also parsed. 907 | /// A string with the parsed characters. 908 | #if NET7_0_OR_GREATER 909 | public string ParseToRegEx([StringSyntax(StringSyntaxAttribute.Regex)] string regularExpression, bool includeToken = false) 910 | #else 911 | public string ParseToRegEx(string regularExpression, bool includeToken = false) 912 | #endif 913 | { 914 | Regex regex = new(regularExpression, RegularExpressionOptions); 915 | return ParseToRegEx(regex, includeToken); 916 | } 917 | 918 | #if !NETSTANDARD2_0 919 | /// 920 | /// Parses characters until the start of the next token that matches the given regular 921 | /// expression and returns a with the parsed characters. 922 | /// If no match is found, this method parses all characters to the end of the text being 923 | /// parsed. Can return an empty span. 924 | /// 925 | /// A regular expression that the text must match. 926 | /// If true and a match is found, the matching text is 927 | /// also parsed. 928 | /// A with the parsed characters. 929 | #if NET7_0_OR_GREATER 930 | public ReadOnlySpan ParseToRegExAsSpan([StringSyntax(StringSyntaxAttribute.Regex)] string regularExpression, bool includeToken = false) 931 | #else 932 | public ReadOnlySpan ParseToRegExAsSpan(string regularExpression, bool includeToken = false) 933 | #endif 934 | { 935 | Regex regex = new(regularExpression, RegularExpressionOptions); 936 | return ParseToRegExAsSpan(regex, includeToken); 937 | } 938 | #endif 939 | 940 | /// 941 | /// Parses characters until the start of the next token that matches the given regular 942 | /// expression and returns a string with the parsed characters. If no match is found, this 943 | /// method parses all characters to the end of the text being parsed. Can return an empty string. 944 | /// 945 | /// A regular expression that the text must match. 946 | /// If true and a match is found, the matching text is 947 | /// also parsed. 948 | /// A string with the parsed characters. 949 | public string ParseToRegEx(Regex regex, bool includeToken = false) 950 | { 951 | #if NETSTANDARD2_0 952 | if (regex == null) 953 | throw new ArgumentNullException(nameof(regex)); 954 | #else 955 | ArgumentNullException.ThrowIfNull(regex); 956 | #endif 957 | 958 | int start = InternalIndex; 959 | SkipToRegEx(regex, includeToken); 960 | return Extract(start, InternalIndex); 961 | } 962 | 963 | #if !NETSTANDARD2_0 964 | /// 965 | /// Parses characters until the start of the next token that matches the given regular 966 | /// expression and returns a with the parsed characters. 967 | /// If no match is found, this method parses all characters to the end of the text being parsed. 968 | /// Can return an empty span. 969 | /// 970 | /// A regular expression that the text must match. 971 | /// If true and a match is found, the matching text is 972 | /// also parsed. 973 | /// A with the parsed characters. 974 | public ReadOnlySpan ParseToRegExAsSpan(Regex regex, bool includeToken = false) 975 | { 976 | #if NETSTANDARD2_0 977 | if (regex == null) 978 | throw new ArgumentNullException(nameof(regex)); 979 | #else 980 | ArgumentNullException.ThrowIfNull(regex); 981 | #endif 982 | 983 | int start = InternalIndex; 984 | SkipToRegEx(regex, includeToken); 985 | return ExtractAsSpan(start, InternalIndex); 986 | } 987 | #endif 988 | 989 | /// 990 | /// Parses characters until the next line break character. If no line-break characters are found, 991 | /// this method parses all characters to the end of the text being parsed. 992 | /// 993 | /// A string with the parsed characters. 994 | public string ParseToEndOfLine() 995 | { 996 | int start = InternalIndex; 997 | SkipToEndOfLine(); 998 | return Extract(start, InternalIndex); 999 | } 1000 | 1001 | #if !NETSTANDARD2_0 1002 | /// 1003 | /// Parses characters until the next line break character. If no line-break characters are found, 1004 | /// this method parses all characters to the end of the text being parsed. 1005 | /// 1006 | /// A with the parsed characters. 1007 | public ReadOnlySpan ParseToEndOfLineAsSpan() 1008 | { 1009 | int start = InternalIndex; 1010 | SkipToEndOfLine(); 1011 | return ExtractAsSpan(start, InternalIndex); 1012 | } 1013 | #endif 1014 | 1015 | /// 1016 | /// Parses characters until the start of the next line. If no more line break characters are 1017 | /// found, this method parses all characters to the end of the text being parsed. 1018 | /// 1019 | /// A string with the parsed characters. 1020 | public string ParseToNextLine() 1021 | { 1022 | int start = InternalIndex; 1023 | SkipToNextLine(); 1024 | return Extract(start, InternalIndex); 1025 | } 1026 | 1027 | #if !NETSTANDARD2_0 1028 | /// 1029 | /// Parses characters until the start of the next line. If no more line break characters are 1030 | /// found, this method parses all characters to the end of the text being parsed. 1031 | /// 1032 | /// A with the parsed characters. 1033 | public ReadOnlySpan ParseToNextLineAsSpan() 1034 | { 1035 | int start = InternalIndex; 1036 | SkipToNextLine(); 1037 | return ExtractAsSpan(start, InternalIndex); 1038 | } 1039 | #endif 1040 | 1041 | #endregion 1042 | 1043 | #region Parse tokens 1044 | 1045 | /// 1046 | /// Parses text using the specified delimiter characters. Skips any characters that are in the 1047 | /// list of delimiters, and then parses any characters that are not in the list of delimiters. 1048 | /// Returns the parsed characters. 1049 | /// 1050 | /// Token delimiter characters. 1051 | /// Returns the parsed token. 1052 | public string ParseToken(params char[] delimiters) 1053 | { 1054 | Skip(delimiters); 1055 | return ParseTo(delimiters); 1056 | } 1057 | 1058 | #if !NETSTANDARD2_0 1059 | /// 1060 | /// Parses text using the specified delimiter characters. Skips any characters that are in the 1061 | /// list of delimiters, and then parses any characters that are not in the list of delimiters. 1062 | /// Returns the parsed characters. 1063 | /// 1064 | /// Token delimiter characters. 1065 | /// Returns the parsed token as a span. 1066 | public ReadOnlySpan ParseTokenAsSpan(params char[] delimiters) 1067 | { 1068 | Skip(delimiters); 1069 | return ParseToAsSpan(delimiters); 1070 | } 1071 | #endif 1072 | 1073 | /// 1074 | /// Parses text using the specified predicate to indicate delimiter characters. Skips any 1075 | /// characters for which returns true, and then parses any 1076 | /// characters for which returns false. Returns the parsed 1077 | /// characters. 1078 | /// 1079 | /// Function that returns true for token delimiter 1080 | /// characters. 1081 | /// Returns the parsed token. 1082 | public string ParseToken(Func predicate) 1083 | { 1084 | SkipWhile(predicate); 1085 | return ParseWhile(c => !predicate(c)); 1086 | } 1087 | 1088 | #if !NETSTANDARD2_0 1089 | /// 1090 | /// Parses text using the specified predicate to indicate delimiter characters. Skips any 1091 | /// characters for which returns true, and then parses any 1092 | /// characters for which returns false. Returns the parsed 1093 | /// characters. 1094 | /// 1095 | /// Function that returns true for token delimiter 1096 | /// characters. 1097 | /// Returns the parsed token as a span. 1098 | public ReadOnlySpan ParseTokenAsSpan(Func predicate) 1099 | { 1100 | SkipWhile(predicate); 1101 | return ParseWhileAsSpan(c => !predicate(c)); 1102 | } 1103 | #endif 1104 | 1105 | /// 1106 | /// Parses text using a regular expression. Skips up to the start of the matching text, and then 1107 | /// parses the matching text. If no match is found, the current position is set to the end of 1108 | /// the text and an empty string is returned. 1109 | /// 1110 | /// A regular expression that the token must match. 1111 | /// Returns the text of the matching token. 1112 | #if NET7_0_OR_GREATER 1113 | public string ParseTokenRegEx([StringSyntax(StringSyntaxAttribute.Regex)] string regularExpression) 1114 | #else 1115 | public string ParseTokenRegEx(string regularExpression) 1116 | #endif 1117 | { 1118 | Regex regex = new(regularExpression, RegularExpressionOptions); 1119 | return ParseTokenRegEx(regex); 1120 | } 1121 | 1122 | /// 1123 | /// Parses text using a regular expression. Skips up to the start of the matching text, and then 1124 | /// parses the matching text. If no match is found, the current position is set to the end of 1125 | /// the text and an empty string is returned. 1126 | /// 1127 | /// A regular expression that the token must match. 1128 | /// Returns the text of the matching token. 1129 | public string ParseTokenRegEx(Regex regex) 1130 | { 1131 | #if NETSTANDARD2_0 1132 | if (regex == null) 1133 | throw new ArgumentNullException(nameof(regex)); 1134 | #else 1135 | ArgumentNullException.ThrowIfNull(regex); 1136 | #endif 1137 | 1138 | Match match = regex.Match(Text, Index); 1139 | if (match.Success) 1140 | { 1141 | InternalIndex = match.Index + match.Length; 1142 | return match.Value; 1143 | } 1144 | InternalIndex = Text.Length; 1145 | return string.Empty; 1146 | } 1147 | 1148 | /// 1149 | /// This method has been deprecated. Please use instead. 1150 | /// 1151 | [Obsolete("This method has been deprecated and will be removed in a future version. Please use ParseTokens() instead.")] 1152 | public IEnumerable ParseAllTokens(params char[] delimiters) => ParseTokens(delimiters); 1153 | 1154 | /// 1155 | /// Parses and returns all tokens to the end of the text being parsed. The specified 1156 | /// characters indicate delimiter characters that are not part of a token. 1157 | /// 1158 | /// Token delimiter characters. 1159 | /// Returns the parsed tokens. 1160 | public IEnumerable ParseTokens(params char[] delimiters) 1161 | { 1162 | Skip(delimiters); 1163 | while (!EndOfText) 1164 | { 1165 | yield return ParseTo(delimiters); 1166 | Skip(delimiters); 1167 | } 1168 | } 1169 | 1170 | /// 1171 | /// Parses and returns up to the specified number of tokens. The specified 1172 | /// characters indicate delimiter characters that are not part of a token. 1173 | /// 1174 | /// The maxiumum number of tokens to parse. 1175 | /// Token delimiter characters. 1176 | /// Returns the parsed tokens. 1177 | public IEnumerable ParseTokens(int count, params char[] delimiters) 1178 | { 1179 | Skip(delimiters); 1180 | while (!EndOfText) 1181 | { 1182 | if (count-- <= 0) 1183 | break; 1184 | yield return ParseTo(delimiters); 1185 | Skip(delimiters); 1186 | } 1187 | } 1188 | 1189 | /// 1190 | /// This method has been deprecated. Please use instead. 1191 | /// 1192 | [Obsolete("This method has been deprecated and will be removed in a future version. Please use ParseTokens() instead.")] 1193 | public IEnumerable ParseAllTokens(Func predicate) => ParseTokens(predicate); 1194 | 1195 | /// 1196 | /// Parses and returns all tokens to the end of the text being parsed. 1197 | /// returns true for delimiter characters that are not part of a token. 1198 | /// 1199 | /// Function that returns true for token delimiter 1200 | /// characters. 1201 | /// Returns the parsed tokens. 1202 | public IEnumerable ParseTokens(Func predicate) 1203 | { 1204 | SkipWhile(predicate); 1205 | while (!EndOfText) 1206 | { 1207 | yield return ParseWhile(c => !predicate(c)); 1208 | SkipWhile(predicate); 1209 | } 1210 | } 1211 | 1212 | /// 1213 | /// Parses and returns up to the specified number of tokens. 1214 | /// returns true for delimiter characters that are not part of a token. 1215 | /// 1216 | /// Specifies the maximum number of tokens to parse. 1217 | /// Function that returns true for token delimiter 1218 | /// characters. 1219 | /// Returns the parsed tokens. 1220 | public IEnumerable ParseTokens(int count, Func predicate) 1221 | { 1222 | SkipWhile(predicate); 1223 | while (!EndOfText) 1224 | { 1225 | if (count-- <= 0) 1226 | break; 1227 | yield return ParseWhile(c => !predicate(c)); 1228 | SkipWhile(predicate); 1229 | } 1230 | } 1231 | 1232 | /// 1233 | /// Parses all tokens that match the given regular expression and sets the current position the end 1234 | /// of the last token. If no matches are found, the current position is set to the end of the text 1235 | /// and an empty collection is returned. 1236 | /// 1237 | /// A regular expression that the tokens must match. 1238 | /// Returns the matching tokens. 1239 | #if NET7_0_OR_GREATER 1240 | public IEnumerable ParseTokensRegEx([StringSyntax(StringSyntaxAttribute.Regex)] string regularExpression) 1241 | #else 1242 | public IEnumerable ParseTokensRegEx(string regularExpression) 1243 | #endif 1244 | { 1245 | Regex regex = new(regularExpression, RegularExpressionOptions); 1246 | return ParseTokensRegEx(regex); 1247 | } 1248 | 1249 | /// 1250 | /// Parses all tokens that match the given regular expression and sets the current position the end 1251 | /// of the last token. If no matches are found, the current position is set to the end of the text 1252 | /// and an empty collection is returned. 1253 | /// 1254 | /// A regular expression that the tokens must match. 1255 | /// Returns the matching tokens. 1256 | public IEnumerable ParseTokensRegEx(Regex regex) 1257 | { 1258 | #if NETSTANDARD2_0 1259 | if (regex == null) 1260 | throw new ArgumentNullException(nameof(regex)); 1261 | #else 1262 | ArgumentNullException.ThrowIfNull(regex); 1263 | #endif 1264 | 1265 | MatchCollection matches = regex.Matches(Text, Index); 1266 | if (matches.Count > 0) 1267 | { 1268 | // Update current position 1269 | #if NETSTANDARD2_0 1270 | Match lastMatch = matches[matches.Count - 1]; 1271 | #else 1272 | Match lastMatch = matches[^1]; 1273 | #endif 1274 | InternalIndex = lastMatch.Index + lastMatch.Length; 1275 | // Return matches 1276 | foreach (Match match in matches.Cast()) 1277 | yield return match.Value; 1278 | } 1279 | else InternalIndex = Text.Length; 1280 | } 1281 | 1282 | #endregion 1283 | 1284 | #region Matches current position 1285 | 1286 | /// 1287 | /// Compares the given character array to the characters starting at the current position 1288 | /// using a case-sensitive comparison. 1289 | /// 1290 | /// Returns true if the given characters match the characters at the current 1291 | /// position. Returns false otherwise. 1292 | public bool MatchesCurrentPosition(char[]? chars) 1293 | { 1294 | if (chars == null || chars.Length == 0 || chars.Length > Remaining) 1295 | return false; 1296 | for (int i = 0; i < chars.Length; i++) 1297 | { 1298 | if (chars[i] != Text[InternalIndex + i]) 1299 | return false; 1300 | } 1301 | return true; 1302 | } 1303 | 1304 | /// 1305 | /// Returns true if the given string matches the characters at the current position, or 1306 | /// false otherwise. 1307 | /// 1308 | /// String to compare. 1309 | /// Returns true if the given string matches the characters at the current position, 1310 | /// or false otherwise. 1311 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1312 | public bool MatchesCurrentPosition(string? s) => s != null && 1313 | s.Length != 0 && 1314 | string.CompareOrdinal(Text, InternalIndex, s, 0, s.Length) == 0; 1315 | 1316 | /// 1317 | /// Returns true if the given string matches the characters at the current position, or 1318 | /// false otherwise. 1319 | /// 1320 | /// String to compare. 1321 | /// One of the enumeration values that specifies the rules to use in the 1322 | /// comparison. 1323 | /// Returns true if the given string matches the characters at the current position, 1324 | /// of false otherwise. 1325 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1326 | public bool MatchesCurrentPosition(string? s, StringComparison comparison) => s != null && 1327 | s.Length != 0 && 1328 | string.Compare(Text, InternalIndex, s, 0, s.Length, comparison) == 0; 1329 | 1330 | #endregion 1331 | 1332 | #region Extraction 1333 | 1334 | /// 1335 | /// Extracts a substring of the text being parsed. The substring includes all characters 1336 | /// from the position to the end of the text. 1337 | /// 1338 | /// 0-based position of first character to be extracted. 1339 | /// Returns the extracted string. 1340 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1341 | #if NETSTANDARD2_0 1342 | public string Extract(int start) => Text.Substring(start); 1343 | #else 1344 | public string Extract(int start) => Text[start..]; 1345 | #endif 1346 | 1347 | #if !NETSTANDARD2_0 1348 | /// 1349 | /// Extracts a span of the text being parsed. The span includes all characters 1350 | /// from the position to the end of the text. 1351 | /// 1352 | /// 0-based position of first character to be extracted. 1353 | /// Returns the extracted span. 1354 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1355 | public ReadOnlySpan ExtractAsSpan(int start) => Text.AsSpan(start); 1356 | #endif 1357 | 1358 | /// 1359 | /// Extracts a substring from the text being parsed. 1360 | /// 1361 | /// 0-based position of first character to be extracted. 1362 | /// 0-based position of the character that follows the last 1363 | /// character to be extracted. 1364 | /// Returns the extracted string. 1365 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1366 | #if NETSTANDARD2_0 1367 | public string Extract(int start, int end) => Text.Substring(start, end - start); 1368 | #else 1369 | public string Extract(int start, int end) => Text[start..end]; 1370 | #endif 1371 | 1372 | #if !NETSTANDARD2_0 1373 | /// 1374 | /// Extracts a span from the text being parsed. 1375 | /// 1376 | /// 0-based position of first character to be extracted. 1377 | /// 0-based position of the character that follows the last 1378 | /// character to be extracted. 1379 | /// A span with the specified characters. 1380 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1381 | public ReadOnlySpan ExtractAsSpan(int start, int end) => Text.AsSpan(start, end - start); 1382 | 1383 | /// 1384 | /// Extracts a substring from the text being parsed. 1385 | /// 1386 | public string this[Range range] 1387 | { 1388 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1389 | get 1390 | { 1391 | (int offset, int length) = range.GetOffsetAndLength(Text.Length); 1392 | return Extract(offset, length - offset); 1393 | } 1394 | } 1395 | 1396 | /// 1397 | /// Gets or sets the character at the specified index. 1398 | /// 1399 | public char this[Index index] 1400 | { 1401 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1402 | get => this[index.GetOffset(Text.Length)]; 1403 | } 1404 | #endif 1405 | 1406 | /// 1407 | /// Gets the character at the specified index. Returns if 1408 | /// is not valid. 1409 | /// 1410 | /// 0-based position of the character to return. 1411 | public char this[int index] 1412 | { 1413 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1414 | get => (index >= 0 && index < Text.Length) ? Text[index] : NullChar; 1415 | } 1416 | 1417 | #endregion 1418 | 1419 | #region Operator overloads 1420 | 1421 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1422 | public static implicit operator int(ParsingHelper helper) => helper.InternalIndex; 1423 | 1424 | /// 1425 | /// Move the current position ahead one character. 1426 | /// 1427 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1428 | public static ParsingHelper operator ++(ParsingHelper helper) 1429 | { 1430 | helper.Next(1); 1431 | return helper; 1432 | } 1433 | 1434 | /// 1435 | /// Move the current position back one character. 1436 | /// 1437 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1438 | public static ParsingHelper operator --(ParsingHelper helper) 1439 | { 1440 | helper.Next(-1); 1441 | return helper; 1442 | } 1443 | 1444 | /// 1445 | /// Moves the current position ahead by the specified number of characters. 1446 | /// 1447 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1448 | public static ParsingHelper operator +(ParsingHelper helper, int count) 1449 | { 1450 | helper.Next(count); 1451 | return helper; 1452 | } 1453 | 1454 | /// 1455 | /// Moves the current position back by the specified number of characters. 1456 | /// 1457 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 1458 | public static ParsingHelper operator -(ParsingHelper helper, int count) 1459 | { 1460 | helper.Next(-count); 1461 | return helper; 1462 | } 1463 | 1464 | #endregion 1465 | 1466 | } 1467 | } 1468 | -------------------------------------------------------------------------------- /SoftCircuits.Parsing.Helper/SkipWhiteSpaceOption.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019-2024 Jonathan Wood (www.softcircuits.com) 2 | // Licensed under the MIT license. 3 | // 4 | 5 | namespace SoftCircuits.Parsing.Helper 6 | { 7 | /// 8 | /// Specifies options for the 9 | /// method. 10 | /// 11 | public enum SkipWhiteSpaceOption 12 | { 13 | /// 14 | /// Stop if a line line break character is found. 15 | /// 16 | StopAtEol, 17 | 18 | /// 19 | /// Stop if the start of a new line (after a line break) is found. 20 | /// 21 | StopAtNextLine 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /SoftCircuits.Parsing.Helper/SoftCircuits.Parsing.Helper.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net9.0;net8.0;net7.0;net6.0;netstandard2.0 5 | true 6 | annotations 7 | enable 8 | latest 9 | Jonathan Wood 10 | SoftCircuits 11 | ParsingHelper 12 | Copyright © 2019-2024 SoftCircuits 13 | en-US 14 | Added direct support for .NET 9.0; Code clean up. 15 | text parse parser parsing text-parser text-parsing parsing-helper .net csharp tokenizer 16 | git 17 | https://github.com/SoftCircuits/ParsingHelper 18 | License.txt 19 | A .NET class library that makes it easier to parse text. The library tracks the current position within the text, ensures your code never attempts to access a character at an invalid index, and includes many methods that make parsing easier. The library makes your text-parsing code more concise and more robust. Includes support for regular expressions. 20 | true 21 | true 22 | 5.2.0 23 | https://github.com/SoftCircuits/ParsingHelper 24 | Logo_64x64.png 25 | 26 | 4.0.1.0 27 | README.md 28 | SoftCircuits.ParsingHelper 29 | 30 | 31 | 32 | 33 | True 34 | 35 | 36 | 37 | True 38 | 39 | 40 | 41 | True 42 | \ 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /TestParsingHelper/TestParsingHelper.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019-2022 Jonathan Wood (www.softcircuits.com) 2 | // Licensed under the MIT license. 3 | // 4 | using Microsoft.VisualStudio.TestTools.UnitTesting; 5 | using SoftCircuits.Parsing.Helper; 6 | using System; 7 | using System.Collections.Generic; 8 | using System.Linq; 9 | using System.Text.RegularExpressions; 10 | 11 | namespace TestParsingHelper 12 | { 13 | [TestClass] 14 | public class TestParsingHelper 15 | { 16 | [TestClass] 17 | public class ParsingHelperTests 18 | { 19 | private const string ShortTest = "Four score and seven years ago"; 20 | private const string LongTest = @"Four score and seven years ago our fathers brought forth on this continent, 21 | a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal. 22 | 23 | Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so 24 | dedicated, can long endure. We are met on a great battle-field of that war. We have come to dedicate a 25 | portion of that field, as a final resting place for those who here gave their lives that that nation might 26 | live. It is altogether fitting and proper that we should do this. 27 | 28 | But, in a larger sense, we can not dedicate -- we can not consecrate -- we can not hallow -- this ground. 29 | The brave men, living and dead, who struggled here, have consecrated it, far above our poor power to add or 30 | detract. The world will little note, nor long remember what we say here, but it can never forget what they 31 | did here. It is for us the living, rather, to be dedicated here to the unfinished work which they who fought 32 | here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining 33 | before us -- that from these honored dead we take increased devotion to that cause for which they gave the 34 | last full measure of devotion -- that we here highly resolve that these dead shall not have died in vain -- 35 | that this nation, under God, shall have a new birth of freedom -- and that government of the people, by the 36 | people, for the people, shall not perish from the earth."; 37 | 38 | [TestMethod] 39 | public void BasicTests() 40 | { 41 | ParsingHelper helper = new(ShortTest); 42 | 43 | // Initial state 44 | Assert.AreEqual('\0', ParsingHelper.NullChar); 45 | Assert.AreEqual(ShortTest, helper.Text); 46 | Assert.AreEqual(0, helper.Index); 47 | Assert.AreEqual(false, helper.EndOfText); 48 | Assert.AreEqual(ShortTest.Length, helper.Remaining); 49 | 50 | // Peek 51 | Assert.AreEqual('F', helper.Peek()); 52 | Assert.AreEqual('o', helper.Peek(1)); 53 | Assert.AreEqual('u', helper.Peek(2)); 54 | Assert.AreEqual('r', helper.Peek(3)); 55 | Assert.AreEqual(ParsingHelper.NullChar, helper.Peek(1000)); 56 | Assert.AreEqual(ParsingHelper.NullChar, helper.Peek(-1000)); 57 | Assert.AreEqual(0, helper.Index); 58 | 59 | // Get 60 | Assert.AreEqual('F', helper.Get()); 61 | Assert.AreEqual('o', helper.Get()); 62 | Assert.AreEqual('u', helper.Get()); 63 | Assert.AreEqual('r', helper.Get()); 64 | Assert.AreEqual(4, helper.Index); 65 | helper.Index = ShortTest.Length; 66 | Assert.AreEqual(ParsingHelper.NullChar, helper.Get()); 67 | 68 | // Next 69 | helper.Reset(); 70 | helper.Next(); 71 | Assert.AreEqual(1, helper.Index); 72 | Assert.AreEqual('o', helper.Peek()); 73 | helper.Next(2); 74 | Assert.AreEqual(3, helper.Index); 75 | Assert.AreEqual('r', helper.Peek()); 76 | helper.Next(-2); 77 | Assert.AreEqual(1, helper.Index); 78 | Assert.AreEqual('o', helper.Peek()); 79 | Assert.AreEqual(false, helper.EndOfText); 80 | Assert.AreEqual(ShortTest.Length - helper.Index, helper.Remaining); 81 | 82 | helper.Next(10000); 83 | Assert.AreEqual(helper.Text.Length, helper.Index); 84 | Assert.AreEqual(true, helper.EndOfText); 85 | Assert.AreEqual(0, helper.Remaining); 86 | helper.Next(-10000); 87 | Assert.AreEqual(0, helper.Index); 88 | Assert.AreEqual(false, helper.EndOfText); 89 | Assert.AreEqual(ShortTest.Length, helper.Remaining); 90 | 91 | helper.Index = 10000; 92 | Assert.AreEqual(ShortTest.Length, helper.Index); 93 | Assert.AreEqual(true, helper.EndOfText); 94 | Assert.AreEqual(0, helper.Remaining); 95 | helper.Index = -10000; 96 | Assert.AreEqual(0, helper.Index); 97 | Assert.AreEqual(false, helper.EndOfText); 98 | Assert.AreEqual(ShortTest.Length, helper.Remaining); 99 | 100 | helper.Index = 0; 101 | Assert.AreEqual(0, helper.Index); 102 | Assert.AreEqual(ShortTest.Length, helper.Remaining); 103 | Assert.AreEqual(false, helper.EndOfText); 104 | helper.Index = helper.Text.Length; 105 | Assert.AreEqual(helper.Text.Length, helper.Index); 106 | Assert.AreEqual(0, helper.Remaining); 107 | Assert.AreEqual(true, helper.EndOfText); 108 | helper.Index = 5; 109 | Assert.AreEqual(5, helper.Index); 110 | Assert.AreEqual(ShortTest.Length - 5, helper.Remaining); 111 | Assert.AreEqual(false, helper.EndOfText); 112 | 113 | helper.Reset(); 114 | Assert.AreEqual(0, helper.Index); 115 | Assert.AreEqual(ShortTest, helper.Text); 116 | 117 | helper.Reset(null); 118 | Assert.AreEqual(0, helper.Index); 119 | Assert.AreEqual(string.Empty, helper.Text); 120 | Assert.AreEqual(true, helper.EndOfText); 121 | Assert.AreEqual(0, helper.Remaining); 122 | } 123 | 124 | [TestMethod] 125 | public void SkipTests() 126 | { 127 | ParsingHelper helper = new(LongTest); 128 | 129 | // SkipTo 130 | Assert.IsTrue(helper.SkipTo("score")); 131 | Assert.AreEqual('s', helper.Peek()); 132 | Assert.AreEqual('c', helper.Peek(1)); 133 | helper.Reset(); 134 | Assert.IsTrue(helper.SkipTo("score", includeToken: true)); 135 | Assert.AreEqual(' ', helper.Peek()); 136 | Assert.AreEqual('a', helper.Peek(1)); 137 | helper.Reset(); 138 | Assert.IsTrue(helper.SkipTo("SCORE", StringComparison.OrdinalIgnoreCase)); 139 | Assert.AreEqual('s', helper.Peek()); 140 | Assert.AreEqual('c', helper.Peek(1)); 141 | helper.Reset(); 142 | Assert.IsTrue(helper.SkipTo('v')); 143 | Assert.AreEqual('v', helper.Peek()); 144 | Assert.AreEqual('e', helper.Peek(1)); 145 | Assert.IsFalse(helper.SkipTo("XxXxXxX")); 146 | Assert.AreEqual(LongTest.Length, helper.Index); 147 | Assert.AreEqual(true, helper.EndOfText); 148 | Assert.AreEqual(0, helper.Remaining); 149 | 150 | // SkipWhiteSpace 151 | helper.Reset(); 152 | Assert.IsTrue(helper.SkipTo(' ')); 153 | helper.SkipWhiteSpace(); 154 | Assert.AreEqual('s', helper.Peek()); 155 | 156 | // SkipWhiteSpace with options 157 | ParsingHelper helper2 = new(" \r\nxyz"); 158 | helper2.SkipWhiteSpace(SkipWhiteSpaceOption.StopAtEol); 159 | Assert.IsTrue(helper2.MatchesCurrentPosition("\r\nxyz")); 160 | helper2.Reset(); 161 | helper2.SkipWhiteSpace(SkipWhiteSpaceOption.StopAtNextLine); 162 | Assert.IsTrue(helper2.MatchesCurrentPosition("xyz")); 163 | helper2.Reset(" \rxyz "); 164 | helper2.SkipWhiteSpace(SkipWhiteSpaceOption.StopAtEol); 165 | Assert.IsTrue(helper2.MatchesCurrentPosition("\rxyz")); 166 | helper2.Reset(" \nxyz "); 167 | helper2.SkipWhiteSpace(SkipWhiteSpaceOption.StopAtNextLine); 168 | Assert.IsTrue(helper2.MatchesCurrentPosition("xyz")); 169 | helper2.Reset(" xyz"); 170 | helper2.SkipWhiteSpace(SkipWhiteSpaceOption.StopAtEol); 171 | Assert.IsTrue(helper2.MatchesCurrentPosition("xyz")); 172 | helper2.Reset(); 173 | helper2.SkipWhiteSpace(SkipWhiteSpaceOption.StopAtNextLine); 174 | Assert.IsTrue(helper2.MatchesCurrentPosition("xyz")); 175 | 176 | // SkipWhile 177 | helper.SkipWhile(c => "score".Contains(c)); 178 | Assert.AreEqual(' ', helper.Peek()); 179 | Assert.AreEqual('a', helper.Peek(1)); 180 | 181 | // SkipToNextLine/SkipToEndOfLine 182 | helper.Reset(); 183 | helper.SkipToEndOfLine(); 184 | Assert.AreEqual('\r', helper.Peek()); 185 | Assert.AreEqual('\n', helper.Peek(1)); 186 | helper.SkipToNextLine(); 187 | Assert.AreEqual('a', helper.Peek()); 188 | Assert.AreEqual(' ', helper.Peek(1)); 189 | helper.SkipToNextLine(); 190 | helper.SkipToNextLine(); 191 | Assert.AreEqual('N', helper.Peek()); 192 | Assert.AreEqual('o', helper.Peek(1)); 193 | 194 | // Skip 195 | helper.Skip('N', 'o', 'w', ' ', 'e'); 196 | Assert.AreEqual('a', helper.Peek()); 197 | Assert.AreEqual('r', helper.Peek(1)); 198 | } 199 | 200 | [TestMethod] 201 | public void ParseTests() 202 | { 203 | ParsingHelper helper = new(LongTest); 204 | 205 | Assert.IsTrue(helper.SkipTo("score")); 206 | Assert.AreEqual("score and seven years ago our ", helper.ParseTo("fathers")); 207 | Assert.AreEqual('f', helper.Peek()); 208 | 209 | helper.Reset(); 210 | Assert.IsTrue(helper.SkipTo("score")); 211 | Assert.AreEqual("score and seven years ago our ", helper.ParseTo("FATHERS", StringComparison.OrdinalIgnoreCase)); 212 | Assert.AreEqual('f', helper.Peek()); 213 | 214 | helper.Reset(); 215 | Assert.IsTrue(helper.SkipTo("score")); 216 | Assert.AreEqual("score and se", helper.ParseTo('v', 'X', 'Y', 'Z')); 217 | Assert.AreEqual('v', helper.Peek()); 218 | 219 | helper.Reset(); 220 | Assert.IsTrue(helper.SkipTo("score")); 221 | Assert.AreEqual("score", helper.Parse('e', 'r', 'o', 'c', 's')); 222 | Assert.AreEqual(' ', helper.Peek()); 223 | Assert.AreEqual(" ", helper.Parse(' ')); 224 | Assert.AreEqual('a', helper.Peek()); 225 | 226 | helper.Reset(); 227 | Assert.IsTrue(helper.SkipTo("score")); 228 | Assert.AreEqual("score and seven years ago our fathers brought forth on this continent", helper.ParseWhile(c => c != ',')); 229 | Assert.AreEqual(',', helper.Peek()); 230 | 231 | helper.Next(); // Skip comma 232 | Assert.AreEqual("a", helper.ParseToken(char.IsWhiteSpace)); 233 | Assert.AreEqual(' ', helper.Peek()); 234 | Assert.AreEqual('n', helper.Peek(1)); 235 | 236 | helper.Reset(); 237 | Assert.AreEqual("Four", helper.ParseToken(' ', '\r', '\n')); 238 | Assert.AreEqual(' ', helper.Peek()); 239 | 240 | string parseAllText = " \t\tthe \r\n\t\t rain in\t\t spain\r\n falls\r\nmainly on\tthe\r\nplain. "; 241 | string[] parseAllResults = [ "the", "rain", "in", "spain", "falls", "mainly", "on", "the", "plain" ]; 242 | 243 | helper.Reset(parseAllText); 244 | CollectionAssert.AreEqual(parseAllResults, helper.ParseTokens(' ', '\t', '\r', '\n', '.').ToList()); 245 | 246 | helper.Reset(); 247 | CollectionAssert.AreEqual(parseAllResults, helper.ParseTokens(c => " \t\r\n.".Contains(c)).ToList()); 248 | 249 | // ParseCharacter 250 | helper.Reset("abc"); 251 | Assert.AreEqual("a", helper.ParseCharacter()); 252 | Assert.AreEqual("b", helper.ParseCharacter()); 253 | Assert.AreEqual("c", helper.ParseCharacter()); 254 | Assert.AreEqual("", helper.ParseCharacter()); 255 | 256 | // ParseCharacters 257 | helper.Reset("abcdefg"); 258 | Assert.AreEqual("", helper.ParseCharacters(0)); 259 | Assert.AreEqual("", helper.ParseCharacters(-1)); 260 | Assert.AreEqual("a", helper.ParseCharacters(1)); 261 | Assert.AreEqual("bc", helper.ParseCharacters(2)); 262 | Assert.AreEqual("def", helper.ParseCharacters(3)); 263 | Assert.AreEqual("g", helper.ParseCharacters(10)); 264 | Assert.AreEqual("", helper.ParseCharacters(10)); 265 | 266 | // Parse to any string 267 | helper.Reset("abcdefg"); 268 | Assert.AreEqual("abc", helper.ParseTo([ "d", "ef", "g" ], StringComparison.Ordinal)); 269 | Assert.AreEqual("d", helper.ParseTo([ "d", "ef", "g" ], StringComparison.Ordinal, true)); 270 | Assert.AreEqual("ef", helper.ParseTo([ "d", "ef", "g" ], StringComparison.Ordinal, true)); 271 | Assert.AreEqual("g", helper.ParseTo([ "d", "ef", "g" ], StringComparison.Ordinal, true)); 272 | 273 | helper.Reset("abcd=ef=>g"); 274 | Assert.AreEqual("abcd=ef", helper.ParseTo([ "z", "=>", "x" ], StringComparison.Ordinal)); 275 | } 276 | 277 | [TestMethod] 278 | public void ParseSpanTests() 279 | { 280 | ParsingHelper helper = new(LongTest); 281 | 282 | Assert.IsTrue(helper.SkipTo("score")); 283 | Assert.AreEqual("score and seven years ago our ", helper.ParseToAsSpan("fathers").ToString()); 284 | Assert.AreEqual('f', helper.Peek()); 285 | 286 | helper.Reset(); 287 | Assert.IsTrue(helper.SkipTo("score")); 288 | Assert.AreEqual("score and seven years ago our ", helper.ParseToAsSpan("FATHERS", StringComparison.OrdinalIgnoreCase).ToString()); 289 | Assert.AreEqual('f', helper.Peek()); 290 | 291 | helper.Reset(); 292 | Assert.IsTrue(helper.SkipTo("score")); 293 | Assert.AreEqual("score and se", helper.ParseToAsSpan('v', 'X', 'Y', 'Z').ToString()); 294 | Assert.AreEqual('v', helper.Peek()); 295 | 296 | helper.Reset(); 297 | Assert.IsTrue(helper.SkipTo("score")); 298 | Assert.AreEqual("score", helper.ParseAsSpan('e', 'r', 'o', 'c', 's').ToString()); 299 | Assert.AreEqual(' ', helper.Peek()); 300 | Assert.AreEqual(" ", helper.ParseAsSpan(' ').ToString()); 301 | Assert.AreEqual('a', helper.Peek()); 302 | 303 | helper.Reset(); 304 | Assert.IsTrue(helper.SkipTo("score")); 305 | Assert.AreEqual("score and seven years ago our fathers brought forth on this continent", helper.ParseWhileAsSpan(c => c != ',').ToString()); 306 | Assert.AreEqual(',', helper.Peek()); 307 | 308 | helper.Next(); // Skip comma 309 | Assert.AreEqual("a", helper.ParseTokenAsSpan(char.IsWhiteSpace).ToString()); 310 | Assert.AreEqual(' ', helper.Peek()); 311 | Assert.AreEqual('n', helper.Peek(1)); 312 | 313 | helper.Reset(); 314 | Assert.AreEqual("Four", helper.ParseTokenAsSpan(' ', '\r', '\n').ToString()); 315 | Assert.AreEqual(' ', helper.Peek()); 316 | 317 | // ParseCharacter 318 | helper.Reset("abc"); 319 | Assert.AreEqual("a", helper.ParseCharacterAsSpan().ToString()); 320 | Assert.AreEqual("b", helper.ParseCharacterAsSpan().ToString()); 321 | Assert.AreEqual("c", helper.ParseCharacterAsSpan().ToString()); 322 | Assert.AreEqual("", helper.ParseCharacterAsSpan().ToString()); 323 | 324 | // ParseCharacters 325 | helper.Reset("abcdefg"); 326 | Assert.AreEqual("", helper.ParseCharactersAsSpan(0).ToString()); 327 | Assert.AreEqual("", helper.ParseCharactersAsSpan(-1).ToString()); 328 | Assert.AreEqual("a", helper.ParseCharactersAsSpan(1).ToString()); 329 | Assert.AreEqual("bc", helper.ParseCharactersAsSpan(2).ToString()); 330 | Assert.AreEqual("def", helper.ParseCharactersAsSpan(3).ToString()); 331 | Assert.AreEqual("g", helper.ParseCharactersAsSpan(10).ToString()); 332 | Assert.AreEqual("", helper.ParseCharactersAsSpan(10).ToString()); 333 | 334 | // Parse to any string 335 | helper.Reset("abcdefg"); 336 | Assert.AreEqual("abc", helper.ParseToAsSpan([ "d", "ef", "g" ], StringComparison.Ordinal).ToString()); 337 | Assert.AreEqual("d", helper.ParseToAsSpan([ "d", "ef", "g" ], StringComparison.Ordinal, true).ToString()); 338 | Assert.AreEqual("ef", helper.ParseToAsSpan([ "d", "ef", "g" ], StringComparison.Ordinal, true).ToString()); 339 | Assert.AreEqual("g", helper.ParseToAsSpan([ "d", "ef", "g" ], StringComparison.Ordinal, true).ToString()); 340 | 341 | helper.Reset("abcd=ef=>g"); 342 | Assert.AreEqual("abcd=ef", helper.ParseToAsSpan([ "z", "=>", "x" ], StringComparison.Ordinal).ToString()); 343 | } 344 | 345 | [TestMethod] 346 | public void QuotedTextTests() 347 | { 348 | // Quoted text 349 | ParsingHelper helper = new(" This is a \"test.\" "); 350 | Assert.IsTrue(helper.SkipTo('"')); 351 | Assert.AreEqual("test.", helper.ParseQuotedText()); 352 | Assert.AreEqual(' ', helper.Peek()); 353 | 354 | // Two quotes escapes 355 | helper = new ParsingHelper(" This is a \"te\"\"st.\" "); 356 | Assert.IsTrue(helper.SkipTo('"')); 357 | Assert.AreEqual("te\"st.", helper.ParseQuotedText()); 358 | Assert.AreEqual(' ', helper.Peek()); 359 | 360 | // No escape 361 | helper = new ParsingHelper(" This is a \"test.\" "); 362 | Assert.IsTrue(helper.SkipTo('"')); 363 | Assert.AreEqual("test.", helper.ParseQuotedText(null, false, false)); 364 | Assert.AreEqual(' ', helper.Peek()); 365 | 366 | // No escape, include escape character 367 | helper.Reset(); 368 | Assert.IsTrue(helper.SkipTo('"')); 369 | Assert.AreEqual("test.", helper.ParseQuotedText(null, true, false)); 370 | Assert.AreEqual(' ', helper.Peek()); 371 | 372 | // No escape, include enclosing quotes 373 | helper.Reset(); 374 | Assert.IsTrue(helper.SkipTo('"')); 375 | Assert.AreEqual("\"test.\"", helper.ParseQuotedText(null, false, true)); 376 | Assert.AreEqual(' ', helper.Peek()); 377 | 378 | // No escape, include escape character and enclosing quotes 379 | helper.Reset(); 380 | Assert.IsTrue(helper.SkipTo('"')); 381 | Assert.AreEqual("\"test.\"", helper.ParseQuotedText(null, true, true)); 382 | Assert.AreEqual(' ', helper.Peek()); 383 | 384 | // Explicit two quotes escapes 385 | helper = new ParsingHelper(" This is a \"te\"\"st.\" "); 386 | Assert.IsTrue(helper.SkipTo('"')); 387 | Assert.AreEqual("te\"st.", helper.ParseQuotedText('\"', false, false)); 388 | Assert.AreEqual(' ', helper.Peek()); 389 | 390 | // Explicit two quotes escapes, include escape character 391 | helper.Reset(); 392 | Assert.IsTrue(helper.SkipTo('"')); 393 | Assert.AreEqual("te\"\"st.", helper.ParseQuotedText('\"', true, false)); 394 | Assert.AreEqual(' ', helper.Peek()); 395 | 396 | // Explicit two quotes escapes, include enclosing quotes 397 | helper.Reset(); 398 | Assert.IsTrue(helper.SkipTo('"')); 399 | Assert.AreEqual("\"te\"st.\"", helper.ParseQuotedText('\"', false, true)); 400 | Assert.AreEqual(' ', helper.Peek()); 401 | 402 | // Explicit two quotes escapes, include escape character and enclosing quotes 403 | helper.Reset(); 404 | Assert.IsTrue(helper.SkipTo('"')); 405 | Assert.AreEqual("\"te\"\"st.\"", helper.ParseQuotedText('\"', true, true)); 406 | Assert.AreEqual(' ', helper.Peek()); 407 | 408 | // Custom escape 409 | helper = new ParsingHelper(" This is a \"te\\\"st.\" "); 410 | Assert.IsTrue(helper.SkipTo('"')); 411 | Assert.AreEqual("te\"st.", helper.ParseQuotedText('\\', false, false)); 412 | Assert.AreEqual(' ', helper.Peek()); 413 | 414 | // Custom escape, include escape character 415 | helper.Reset(); 416 | Assert.IsTrue(helper.SkipTo('"')); 417 | Assert.AreEqual("te\\\"st.", helper.ParseQuotedText('\\', true, false)); 418 | Assert.AreEqual(' ', helper.Peek()); 419 | 420 | // Custom escape, include enclosing quotes 421 | helper.Reset(); 422 | Assert.IsTrue(helper.SkipTo('"')); 423 | Assert.AreEqual("\"te\"st.\"", helper.ParseQuotedText('\\', false, true)); 424 | Assert.AreEqual(' ', helper.Peek()); 425 | 426 | // Custom escape, include escape character and enclosing quotes 427 | helper.Reset(); 428 | Assert.IsTrue(helper.SkipTo('"')); 429 | Assert.AreEqual("\"te\\\"st.\"", helper.ParseQuotedText('\\', true, true)); 430 | Assert.AreEqual(' ', helper.Peek()); 431 | 432 | // Handles end of text 433 | helper.Reset(""); 434 | Assert.AreEqual(string.Empty, helper.ParseQuotedText('\"', true, true)); 435 | } 436 | 437 | [TestMethod] 438 | public void MatchesCurrentPositionTests() 439 | { 440 | ParsingHelper helper = new(LongTest); 441 | Assert.IsTrue(helper.SkipTo("consecrated it")); 442 | Assert.AreEqual(true, helper.MatchesCurrentPosition("consecrated it")); 443 | Assert.AreEqual(true, helper.MatchesCurrentPosition("CONSECRATED IT", StringComparison.OrdinalIgnoreCase)); 444 | Assert.AreEqual(false, helper.MatchesCurrentPosition(string.Empty)); 445 | Assert.AreEqual(false, helper.MatchesCurrentPosition(string.Empty, StringComparison.OrdinalIgnoreCase)); 446 | Assert.AreEqual(false, helper.MatchesCurrentPosition("consecrated_it")); 447 | Assert.AreEqual(false, helper.MatchesCurrentPosition("CONSECRATED_IT", StringComparison.OrdinalIgnoreCase)); 448 | 449 | Assert.AreEqual(true, helper.MatchesCurrentPosition((char[])['c', 'o', 'n', 's', 'e', 'c', 'r', 'a', 't', 'e', 'd', ' ', 'i', 't'])); 450 | Assert.AreEqual(false, helper.MatchesCurrentPosition((char[])['o', 'n', 's', 'e', 'c', 'r', 'a', 't', 'e', 'd', ' ', 'i', 't'])); 451 | 452 | helper.Index = LongTest.Length - 1; 453 | Assert.AreEqual(false, helper.MatchesCurrentPosition("consecrated it")); 454 | Assert.AreEqual(false, helper.MatchesCurrentPosition("CONSECRATED IT", StringComparison.OrdinalIgnoreCase)); 455 | helper.Index = LongTest.Length; 456 | Assert.AreEqual(false, helper.MatchesCurrentPosition("consecrated it")); 457 | Assert.AreEqual(false, helper.MatchesCurrentPosition("CONSECRATED IT", StringComparison.OrdinalIgnoreCase)); 458 | } 459 | 460 | [TestMethod] 461 | public void ExtractTests() 462 | { 463 | ParsingHelper helper = new(LongTest); 464 | string s = "consecrated it"; 465 | Assert.IsTrue(helper.SkipTo(s)); 466 | int start = helper.Index; 467 | helper.Next(s.Length); 468 | Assert.AreEqual(s, helper.Extract(start, helper.Index)); 469 | Assert.AreEqual(@"consecrated it, far above our poor power to add or 470 | detract. The world will little note, nor long remember what we say here, but it can never forget what they 471 | did here. It is for us the living, rather, to be dedicated here to the unfinished work which they who fought 472 | here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining 473 | before us -- that from these honored dead we take increased devotion to that cause for which they gave the 474 | last full measure of devotion -- that we here highly resolve that these dead shall not have died in vain -- 475 | that this nation, under God, shall have a new birth of freedom -- and that government of the people, by the 476 | people, for the people, shall not perish from the earth.", helper.Extract(start)); 477 | Assert.AreEqual(LongTest, helper.Extract(0, LongTest.Length)); 478 | Assert.AreEqual("score", helper.Extract(5, 10)); 479 | Assert.AreNotEqual("score", helper.Extract(5, 11)); 480 | Assert.AreNotEqual("score", helper.Extract(4, 10)); 481 | Assert.AreEqual(string.Empty, helper.Extract(0, 0)); 482 | Assert.AreEqual(string.Empty, helper.Extract(LongTest.Length, LongTest.Length)); 483 | 484 | helper.Reset("abc"); 485 | Assert.AreEqual('a', helper[0]); 486 | Assert.AreEqual('b', helper[1]); 487 | Assert.AreEqual('c', helper[2]); 488 | Assert.AreEqual('b', helper[^2]); 489 | Assert.AreEqual(ParsingHelper.NullChar, helper[3]); 490 | Assert.AreEqual(ParsingHelper.NullChar, helper[-1]); 491 | 492 | helper.Reset(string.Empty); 493 | Assert.AreEqual(ParsingHelper.NullChar, helper[0]); 494 | Assert.AreEqual(ParsingHelper.NullChar, helper[3]); 495 | Assert.AreEqual(ParsingHelper.NullChar, helper[-1]); 496 | 497 | helper.Reset("abc"); 498 | Assert.AreEqual("a", helper[0..1]); 499 | Assert.AreEqual("ab", helper[0..^1]); 500 | Assert.AreEqual("ab", helper[0..2]); 501 | Assert.AreEqual("abc", helper[0..3]); 502 | Assert.ThrowsException(() => helper[0..4]); 503 | 504 | helper.Reset(string.Empty); 505 | Assert.ThrowsException(() => helper[0..1]); 506 | } 507 | 508 | [TestMethod] 509 | public void ExtractSpanTests() 510 | { 511 | ParsingHelper helper = new(LongTest); 512 | string s = "consecrated it"; 513 | Assert.IsTrue(helper.SkipTo(s)); 514 | int start = helper.Index; 515 | helper.Next(s.Length); 516 | Assert.AreEqual(s, helper.ExtractAsSpan(start, helper.Index).ToString()); 517 | Assert.AreEqual(@"consecrated it, far above our poor power to add or 518 | detract. The world will little note, nor long remember what we say here, but it can never forget what they 519 | did here. It is for us the living, rather, to be dedicated here to the unfinished work which they who fought 520 | here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining 521 | before us -- that from these honored dead we take increased devotion to that cause for which they gave the 522 | last full measure of devotion -- that we here highly resolve that these dead shall not have died in vain -- 523 | that this nation, under God, shall have a new birth of freedom -- and that government of the people, by the 524 | people, for the people, shall not perish from the earth.", helper.ExtractAsSpan(start).ToString()); 525 | Assert.AreEqual(LongTest, helper.ExtractAsSpan(0, LongTest.Length).ToString()); 526 | Assert.AreEqual("score", helper.ExtractAsSpan(5, 10).ToString()); 527 | Assert.AreNotEqual("score", helper.ExtractAsSpan(5, 11).ToString()); 528 | Assert.AreNotEqual("score", helper.ExtractAsSpan(4, 10).ToString()); 529 | Assert.AreEqual(string.Empty, helper.ExtractAsSpan(0, 0).ToString()); 530 | Assert.AreEqual(string.Empty, helper.ExtractAsSpan(LongTest.Length, LongTest.Length).ToString()); 531 | 532 | helper.Reset("abc"); 533 | Assert.AreEqual('a', helper[0]); 534 | Assert.AreEqual('b', helper[1]); 535 | Assert.AreEqual('c', helper[2]); 536 | Assert.AreEqual('b', helper[^2]); 537 | Assert.AreEqual(ParsingHelper.NullChar, helper[3]); 538 | Assert.AreEqual(ParsingHelper.NullChar, helper[-1]); 539 | 540 | helper.Reset(string.Empty); 541 | Assert.AreEqual(ParsingHelper.NullChar, helper[0]); 542 | Assert.AreEqual(ParsingHelper.NullChar, helper[3]); 543 | Assert.AreEqual(ParsingHelper.NullChar, helper[-1]); 544 | 545 | helper.Reset("abc"); 546 | Assert.AreEqual("a", helper.ExtractAsSpan(0, 1).ToString()); 547 | Assert.AreEqual("ab", helper.ExtractAsSpan(0, helper.Text.Length - 1).ToString()); 548 | Assert.AreEqual("ab", helper.ExtractAsSpan(0, 2).ToString()); 549 | Assert.AreEqual("abc", helper.ExtractAsSpan(0, 3).ToString()); 550 | Assert.ThrowsException(() => helper.ExtractAsSpan(0, 4)); 551 | 552 | helper.Reset(string.Empty); 553 | Assert.ThrowsException(() => helper[0..1]); 554 | } 555 | 556 | [TestMethod] 557 | public void OperatorOverloadTests() 558 | { 559 | ParsingHelper helper = new(LongTest); 560 | 561 | for (int i = 0; !helper.EndOfText; i++, helper++) 562 | { 563 | Assert.AreEqual(i, helper.Index); 564 | Assert.AreEqual(LongTest[i], helper.Peek()); 565 | } 566 | 567 | helper.Reset(); 568 | helper++; 569 | Assert.AreEqual(1, helper); 570 | helper += 2; 571 | Assert.AreEqual(3, helper); 572 | #pragma warning disable IDE0054 // Use compound assignment 573 | helper = helper + 2; 574 | #pragma warning restore IDE0054 // Use compound assignment 575 | Assert.AreEqual(5, helper); 576 | helper -= 2; 577 | Assert.AreEqual(3, helper); 578 | #pragma warning disable IDE0054 // Use compound assignment 579 | helper = helper - 2; 580 | #pragma warning restore IDE0054 // Use compound assignment 581 | Assert.AreEqual(1, helper); 582 | helper--; 583 | Assert.AreEqual(0, helper); 584 | helper += 10000; 585 | Assert.AreEqual(LongTest.Length, helper); 586 | helper -= 10000; 587 | Assert.AreEqual(0, helper); 588 | } 589 | 590 | private static readonly string[] StringArray = ["summer", "side", "servant"]; 591 | 592 | [TestMethod] 593 | public void RegExTests() 594 | { 595 | string text = "summer side creature toothpaste dime wind harbor cake nail attention opinion railway horses garden alley quicksand knot servant fight form park polish toad rub hall"; 596 | ParsingHelper helper = new(text); 597 | string s = helper.ParseTokenRegEx(@"\b[d]\w+"); 598 | Assert.AreEqual("dime", s); 599 | Assert.AreEqual(36, helper.Index); 600 | 601 | helper.Reset(); 602 | IEnumerable results = helper.ParseTokensRegEx(@"\b[s]\w+"); 603 | CollectionAssert.AreEqual(StringArray, results.ToList()); 604 | Assert.AreEqual(127, helper.Index); 605 | 606 | helper.Reset(); 607 | s = helper.ParseTokenRegEx(@"\b[x]\w+"); 608 | Assert.AreEqual(string.Empty, s); 609 | Assert.AreEqual(text.Length, helper.Index); 610 | 611 | helper.Reset(); 612 | results = helper.ParseTokensRegEx(@"\b[x]\w+"); 613 | CollectionAssert.AreEqual(new List(), results.ToList()); 614 | Assert.AreEqual(text.Length, helper.Index); 615 | 616 | helper.Reset(); 617 | helper.SkipToRegEx(@"\b[a]\w+"); 618 | Assert.IsTrue(helper.MatchesCurrentPosition("attention")); 619 | 620 | s = helper.ParseToRegEx(@"\b[r]\w+"); 621 | Assert.AreEqual("attention opinion ", s); 622 | Assert.IsTrue(helper.MatchesCurrentPosition("railway ")); 623 | 624 | helper.Reset(); 625 | helper.SkipToRegEx(@"\b[a]\w+", true); 626 | Assert.IsTrue(helper.MatchesCurrentPosition(" opinion")); 627 | 628 | helper.Reset("Abc1234def5678ghi"); 629 | Assert.AreEqual(true, helper.SkipTo("123")); 630 | helper.SkipRegEx(@"\d+"); 631 | Assert.AreEqual('d', helper.Peek()); 632 | helper.SkipRegEx(@"[a-z]+"); 633 | Assert.AreEqual('5', helper.Peek()); 634 | 635 | // Test overloads that accept a Regex object 636 | 637 | Regex regex; 638 | helper.Reset(text); 639 | 640 | #pragma warning disable SYSLIB1045 // Convert to 'GeneratedRegexAttribute'. 641 | 642 | regex = new(@"\b[d]\w+"); 643 | s = helper.ParseTokenRegEx(regex); 644 | Assert.AreEqual("dime", s); 645 | Assert.AreEqual(36, helper.Index); 646 | 647 | regex = new(@"\b[s]\w+"); 648 | helper.Reset(); 649 | results = helper.ParseTokensRegEx(regex); 650 | CollectionAssert.AreEqual(StringArray, results.ToList()); 651 | Assert.AreEqual(127, helper.Index); 652 | 653 | regex = new(@"\b[x]\w+"); 654 | helper.Reset(); 655 | s = helper.ParseTokenRegEx(regex); 656 | Assert.AreEqual(string.Empty, s); 657 | Assert.AreEqual(text.Length, helper.Index); 658 | 659 | regex = new(@"\b[x]\w+"); 660 | helper.Reset(); 661 | results = helper.ParseTokensRegEx(regex); 662 | CollectionAssert.AreEqual(new List(), results.ToList()); 663 | Assert.AreEqual(text.Length, helper.Index); 664 | 665 | regex = new(@"\b[a]\w+"); 666 | helper.Reset(); 667 | helper.SkipToRegEx(regex); 668 | Assert.IsTrue(helper.MatchesCurrentPosition("attention")); 669 | 670 | regex = new(@"\b[r]\w+"); 671 | s = helper.ParseToRegEx(regex); 672 | Assert.AreEqual("attention opinion ", s); 673 | Assert.IsTrue(helper.MatchesCurrentPosition("railway ")); 674 | 675 | regex = new(@"\b[a]\w+"); 676 | helper.Reset(); 677 | helper.SkipToRegEx(regex, true); 678 | Assert.IsTrue(helper.MatchesCurrentPosition(" opinion")); 679 | 680 | helper.Reset("Abc1234def5678ghi"); 681 | Assert.AreEqual(true, helper.SkipTo("123")); 682 | regex = new(@"\d+"); 683 | helper.SkipRegEx(regex); 684 | Assert.AreEqual('d', helper.Peek()); 685 | regex = new(@"[a-z]+"); 686 | helper.SkipRegEx(regex); 687 | Assert.AreEqual('5', helper.Peek()); 688 | 689 | #pragma warning restore SYSLIB1045 // Convert to 'GeneratedRegexAttribute'. 690 | 691 | } 692 | 693 | [TestMethod] 694 | public void ParseLineTests() 695 | { 696 | List<(string, List)> tests = 697 | [ 698 | ("a", new List(["a"])), 699 | ("ab", new List(["ab"])), 700 | ("abc", new List(["abc"])), 701 | ("abc\r", new List(["abc"])), 702 | ("abc\r\n", new List(["abc"])), 703 | ("abc\r\nd", new List(["abc", "d"])), 704 | ("abc\r\nde", new List(["abc", "de"])), 705 | ("abc\r\ndef", new List(["abc", "def"])), 706 | ("abc\r\ndef\n", new List(["abc", "def"])), 707 | ("abc\r\ndef\n\r", new List(["abc", "def", ""])), 708 | ("abc\r\ndef\n\rg", new List(["abc", "def", "", "g"])), 709 | ("abc\r\ndef\n\rgh", new List(["abc", "def", "", "gh"])), 710 | ("abc\r\ndef\n\rghi", new List(["abc", "def", "", "ghi"])), 711 | ("abc\r\ndef\n\rghi\n", new List(["abc", "def", "", "ghi"])), 712 | ("abc\r\ndef\n\rghi\nx", new List(["abc", "def", "", "ghi", "x"])), 713 | ("abc\r\ndef\n\rghi\nxy", new List(["abc", "def", "", "ghi", "xy"])), 714 | ("abc\r\ndef\n\rghi\nxyz", new List(["abc", "def", "", "ghi", "xyz"])), 715 | ("abc\r\ndef\n\rghi\nxyz\r", new List(["abc", "def", "", "ghi", "xyz"])), 716 | ("abc\r\ndef\n\rghi\nxyz\r\r", new List(["abc", "def", "", "ghi", "xyz", ""])), 717 | ]; 718 | 719 | ParsingHelper helper = new(null); 720 | List lines = []; 721 | 722 | foreach (var test in tests) 723 | { 724 | helper.Reset(test.Item1); 725 | lines.Clear(); 726 | while (helper.ParseLine(out string line)) 727 | lines.Add(line); 728 | CollectionAssert.AreEqual(test.Item2, lines); 729 | } 730 | 731 | // Spans 732 | foreach (var test in tests) 733 | { 734 | helper.Reset(test.Item1); 735 | lines.Clear(); 736 | while (helper.ParseLine(out ReadOnlySpan span)) 737 | lines.Add(span.ToString()); 738 | CollectionAssert.AreEqual(test.Item2, lines); 739 | } 740 | } 741 | 742 | [TestMethod] 743 | public void ParsePositionTests() 744 | { 745 | ParsePosition pos; 746 | string text = "abc\r\ndef\rghi\nxyz\n"; 747 | 748 | List<(int Line, int Column)> values = 749 | [ 750 | (1, 1), // 0 751 | (1, 2), // 1 752 | (1, 3), // 2 753 | (1, 4), // 3 754 | (1, 5), // 4 755 | (2, 1), // 5 756 | (2, 2), // 6 757 | (2, 3), // 7 758 | (2, 4), // 8 759 | (3, 1), // 9 760 | (3, 2), // 10 761 | (3, 3), // 11 762 | (3, 4), // 12 763 | (4, 1), // 13 764 | (4, 2), // 14 765 | (4, 3), // 15 766 | (4, 4), // 16 767 | (5, 1), // 17 768 | ]; 769 | 770 | for (int i = 0; i < values.Count; i++) 771 | { 772 | pos = ParsePosition.CalculatePosition(text, i); 773 | Assert.AreEqual(values[i].Line, pos.Line); 774 | Assert.AreEqual(values[i].Column, pos.Column); 775 | } 776 | 777 | ParsingHelper helper = new(text); 778 | helper.SkipTo("ghi"); 779 | pos = helper.GetLineColumn(); 780 | Assert.AreEqual(3, pos.Line); 781 | Assert.AreEqual(1, pos.Column); 782 | } 783 | } 784 | } 785 | } 786 | -------------------------------------------------------------------------------- /TestParsingHelper/TestParsingHelper.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net9.0 5 | 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | all 15 | runtime; build; native; contentfiles; analyzers; buildtransitive 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | --------------------------------------------------------------------------------