├── .codecov.yml ├── .editorconfig ├── .github ├── FUNDING.yml ├── dependabot.yml ├── release-drafter.yml └── workflows │ ├── build.yml │ └── release-drafter.yml ├── .gitignore ├── CodeCoverage.runsettings ├── License.txt ├── README.md ├── TurnerSoftware.SitemapTools.sln ├── images └── icon.png ├── src ├── Directory.Build.props └── TurnerSoftware.SitemapTools │ ├── ChangeFrequency.cs │ ├── Constants.cs │ ├── IsExternalInit.cs │ ├── Parser │ ├── ISitemapParser.cs │ ├── TextSitemapParser.cs │ └── XmlSitemapParser.cs │ ├── SitemapFile.cs │ ├── SitemapQuery.cs │ ├── SitemapType.cs │ └── TurnerSoftware.SitemapTools.csproj └── tests ├── Directory.Build.props └── TurnerSoftware.SitemapTools.Tests ├── Resources ├── another-indexed-sitemap.xml ├── another-text-sitemap.txt ├── basic-sitemap-WrongFormat.txt ├── basic-sitemap-WrongFormat.xml ├── basic-sitemap.xml ├── gzipped-sitemap.xml.gz ├── last-text-sitemap.txt ├── robots.txt ├── sitemap.xml └── text-sitemap.txt ├── Server └── Startup.cs ├── SitemapQueryTests.cs ├── TestBase.cs ├── TestConfiguration.cs ├── TextSitemapParserTests.cs ├── TurnerSoftware.SitemapTools.Tests.csproj └── XmlSitemapParserTests.cs /.codecov.yml: -------------------------------------------------------------------------------- 1 | comment: off -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # Based on the EditorConfig from Roslyn 2 | # top-most EditorConfig file 3 | root = true 4 | 5 | [*.cs] 6 | indent_style = tab 7 | 8 | # Sort using and Import directives with System.* appearing first 9 | dotnet_sort_system_directives_first = true 10 | # Avoid "this." and "Me." if not necessary 11 | dotnet_style_qualification_for_field = false:suggestion 12 | dotnet_style_qualification_for_property = false:suggestion 13 | dotnet_style_qualification_for_method = false:suggestion 14 | dotnet_style_qualification_for_event = false:suggestion 15 | 16 | # Use language keywords instead of framework type names for type references 17 | dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion 18 | dotnet_style_predefined_type_for_member_access = true:suggestion 19 | 20 | # Suggest more modern language features when available 21 | dotnet_style_object_initializer = true:suggestion 22 | dotnet_style_collection_initializer = true:suggestion 23 | dotnet_style_coalesce_expression = true:suggestion 24 | dotnet_style_null_propagation = true:suggestion 25 | dotnet_style_explicit_tuple_names = true:suggestion 26 | 27 | # Prefer "var" everywhere 28 | csharp_style_var_for_built_in_types = true:suggestion 29 | csharp_style_var_when_type_is_apparent = true:suggestion 30 | csharp_style_var_elsewhere = true:suggestion 31 | 32 | # Prefer method-like constructs to have a block body 33 | csharp_style_expression_bodied_methods = false:none 34 | csharp_style_expression_bodied_constructors = false:none 35 | csharp_style_expression_bodied_operators = false:none 36 | 37 | # Prefer property-like constructs to have an expression-body 38 | csharp_style_expression_bodied_properties = when_on_single_line:suggestion 39 | csharp_style_expression_bodied_indexers = true:none 40 | csharp_style_expression_bodied_accessors = when_on_single_line:suggestion 41 | 42 | # Suggest more modern language features when available 43 | csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion 44 | csharp_style_pattern_matching_over_as_with_null_check = true:suggestion 45 | csharp_style_inlined_variable_declaration = true:suggestion 46 | csharp_style_throw_expression = true:suggestion 47 | csharp_style_conditional_delegate_call = true:suggestion 48 | 49 | # Newline settings 50 | csharp_new_line_before_open_brace = all 51 | csharp_new_line_before_else = true 52 | csharp_new_line_before_catch = true 53 | csharp_new_line_before_finally = true 54 | csharp_new_line_before_members_in_object_initializers = true 55 | csharp_new_line_before_members_in_anonymous_types = true 56 | 57 | # Misc 58 | csharp_space_after_keywords_in_control_flow_statements = true 59 | csharp_space_between_method_declaration_parameter_list_parentheses = false 60 | csharp_space_between_method_call_parameter_list_parentheses = false 61 | csharp_space_between_parentheses = false 62 | csharp_preserve_single_line_statements = false 63 | csharp_preserve_single_line_blocks = true 64 | csharp_indent_case_contents = true 65 | csharp_indent_switch_labels = true 66 | csharp_indent_labels = no_change 67 | 68 | # Custom naming conventions 69 | dotnet_naming_rule.non_field_members_must_be_capitalized.symbols = non_field_member_symbols 70 | dotnet_naming_symbols.non_field_member_symbols.applicable_kinds = property,method,event,delegate 71 | dotnet_naming_symbols.non_field_member_symbols.applicable_accessibilities = * 72 | 73 | dotnet_naming_rule.non_field_members_must_be_capitalized.style = pascal_case_style 74 | dotnet_naming_style.pascal_case_style.capitalization = pascal_case 75 | 76 | dotnet_naming_rule.non_field_members_must_be_capitalized.severity = suggestion -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: Turnerj -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: nuget 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name-template: '$RESOLVED_VERSION' 2 | tag-template: '$RESOLVED_VERSION' 3 | categories: 4 | - title: '🚀 Features' 5 | labels: 6 | - 'feature' 7 | - 'enhancement' 8 | - title: '🐛 Bug Fixes' 9 | labels: 10 | - 'bug' 11 | - 'bugfix' 12 | - title: '🧰 Maintenance' 13 | label: 14 | - 'dependencies' 15 | - 'maintenance' 16 | change-template: '- $TITLE by @$AUTHOR (#$NUMBER)' 17 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. 18 | version-resolver: 19 | major: 20 | labels: 21 | - 'major' 22 | minor: 23 | labels: 24 | - 'minor' 25 | patch: 26 | labels: 27 | - 'patch' 28 | default: patch 29 | template: | 30 | ## Changes 31 | 32 | $CHANGES 33 | 34 | ## 👨🏼‍💻 Contributors 35 | 36 | $CONTRIBUTORS -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | release: 8 | types: [ published ] 9 | 10 | env: 11 | # Disable the .NET logo in the console output. 12 | DOTNET_NOLOGO: true 13 | # Disable the .NET first time experience to skip caching NuGet packages and speed up the build. 14 | DOTNET_SKIP_FIRST_TIME_EXPERIENCE: true 15 | # Disable sending .NET CLI telemetry to Microsoft. 16 | DOTNET_CLI_TELEMETRY_OPTOUT: true 17 | 18 | BUILD_ARTIFACT_PATH: ${{github.workspace}}/build-artifacts 19 | 20 | jobs: 21 | 22 | build: 23 | name: Build ${{matrix.os}} 24 | runs-on: ${{matrix.os}} 25 | strategy: 26 | matrix: 27 | os: [ubuntu-latest, windows-latest, macOS-latest] 28 | steps: 29 | - name: Checkout 30 | uses: actions/checkout@v2 31 | - name: Setup dotnet SDKs 32 | uses: actions/setup-dotnet@v1.9.0 33 | with: 34 | dotnet-version: | 35 | 6.0.x 36 | 8.0.x 37 | - name: Install dependencies 38 | run: dotnet restore 39 | - name: Build 40 | run: dotnet build --no-restore -c Release /p:ContinuousIntegrationBuild=true 41 | - name: Test with Coverage 42 | run: dotnet test --no-restore --logger trx --results-directory ${{env.BUILD_ARTIFACT_PATH}}/coverage --collect "XPlat Code Coverage" --settings CodeCoverage.runsettings /p:SkipBuildVersioning=true 43 | - name: Pack 44 | run: dotnet pack --no-build -c Release /p:PackageOutputPath=${{env.BUILD_ARTIFACT_PATH}} /p:ContinuousIntegrationBuild=true 45 | - name: Publish artifacts 46 | uses: actions/upload-artifact@v2 47 | with: 48 | name: ${{matrix.os}} 49 | path: ${{env.BUILD_ARTIFACT_PATH}} 50 | 51 | coverage: 52 | name: Process code coverage 53 | runs-on: ubuntu-latest 54 | needs: build 55 | steps: 56 | - name: Checkout 57 | uses: actions/checkout@v2 58 | - name: Download coverage reports 59 | uses: actions/download-artifact@v2 60 | - name: Install ReportGenerator tool 61 | run: dotnet tool install -g dotnet-reportgenerator-globaltool 62 | - name: Prepare coverage reports 63 | run: reportgenerator -reports:*/coverage/*/coverage.cobertura.xml -targetdir:./ -reporttypes:Cobertura 64 | - name: Upload coverage report 65 | uses: codecov/codecov-action@v4.3.0 66 | with: 67 | file: Cobertura.xml 68 | fail_ci_if_error: false 69 | - name: Save combined coverage report as artifact 70 | uses: actions/upload-artifact@v2 71 | with: 72 | name: coverage-report 73 | path: Cobertura.xml 74 | 75 | push-to-github-packages: 76 | name: 'Push GitHub Packages' 77 | needs: build 78 | if: github.ref == 'refs/heads/main' || github.event_name == 'release' 79 | environment: 80 | name: 'GitHub Packages' 81 | url: https://github.com/TurnerSoftware/SitemapTools/packages 82 | permissions: 83 | packages: write 84 | runs-on: ubuntu-latest 85 | steps: 86 | - name: 'Download build' 87 | uses: actions/download-artifact@v2 88 | with: 89 | name: 'ubuntu-latest' 90 | - name: 'Add NuGet source' 91 | run: dotnet nuget add source https://nuget.pkg.github.com/TurnerSoftware/index.json --name GitHub --username Turnerj --password ${{secrets.GITHUB_TOKEN}} --store-password-in-clear-text 92 | - name: 'Upload NuGet package' 93 | run: dotnet nuget push *.nupkg --api-key ${{secrets.GH_PACKAGE_REGISTRY_API_KEY}} --source GitHub --skip-duplicate 94 | 95 | push-to-nuget: 96 | name: 'Push NuGet Packages' 97 | needs: build 98 | if: github.event_name == 'release' 99 | environment: 100 | name: 'NuGet' 101 | url: https://www.nuget.org/packages/TurnerSoftware.SitemapTools 102 | runs-on: ubuntu-latest 103 | steps: 104 | - name: 'Download build' 105 | uses: actions/download-artifact@v2 106 | with: 107 | name: 'ubuntu-latest' 108 | - name: 'Upload NuGet package and symbols' 109 | run: dotnet nuget push *.nupkg --source https://api.nuget.org/v3/index.json --skip-duplicate --api-key ${{secrets.NUGET_API_KEY}} -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | update_release_draft: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: release-drafter/release-drafter@v5 13 | env: 14 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.suo 8 | *.user 9 | *.userosscache 10 | *.sln.docstates 11 | 12 | # User-specific files (MonoDevelop/Xamarin Studio) 13 | *.userprefs 14 | 15 | # Build results 16 | [Dd]ebug/ 17 | [Dd]ebugPublic/ 18 | [Rr]elease/ 19 | [Rr]eleases/ 20 | x64/ 21 | x86/ 22 | bld/ 23 | [Bb]in/ 24 | [Oo]bj/ 25 | [Ll]og/ 26 | 27 | # Visual Studio 2015/2017 cache/options directory 28 | .vs/ 29 | # Uncomment if you have tasks that create the project's static files in wwwroot 30 | #wwwroot/ 31 | 32 | # Visual Studio 2017 auto generated files 33 | Generated\ Files/ 34 | 35 | # MSTest test Results 36 | [Tt]est[Rr]esult*/ 37 | [Bb]uild[Ll]og.* 38 | 39 | # NUNIT 40 | *.VisualState.xml 41 | TestResult.xml 42 | 43 | # Build Results of an ATL Project 44 | [Dd]ebugPS/ 45 | [Rr]eleasePS/ 46 | dlldata.c 47 | 48 | # Benchmark Results 49 | BenchmarkDotNet.Artifacts/ 50 | 51 | # .NET Core 52 | project.lock.json 53 | project.fragment.lock.json 54 | artifacts/ 55 | **/Properties/launchSettings.json 56 | 57 | # StyleCop 58 | StyleCopReport.xml 59 | 60 | # Files built by Visual Studio 61 | *_i.c 62 | *_p.c 63 | *_i.h 64 | *.ilk 65 | *.meta 66 | *.obj 67 | *.iobj 68 | *.pch 69 | *.pdb 70 | *.ipdb 71 | *.pgc 72 | *.pgd 73 | *.rsp 74 | *.sbr 75 | *.tlb 76 | *.tli 77 | *.tlh 78 | *.tmp 79 | *.tmp_proj 80 | *.log 81 | *.vspscc 82 | *.vssscc 83 | .builds 84 | *.pidb 85 | *.svclog 86 | *.scc 87 | 88 | # Chutzpah Test files 89 | _Chutzpah* 90 | 91 | # Visual C++ cache files 92 | ipch/ 93 | *.aps 94 | *.ncb 95 | *.opendb 96 | *.opensdf 97 | *.sdf 98 | *.cachefile 99 | *.VC.db 100 | *.VC.VC.opendb 101 | 102 | # Visual Studio profiler 103 | *.psess 104 | *.vsp 105 | *.vspx 106 | *.sap 107 | 108 | # Visual Studio Trace Files 109 | *.e2e 110 | 111 | # TFS 2012 Local Workspace 112 | $tf/ 113 | 114 | # Guidance Automation Toolkit 115 | *.gpState 116 | 117 | # ReSharper is a .NET coding add-in 118 | _ReSharper*/ 119 | *.[Rr]e[Ss]harper 120 | *.DotSettings.user 121 | 122 | # JustCode is a .NET coding add-in 123 | .JustCode 124 | 125 | # TeamCity is a build add-in 126 | _TeamCity* 127 | 128 | # DotCover is a Code Coverage Tool 129 | *.dotCover 130 | 131 | # AxoCover is a Code Coverage Tool 132 | .axoCover/* 133 | !.axoCover/settings.json 134 | 135 | # Visual Studio code coverage results 136 | *.coverage 137 | *.coveragexml 138 | 139 | # NCrunch 140 | _NCrunch_* 141 | .*crunch*.local.xml 142 | nCrunchTemp_* 143 | 144 | # MightyMoose 145 | *.mm.* 146 | AutoTest.Net/ 147 | 148 | # Web workbench (sass) 149 | .sass-cache/ 150 | 151 | # Installshield output folder 152 | [Ee]xpress/ 153 | 154 | # DocProject is a documentation generator add-in 155 | DocProject/buildhelp/ 156 | DocProject/Help/*.HxT 157 | DocProject/Help/*.HxC 158 | DocProject/Help/*.hhc 159 | DocProject/Help/*.hhk 160 | DocProject/Help/*.hhp 161 | DocProject/Help/Html2 162 | DocProject/Help/html 163 | 164 | # Click-Once directory 165 | publish/ 166 | 167 | # Publish Web Output 168 | *.[Pp]ublish.xml 169 | *.azurePubxml 170 | # Note: Comment the next line if you want to checkin your web deploy settings, 171 | # but database connection strings (with potential passwords) will be unencrypted 172 | *.pubxml 173 | *.publishproj 174 | 175 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 176 | # checkin your Azure Web App publish settings, but sensitive information contained 177 | # in these scripts will be unencrypted 178 | PublishScripts/ 179 | 180 | # NuGet Packages 181 | *.nupkg 182 | # The packages folder can be ignored because of Package Restore 183 | **/[Pp]ackages/* 184 | # except build/, which is used as an MSBuild target. 185 | !**/[Pp]ackages/build/ 186 | # Uncomment if necessary however generally it will be regenerated when needed 187 | #!**/[Pp]ackages/repositories.config 188 | # NuGet v3's project.json files produces more ignorable files 189 | *.nuget.props 190 | *.nuget.targets 191 | 192 | # Microsoft Azure Build Output 193 | csx/ 194 | *.build.csdef 195 | 196 | # Microsoft Azure Emulator 197 | ecf/ 198 | rcf/ 199 | 200 | # Windows Store app package directories and files 201 | AppPackages/ 202 | BundleArtifacts/ 203 | Package.StoreAssociation.xml 204 | _pkginfo.txt 205 | *.appx 206 | 207 | # Visual Studio cache files 208 | # files ending in .cache can be ignored 209 | *.[Cc]ache 210 | # but keep track of directories ending in .cache 211 | !*.[Cc]ache/ 212 | 213 | # Others 214 | ClientBin/ 215 | ~$* 216 | *~ 217 | *.dbmdl 218 | *.dbproj.schemaview 219 | *.jfm 220 | *.pfx 221 | *.publishsettings 222 | orleans.codegen.cs 223 | 224 | # Including strong name files can present a security risk 225 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 226 | #*.snk 227 | 228 | # Since there are multiple workflows, uncomment next line to ignore bower_components 229 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 230 | #bower_components/ 231 | 232 | # RIA/Silverlight projects 233 | Generated_Code/ 234 | 235 | # Backup & report files from converting an old project file 236 | # to a newer Visual Studio version. Backup files are not needed, 237 | # because we have git ;-) 238 | _UpgradeReport_Files/ 239 | Backup*/ 240 | UpgradeLog*.XML 241 | UpgradeLog*.htm 242 | ServiceFabricBackup/ 243 | *.rptproj.bak 244 | 245 | # SQL Server files 246 | *.mdf 247 | *.ldf 248 | *.ndf 249 | 250 | # Business Intelligence projects 251 | *.rdl.data 252 | *.bim.layout 253 | *.bim_*.settings 254 | *.rptproj.rsuser 255 | 256 | # Microsoft Fakes 257 | FakesAssemblies/ 258 | 259 | # GhostDoc plugin setting file 260 | *.GhostDoc.xml 261 | 262 | # Node.js Tools for Visual Studio 263 | .ntvs_analysis.dat 264 | node_modules/ 265 | 266 | # Visual Studio 6 build log 267 | *.plg 268 | 269 | # Visual Studio 6 workspace options file 270 | *.opt 271 | 272 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 273 | *.vbw 274 | 275 | # Visual Studio LightSwitch build output 276 | **/*.HTMLClient/GeneratedArtifacts 277 | **/*.DesktopClient/GeneratedArtifacts 278 | **/*.DesktopClient/ModelManifest.xml 279 | **/*.Server/GeneratedArtifacts 280 | **/*.Server/ModelManifest.xml 281 | _Pvt_Extensions 282 | 283 | # Paket dependency manager 284 | .paket/paket.exe 285 | paket-files/ 286 | 287 | # FAKE - F# Make 288 | .fake/ 289 | 290 | # JetBrains Rider 291 | .idea/ 292 | *.sln.iml 293 | 294 | # CodeRush 295 | .cr/ 296 | 297 | # Python Tools for Visual Studio (PTVS) 298 | __pycache__/ 299 | *.pyc 300 | 301 | # Cake - Uncomment if you are using it 302 | # tools/** 303 | # !tools/packages.config 304 | 305 | # Tabs Studio 306 | *.tss 307 | 308 | # Telerik's JustMock configuration file 309 | *.jmconfig 310 | 311 | # BizTalk build output 312 | *.btp.cs 313 | *.btm.cs 314 | *.odx.cs 315 | *.xsd.cs 316 | 317 | # OpenCover UI analysis results 318 | OpenCover/ 319 | 320 | # Azure Stream Analytics local run output 321 | ASALocalRun/ 322 | 323 | # MSBuild Binary and Structured Log 324 | *.binlog 325 | 326 | # NVidia Nsight GPU debugger configuration file 327 | *.nvuser 328 | 329 | # MFractors (Xamarin productivity tool) working folder 330 | .mfractor/ 331 | 332 | /build-artifacts -------------------------------------------------------------------------------- /CodeCoverage.runsettings: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | cobertura 8 | [TurnerSoftware.SitemapTools.Tests]* 9 | [TurnerSoftware.SitemapTools]* 10 | Obsolete,GeneratedCodeAttribute,CompilerGeneratedAttribute 11 | true 12 | true 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /License.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Turner Software 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | ![Icon](images/icon.png) 4 | # Sitemap Tools 5 | 6 | A sitemap (sitemap.xml) querying and parsing library for .NET 7 | 8 | ![Build](https://img.shields.io/github/actions/workflow/status/TurnerSoftware/sitemaptools/build.yml?branch=main) 9 | [![Codecov](https://img.shields.io/codecov/c/github/turnersoftware/sitemaptools/master.svg)](https://codecov.io/gh/TurnerSoftware/SitemapTools) 10 | [![NuGet](https://img.shields.io/nuget/v/TurnerSoftware.SitemapTools.svg)](https://www.nuget.org/packages/TurnerSoftware.SitemapTools) 11 |
12 | 13 | ## Key features 14 | - Parses both XML sitemaps and [sitemap index files](http://www.sitemaps.org/protocol.html#index) 15 | - Handles GZ-compressed XML sitemaps 16 | - Supports TXT sitemaps 17 | 18 | 19 | ## Licensing and Support 20 | 21 | Sitemap Tools is licensed under the MIT license. It is free to use in personal and commercial projects. 22 | 23 | There are [support plans](https://turnersoftware.com.au/support-plans) available that cover all active [Turner Software OSS projects](https://github.com/TurnerSoftware). 24 | Support plans provide private email support, expert usage advice for our projects, priority bug fixes and more. 25 | These support plans help fund our OSS commitments to provide better software for everyone. 26 | 27 | 28 | ## Notes 29 | - Does not enforce sitemap standards [as described at sitemaps.org](http://www.sitemaps.org/protocol.html) 30 | - Does not validate the sitemaps 31 | - Does not support RSS sitemaps 32 | 33 | ## Example 34 | ```csharp 35 | using TurnerSoftware.SitemapTools; 36 | 37 | var sitemapQuery = new SitemapQuery(); 38 | var sitemapEntries = await sitemapQuery.GetAllSitemapsForDomainAsync("example.org"); 39 | ``` -------------------------------------------------------------------------------- /TurnerSoftware.SitemapTools.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.0.31808.319 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "global", "global", "{B79203C9-7B50-4C91-A0AD-EAA6FBAABD53}" 7 | ProjectSection(SolutionItems) = preProject 8 | .codecov.yml = .codecov.yml 9 | .editorconfig = .editorconfig 10 | .gitignore = .gitignore 11 | CodeCoverage.runsettings = CodeCoverage.runsettings 12 | License.txt = License.txt 13 | README.md = README.md 14 | EndProjectSection 15 | EndProject 16 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TurnerSoftware.SitemapTools", "src\TurnerSoftware.SitemapTools\TurnerSoftware.SitemapTools.csproj", "{788EF4B5-4FAC-4E83-BBF8-ADC89DEFD512}" 17 | EndProject 18 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{F738FA56-D159-4C5E-BD05-81B5C3290971}" 19 | ProjectSection(SolutionItems) = preProject 20 | src\Directory.Build.props = src\Directory.Build.props 21 | EndProjectSection 22 | EndProject 23 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{AF69BE3F-20E7-40CA-85D7-64BF2EA14C15}" 24 | EndProject 25 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TurnerSoftware.SitemapTools.Tests", "tests\TurnerSoftware.SitemapTools.Tests\TurnerSoftware.SitemapTools.Tests.csproj", "{DD0521BB-2CDF-4DA5-9E76-4B87E4AA9D33}" 26 | EndProject 27 | Global 28 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 29 | Debug|Any CPU = Debug|Any CPU 30 | Release|Any CPU = Release|Any CPU 31 | EndGlobalSection 32 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 33 | {788EF4B5-4FAC-4E83-BBF8-ADC89DEFD512}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 34 | {788EF4B5-4FAC-4E83-BBF8-ADC89DEFD512}.Debug|Any CPU.Build.0 = Debug|Any CPU 35 | {788EF4B5-4FAC-4E83-BBF8-ADC89DEFD512}.Release|Any CPU.ActiveCfg = Release|Any CPU 36 | {788EF4B5-4FAC-4E83-BBF8-ADC89DEFD512}.Release|Any CPU.Build.0 = Release|Any CPU 37 | {DD0521BB-2CDF-4DA5-9E76-4B87E4AA9D33}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 38 | {DD0521BB-2CDF-4DA5-9E76-4B87E4AA9D33}.Debug|Any CPU.Build.0 = Debug|Any CPU 39 | {DD0521BB-2CDF-4DA5-9E76-4B87E4AA9D33}.Release|Any CPU.ActiveCfg = Release|Any CPU 40 | {DD0521BB-2CDF-4DA5-9E76-4B87E4AA9D33}.Release|Any CPU.Build.0 = Release|Any CPU 41 | EndGlobalSection 42 | GlobalSection(SolutionProperties) = preSolution 43 | HideSolutionNode = FALSE 44 | EndGlobalSection 45 | GlobalSection(NestedProjects) = preSolution 46 | {788EF4B5-4FAC-4E83-BBF8-ADC89DEFD512} = {F738FA56-D159-4C5E-BD05-81B5C3290971} 47 | {DD0521BB-2CDF-4DA5-9E76-4B87E4AA9D33} = {AF69BE3F-20E7-40CA-85D7-64BF2EA14C15} 48 | EndGlobalSection 49 | GlobalSection(ExtensibilityGlobals) = postSolution 50 | SolutionGuid = {D8796E9A-2E75-4B71-A6BE-A2EB4C4D30F3} 51 | EndGlobalSection 52 | EndGlobal 53 | -------------------------------------------------------------------------------- /images/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurnerSoftware/SitemapTools/4ac4cdbe4f09081fb6753e5c26f7a936600aa9cb/images/icon.png -------------------------------------------------------------------------------- /src/Directory.Build.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | TurnerSoftware.SitemapTools 5 | 6 | Turner Software 7 | 8 | $(AssemblyName) 9 | true 10 | MIT 11 | icon.png 12 | https://github.com/TurnerSoftware/SitemapTools 13 | utilities;sitemap;sitemap.xml;sitemap.txt 14 | 15 | 16 | true 17 | true 18 | embedded 19 | 20 | Latest 21 | Enable 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/TurnerSoftware.SitemapTools/ChangeFrequency.cs: -------------------------------------------------------------------------------- 1 | namespace TurnerSoftware.SitemapTools; 2 | 3 | public enum ChangeFrequency 4 | { 5 | Always, 6 | Hourly, 7 | Daily, 8 | Weekly, 9 | Monthly, 10 | Yearly, 11 | Never 12 | } 13 | -------------------------------------------------------------------------------- /src/TurnerSoftware.SitemapTools/Constants.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace TurnerSoftware.SitemapTools; 4 | 5 | public static class Constants 6 | { 7 | public const string DefaultSitemapFilename = "sitemap.xml"; 8 | 9 | private static bool CaseInsensitiveEquality(string x, string y) => x.Equals(y, StringComparison.OrdinalIgnoreCase); 10 | 11 | public static class ChangeFrequency 12 | { 13 | public const string Always = "always"; 14 | public const string Hourly = "hourly"; 15 | public const string Daily = "daily"; 16 | public const string Weekly = "weekly"; 17 | public const string Monthly = "monthly"; 18 | public const string Yearly = "yearly"; 19 | public const string Never = "never"; 20 | 21 | /// 22 | /// Converts a change frequency into a . 23 | /// 24 | /// The change frequency to parse. 25 | /// A if successful; otherwise . 26 | public static SitemapTools.ChangeFrequency? ToEnum(string changeFrequency) 27 | { 28 | return changeFrequency switch 29 | { 30 | _ when CaseInsensitiveEquality(Always, changeFrequency) => SitemapTools.ChangeFrequency.Always, 31 | _ when CaseInsensitiveEquality(Hourly, changeFrequency) => SitemapTools.ChangeFrequency.Hourly, 32 | _ when CaseInsensitiveEquality(Daily, changeFrequency) => SitemapTools.ChangeFrequency.Daily, 33 | _ when CaseInsensitiveEquality(Weekly, changeFrequency) => SitemapTools.ChangeFrequency.Weekly, 34 | _ when CaseInsensitiveEquality(Monthly, changeFrequency) => SitemapTools.ChangeFrequency.Monthly, 35 | _ when CaseInsensitiveEquality(Yearly, changeFrequency) => SitemapTools.ChangeFrequency.Yearly, 36 | _ when CaseInsensitiveEquality(Never, changeFrequency) => SitemapTools.ChangeFrequency.Never, 37 | _ => null 38 | }; 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/TurnerSoftware.SitemapTools/IsExternalInit.cs: -------------------------------------------------------------------------------- 1 | namespace System.Runtime.CompilerServices; 2 | 3 | internal static class IsExternalInit { } -------------------------------------------------------------------------------- /src/TurnerSoftware.SitemapTools/Parser/ISitemapParser.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using System.Threading; 4 | using System.Threading.Tasks; 5 | 6 | namespace TurnerSoftware.SitemapTools.Parser; 7 | 8 | public interface ISitemapParser 9 | { 10 | Task ParseSitemapAsync(Uri sitemapUrl, TextReader reader, CancellationToken cancellationToken = default); 11 | } 12 | -------------------------------------------------------------------------------- /src/TurnerSoftware.SitemapTools/Parser/TextSitemapParser.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.Threading; 5 | using System.Threading.Tasks; 6 | 7 | namespace TurnerSoftware.SitemapTools.Parser; 8 | 9 | public class TextSitemapParser : ISitemapParser 10 | { 11 | public async Task ParseSitemapAsync(Uri sitemapUrl, TextReader reader, CancellationToken cancellationToken = default) 12 | { 13 | var sitemapEntries = new List(); 14 | 15 | string line; 16 | while ((line = await reader.ReadLineAsync()) != null) 17 | { 18 | cancellationToken.ThrowIfCancellationRequested(); 19 | if (Uri.TryCreate(line, UriKind.Absolute, out var tmpUri)) 20 | { 21 | sitemapEntries.Add(new SitemapEntry(tmpUri)); 22 | } 23 | } 24 | 25 | return new SitemapFile(sitemapUrl) 26 | { 27 | Urls = sitemapEntries 28 | }; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/TurnerSoftware.SitemapTools/Parser/XmlSitemapParser.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Globalization; 4 | using System.IO; 5 | using System.Threading; 6 | using System.Threading.Tasks; 7 | using System.Xml; 8 | using System.Xml.Linq; 9 | 10 | namespace TurnerSoftware.SitemapTools.Parser; 11 | 12 | /// 13 | /// Based on the Sitemap specification described here: http://www.sitemaps.org/protocol.html 14 | /// 15 | public class XmlSitemapParser : ISitemapParser 16 | { 17 | #pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously 18 | public async Task ParseSitemapAsync(Uri sitemapUrl, TextReader reader, CancellationToken cancellationToken = default) 19 | #pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously 20 | { 21 | XDocument document; 22 | 23 | try 24 | { 25 | #if NETSTANDARD2_1 26 | document = await XDocument.LoadAsync(reader, LoadOptions.None, cancellationToken); 27 | #else 28 | document = XDocument.Load(reader, LoadOptions.None); 29 | cancellationToken.ThrowIfCancellationRequested(); 30 | #endif 31 | } 32 | catch (XmlException) 33 | { 34 | return null; 35 | } 36 | 37 | var result = new SitemapFile(sitemapUrl); 38 | foreach (var topNode in document.Elements()) 39 | { 40 | var nodeName = topNode.Name.LocalName; 41 | 42 | if (nodeName.Equals("urlset", StringComparison.InvariantCultureIgnoreCase)) 43 | { 44 | var sitemapEntries = new List(); 45 | 46 | foreach (var urlNode in topNode.Elements()) 47 | { 48 | cancellationToken.ThrowIfCancellationRequested(); 49 | if (TryParseSitemapEntry(urlNode, out var sitemapEntry)) 50 | { 51 | sitemapEntries.Add(sitemapEntry!); 52 | } 53 | } 54 | 55 | result = result with 56 | { 57 | Urls = sitemapEntries 58 | }; 59 | } 60 | else if (nodeName.Equals("sitemapindex", StringComparison.InvariantCultureIgnoreCase)) 61 | { 62 | var sitemapIndexEntries = new List(); 63 | 64 | foreach (var sitemapNode in topNode.Elements()) 65 | { 66 | cancellationToken.ThrowIfCancellationRequested(); 67 | if (TryParseSitemapIndex(sitemapNode, out var indexedSitemap)) 68 | { 69 | sitemapIndexEntries.Add(indexedSitemap!); 70 | } 71 | } 72 | 73 | result = result with 74 | { 75 | Sitemaps = sitemapIndexEntries 76 | }; 77 | } 78 | } 79 | 80 | return result; 81 | } 82 | 83 | private bool TryParseSitemapIndex(XElement sitemapNode, out SitemapIndexEntry? value) 84 | { 85 | Uri? location = null; 86 | DateTime? lastModified = null; 87 | foreach (var urlDetail in sitemapNode.Elements()) 88 | { 89 | var nodeName = urlDetail.Name.LocalName; 90 | var nodeValue = urlDetail.Value; 91 | 92 | if (nodeName.Equals("loc", StringComparison.InvariantCultureIgnoreCase)) 93 | { 94 | if (Uri.TryCreate(nodeValue, UriKind.Absolute, out var tmpUri)) 95 | { 96 | location = tmpUri; 97 | } 98 | } 99 | else if (nodeName.Equals("lastmod", StringComparison.InvariantCultureIgnoreCase)) 100 | { 101 | if (DateTime.TryParse(nodeValue, CultureInfo.InvariantCulture, DateTimeStyles.None, out var tmpLastModified)) 102 | { 103 | lastModified = tmpLastModified; 104 | } 105 | } 106 | } 107 | 108 | if (location is null) 109 | { 110 | value = default; 111 | return false; 112 | } 113 | 114 | value = new(location, lastModified); 115 | return true; 116 | } 117 | 118 | private bool TryParseSitemapEntry(XElement urlNode, out SitemapEntry? value) 119 | { 120 | Uri? location = null; 121 | DateTime? lastModified = null; 122 | ChangeFrequency? changeFrequency = null; 123 | var priority = SitemapEntry.DefaultPriority; 124 | 125 | foreach (var urlDetail in urlNode.Elements()) 126 | { 127 | var nodeName = urlDetail.Name.LocalName; 128 | var nodeValue = urlDetail.Value; 129 | 130 | if (nodeName.Equals("loc", StringComparison.InvariantCultureIgnoreCase)) 131 | { 132 | if (Uri.TryCreate(nodeValue, UriKind.Absolute, out var tmpUri)) 133 | { 134 | location = tmpUri; 135 | } 136 | } 137 | else if (nodeName.Equals("lastmod", StringComparison.InvariantCultureIgnoreCase)) 138 | { 139 | if (DateTime.TryParse(nodeValue, CultureInfo.InvariantCulture, DateTimeStyles.None, out var tmpLastModified)) 140 | { 141 | lastModified = tmpLastModified; 142 | } 143 | } 144 | else if (nodeName.Equals("changefreq", StringComparison.InvariantCultureIgnoreCase)) 145 | { 146 | changeFrequency = Constants.ChangeFrequency.ToEnum(nodeValue); 147 | } 148 | else if (nodeName.Equals("priority", StringComparison.InvariantCultureIgnoreCase)) 149 | { 150 | if (double.TryParse(nodeValue, NumberStyles.Float, CultureInfo.InvariantCulture, out var tmpPriority)) 151 | { 152 | priority = tmpPriority; 153 | } 154 | } 155 | } 156 | 157 | if (location is null) 158 | { 159 | value = default; 160 | return false; 161 | } 162 | 163 | value = new(location, lastModified, changeFrequency, priority); 164 | return true; 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /src/TurnerSoftware.SitemapTools/SitemapFile.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | 4 | namespace TurnerSoftware.SitemapTools; 5 | 6 | /// 7 | /// Represents a sitemap that can contain references to other sitemaps or the sitemap entries themselves. 8 | /// 9 | /// The sitemap location. 10 | /// List of additional sitemaps. 11 | /// List of sitemap entries. 12 | public record SitemapFile(Uri Location, IReadOnlyList Sitemaps, IReadOnlyList Urls) 13 | { 14 | /// 15 | /// Create a sitemap file with no references to other sitemaps or any sitemap entries. 16 | /// 17 | /// The sitemap location 18 | public SitemapFile(Uri location) : this(location, Array.Empty(), Array.Empty()) { } 19 | } 20 | 21 | /// 22 | /// The individual entry in a sitemap file. 23 | /// 24 | /// The location of the resource pointed towards by the sitemap file. 25 | /// The last modified time of the resource. 26 | /// The change frequency of the resource. This describes how often the resource is updated. 27 | /// The priority of this resource. Default value is 0.5. 28 | public record SitemapEntry(Uri Location, DateTime? LastModified, ChangeFrequency? ChangeFrequency, double Priority = SitemapEntry.DefaultPriority) 29 | { 30 | /// 31 | /// The default priority for a . 32 | /// 33 | public const double DefaultPriority = 0.5; 34 | 35 | /// 36 | /// Creates a with the specified location. 37 | /// 38 | /// The location of the resource pointed towards by the sitemap file. 39 | public SitemapEntry(Uri location) : this(location, default, default, DefaultPriority) { } 40 | } 41 | 42 | /// 43 | /// A sitemap entry that points to another sitemap. 44 | /// 45 | /// The location of the sitemap. 46 | /// The last modified time of the sitemap. 47 | public record SitemapIndexEntry(Uri Location, DateTime? LastModified) 48 | { 49 | /// 50 | /// Creates a with the specified location. 51 | /// 52 | /// The location of the sitemap. 53 | public SitemapIndexEntry(Uri location) : this(location, default) { } 54 | } 55 | -------------------------------------------------------------------------------- /src/TurnerSoftware.SitemapTools/SitemapQuery.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO.Compression; 4 | using System.Linq; 5 | using System.Net; 6 | using System.Threading.Tasks; 7 | using System.IO; 8 | using TurnerSoftware.SitemapTools.Parser; 9 | using System.Net.Http; 10 | using TurnerSoftware.RobotsExclusionTools; 11 | using System.Threading; 12 | 13 | namespace TurnerSoftware.SitemapTools; 14 | 15 | /// 16 | /// Allows for the querying and discovery of sitemaps. 17 | /// 18 | public class SitemapQuery 19 | { 20 | /// 21 | /// HTTP content type mapping against . 22 | /// 23 | public static Dictionary SitemapTypeMapping { get; } 24 | /// 25 | /// mapping against . 26 | /// 27 | public static Dictionary SitemapParsers { get; } 28 | 29 | static SitemapQuery() 30 | { 31 | SitemapTypeMapping = new Dictionary 32 | { 33 | { "text/xml", SitemapType.Xml }, 34 | { "application/xml", SitemapType.Xml }, 35 | { "text/plain", SitemapType.Text } 36 | }; 37 | SitemapParsers = new Dictionary 38 | { 39 | { SitemapType.Xml, new XmlSitemapParser() }, 40 | { SitemapType.Text, new TextSitemapParser() } 41 | }; 42 | } 43 | 44 | private readonly HttpClient httpClient; 45 | 46 | /// 47 | /// Creates a with a configured 48 | /// for automatic decompression. 49 | /// 50 | public SitemapQuery() 51 | { 52 | var clientHandler = new HttpClientHandler 53 | { 54 | AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate 55 | }; 56 | 57 | httpClient = new HttpClient(clientHandler); 58 | } 59 | 60 | /// 61 | /// Creates a with the provided . 62 | /// 63 | /// 64 | public SitemapQuery(HttpClient client) 65 | { 66 | httpClient = client; 67 | } 68 | 69 | /// 70 | /// Discovers available sitemaps for a given domain name, returning a list of sitemap URIs discovered. 71 | /// The sitemaps are discovered from a combination of the site root and looking through the robots.txt file. 72 | /// 73 | /// The domain name to search 74 | /// List of found sitemap URIs 75 | public async Task> DiscoverSitemapsAsync(string domainName, CancellationToken cancellationToken = default) 76 | { 77 | var uriBuilder = new UriBuilder("http", domainName); 78 | var baseUri = uriBuilder.Uri; 79 | 80 | uriBuilder.Path = Constants.DefaultSitemapFilename; 81 | var defaultSitemapUri = uriBuilder.Uri; 82 | 83 | var sitemapUris = new List 84 | { 85 | defaultSitemapUri 86 | }; 87 | 88 | var robotsFile = await new RobotsFileParser(httpClient).FromUriAsync(baseUri, cancellationToken); 89 | 90 | sitemapUris.AddRange(robotsFile.SitemapEntries.Select(s => s.Sitemap)); 91 | sitemapUris = sitemapUris.Distinct().ToList(); 92 | 93 | var result = new HashSet(); 94 | foreach (var uri in sitemapUris) 95 | { 96 | cancellationToken.ThrowIfCancellationRequested(); 97 | 98 | try 99 | { 100 | var requestMessage = new HttpRequestMessage(HttpMethod.Head, uri); 101 | var response = await httpClient.SendAsync(requestMessage, cancellationToken); 102 | 103 | if (response.IsSuccessStatusCode) 104 | { 105 | result.Add(uri); 106 | continue; 107 | } 108 | 109 | if ((int)response.StatusCode >= 400 && (int)response.StatusCode < 500 && response.StatusCode != HttpStatusCode.NotFound) 110 | { 111 | requestMessage = new HttpRequestMessage(HttpMethod.Get, uri); 112 | response = await httpClient.SendAsync(requestMessage, cancellationToken); 113 | 114 | if (response.IsSuccessStatusCode) 115 | { 116 | result.Add(uri); 117 | } 118 | } 119 | } 120 | catch (WebException ex) 121 | { 122 | if (ex.Response != null) 123 | { 124 | continue; 125 | } 126 | 127 | throw; 128 | } 129 | } 130 | 131 | return result; 132 | } 133 | 134 | private static bool IsCompressedStream(string contentType) => 135 | contentType.Equals("application/x-gzip", StringComparison.InvariantCultureIgnoreCase) || 136 | contentType.Equals("application/octet-stream", StringComparison.InvariantCultureIgnoreCase); 137 | 138 | /// 139 | /// Retrieves a sitemap at the given URI, converting it to a . 140 | /// 141 | /// The URI where the sitemap exists. 142 | /// The found and converted 143 | public async Task GetSitemapAsync(Uri sitemapUrl, CancellationToken cancellationToken = default) 144 | { 145 | try 146 | { 147 | var response = await httpClient.GetAsync(sitemapUrl, cancellationToken); 148 | 149 | if (response.IsSuccessStatusCode) 150 | { 151 | var contentType = response.Content.Headers.ContentType.MediaType; 152 | var requiresManualDecompression = false; 153 | 154 | if (IsCompressedStream(contentType)) 155 | { 156 | requiresManualDecompression = true; 157 | var baseFileName = Path.GetFileNameWithoutExtension(sitemapUrl.AbsolutePath); 158 | contentType = MimeTypes.GetMimeType(baseFileName); 159 | } 160 | 161 | if (SitemapTypeMapping.ContainsKey(contentType)) 162 | { 163 | var sitemapType = SitemapTypeMapping[contentType]; 164 | if (SitemapParsers.ContainsKey(sitemapType)) 165 | { 166 | var parser = SitemapParsers[sitemapType]; 167 | 168 | using var stream = await response.Content.ReadAsStreamAsync(); 169 | cancellationToken.ThrowIfCancellationRequested(); 170 | var contentStream = stream; 171 | if (requiresManualDecompression) 172 | { 173 | contentStream = new GZipStream(contentStream, CompressionMode.Decompress); 174 | } 175 | 176 | using var streamReader = new StreamReader(contentStream); 177 | return await parser.ParseSitemapAsync(sitemapUrl, streamReader, cancellationToken); 178 | } 179 | else 180 | { 181 | throw new InvalidOperationException($"No sitemap readers for {sitemapType}"); 182 | } 183 | } 184 | } 185 | 186 | return null; 187 | } 188 | catch (WebException ex) 189 | { 190 | if (ex.Response != null) 191 | { 192 | return null; 193 | } 194 | 195 | throw; 196 | } 197 | } 198 | 199 | /// 200 | /// Retrieves all sitemaps for a given domain. 201 | /// 202 | /// 203 | /// This effectively combines and 204 | /// while additionally finding any other sitemaps described in sitemap index files. 205 | /// 206 | /// 207 | /// 208 | public async Task> GetAllSitemapsForDomainAsync(string domainName, CancellationToken cancellationToken = default) 209 | { 210 | var sitemapFiles = new Dictionary(); 211 | var sitemapUris = new Stack(await DiscoverSitemapsAsync(domainName, cancellationToken)); 212 | 213 | while (sitemapUris.Count > 0) 214 | { 215 | var sitemapUri = sitemapUris.Pop(); 216 | var sitemapFile = await GetSitemapAsync(sitemapUri, cancellationToken); 217 | if (sitemapFile != null) 218 | { 219 | sitemapFiles.Add(sitemapUri, sitemapFile); 220 | 221 | foreach (var indexFile in sitemapFile.Sitemaps) 222 | { 223 | if (!sitemapFiles.ContainsKey(indexFile.Location)) 224 | { 225 | sitemapUris.Push(indexFile.Location); 226 | } 227 | } 228 | } 229 | } 230 | 231 | return sitemapFiles.Values; 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /src/TurnerSoftware.SitemapTools/SitemapType.cs: -------------------------------------------------------------------------------- 1 | namespace TurnerSoftware.SitemapTools; 2 | 3 | public enum SitemapType 4 | { 5 | Unknown, 6 | Xml, 7 | Text 8 | } 9 | -------------------------------------------------------------------------------- /src/TurnerSoftware.SitemapTools/TurnerSoftware.SitemapTools.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | netstandard2.0;netstandard2.1 5 | 6 | TurnerSoftware.SitemapTools 7 | A sitemap (sitemap.xml) parsing and querying library in C# 8 | $(PackageBaseTags) 9 | James Turner 10 | 11 | 12 | 13 | 14 | all 15 | runtime; build; native; contentfiles; analyzers 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /tests/Directory.Build.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Latest 5 | 6 | 7 | -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/Resources/another-indexed-sitemap.xml: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | http://localhost/last-text-sitemap.txt 5 | 2005-01-01 6 | 7 | 8 | -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/Resources/another-text-sitemap.txt: -------------------------------------------------------------------------------- 1 | http://www.example.com/never-before-seen-url -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/Resources/basic-sitemap-WrongFormat.txt: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | http://www.example.com/ 5 | 2005-01-02 6 | monthly 7 | 0.8 8 | 9 | 10 | http://www.example.com/catalog?item=12&desc=vacation_hawaii 11 | weekly 12 | 13 | 14 | http://www.example.com/catalog?item=73&desc=vacation_new_zealand 15 | 2004-12-23 16 | weekly 17 | 18 | 19 | http://www.example.com/catalog?item=74&desc=vacation_newfoundland 20 | 2004-12-23T18:00:15+00:00 21 | 0.3 22 | 23 | 24 | http://www.example.com/catalog?item=83&desc=vacation_usa 25 | 2004-11-23 26 | 27 | 28 | http://www.example.com/frequency/always 29 | always 30 | 31 | 32 | http://www.example.com/frequency/hourly 33 | hourly 34 | 35 | 36 | http://www.example.com/frequency/daily 37 | daily 38 | 39 | 40 | http://www.example.com/frequency/weekly 41 | weekly 42 | 43 | 44 | http://www.example.com/frequency/monthly 45 | monthly 46 | 47 | 48 | http://www.example.com/frequency/yearly 49 | yearly 50 | 51 | 52 | http://www.example.com/frequency/never 53 | never 54 | 55 | -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/Resources/basic-sitemap-WrongFormat.xml: -------------------------------------------------------------------------------- 1 | http://www.example.com/ 2 | http://www.example.com/about 3 | http://www.example.com/contact-us -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/Resources/basic-sitemap.xml: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | http://www.example.com/ 5 | 2005-01-02 6 | monthly 7 | 0.8 8 | 9 | 10 | http://www.example.com/catalog?item=12&desc=vacation_hawaii 11 | weekly 12 | 13 | 14 | http://www.example.com/catalog?item=73&desc=vacation_new_zealand 15 | 2004-12-23 16 | weekly 17 | 18 | 19 | http://www.example.com/catalog?item=74&desc=vacation_newfoundland 20 | 2004-12-23T18:00:15+00:00 21 | 0.3 22 | 23 | 24 | http://www.example.com/catalog?item=83&desc=vacation_usa 25 | 2004-11-23 26 | 27 | 28 | http://www.example.com/frequency/always 29 | always 30 | 31 | 32 | http://www.example.com/frequency/hourly 33 | hourly 34 | 35 | 36 | http://www.example.com/frequency/daily 37 | daily 38 | 39 | 40 | http://www.example.com/frequency/weekly 41 | weekly 42 | 43 | 44 | http://www.example.com/frequency/monthly 45 | monthly 46 | 47 | 48 | http://www.example.com/frequency/yearly 49 | yearly 50 | 51 | 52 | http://www.example.com/frequency/never 53 | never 54 | 55 | -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/Resources/gzipped-sitemap.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TurnerSoftware/SitemapTools/4ac4cdbe4f09081fb6753e5c26f7a936600aa9cb/tests/TurnerSoftware.SitemapTools.Tests/Resources/gzipped-sitemap.xml.gz -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/Resources/last-text-sitemap.txt: -------------------------------------------------------------------------------- 1 | http://www.example.com/last-text-url -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/Resources/robots.txt: -------------------------------------------------------------------------------- 1 | Sitemap: http://localhost/basic-sitemap.xml 2 | Sitemap: http://localhost/not-a-real-sitemap.xml 3 | Sitemap: http://localhost/text-sitemap.txt -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/Resources/sitemap.xml: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | http://localhost/another-indexed-sitemap.xml 5 | 6 | 7 | http://localhost/gzipped-sitemap.xml.gz 8 | 2004-10-01T18:23:17+00:00 9 | 10 | 11 | http://localhost/basic-sitemap.xml 12 | 2004-01-01 13 | 14 | 15 | http://localhost/another-text-sitemap.txt 16 | 2004-01-01 17 | 18 | 19 | -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/Resources/text-sitemap.txt: -------------------------------------------------------------------------------- 1 | http://www.example.com/ 2 | http://www.example.com/about 3 | http://www.example.com/contact-us -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/Server/Startup.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | using Microsoft.AspNetCore.Builder; 3 | using Microsoft.Extensions.FileProviders; 4 | 5 | namespace TurnerSoftware.SitemapTools.Tests.Server; 6 | 7 | public class Startup 8 | { 9 | public void Configure(IApplicationBuilder app) 10 | { 11 | app.UseStaticFiles(new StaticFileOptions 12 | { 13 | FileProvider = new PhysicalFileProvider( 14 | Path.Combine(Directory.GetCurrentDirectory(), "Resources")) 15 | }); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/SitemapQueryTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Globalization; 3 | using System.Linq; 4 | using System.Threading; 5 | using System.Threading.Tasks; 6 | using Microsoft.VisualStudio.TestTools.UnitTesting; 7 | 8 | namespace TurnerSoftware.SitemapTools.Tests; 9 | 10 | [TestClass] 11 | public class SitemapQueryTests : TestBase 12 | { 13 | [TestMethod] 14 | public async Task GetSitemapAsync() 15 | { 16 | foreach (var culture in CultureInfo.GetCultures(CultureTypes.AllCultures)) 17 | { 18 | Thread.CurrentThread.CurrentCulture = culture; 19 | 20 | var sitemapQuery = GetSitemapQuery(); 21 | var uriBuilder = GetTestServerUriBuilder(); 22 | 23 | uriBuilder.Path = "basic-sitemap.xml"; 24 | var sitemap = await sitemapQuery.GetSitemapAsync(uriBuilder.Uri); 25 | 26 | Assert.IsNotNull(sitemap); 27 | Assert.AreEqual(0, sitemap.Sitemaps.Count()); 28 | Assert.AreEqual(12, sitemap.Urls.Count()); 29 | } 30 | } 31 | 32 | [TestMethod] 33 | public async Task GetSitemapAsync_NotFound() 34 | { 35 | var sitemapQuery = GetSitemapQuery(); 36 | var uriBuilder = GetTestServerUriBuilder(); 37 | 38 | uriBuilder.Path = "basic-sitemapNotFound.xml"; 39 | var sitemap = await sitemapQuery.GetSitemapAsync(uriBuilder.Uri); 40 | 41 | Assert.IsNull(sitemap); 42 | } 43 | 44 | [TestMethod] 45 | public async Task GetSitemapAsync_WrongFormat() 46 | { 47 | var sitemapQuery = GetSitemapQuery(); 48 | var uriBuilder = GetTestServerUriBuilder(); 49 | 50 | uriBuilder.Path = "basic-sitemap-WrongFormat.xml"; 51 | var sitemap = await sitemapQuery.GetSitemapAsync(uriBuilder.Uri); 52 | 53 | Assert.IsNull(sitemap); 54 | } 55 | 56 | [TestMethod] 57 | public async Task GetSitemapAsync_WrongFormatTxt() 58 | { 59 | var sitemapQuery = GetSitemapQuery(); 60 | var uriBuilder = GetTestServerUriBuilder(); 61 | 62 | uriBuilder.Path = "basic-sitemap-WrongFormat.txt"; 63 | var sitemap = await sitemapQuery.GetSitemapAsync(uriBuilder.Uri); 64 | 65 | Assert.IsNotNull(sitemap); 66 | Assert.AreEqual(0, sitemap.Sitemaps.Count()); 67 | Assert.AreEqual(0, sitemap.Urls.Count()); 68 | } 69 | 70 | [TestMethod] 71 | public async Task DiscoverSitemapsAsync() 72 | { 73 | foreach (var culture in CultureInfo.GetCultures(CultureTypes.AllCultures)) 74 | { 75 | Thread.CurrentThread.CurrentCulture = culture; 76 | 77 | var sitemapQuery = GetSitemapQuery(); 78 | var discoveredSitemaps = await sitemapQuery.DiscoverSitemapsAsync("localhost"); 79 | 80 | Assert.AreEqual(3, discoveredSitemaps.Count()); 81 | } 82 | } 83 | 84 | [TestMethod] 85 | public async Task GetAllSitemapsForDomainAsync() 86 | { 87 | foreach (var culture in CultureInfo.GetCultures(CultureTypes.AllCultures)) 88 | { 89 | Thread.CurrentThread.CurrentCulture = culture; 90 | 91 | var sitemapQuery = GetSitemapQuery(); 92 | var sitemaps = await sitemapQuery.GetAllSitemapsForDomainAsync("localhost"); 93 | 94 | Assert.AreEqual(7, sitemaps.Count()); 95 | } 96 | } 97 | 98 | [TestMethod] 99 | public async Task SupportsGzippedSitemap() 100 | { 101 | foreach (var culture in CultureInfo.GetCultures(CultureTypes.AllCultures)) 102 | { 103 | Thread.CurrentThread.CurrentCulture = culture; 104 | 105 | var sitemapQuery = GetSitemapQuery(); 106 | var uriBuilder = GetTestServerUriBuilder(); 107 | 108 | uriBuilder.Path = "gzipped-sitemap.xml.gz"; 109 | var sitemap = await sitemapQuery.GetSitemapAsync(uriBuilder.Uri); 110 | 111 | Assert.IsNotNull(sitemap); 112 | var gzipSitemapReference = new Uri("http://www.example.com/gzipped/"); 113 | Assert.IsTrue(sitemap.Urls.Any(u => u.Location == gzipSitemapReference)); 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/TestBase.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using Microsoft.VisualStudio.TestTools.UnitTesting; 4 | 5 | namespace TurnerSoftware.SitemapTools.Tests; 6 | 7 | [TestClass] 8 | public class TestBase 9 | { 10 | [AssemblyInitialize] 11 | public static void AssemblyInitialize(TestContext context) 12 | { 13 | TestConfiguration.StartupServer(); 14 | } 15 | 16 | [AssemblyCleanup] 17 | public static void AssemblyCleanup() 18 | { 19 | TestConfiguration.ShutdownServer(); 20 | } 21 | 22 | protected SitemapQuery GetSitemapQuery() 23 | { 24 | var client = TestConfiguration.GetHttpClient(); 25 | return new SitemapQuery(client); 26 | } 27 | 28 | protected UriBuilder GetTestServerUriBuilder() 29 | { 30 | var client = TestConfiguration.GetHttpClient(); 31 | return new UriBuilder(client.BaseAddress); 32 | } 33 | 34 | protected StreamReader LoadResource(string name) 35 | { 36 | var fileStream = new FileStream($"Resources/{name}", FileMode.Open); 37 | return new StreamReader(fileStream); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/TestConfiguration.cs: -------------------------------------------------------------------------------- 1 | using System.Net.Http; 2 | using Microsoft.AspNetCore.Hosting; 3 | using Microsoft.AspNetCore.TestHost; 4 | using TurnerSoftware.SitemapTools.Tests.Server; 5 | 6 | namespace TurnerSoftware.SitemapTools.Tests; 7 | 8 | static class TestConfiguration 9 | { 10 | private static TestServer Server { get; set; } 11 | 12 | private static HttpClient Client { get; set; } 13 | public static HttpClient GetHttpClient() 14 | { 15 | if (Client == null) 16 | { 17 | Client = Server.CreateClient(); 18 | } 19 | return Client; 20 | } 21 | 22 | public static void StartupServer() 23 | { 24 | if (Server != null) 25 | { 26 | return; 27 | } 28 | 29 | var builder = new WebHostBuilder() 30 | .UseStartup(); 31 | 32 | Server = new TestServer(builder); 33 | } 34 | 35 | public static void ShutdownServer() 36 | { 37 | Server.Dispose(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/TextSitemapParserTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Globalization; 3 | using System.Linq; 4 | using System.Threading; 5 | using System.Threading.Tasks; 6 | using Microsoft.VisualStudio.TestTools.UnitTesting; 7 | using TurnerSoftware.SitemapTools.Parser; 8 | 9 | namespace TurnerSoftware.SitemapTools.Tests; 10 | 11 | [TestClass] 12 | public class TextSitemapParserTests : TestBase 13 | { 14 | [TestMethod] 15 | public async Task ParseTextSitemapAsync() 16 | { 17 | foreach (var culture in CultureInfo.GetCultures(CultureTypes.AllCultures)) 18 | { 19 | Thread.CurrentThread.CurrentCulture = culture; 20 | 21 | using var reader = LoadResource("text-sitemap.txt"); 22 | var parser = new TextSitemapParser(); 23 | var sitemapFile = await parser.ParseSitemapAsync(new Uri("http://localhost/"), reader); 24 | 25 | Assert.IsNotNull(sitemapFile); 26 | Assert.AreEqual(3, sitemapFile.Urls.Count()); 27 | 28 | var entry = sitemapFile.Urls.ElementAt(0); 29 | Assert.AreEqual(new Uri("http://www.example.com/"), entry.Location); 30 | entry = sitemapFile.Urls.ElementAt(1); 31 | Assert.AreEqual(new Uri("http://www.example.com/about"), entry.Location); 32 | entry = sitemapFile.Urls.ElementAt(2); 33 | Assert.AreEqual(new Uri("http://www.example.com/contact-us"), entry.Location); 34 | } 35 | } 36 | 37 | [TestMethod] 38 | public async Task ParseTextSitemapAsync_Cancellation() 39 | { 40 | using var reader = LoadResource("text-sitemap.txt"); 41 | var parser = new TextSitemapParser(); 42 | await Assert.ThrowsExceptionAsync( 43 | async () => await parser.ParseSitemapAsync(new Uri("http://localhost/"), reader, new CancellationToken(true)) 44 | ); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/TurnerSoftware.SitemapTools.Tests.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net461;net6.0;net8.0 5 | 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | Always 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | all 27 | runtime; build; native; contentfiles; analyzers; buildtransitive 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /tests/TurnerSoftware.SitemapTools.Tests/XmlSitemapParserTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Globalization; 3 | using System.Linq; 4 | using System.Threading; 5 | using System.Threading.Tasks; 6 | using Microsoft.VisualStudio.TestTools.UnitTesting; 7 | using TurnerSoftware.SitemapTools.Parser; 8 | 9 | namespace TurnerSoftware.SitemapTools.Tests; 10 | 11 | [TestClass] 12 | public class XmlSitemapParserTests : TestBase 13 | { 14 | [TestMethod] 15 | public async Task ChangeFrequenciesAreSetCorrectly() 16 | { 17 | foreach (var culture in CultureInfo.GetCultures(CultureTypes.AllCultures)) 18 | { 19 | Thread.CurrentThread.CurrentCulture = culture; 20 | 21 | using var reader = LoadResource("basic-sitemap.xml"); 22 | var parser = new XmlSitemapParser(); 23 | var sitemapFile = await parser.ParseSitemapAsync(new Uri("http://localhost/"), reader); 24 | 25 | var entries = sitemapFile.Urls.Where(e => e.Location.AbsolutePath.Contains("frequency/")); 26 | 27 | var alwaysEntry = entries.FirstOrDefault(e => e.Location.AbsolutePath.Contains("always")); 28 | Assert.IsNotNull(alwaysEntry); 29 | Assert.AreEqual(ChangeFrequency.Always, alwaysEntry.ChangeFrequency); 30 | 31 | var hourlyEntry = entries.FirstOrDefault(e => e.Location.AbsolutePath.Contains("hourly")); 32 | Assert.IsNotNull(hourlyEntry); 33 | Assert.AreEqual(ChangeFrequency.Hourly, hourlyEntry.ChangeFrequency); 34 | 35 | var dailyEntry = entries.FirstOrDefault(e => e.Location.AbsolutePath.Contains("daily")); 36 | Assert.IsNotNull(dailyEntry); 37 | Assert.AreEqual(ChangeFrequency.Daily, dailyEntry.ChangeFrequency); 38 | 39 | var weeklyEntry = entries.FirstOrDefault(e => e.Location.AbsolutePath.Contains("weekly")); 40 | Assert.IsNotNull(weeklyEntry); 41 | Assert.AreEqual(ChangeFrequency.Weekly, weeklyEntry.ChangeFrequency); 42 | 43 | var monthlyEntry = entries.FirstOrDefault(e => e.Location.AbsolutePath.Contains("monthly")); 44 | Assert.IsNotNull(monthlyEntry); 45 | Assert.AreEqual(ChangeFrequency.Monthly, monthlyEntry.ChangeFrequency); 46 | 47 | var yearlyEntry = entries.FirstOrDefault(e => e.Location.AbsolutePath.Contains("yearly")); 48 | Assert.IsNotNull(yearlyEntry); 49 | Assert.AreEqual(ChangeFrequency.Yearly, yearlyEntry.ChangeFrequency); 50 | 51 | var neverEntry = entries.FirstOrDefault(e => e.Location.AbsolutePath.Contains("never")); 52 | Assert.IsNotNull(neverEntry); 53 | Assert.AreEqual(ChangeFrequency.Never, neverEntry.ChangeFrequency); 54 | } 55 | } 56 | 57 | [TestMethod] 58 | public async Task ParseIndexFileAsync() 59 | { 60 | foreach (var culture in CultureInfo.GetCultures(CultureTypes.AllCultures)) 61 | { 62 | Thread.CurrentThread.CurrentCulture = culture; 63 | 64 | using (var reader = LoadResource("another-indexed-sitemap.xml")) 65 | { 66 | var parser = new XmlSitemapParser(); 67 | var sitemapFile = await parser.ParseSitemapAsync(new Uri("http://localhost/"), reader); 68 | 69 | Assert.AreEqual(1, sitemapFile.Sitemaps.Count()); 70 | 71 | var indexEntry = sitemapFile.Sitemaps.FirstOrDefault(); 72 | Assert.AreEqual(new Uri("http://localhost/last-text-sitemap.txt"), indexEntry.Location); 73 | Assert.AreEqual(new DateTime(2005, 1, 1), indexEntry.LastModified); 74 | } 75 | } 76 | } 77 | 78 | [TestMethod] 79 | public async Task ParseSitemapFileAsync() 80 | { 81 | foreach (var culture in CultureInfo.GetCultures(CultureTypes.AllCultures)) 82 | { 83 | Thread.CurrentThread.CurrentCulture = culture; 84 | 85 | using var reader = LoadResource("basic-sitemap.xml"); 86 | var parser = new XmlSitemapParser(); 87 | var sitemapFile = await parser.ParseSitemapAsync(new Uri("http://localhost/"), reader); 88 | 89 | Assert.AreEqual(12, sitemapFile.Urls.Count()); 90 | 91 | var sitemapEntry = sitemapFile.Urls.FirstOrDefault(); 92 | Assert.AreEqual(new Uri("http://www.example.com/"), sitemapEntry.Location); 93 | Assert.AreEqual(new DateTime(2005, 1, 2), sitemapEntry.LastModified); 94 | Assert.AreEqual(0.8, sitemapEntry.Priority); 95 | 96 | sitemapEntry = sitemapFile.Urls.ElementAt(1); 97 | Assert.AreEqual(new Uri("http://www.example.com/catalog?item=12&desc=vacation_hawaii"), sitemapEntry.Location); 98 | Assert.AreEqual(0.5, sitemapEntry.Priority); 99 | } 100 | } 101 | 102 | [TestMethod] 103 | public async Task ParseSitemapFileAsync_Cancellation() 104 | { 105 | using var reader = LoadResource("basic-sitemap.xml"); 106 | var parser = new XmlSitemapParser(); 107 | try 108 | { 109 | await parser.ParseSitemapAsync(new Uri("http://localhost/"), reader, new CancellationToken(true)); 110 | } 111 | catch (Exception ex) when (ex is TaskCanceledException || ex is OperationCanceledException) 112 | { 113 | return; 114 | } 115 | Assert.Fail("Expected exception not thrown"); 116 | } 117 | } 118 | --------------------------------------------------------------------------------