├── .editorconfig ├── .github └── workflows │ ├── build-debug.yaml │ ├── build-release.yaml │ └── stale.yaml ├── .gitignore ├── Directory.Build.props ├── Icon.png ├── LICENSE ├── README.md ├── Utf8StreamReader.sln ├── opensource.snk ├── sandbox ├── Benchmark │ ├── Benchmark.csproj │ ├── BytesReadToEnd.cs │ ├── FromFile.cs │ ├── FromMemory.cs │ ├── Program.cs │ └── ReadToEndString.cs └── ConsoleApp1 │ ├── ConsoleApp1.csproj │ ├── Program.cs │ ├── ReadMeSample.cs │ ├── RespReader.cs │ └── file1.txt ├── src └── Utf8StreamReader │ ├── SegmentedArrayBufferWriter.cs │ ├── Utf8StreamReader.cs │ ├── Utf8StreamReader.csproj │ └── Utf8TextReader.cs └── tests └── Utf8StreamReader.Tests ├── FakeMemoryStream.cs ├── FileReadTest.cs ├── ReadBlockTest.cs ├── ReadTest.cs ├── ReadToEndTest.cs ├── SegmentedArrayBufferWriterTest.cs ├── Tests.cs ├── TextReaderTest.cs ├── Utf8StreamReader.Tests.csproj └── file1.txt /.editorconfig: -------------------------------------------------------------------------------- 1 | # top-most EditorConfig file 2 | root = true 3 | 4 | [*] 5 | charset = utf-8 6 | end_of_line = lf 7 | indent_style = space 8 | indent_size = 2 9 | insert_final_newline = true 10 | trim_trailing_whitespace = true 11 | 12 | # Visual Studio Spell checker configs (https://learn.microsoft.com/en-us/visualstudio/ide/text-spell-checker?view=vs-2022#how-to-customize-the-spell-checker) 13 | spelling_exclusion_path = ./exclusion.dic 14 | 15 | [*.cs] 16 | indent_size = 4 17 | charset = utf-8-bom 18 | end_of_line = unset 19 | 20 | # Solution files 21 | [*.{sln,slnx}] 22 | end_of_line = unset 23 | 24 | # MSBuild project files 25 | [*.{csproj,props,targets}] 26 | end_of_line = unset 27 | 28 | # Xml config files 29 | [*.{ruleset,config,nuspec,resx,runsettings,DotSettings}] 30 | end_of_line = unset 31 | 32 | [*{_AssemblyInfo.cs,.notsupported.cs}] 33 | generated_code = true 34 | 35 | # C# code style settings 36 | [*.{cs}] 37 | dotnet_style_coalesce_expression = true:suggestion 38 | dotnet_style_null_propagation = true:suggestion 39 | dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion 40 | dotnet_style_prefer_auto_properties = true:suggestion 41 | dotnet_style_object_initializer = true:suggestion 42 | dotnet_style_prefer_collection_expression = true:suggestion 43 | dotnet_style_collection_initializer = true:suggestion 44 | dotnet_style_prefer_simplified_boolean_expressions = true:suggestion 45 | dotnet_style_prefer_conditional_expression_over_assignment = true:silent 46 | dotnet_style_prefer_conditional_expression_over_return = true:silent 47 | dotnet_style_explicit_tuple_names = true:suggestion 48 | dotnet_style_prefer_inferred_tuple_names = true:suggestion 49 | dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion 50 | dotnet_style_prefer_compound_assignment = true:suggestion 51 | dotnet_style_prefer_simplified_interpolation = true:suggestion 52 | dotnet_style_namespace_match_folder = true:suggestion 53 | dotnet_style_readonly_field = true:suggestion 54 | dotnet_style_predefined_type_for_member_access = true:suggestion 55 | dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion 56 | dotnet_style_require_accessibility_modifiers = for_non_interface_members:silent 57 | dotnet_style_allow_statement_immediately_after_block_experimental = true:silent 58 | dotnet_style_allow_multiple_blank_lines_experimental = true:silent 59 | dotnet_code_quality_unused_parameters = non_public:suggestion 60 | dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity:silent 61 | dotnet_style_parentheses_in_other_binary_operators = always_for_clarity:silent 62 | dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity:silent 63 | dotnet_style_parentheses_in_other_operators = never_if_unnecessary:silent 64 | dotnet_style_qualification_for_method = false:none 65 | dotnet_style_qualification_for_property = false:none 66 | dotnet_style_qualification_for_field = false:none 67 | dotnet_style_qualification_for_event = false:none 68 | 69 | # New line preferences 70 | csharp_new_line_before_open_brace = all 71 | csharp_new_line_before_else = true 72 | csharp_new_line_before_catch = true 73 | csharp_new_line_before_finally = true 74 | csharp_new_line_before_members_in_object_initializers = true 75 | csharp_new_line_before_members_in_anonymous_types = true 76 | csharp_new_line_between_query_expression_clauses = true 77 | 78 | # Indentation preferences 79 | csharp_indent_block_contents = true 80 | csharp_indent_braces = false 81 | csharp_indent_case_contents = true 82 | csharp_indent_case_contents_when_block = true 83 | csharp_indent_switch_labels = true 84 | csharp_indent_labels = one_less_than_current 85 | 86 | # Modifier preferences 87 | csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:suggestion 88 | 89 | # avoid this. unless absolutely necessary 90 | dotnet_style_qualification_for_field = false:none 91 | dotnet_style_qualification_for_property = false:none 92 | dotnet_style_qualification_for_method = false:none 93 | dotnet_style_qualification_for_event = false:none 94 | 95 | # Types: use keywords instead of BCL types, and permit var only when the type is clear 96 | csharp_style_var_for_built_in_types = false:none 97 | csharp_style_var_when_type_is_apparent = false:none 98 | csharp_style_var_elsewhere = false:none 99 | dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion 100 | dotnet_style_predefined_type_for_member_access = true:suggestion 101 | 102 | # name all constant fields using PascalCase 103 | dotnet_naming_rule.constant_fields_should_be_pascal_case.severity = suggestion 104 | dotnet_naming_rule.constant_fields_should_be_pascal_case.symbols = constant_fields 105 | dotnet_naming_rule.constant_fields_should_be_pascal_case.style = pascal_case_style 106 | dotnet_naming_symbols.constant_fields.applicable_kinds = field 107 | dotnet_naming_symbols.constant_fields.required_modifiers = const 108 | dotnet_naming_style.pascal_case_style.capitalization = pascal_case 109 | 110 | # static fields 111 | dotnet_naming_rule.static_fields_should_have_prefix.severity = none 112 | dotnet_naming_rule.static_fields_should_have_prefix.symbols = static_fields 113 | dotnet_naming_rule.static_fields_should_have_prefix.style = static_prefix_style 114 | dotnet_naming_symbols.static_fields.applicable_kinds = field 115 | dotnet_naming_symbols.static_fields.required_modifiers = static 116 | dotnet_naming_symbols.static_fields.applicable_accessibilities = private, internal, private_protected 117 | dotnet_naming_style.static_prefix_style.required_prefix = s_ 118 | dotnet_naming_style.static_prefix_style.capitalization = camel_case 119 | 120 | # internal and private fields 121 | dotnet_naming_rule.camel_case_for_private_internal_fields.severity = none 122 | dotnet_naming_rule.camel_case_for_private_internal_fields.symbols = private_internal_fields 123 | dotnet_naming_rule.camel_case_for_private_internal_fields.style = camel_case_underscore_style 124 | dotnet_naming_symbols.private_internal_fields.applicable_kinds = field 125 | dotnet_naming_symbols.private_internal_fields.applicable_accessibilities = private, internal 126 | dotnet_naming_style.camel_case_underscore_style.required_prefix = _ 127 | dotnet_naming_style.camel_case_underscore_style.capitalization = camel_case 128 | 129 | # Code style defaults 130 | csharp_using_directive_placement = outside_namespace:suggestion 131 | csharp_prefer_braces = true:silent 132 | csharp_preserve_single_line_blocks = true:none 133 | csharp_preserve_single_line_statements = false:none 134 | csharp_prefer_static_local_function = true:suggestion 135 | csharp_prefer_simple_using_statement = false:none 136 | csharp_style_prefer_switch_expression = true:suggestion 137 | 138 | # Code quality 139 | dotnet_style_readonly_field = true:suggestion 140 | dotnet_code_quality_unused_parameters = non_public:suggestion 141 | 142 | # Expression-level preferences 143 | dotnet_style_object_initializer = true:suggestion 144 | dotnet_style_collection_initializer = true:suggestion 145 | dotnet_style_explicit_tuple_names = true:suggestion 146 | dotnet_style_coalesce_expression = true:suggestion 147 | dotnet_style_null_propagation = true:suggestion 148 | dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion 149 | dotnet_style_prefer_inferred_tuple_names = true:suggestion 150 | dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion 151 | dotnet_style_prefer_auto_properties = true:suggestion 152 | dotnet_style_prefer_conditional_expression_over_assignment = true:silent 153 | dotnet_style_prefer_conditional_expression_over_return = true:silent 154 | csharp_prefer_simple_default_expression = true:suggestion 155 | 156 | # Expression-bodied members 157 | csharp_style_expression_bodied_methods = true:silent 158 | csharp_style_expression_bodied_constructors = true:silent 159 | csharp_style_expression_bodied_operators = true:silent 160 | csharp_style_expression_bodied_properties = true:silent 161 | csharp_style_expression_bodied_indexers = true:silent 162 | csharp_style_expression_bodied_accessors = true:silent 163 | csharp_style_expression_bodied_lambdas = true:silent 164 | csharp_style_expression_bodied_local_functions = true:silent 165 | 166 | # Pattern matching 167 | csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion 168 | csharp_style_pattern_matching_over_as_with_null_check = true:suggestion 169 | csharp_style_inlined_variable_declaration = true:suggestion 170 | 171 | # Null checking preferences 172 | csharp_style_throw_expression = true:suggestion 173 | csharp_style_conditional_delegate_call = true:suggestion 174 | 175 | # Other features 176 | csharp_style_prefer_index_operator = false:none 177 | csharp_style_prefer_range_operator = false:none 178 | csharp_style_pattern_local_over_anonymous_function = false:none 179 | 180 | # Space preferences 181 | csharp_space_after_cast = false 182 | csharp_space_after_colon_in_inheritance_clause = true 183 | csharp_space_after_comma = true 184 | csharp_space_after_dot = false 185 | csharp_space_after_keywords_in_control_flow_statements = true 186 | csharp_space_after_semicolon_in_for_statement = true 187 | csharp_space_around_binary_operators = before_and_after 188 | csharp_space_around_declaration_statements = do_not_ignore 189 | csharp_space_before_colon_in_inheritance_clause = true 190 | csharp_space_before_comma = false 191 | csharp_space_before_dot = false 192 | csharp_space_before_open_square_brackets = false 193 | csharp_space_before_semicolon_in_for_statement = false 194 | csharp_space_between_empty_square_brackets = false 195 | csharp_space_between_method_call_empty_parameter_list_parentheses = false 196 | csharp_space_between_method_call_name_and_opening_parenthesis = false 197 | csharp_space_between_method_call_parameter_list_parentheses = false 198 | csharp_space_between_method_declaration_empty_parameter_list_parentheses = false 199 | csharp_space_between_method_declaration_name_and_open_parenthesis = false 200 | csharp_space_between_method_declaration_parameter_list_parentheses = false 201 | csharp_space_between_parentheses = false 202 | csharp_space_between_square_brackets = false 203 | 204 | # Analyzers 205 | dotnet_code_quality.CA1052.api_surface = private, internal 206 | dotnet_code_quality.CA1802.api_surface = private, internal 207 | dotnet_code_quality.CA1822.api_surface = private, internal 208 | dotnet_code_quality.CA2208.api_surface = public 209 | 210 | # IDE0008: Use explicit type 211 | dotnet_diagnostic.IDE0008.severity = none 212 | 213 | # IDE0090: Use 'new(...)' 214 | dotnet_diagnostic.IDE0090.severity = none 215 | 216 | # IDE0040: Add accessibility modifiers 217 | dotnet_diagnostic.IDE0040.severity = none 218 | 219 | # Nullability in reference types of interface implemented by the base type doesn't match 220 | dotnet_diagnostic.CS8644.severity = none 221 | 222 | dotnet_diagnostic.CA1816.severity = none 223 | 224 | dotnet_diagnostic.IDE1006.severity = none 225 | 226 | #Remove unnecessary suppression 227 | dotnet_diagnostic.IDE0079.severity = none 228 | 229 | dotnet_diagnostic.IDE0130.severity = none 230 | 231 | dotnet_diagnostic.CA1822.severity = none 232 | 233 | csharp_style_prefer_switch_expression = false:suggestion 234 | 235 | csharp_style_pattern_matching_over_as_with_null_check = false:suggestion 236 | 237 | dotnet_naming_symbols.functional_symbols.applicable_kinds = property,method,event,delegate 238 | dotnet_naming_style.pascal_case_style.capitalization = pascal_case 239 | dotnet_naming_rule.functional_symbols_must_be_capitalized.symbols = functional_symbols 240 | dotnet_naming_rule.functional_symbols_must_be_capitalized.style = pascal_case_style 241 | dotnet_naming_rule.functional_symbols_must_be_capitalized.severity = warning 242 | 243 | dotnet_naming_symbols.public_symbols.applicable_kinds = property,method,field,event,delegate 244 | dotnet_naming_symbols.public_symbols.applicable_accessibilities = public 245 | dotnet_naming_symbols.public_symbols.required_modifiers = readonly 246 | dotnet_naming_style.first_word_upper_case_style.capitalization = first_word_upper 247 | dotnet_naming_rule.public_members_must_be_capitalized.symbols = public_symbols 248 | dotnet_naming_rule.public_members_must_be_capitalized.style = first_word_upper_case_style 249 | dotnet_naming_rule.public_members_must_be_capitalized.severity = warning 250 | 251 | csharp_style_expression_bodied_methods = false:silent 252 | csharp_style_expression_bodied_constructors = false:silent 253 | csharp_style_expression_bodied_operators = false:silent 254 | csharp_style_namespace_declarations = file_scoped:suggestion 255 | csharp_style_prefer_method_group_conversion = true:silent 256 | csharp_style_prefer_top_level_statements = true:silent 257 | csharp_style_prefer_primary_constructors = true:suggestion 258 | csharp_style_prefer_null_check_over_type_check = true:suggestion 259 | csharp_style_prefer_local_over_anonymous_function = true:suggestion 260 | csharp_style_implicit_object_creation_when_type_is_apparent = true:suggestion 261 | csharp_style_prefer_tuple_swap = true:suggestion 262 | csharp_style_prefer_utf8_string_literals = true:suggestion 263 | csharp_style_deconstructed_variable_declaration = true:suggestion 264 | csharp_style_unused_value_assignment_preference = discard_variable:suggestion 265 | csharp_style_unused_value_expression_statement_preference = discard_variable:silent 266 | csharp_style_prefer_readonly_struct_member = true:suggestion 267 | csharp_style_prefer_readonly_struct = true:suggestion 268 | csharp_style_allow_embedded_statements_on_same_line_experimental = true:silent 269 | csharp_style_allow_blank_line_after_token_in_arrow_expression_clause_experimental = true:silent 270 | csharp_style_allow_blank_line_after_token_in_conditional_expression_experimental = true:silent 271 | csharp_style_allow_blank_line_after_colon_in_constructor_initializer_experimental = true:silent 272 | csharp_style_allow_blank_lines_between_consecutive_braces_experimental = true:silent 273 | csharp_style_prefer_pattern_matching = true:silent 274 | csharp_style_prefer_extended_property_pattern = true:suggestion 275 | csharp_style_prefer_not_pattern = true:suggestion 276 | -------------------------------------------------------------------------------- /.github/workflows/build-debug.yaml: -------------------------------------------------------------------------------- 1 | name: Build-Debug 2 | 3 | on: 4 | push: 5 | branches: 6 | - "main" 7 | pull_request: 8 | branches: 9 | - "main" 10 | 11 | jobs: 12 | build-dotnet: 13 | permissions: 14 | contents: read 15 | runs-on: ubuntu-24.04 16 | timeout-minutes: 10 17 | steps: 18 | - uses: Cysharp/Actions/.github/actions/checkout@main 19 | - uses: Cysharp/Actions/.github/actions/setup-dotnet@main 20 | - run: dotnet build -c Debug 21 | - run: dotnet test -c Debug --no-build 22 | -------------------------------------------------------------------------------- /.github/workflows/build-release.yaml: -------------------------------------------------------------------------------- 1 | name: Build-Release 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | tag: 7 | description: "tag: git tag you want create. (sample 1.0.0)" 8 | required: true 9 | dry-run: 10 | description: "dry-run: true will never create relase/nuget." 11 | required: true 12 | default: false 13 | type: boolean 14 | 15 | jobs: 16 | build-dotnet: 17 | permissions: 18 | contents: read 19 | runs-on: ubuntu-24.04 20 | timeout-minutes: 10 21 | steps: 22 | - uses: Cysharp/Actions/.github/actions/checkout@main 23 | - uses: Cysharp/Actions/.github/actions/setup-dotnet@main 24 | - run: dotnet build -c Release -p:Version=${{ inputs.tag }} 25 | - run: dotnet test -c Release --no-build 26 | - run: dotnet pack -c Release --no-build -p:Version=${{ inputs.tag }} -o ./publish 27 | # Store artifacts. 28 | - uses: Cysharp/Actions/.github/actions/upload-artifact@main 29 | with: 30 | name: nuget 31 | path: ./publish/ 32 | 33 | # release 34 | create-release: 35 | needs: [build-dotnet] 36 | permissions: 37 | contents: write 38 | uses: Cysharp/Actions/.github/workflows/create-release.yaml@main 39 | with: 40 | commit-id: '' 41 | dry-run: ${{ inputs.dry-run }} 42 | tag: ${{ inputs.tag }} 43 | nuget-push: true 44 | release-upload: false 45 | secrets: inherit 46 | -------------------------------------------------------------------------------- /.github/workflows/stale.yaml: -------------------------------------------------------------------------------- 1 | name: "Close stale issues" 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: "0 0 * * *" 7 | 8 | jobs: 9 | stale: 10 | permissions: 11 | contents: read 12 | pull-requests: write 13 | issues: write 14 | uses: Cysharp/Actions/.github/workflows/stale-issue.yaml@main 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build Folders (you can keep bin if you'd like, to store dlls and pdbs) 2 | [Bb]in/ 3 | [Oo]bj/ 4 | 5 | # mstest test results 6 | TestResults 7 | 8 | ## Ignore Visual Studio temporary files, build results, and 9 | ## files generated by popular Visual Studio add-ons. 10 | 11 | # User-specific files 12 | *.suo 13 | *.user 14 | *.sln.docstates 15 | 16 | # Build results 17 | [Dd]ebug/ 18 | [Rr]elease/ 19 | x64/ 20 | *_i.c 21 | *_p.c 22 | *.ilk 23 | *.obj 24 | *.pch 25 | *.pdb 26 | *.pgc 27 | *.pgd 28 | *.rsp 29 | *.sbr 30 | *.tlb 31 | *.tli 32 | *.tlh 33 | *.tmp 34 | *.log 35 | *.vspscc 36 | *.vssscc 37 | .builds 38 | 39 | # Visual C++ cache files 40 | ipch/ 41 | *.aps 42 | *.ncb 43 | *.opensdf 44 | *.sdf 45 | 46 | # Visual Studio profiler 47 | *.psess 48 | *.vsp 49 | *.vspx 50 | 51 | # Guidance Automation Toolkit 52 | *.gpState 53 | 54 | # ReSharper is a .NET coding add-in 55 | _ReSharper* 56 | 57 | # NCrunch 58 | *.ncrunch* 59 | .*crunch*.local.xml 60 | 61 | # Installshield output folder 62 | [Ee]xpress 63 | 64 | # DocProject is a documentation generator add-in 65 | DocProject/buildhelp/ 66 | DocProject/Help/*.HxT 67 | DocProject/Help/*.HxC 68 | DocProject/Help/*.hhc 69 | DocProject/Help/*.hhk 70 | DocProject/Help/*.hhp 71 | DocProject/Help/Html2 72 | DocProject/Help/html 73 | 74 | # Click-Once directory 75 | publish 76 | 77 | # Publish Web Output 78 | *.Publish.xml 79 | 80 | # NuGet Packages Directory 81 | packages 82 | 83 | # Windows Azure Build Output 84 | csx 85 | *.build.csdef 86 | 87 | # Windows Store app package directory 88 | AppPackages/ 89 | 90 | # Others 91 | [Bb]in 92 | [Oo]bj 93 | sql 94 | TestResults 95 | [Tt]est[Rr]esult* 96 | *.Cache 97 | ClientBin 98 | [Ss]tyle[Cc]op.* 99 | ~$* 100 | *.dbmdl 101 | Generated_Code #added for RIA/Silverlight projects 102 | 103 | # Backup & report files from converting an old project file to a newer 104 | # Visual Studio version. Backup files are not needed, because we have git ;-) 105 | _UpgradeReport_Files/ 106 | Backup*/ 107 | UpgradeLog*.XML 108 | .vs/config/applicationhost.config 109 | .vs/restore.dg 110 | 111 | # OTHER 112 | nuget/tools/* 113 | *.nupkg 114 | 115 | .vs 116 | **/.DS_Store 117 | .idea 118 | 119 | # publish directory 120 | out/ 121 | *.tsbuildinfo 122 | 123 | # BenchmarkDotNet Artifacts 124 | BenchmarkDotNet.Artifacts/ 125 | -------------------------------------------------------------------------------- /Directory.Build.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | $(Version) 6 | Cysharp 7 | Cysharp 8 | © Cysharp, Inc. 9 | https://github.com/Cysharp/Utf8StreamReader 10 | $(PackageProjectUrl) 11 | git 12 | MIT 13 | Icon.png 14 | true 15 | ../../opensource.snk 16 | 17 | ../../../../../../opensource.snk 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /Icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cysharp/Utf8StreamReader/a92ba5ef05e22234eb9ec7d02ac5b5f885b492bd/Icon.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Cysharp, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Utf8StreamReader 2 | 3 | [![GitHub Actions](https://github.com/Cysharp/Utf8StreamReader/workflows/Build-Debug/badge.svg)](https://github.com/Cysharp/Utf8StreamReader/actions) [![Releases](https://img.shields.io/github/release/Cysharp/Utf8StreamReader.svg)](https://github.com/Cysharp/Utf8StreamReader/releases) 4 | [![NuGet package](https://img.shields.io/nuget/v/Utf8StreamReader.svg)](https://nuget.org/packages/Utf8StreamReader) 5 | 6 | Utf8 based StreamReader for high performance text processing. In addition to UTF-8 based binary processing, it can also be used as a a high-performance replacement for StreamReader and as a helper for fast binary reading. 7 | 8 | Avoiding unnecessary string allocation is a fundamental aspect of recent .NET performance improvements. Given that most file and network data is in UTF8, features like [JsonSerializer](https://learn.microsoft.com/en-us/dotnet/api/system.text.json.jsonserializer?view=net-8.0) and [IUtf8SpanParsable](https://learn.microsoft.com/en-us/dotnet/api/system.iutf8spanparsable-1?view=net-8.0), which operate on UTF8-based data, have been added. More recently, methods like [.NET8 MemoryExtensions.Split](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.split?view=net-8.0), which avoids allocations, have also been introduced. 9 | 10 | However, for the most common use case of parsing strings delimited by newlines, only the traditional [StreamReader](https://learn.microsoft.com/en-us/dotnet/api/system.io.streamreader) is provided, which generates a new String for each line, resulting in a large amount of allocations. 11 | 12 | ![image](https://github.com/Cysharp/Utf8StringInterpolation/assets/46207/ac8d2c7f-65fb-4dc1-b9f5-73219f036e58) 13 | > Read simple 1000000 lines text 14 | 15 | Incredibly, there is a **240,000 times** difference! 16 | 17 | While it is possible to process data in UTF8 format using standard classes like [PipeReader](https://learn.microsoft.com/en-us/dotnet/api/system.io.pipelines.pipereader?view=dotnet-plat-ext-8.0) and [SequenceReader](https://learn.microsoft.com/en-us/dotnet/api/system.buffers.sequencereader-1?view=net-8.0), they are generic librardies, so properly handling newline processing requires considerable effort(Handling BOM and Multiple Types of Newline Characters). 18 | 19 | `Utf8StreamReader` provides a familiar API similar to StreamReader, making it easy to use, while its ReadLine-specific implementation maximizes performance. 20 | 21 | By using optimized internal processing, higher performance can be achieved when reading Strings from Files compared to using the standard `StreamReader.ReadToEnd` or `File.ReadAllText` methods. 22 | 23 | ![image](https://github.com/Cysharp/Utf8StreamReader/assets/46207/f2dc965a-768a-4069-a3e3-387f5279421a) 24 | 25 | > Read from file(1000000 lines text) 26 | 27 | ```csharp 28 | [Benchmark] 29 | public async Task StreamReaderReadToEndAsync() 30 | { 31 | using var sr = new System.IO.StreamReader(filePath); 32 | return await sr.ReadToEndAsync(); 33 | } 34 | 35 | [Benchmark] 36 | public async Task Utf8TextReaderReadToEndAsync() 37 | { 38 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath).AsTextReader(); 39 | return await sr.ReadToEndAsync(); 40 | } 41 | 42 | [Benchmark] 43 | public async Task FileReadAllTextAsync() 44 | { 45 | return await File.ReadAllTextAsync(filePath); 46 | } 47 | ``` 48 | 49 | For an explanation of the performance difference, please refer to the [ReadString Section](#readstring). 50 | 51 | ## Getting Started 52 | 53 | This library is distributed via NuGet, supporting `.NET Standard 2.1`, `.NET 6(.NET 7)` and `.NET 8` or above. For information on usage with Unity, please refer to the [Unity Section](#unity). 54 | 55 | PM> Install-Package [Utf8StreamReader](https://www.nuget.org/packages/Utf8StreamReader) 56 | 57 | The basic API involves `using var streamReader = new Utf8StreamReader(stream);` and then `ReadOnlyMemory line = await streamReader.ReadLineAsync();`. When enumerating all lines, you can choose from three styles: 58 | 59 | ```csharp 60 | using Cysharp.IO; // namespace of Utf8StreamReader 61 | 62 | public async Task Sample1(Stream stream) 63 | { 64 | using var reader = new Utf8StreamReader(stream); 65 | 66 | // Most performant style, similar as System.Threading.Channels 67 | while (await reader.LoadIntoBufferAsync()) 68 | { 69 | while (reader.TryReadLine(out var line)) 70 | { 71 | // line is ReadOnlyMemory, deserialize UTF8 directly. 72 | _ = JsonSerializer.Deserialize(line.Span); 73 | } 74 | } 75 | } 76 | 77 | public async Task Sample2(Stream stream) 78 | { 79 | using var reader = new Utf8StreamReader(stream); 80 | 81 | // Classical style, same as StreamReader 82 | ReadOnlyMemory? line = null; 83 | while ((line = await reader.ReadLineAsync()) != null) 84 | { 85 | _ = JsonSerializer.Deserialize(line.Value.Span); 86 | } 87 | } 88 | 89 | public async Task Sample3(Stream stream) 90 | { 91 | using var reader = new Utf8StreamReader(stream); 92 | 93 | // Most easiest style, use async streams 94 | await foreach (var line in reader.ReadAllLinesAsync()) 95 | { 96 | _ = JsonSerializer.Deserialize(line.Span); 97 | } 98 | } 99 | ``` 100 | 101 | From a performance perspective, `Utf8StreamReader` only provides asynchronous APIs. 102 | 103 | Theoretically, the highest performance can be achieved by combining `LoadIntoBufferAsync` and `TryReadLine` in a double while loop. This is similar to the combination of `WaitToReadAsync` and `TryRead` in [Channels](https://learn.microsoft.com/en-us/dotnet/core/extensions/channels). 104 | 105 | `ReadLineAsync`, like StreamReader.ReadLine, returns null to indicate that the end has been reached. 106 | 107 | `ReadAllLinesAsync` returns an `IAsyncEnumerable>`. Although there is a performance difference, it is minimal, so this API is ideal when you want to use it easily. 108 | 109 | All asynchronous methods accept a `CancellationToken` and support cancellation. 110 | 111 | For a real-world usage example, refer to [StreamMessageReader.cs](https://github.com/Cysharp/Claudia/blob/main/src/Claudia/StreamMessageReader.cs) in [Cysharp/Claudia](https://github.com/Cysharp/Claudia/), a C# SDK for Anthropic Claude, which parses [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events). 112 | 113 | ## Buffer Lifetimes 114 | 115 | The `ReadOnlyMemory` returned from `ReadLineAsync` or `TryReadLine` is only valid until the next call to `LoadIntoBufferAsync` or `TryReadLine` or `ReadLineAsync`. Since the data is shared with the internal buffer, it may be overwritten, moved, or returned on the next call, so the safety of the data cannot be guaranteed. The received data must be promptly parsed and converted into a separate object. If you want to keep the data as is, use `ToArray()` to convert it to a `byte[]`. 116 | 117 | This design is similar to [System.IO.Pipelines](https://learn.microsoft.com/en-us/dotnet/standard/io/pipelines). 118 | 119 | ## Read as `ReadOnlyMemory` 120 | 121 | You can convert it to a `Utf8TextReader` that extracts `ReadOnlyMemory` or `string`. Although there is a conversion cost, it is still fast and low allocation, so it can be used as an alternative to `StreamReader`. 122 | 123 | ![image](https://github.com/Cysharp/Utf8StreamReader/assets/46207/d77af0fd-76af-46ce-8261-0863e4ab7109) 124 | 125 | After converting with `AsTextReader()`, all the same methods (`TryReadLine`, `ReadLineAsync`, `LoadIntoBufferAsync`, `ReadAllLinesAsync`) can be used. 126 | 127 | ```csharp 128 | using var sr = new Cysharp.IO.Utf8StreamReader(ms).AsTextReader(); 129 | while (await sr.LoadIntoBufferAsync()) 130 | { 131 | while (sr.TryReadLine(out var line)) 132 | { 133 | // line is ReadOnlyMemory, you can add to StringBuilder or other parsing method. 134 | 135 | // If you neeed string, ReadOnlyMemory.ToString() build string instance 136 | // string str = line.ToString(); 137 | } 138 | } 139 | ``` 140 | 141 | You can perform text processing without allocation, such as splitting `ReadOnlySpan` using [MemoryExtensions.Split](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.split?view=net-8.0#system-memoryextensions-split(system-readonlyspan((system-char))-system-span((system-range))-system-char-system-stringsplitoptions)), and concatenate the results using StringBuilder's [`Append/AppendLine(ReadOnlySpan)`](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.append). This way, string-based processing can be done with much lower allocation compared to `StreamReader`. 142 | 143 | When a string is needed, you can convert `ReadOnlyMemory` to a string using `ToString()`. Even with the added string conversion, the performance is higher than `StreamReader`, so it can be used as a better alternative. 144 | 145 | ## Optimizing FileStream 146 | 147 | Similar to `StreamReader`, `Utf8StreamReader` has the ability to open a `FileStream` by accepting a `string path`. 148 | 149 | ```csharp 150 | public Utf8StreamReader(string path, FileOpenMode fileOpenMode = FileOpenMode.Throughput) 151 | public Utf8StreamReader(string path, int bufferSize, FileOpenMode fileOpenMode = FileOpenMode.Throughput) 152 | public Utf8StreamReader(string path, FileStreamOptions options) 153 | public Utf8StreamReader(string path, FileStreamOptions options, int bufferSize) 154 | ``` 155 | 156 | Unfortunately, the `FileStream` used by `StreamReader` is not optimized for modern .NET. For example, when using `FileStream` with asynchronous methods, it should be opened with `useAsync: true` for optimal performance. However, since `StreamReader` has both synchronous and asynchronous methods in its API, false is specified. Additionally, although `StreamReader` itself has a buffer and `FileStream` does not require a buffer, the buffer of `FileStream` is still being utilized. 157 | 158 | It is difficult to handle `FileStream` correctly with high performance. By specifying a `string path`, the stream is opened with options optimized for `Utf8StreamReader`, so it is recommended to use this overload rather than opening `FileStream` yourself. The following is a benchmark of `FileStream`. 159 | 160 | ![image](https://github.com/Cysharp/Utf8StreamReader/assets/46207/83936827-2380-414a-9778-f53252689eb7) 161 | 162 | `Utf8StreamReader` opens `FileStream` with the following settings: 163 | 164 | ```csharp 165 | var useAsync = (fileOpenMode == FileOpenMode.Scalability); 166 | new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, useAsync: useAsync) 167 | ``` 168 | 169 | Due to historical reasons, the options for `FileStream` are odd, but by setting `bufferSize` to 1, you can avoid the use of internal buffers. `FileStream` has been significantly revamped in .NET 6, and by controlling the setting of this option and the way `Utf8StreamReader` is called as a whole, it can function as a thin wrapper around the fast [RandomAccess.ReadAsync](https://learn.microsoft.com/en-us/dotnet/api/system.io.randomaccess.readasync), allowing you to avoid most of the overhead of FileStream. 170 | 171 | `FileOpenMode` is a proprietary option of `Utf8StreamReader`. 172 | 173 | 174 | ```csharp 175 | public enum FileOpenMode 176 | { 177 | Scalability, 178 | Throughput 179 | } 180 | ``` 181 | 182 | In a Windows environment, the table in the [IO section of the Performance Improvements in .NET 6 blog](https://devblogs.microsoft.com/dotnet/performance-improvements-in-net-6/#io) shows that throughput decreases when `useAsync: true` is used. 183 | 184 | | Method | Runtime | IsAsync | BufferSize | Mean | 185 | | - | - | - | - | - | 186 | | ReadAsync | .NET 6.0 | True | 1 | 119.573 ms | 187 | | ReadAsync | .NET 6.0 | False | 1 | 36.018 ms | 188 | 189 | By setting `Utf8StreamReader` to `FileOpenMode.Scalability`, true async I/O is enabled and scalability is prioritized. If set to `FileOpenMode.Throughput`, it internally becomes sync-over-async and consumes the ThreadPool, but reduces the overhead of asynchronous I/O and improves throughput. 190 | 191 | If frequently executed within a server application, setting it to `Scalability`, and for batch applications, setting it to `Throughput` will likely yield the best performance characteristics. The default is `Throughput`. (In the current .NET implementation, both seem to be the same (similar to Throughput on Windows) in Linux environments.) 192 | 193 | In `Utf8StreamReader`, by carefully adjusting the buffer size on the `Utf8StreamReader` side, the performance difference is minimized. Please refer to the above benchmark results image for specific values. 194 | 195 | For overloads that accept `FileStreamOptions`, the above settings are not reflected, so please adjust them manually. 196 | 197 | ## ReadString 198 | 199 | By combining the above FileStream optimization with `.AsTextReader().ReadToEndAsync()`, you can achieve higher performance when reading out a `string` compared to `StreamReader.ReadToEnd` or `File.ReadAllText`. 200 | 201 | ![image](https://github.com/Cysharp/Utf8StreamReader/assets/46207/f2dc965a-768a-4069-a3e3-387f5279421a) 202 | 203 | The implementation of `File.ReadAllText` in dotnet/runtime uses `StreamReader.ReadToEnd`, so they are almost the same. However, in the case of `File.ReadAllText`, it uses `useAsync: true` when opening the `FileStream`. That accounts for the performance difference in the benchmark. 204 | 205 | Another significant difference in the implementation is that `Utf8StreamReader` generates a `string` without using `StringBuilder`. `StreamReader.ReadToEnd` generates a string using the following flow: `byte[] buffer` -> `char[] decodeBuffer` -> `StringBuilder.Append(char[])` -> `StringBuilder.ToString()`, but there are removable inefficiencies. Both `char[]` and `StringBuilder` are `char[]` buffers, and copying occurs. By generating a `string` directly from `char[]`, the copy to the internal buffer of `StringBuilder` can be eliminated. 206 | 207 | In `Utf8StreamReader`'s `.AsTextReader().ReadToEndAsync()`, it receives streaming data in read buffer units from `Utf8StreamReader` (`ReadToEndChunksAsync`), converts it to `char[]` chunks using `Decoder`, and generates the string all at once using `string.Create`. 208 | 209 | ```csharp 210 | // Utf8TextReader is a helper class for ReadOnlyMemory and string generation that internally holds Utf8StreamReader 211 | public async ValueTask ReadToEndAsync(CancellationToken cancellationToken = default) 212 | { 213 | // Using a method similar to .NET 9 LINQ to Objects's ToArray improvement, returns a structure optimized for gap-free sequential expansion 214 | // StreamReader.ReadToEnd copies the buffer to a StringBuilder, but this implementation holds char[] chunks(char[][]) without copying. 215 | using var writer = new SegmentedArrayBufferWriter(); 216 | var decoder = Encoding.UTF8.GetDecoder(); 217 | 218 | // Utf8StreamReader.ReadToEndChunksAsync returns the internal buffer ReadOnlyMemory as an asynchronous sequence upon each read completion 219 | await foreach (var chunk in reader.ReadToEndChunksAsync(cancellationToken).ConfigureAwait(reader.ConfigureAwait)) 220 | { 221 | var input = chunk; 222 | while (input.Length != 0) 223 | { 224 | // The Decoder directly writes from the read buffer to the char[] buffer 225 | decoder.Convert(input.Span, writer.GetMemory().Span, flush: false, out var bytesUsed, out var charsUsed, out var completed); 226 | input = input.Slice(bytesUsed); 227 | writer.Advance(charsUsed); 228 | } 229 | } 230 | 231 | decoder.Convert([], writer.GetMemory().Span, flush: true, out _, out var finalCharsUsed, out _); 232 | writer.Advance(finalCharsUsed); 233 | 234 | // Directly generate a string from the char[][] buffer using String.Create 235 | return string.Create(writer.WrittenCount, writer, static (stringSpan, writer) => 236 | { 237 | foreach (var item in writer.GetSegmentsAndDispose()) 238 | { 239 | item.Span.CopyTo(stringSpan); 240 | stringSpan = stringSpan.Slice(item.Length); 241 | } 242 | }); 243 | } 244 | ``` 245 | 246 | SegmentedArrayBufferWriter borrows the idea (which I proposed) from [the performance improvement of ToArray in LINQ in .NET 9](https://github.com/dotnet/runtime/pull/96570), and internally holds an InlineArray that expands by equal multipliers. 247 | 248 | ```csharp 249 | [StructLayout(LayoutKind.Sequential)] 250 | struct InlineArray19 251 | { 252 | public const int InitialSize = 8192; 253 | 254 | T[] array00; // 8192 255 | T[] array01; // 16384 256 | T[] array02; // 32768 257 | T[] array03; // 65536 258 | T[] array04; // 131072 259 | T[] array05; // 262144 260 | T[] array06; // 524288 261 | T[] array07; // 1048576 262 | T[] array08; // 2097152 263 | T[] array09; // 4194304 264 | T[] array10; // 8388608 265 | T[] array11; // 16777216 266 | T[] array12; // 33554432 267 | T[] array13; // 67108864 268 | T[] array14; // 134217728 269 | T[] array15; // 268435456 270 | T[] array16; // 536870912 271 | T[] array17; // 1073741824 272 | T[] array18; // Array.MaxLength - total 273 | 274 | public T[] this[int i] 275 | { 276 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 277 | get 278 | { 279 | if (i < 0 || i > 18) Throw(); 280 | return Unsafe.Add(ref array00, i); 281 | } 282 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 283 | set 284 | { 285 | if (i < 0 || i > 18) Throw(); 286 | Unsafe.Add(ref array00, i) = value; 287 | } 288 | } 289 | void Throw() { throw new ArgumentOutOfRangeException(); } 290 | } 291 | ``` 292 | 293 | With these optimizations for both reading and writing, we achieved several times the speedup compared to the .NET standard library. 294 | 295 | ## Binary Read 296 | 297 | `TryPeek`, `PeekAsync`, `TryRead`, `ReadAsync`, `TryReadBlock`, and `ReadBlockAsync` enable reading as binary, irrespective of newline codes. For example, [Redis's protocol, RESP](https://redis.io/docs/latest/develop/reference/protocol-spec/), is a text protocol and typically newline-delimited, but after `$N`, it requires reading N bytes (BulkString). For instance, `$5\r\nhello\r\n` means reading 5 bytes. 298 | 299 | Here's an example of how it can be parsed: 300 | 301 | ```csharp 302 | // $5\r\nhello\r\n 303 | var line = await reader.ReadLineAsync(); // $5(+ consumed \r\n) 304 | if (line.Value.Span[0] == (byte)'$') 305 | { 306 | Utf8Parser.TryParse(line.Value.Span.Slice(1), out int size, out _); // 5 307 | var block = await reader.ReadBlockAsync(size); // hello 308 | await reader.ReadLineAsync(); // consume \r\n 309 | Console.WriteLine(Encoding.UTF8.GetString(block.Span)); 310 | } 311 | ``` 312 | 313 | A sample that parses all RESP code is available in [RespReader.cs](https://github.com/Cysharp/Utf8StreamReader/blob/e400444/sandbox/ConsoleApp1/RespReader.cs). 314 | 315 | Additionally, when using `LoadIntoBufferAsync` and `LoadIntoBufferAtLeastAsync` to include data in the buffer, using `Try***` allows for more efficient execution. 316 | 317 | ```csharp 318 | while (await reader.LoadIntoBufferAsync()) 319 | { 320 | while (reader.TryReadLine(out var line)) 321 | { 322 | switch (line.Span[0]) 323 | { 324 | case (byte)'$': 325 | Utf8Parser.TryParse(line.Span.Slice(1), out int size, out _); 326 | if (!reader.TryReadBlock(size + 2, out var block)) // +2 is \r\n 327 | { 328 | // ReadBlockAsync is TryReadBlock + LoadIntoBufferAtLeastAsync 329 | block = await reader.ReadBlockAsync(size + 2); 330 | } 331 | yield return block.Slice(0, size); 332 | break; 333 | // and others('+', '-', ':', '*') 334 | default: 335 | break; 336 | } 337 | } 338 | } 339 | ``` 340 | 341 | When using `ReadToEndAsync`, you can obtain a `byte[]` using Utf8StreamReader's efficient binary reading/concatenation (`SegmentedArrayBufferWriter, InlineArray19`). 342 | 343 | ```csharp 344 | using var reader = new Utf8StreamReader(stream); 345 | byte[] bytes = await reader.ReadToEndAsync(); 346 | ``` 347 | 348 | `ReadToEndAsync()` has two optional overloads, `(bool disableBomCheck)` and `(long resultSizeHint)`. 349 | 350 | If `disableBomCheck` is true, it disables the BOM check/trim and always performs a complete binary-matching read. The default for `ReadToEndAsync` is true, which always expects a binary-matching read. If false, it follows Utf8StreamReader.SkipBom. 351 | 352 | `resultSizeHint` allows for reducing the copy cost by directly generating `new byte[resultSizeHint]` when the final binary size is known and reading directly into that buffer. When reading a file, i.e., when the `Stream` is a `FileStream` and seekable, `FileStream.Length` is used as the resultSizeHint as an optimization. 353 | 354 | Here is the peformance comparison between copying a normal `Stream` to a `MemoryStream` by `CopyToAsync` and using `ToArray`, and using `ReadToEndAsync` of `Utf8StreamReader` when converting to `byte[]`. The options are adjusted so that optimization does not occur when directly passing FileStream to Utf8StreamReader, in order to intentionally avoid optimization. 355 | 356 | ![image](https://github.com/Cysharp/Utf8StreamReader/assets/46207/5d8fc9a3-8455-43de-ab8a-80a0963f2638) 357 | 358 | ```csharp 359 | [Benchmark] 360 | public async Task MemoryStreamCopyToToArray() 361 | { 362 | using var fs = new FileStream(filePath, FileMode.Open); 363 | var ms = new MemoryStream(); 364 | await fs.CopyToAsync(ms); 365 | 366 | return ms.ToArray(); 367 | } 368 | 369 | [Benchmark] 370 | public async Task Utf8StreamReaderReadToEndAsync() 371 | { 372 | using var fs = new FileStream(filePath, FileMode.Open); 373 | using var sr = new Cysharp.IO.Utf8StreamReader(fs); 374 | return await sr.ReadToEndAsync(disableBomCheck: false); // hack for disable optimize(for benchmark fairness) 375 | } 376 | ``` 377 | 378 | ## Reset 379 | 380 | `Utf8StreamReader` is a class that supports reuse. By calling `Reset()`, the Stream and internal state are released. Using `Reset(Stream)`, it can be reused with a new `Stream`. 381 | 382 | ## Options 383 | 384 | The constructor accepts `int bufferSize` and `bool leaveOpen` as parameters. 385 | 386 | `int bufferSize` defaults to 65536 and the buffer is rented from `ArrayPool`. If the data per line is large, changing the buffer size may improve performance. When the buffer size and the size per line are close, frequent buffer copy operations occur, leading to performance degradation. 387 | 388 | `bool leaveOpen` determines whether the internal Stream is also disposed when the object is disposed. The default is `false`, which means the Stream is disposed. 389 | 390 | Additionally, there are init properties that allow changing the option values for `ConfigureAwait`, `SyncRead` and `SkipBom`. 391 | 392 | `bool ConfigureAwait { init; }` allows you to specify the value for `ConfigureAwait(bool continueOnCapturedContext)` when awaiting asynchronous methods internally. The default is `false`. 393 | 394 | `bool SyncRead { init; }` configures the Stream to use synchronous reading, meaning it will use Read instead. This causes all Async operations to complete synchronously. There is potential for slight performance improvements when a `FileStream` is opened with `useAsync:false`. Normally, leaving it as false is fine. The default is `false`. 395 | 396 | `bool SkipBom { init; }` determines whether to identify and skip the BOM (Byte Order Mark) included at the beginning of the data during the first read. The default is `true`, which means the BOM is skipped. 397 | 398 | Currently, this is not an option, but `Utf8StreamReader` only determines `CRLF(\r\n)` or `LF(\n)` as newline characters. Since environments that use `CR(\r)` are now extremely rare, the CR check is omitted for performance reasons. If you need this functionality, please let us know by creating an Issue. We will consider adding it as an option 399 | 400 | Unity 401 | --- 402 | Unity, which supports .NET Standard 2.1, can run this library. Since the library is only provided through NuGet, it is recommended to use [NuGetForUnity](https://github.com/GlitchEnzo/NuGetForUnity) for installation. 403 | 404 | For detailed instructions on using NuGet libraries in Unity, please refer to the documentation of [Cysharp/R3](https://github.com/Cysharp/R3/) and other similar resources. 405 | 406 | License 407 | --- 408 | This library is under the MIT License. 409 | -------------------------------------------------------------------------------- /Utf8StreamReader.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.8.34330.188 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{BD07BD08-1CB4-41AE-B2BD-3975BE13B8EC}" 7 | EndProject 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Utf8StreamReader", "src\Utf8StreamReader\Utf8StreamReader.csproj", "{983561F1-F180-4188-AE80-BFA95FD69656}" 9 | EndProject 10 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{5A8808D6-63E0-48EE-A115-0380E0E57156}" 11 | EndProject 12 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Utf8StreamReader.Tests", "tests\Utf8StreamReader.Tests\Utf8StreamReader.Tests.csproj", "{6C953584-A04B-42C7-9CF3-267AFB010C2B}" 13 | EndProject 14 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "sandbox", "sandbox", "{6BA94544-B2DF-4DD2-9390-DAA8AF5CA90A}" 15 | EndProject 16 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ConsoleApp1", "sandbox\ConsoleApp1\ConsoleApp1.csproj", "{27B89B32-EC1A-48B0-BFC9-6172FCCE2961}" 17 | EndProject 18 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Benchmark", "sandbox\Benchmark\Benchmark.csproj", "{48293CC8-A87C-4F59-A398-51CD37E6B62B}" 19 | EndProject 20 | Global 21 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 22 | Debug|Any CPU = Debug|Any CPU 23 | Release|Any CPU = Release|Any CPU 24 | EndGlobalSection 25 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 26 | {983561F1-F180-4188-AE80-BFA95FD69656}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 27 | {983561F1-F180-4188-AE80-BFA95FD69656}.Debug|Any CPU.Build.0 = Debug|Any CPU 28 | {983561F1-F180-4188-AE80-BFA95FD69656}.Release|Any CPU.ActiveCfg = Release|Any CPU 29 | {983561F1-F180-4188-AE80-BFA95FD69656}.Release|Any CPU.Build.0 = Release|Any CPU 30 | {6C953584-A04B-42C7-9CF3-267AFB010C2B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 31 | {6C953584-A04B-42C7-9CF3-267AFB010C2B}.Debug|Any CPU.Build.0 = Debug|Any CPU 32 | {6C953584-A04B-42C7-9CF3-267AFB010C2B}.Release|Any CPU.ActiveCfg = Release|Any CPU 33 | {6C953584-A04B-42C7-9CF3-267AFB010C2B}.Release|Any CPU.Build.0 = Release|Any CPU 34 | {27B89B32-EC1A-48B0-BFC9-6172FCCE2961}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 35 | {27B89B32-EC1A-48B0-BFC9-6172FCCE2961}.Debug|Any CPU.Build.0 = Debug|Any CPU 36 | {27B89B32-EC1A-48B0-BFC9-6172FCCE2961}.Release|Any CPU.ActiveCfg = Release|Any CPU 37 | {27B89B32-EC1A-48B0-BFC9-6172FCCE2961}.Release|Any CPU.Build.0 = Release|Any CPU 38 | {48293CC8-A87C-4F59-A398-51CD37E6B62B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 39 | {48293CC8-A87C-4F59-A398-51CD37E6B62B}.Debug|Any CPU.Build.0 = Debug|Any CPU 40 | {48293CC8-A87C-4F59-A398-51CD37E6B62B}.Release|Any CPU.ActiveCfg = Release|Any CPU 41 | {48293CC8-A87C-4F59-A398-51CD37E6B62B}.Release|Any CPU.Build.0 = Release|Any CPU 42 | EndGlobalSection 43 | GlobalSection(SolutionProperties) = preSolution 44 | HideSolutionNode = FALSE 45 | EndGlobalSection 46 | GlobalSection(NestedProjects) = preSolution 47 | {983561F1-F180-4188-AE80-BFA95FD69656} = {BD07BD08-1CB4-41AE-B2BD-3975BE13B8EC} 48 | {6C953584-A04B-42C7-9CF3-267AFB010C2B} = {5A8808D6-63E0-48EE-A115-0380E0E57156} 49 | {27B89B32-EC1A-48B0-BFC9-6172FCCE2961} = {6BA94544-B2DF-4DD2-9390-DAA8AF5CA90A} 50 | {48293CC8-A87C-4F59-A398-51CD37E6B62B} = {6BA94544-B2DF-4DD2-9390-DAA8AF5CA90A} 51 | EndGlobalSection 52 | GlobalSection(ExtensibilityGlobals) = postSolution 53 | SolutionGuid = {38C0CA37-B15E-4200-9F2C-AD08076E4013} 54 | EndGlobalSection 55 | EndGlobal 56 | -------------------------------------------------------------------------------- /opensource.snk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cysharp/Utf8StreamReader/a92ba5ef05e22234eb9ec7d02ac5b5f885b492bd/opensource.snk -------------------------------------------------------------------------------- /sandbox/Benchmark/Benchmark.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | net8.0 6 | enable 7 | enable 8 | false 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /sandbox/Benchmark/BytesReadToEnd.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Attributes; 2 | using Cysharp.IO; 3 | using System.Text.Encodings.Web; 4 | using System.Text.Json; 5 | using System.Text.Unicode; 6 | 7 | namespace Benchmark; 8 | 9 | [SimpleJob, MemoryDiagnoser] 10 | public class BytesReadToEnd 11 | { 12 | const int C = 1000000; 13 | 14 | string filePath = default!; 15 | 16 | [GlobalSetup] 17 | public void GlobalSetup() 18 | { 19 | var options = new JsonSerializerOptions 20 | { 21 | Encoder = JavaScriptEncoder.Create(UnicodeRanges.All) 22 | }; 23 | 24 | var path = Path.GetTempFileName(); 25 | var newline = OperatingSystem.IsWindows() ? "\r\n"u8 : "\n"u8; 26 | using var file = File.OpenWrite(path); 27 | for (var i = 0; i < C; i++) 28 | { 29 | var json = JsonSerializer.SerializeToUtf8Bytes( 30 | new MyClass { MyProperty = i, MyProperty2 = "あいうえおかきくけこ" }, options); 31 | file.Write(json); 32 | file.Write(newline); 33 | } 34 | 35 | filePath = path; 36 | } 37 | 38 | [GlobalCleanup] 39 | public void GlobalCleanup() 40 | { 41 | File.Delete(filePath); 42 | } 43 | 44 | [Benchmark] 45 | public async Task FileReadAllBytesAsync() 46 | { 47 | // ReadAllBytes knows file-length so fastest. 48 | return await File.ReadAllBytesAsync(filePath); 49 | } 50 | 51 | [Benchmark] 52 | public async Task Utf8StreamReaderReadToEndAsync() 53 | { 54 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath); 55 | return await sr.ReadToEndAsync(); 56 | } 57 | } 58 | 59 | [SimpleJob, MemoryDiagnoser] 60 | public class BytesReadToEnd2 61 | { 62 | const int C = 1000000; 63 | 64 | string filePath = default!; 65 | 66 | [GlobalSetup] 67 | public void GlobalSetup() 68 | { 69 | var options = new JsonSerializerOptions 70 | { 71 | Encoder = JavaScriptEncoder.Create(UnicodeRanges.All) 72 | }; 73 | 74 | var path = Path.GetTempFileName(); 75 | var newline = OperatingSystem.IsWindows() ? "\r\n"u8 : "\n"u8; 76 | using var file = File.OpenWrite(path); 77 | for (var i = 0; i < C; i++) 78 | { 79 | var json = JsonSerializer.SerializeToUtf8Bytes( 80 | new MyClass { MyProperty = i, MyProperty2 = "あいうえおかきくけこ" }, options); 81 | file.Write(json); 82 | file.Write(newline); 83 | } 84 | 85 | filePath = path; 86 | } 87 | 88 | [GlobalCleanup] 89 | public void GlobalCleanup() 90 | { 91 | File.Delete(filePath); 92 | } 93 | 94 | [Benchmark] 95 | public async Task MemoryStreamCopyToToArray() 96 | { 97 | using var fs = new FileStream(filePath, FileMode.Open); 98 | var ms = new MemoryStream(); 99 | await fs.CopyToAsync(ms); 100 | 101 | return ms.ToArray(); 102 | } 103 | 104 | [Benchmark] 105 | public async Task Utf8StreamReaderReadToEndAsync() 106 | { 107 | using var fs = new FileStream(filePath, FileMode.Open); 108 | using var sr = new Cysharp.IO.Utf8StreamReader(fs); 109 | return await sr.ReadToEndAsync(disableBomCheck: false); // hack for ignore optimize(for benchmark fairness) 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /sandbox/Benchmark/FromFile.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Attributes; 2 | using Cysharp.IO; 3 | using System.Text; 4 | using System.Text.Encodings.Web; 5 | using System.Text.Json; 6 | using System.Text.Unicode; 7 | 8 | namespace Benchmark; 9 | 10 | [SimpleJob, MemoryDiagnoser] 11 | public class FromFile 12 | { 13 | const int C = 1000000; 14 | 15 | string filePath = default!; 16 | 17 | [GlobalSetup] 18 | public void GlobalSetup() 19 | { 20 | var options = new JsonSerializerOptions 21 | { 22 | Encoder = JavaScriptEncoder.Create(UnicodeRanges.All) 23 | }; 24 | 25 | var path = Path.GetTempFileName(); 26 | var newline = OperatingSystem.IsWindows() ? "\r\n"u8 : "\n"u8; 27 | using var file = File.OpenWrite(path); 28 | for (var i = 0; i < C; i++) 29 | { 30 | var json = JsonSerializer.SerializeToUtf8Bytes( 31 | new MyClass { MyProperty = i, MyProperty2 = "あいうえおかきくけこ" }, options); 32 | file.Write(json); 33 | file.Write(newline); 34 | } 35 | 36 | filePath = path; 37 | } 38 | 39 | [GlobalCleanup] 40 | public void GlobalCleanup() 41 | { 42 | File.Delete(filePath); 43 | } 44 | 45 | [Benchmark] 46 | public async Task StreamReaderFileStream() 47 | { 48 | using var sr = new System.IO.StreamReader(filePath); 49 | string? line; 50 | while ((line = await sr.ReadLineAsync()) != null) 51 | { 52 | // ... 53 | } 54 | } 55 | 56 | [Benchmark] 57 | public async Task FileReadLinesAsync() 58 | { 59 | await foreach (var line in File.ReadLinesAsync(filePath, Encoding.UTF8)) 60 | { 61 | } 62 | } 63 | 64 | [Benchmark] 65 | public async Task Utf8StreamReaderFileStreamScalability() 66 | { 67 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Scalability); 68 | while (await sr.LoadIntoBufferAsync()) 69 | { 70 | while (sr.TryReadLine(out var line)) 71 | { 72 | // ... 73 | } 74 | } 75 | } 76 | 77 | [Benchmark] 78 | public async Task Utf8StreamReaderFileStreamThroughput() 79 | { 80 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput); 81 | while (await sr.LoadIntoBufferAsync()) 82 | { 83 | while (sr.TryReadLine(out var line)) 84 | { 85 | // ... 86 | } 87 | } 88 | } 89 | 90 | [Benchmark] 91 | public async ValueTask Utf8StreamReaderFileStreamThroughputSyncRead() 92 | { 93 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput) { SyncRead = true }; 94 | while (await sr.LoadIntoBufferAsync()) 95 | { 96 | while (sr.TryReadLine(out var line)) 97 | { 98 | } 99 | } 100 | } 101 | 102 | [Benchmark] 103 | public async Task Utf8TextReaderFileStreamScalability() 104 | { 105 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Scalability).AsTextReader(); 106 | while (await sr.LoadIntoBufferAsync()) 107 | { 108 | while (sr.TryReadLine(out var line)) 109 | { 110 | // ... 111 | } 112 | } 113 | } 114 | 115 | [Benchmark] 116 | public async Task Utf8TextReaderFileStreamThroughput() 117 | { 118 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput).AsTextReader(); 119 | while (await sr.LoadIntoBufferAsync()) 120 | { 121 | while (sr.TryReadLine(out var line)) 122 | { 123 | // ... 124 | } 125 | } 126 | } 127 | 128 | [Benchmark] 129 | public async ValueTask Utf8TextReaderFileStreamThroughputSyncRead() 130 | { 131 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput) { SyncRead = true }.AsTextReader(); 132 | while (await sr.LoadIntoBufferAsync()) 133 | { 134 | while (sr.TryReadLine(out var line)) 135 | { 136 | // ... 137 | } 138 | } 139 | } 140 | 141 | [Benchmark] 142 | public async Task Utf8TextReaderToStringFileStreamScalability() 143 | { 144 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Scalability).AsTextReader(); 145 | while (await sr.LoadIntoBufferAsync()) 146 | { 147 | while (sr.TryReadLine(out var line)) 148 | { 149 | _ = line.ToString(); 150 | } 151 | } 152 | } 153 | 154 | [Benchmark] 155 | public async Task Utf8TextReaderToStringFileStreamThroughput() 156 | { 157 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput).AsTextReader(); 158 | while (await sr.LoadIntoBufferAsync()) 159 | { 160 | while (sr.TryReadLine(out var line)) 161 | { 162 | _ = line.ToString(); 163 | } 164 | } 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /sandbox/Benchmark/FromMemory.cs: -------------------------------------------------------------------------------- 1 | using System.Buffers; 2 | using System.IO.Pipelines; 3 | using System.Text; 4 | using System.Text.Encodings.Web; 5 | using System.Text.Json; 6 | using System.Text.Unicode; 7 | using BenchmarkDotNet.Attributes; 8 | using Cysharp.IO; 9 | 10 | namespace Benchmark; 11 | 12 | [SimpleJob, MemoryDiagnoser] 13 | public class FromMemory 14 | { 15 | const int C = 1000000; 16 | // const int C = 100; 17 | 18 | byte[] utf8Data = default!; 19 | MemoryStream ms = default!; 20 | 21 | [GlobalSetup] 22 | public void GlobalSetup() 23 | { 24 | var options = new JsonSerializerOptions 25 | { 26 | Encoder = JavaScriptEncoder.Create(UnicodeRanges.All) 27 | }; 28 | 29 | var jsonLines = Enumerable.Range(0, C) 30 | .Select(x => new MyClass { MyProperty = x, MyProperty2 = "あいうえおかきくけこ" }) 31 | .Select(x => JsonSerializer.Serialize(x, options)) 32 | .ToArray(); 33 | 34 | utf8Data = Encoding.UTF8.GetBytes(string.Join(Environment.NewLine, jsonLines)); 35 | } 36 | 37 | [IterationSetup] 38 | public void Setup() 39 | { 40 | ms = new MemoryStream(utf8Data); 41 | } 42 | 43 | [Benchmark] 44 | public async Task StreamReader() 45 | { 46 | using var sr = new System.IO.StreamReader(ms); 47 | string? line; 48 | while ((line = await sr.ReadLineAsync()) != null) 49 | { 50 | // Console.WriteLine(line); 51 | } 52 | } 53 | 54 | [Benchmark] 55 | public async Task Utf8StreamReader() 56 | { 57 | using var sr = new Cysharp.IO.Utf8StreamReader(ms); 58 | while (await sr.LoadIntoBufferAsync()) 59 | { 60 | while (sr.TryReadLine(out var line)) 61 | { 62 | // Console.WriteLine(Encoding.UTF8.GetString( line.Span)); 63 | } 64 | } 65 | } 66 | 67 | [Benchmark] 68 | public async Task Utf8TextReader() 69 | { 70 | using var sr = new Cysharp.IO.Utf8StreamReader(ms).AsTextReader(); 71 | while (await sr.LoadIntoBufferAsync()) 72 | { 73 | while (sr.TryReadLine(out var line)) 74 | { 75 | // Console.WriteLine(Encoding.UTF8.GetString( line.Span)); 76 | } 77 | } 78 | } 79 | 80 | [Benchmark] 81 | public async Task Utf8TextReaderToString() 82 | { 83 | using var sr = new Cysharp.IO.Utf8StreamReader(ms).AsTextReader(); 84 | while (await sr.LoadIntoBufferAsync()) 85 | { 86 | while (sr.TryReadLine(out var line)) 87 | { 88 | _ = line.ToString(); 89 | // Console.WriteLine(Encoding.UTF8.GetString( line.Span)); 90 | } 91 | } 92 | } 93 | 94 | //[Benchmark] 95 | //public async Task Utf8StreamReaderReadLine() 96 | //{ 97 | // using var sr = new Cysharp.IO.Utf8StreamReader(ms); 98 | // ReadOnlyMemory? line; 99 | // while ((line = await sr.ReadLineAsync()) != null) 100 | // { 101 | // // Console.WriteLine(Encoding.UTF8.GetString(line.Value.Span)); 102 | // } 103 | //} 104 | 105 | //[Benchmark] 106 | //public async Task Utf8StreamReaderReadAllLines() 107 | //{ 108 | // using var sr = new Cysharp.IO.Utf8StreamReader(ms); 109 | // await foreach (var line in sr.ReadAllLinesAsync()) 110 | // { 111 | // //Console.WriteLine(Encoding.UTF8.GetString(line.Span)); 112 | // } 113 | //} 114 | 115 | [Benchmark] 116 | public async Task PipeReaderSequenceReader() 117 | { 118 | using (ms) 119 | { 120 | var reader = PipeReader.Create(ms); 121 | 122 | READ_AGAIN: 123 | var readResult = await reader.ReadAsync(); 124 | 125 | if (!(readResult.IsCompleted | readResult.IsCanceled)) 126 | { 127 | var buffer = readResult.Buffer; 128 | 129 | while (TryReadData(ref buffer, out var line)) 130 | { 131 | //Console.WriteLine(Encoding.UTF8.GetString(line)); 132 | } 133 | 134 | reader.AdvanceTo(buffer.Start, buffer.End); 135 | goto READ_AGAIN; 136 | } 137 | 138 | } 139 | 140 | static bool TryReadData(ref ReadOnlySequence buffer, out ReadOnlySequence line) 141 | { 142 | var reader = new SequenceReader(buffer); 143 | if (reader.TryReadTo(out line, (byte)'\n', advancePastDelimiter: true)) 144 | { 145 | buffer = buffer.Slice(reader.Consumed); 146 | return true; 147 | } 148 | return false; 149 | } 150 | } 151 | 152 | //[Benchmark] 153 | //public async Task PipelineStreamReader2() 154 | //{ 155 | // using (ms) 156 | // { 157 | // var reader = PipeReader.Create(ms); 158 | 159 | // READ_AGAIN: 160 | // var readResult = await reader.ReadAsync(); 161 | 162 | // if (!(readResult.IsCompleted | readResult.IsCanceled)) 163 | // { 164 | // var buffer = readResult.Buffer; 165 | // ConsumeAllData(ref buffer); 166 | // reader.AdvanceTo(buffer.Start, buffer.End); 167 | // goto READ_AGAIN; 168 | // } 169 | // } 170 | 171 | // static void ConsumeAllData(ref ReadOnlySequence buffer) 172 | // { 173 | // var reader = new SequenceReader(buffer); 174 | // while (reader.TryReadTo(out ReadOnlySequence line, (byte)'\n', advancePastDelimiter: true)) 175 | // { 176 | // //Console.WriteLine(Encoding.UTF8.GetString(line)); 177 | // } 178 | // buffer = buffer.Slice(reader.Consumed); 179 | // } 180 | //} 181 | } 182 | 183 | 184 | public class MyClass 185 | { 186 | public int MyProperty { get; set; } 187 | public string? MyProperty2 { get; set; } 188 | } 189 | -------------------------------------------------------------------------------- /sandbox/Benchmark/Program.cs: -------------------------------------------------------------------------------- 1 | #if DEBUG 2 | 3 | using Benchmark; 4 | using System.Runtime.CompilerServices; 5 | 6 | global::System.Console.WriteLine("DEBUG"); 7 | 8 | //var benchmark = new BytesReadToEnd(); 9 | var benchmark = new ReadToEndString(); 10 | benchmark.GlobalSetup(); 11 | 12 | //var s1 = await benchmark.FileReadAllBytesAsync(); 13 | var s2 = await benchmark.Utf8TextReaderReadToEndAsync(); 14 | 15 | //Console.WriteLine(s1.SequenceEqual(s2)); 16 | 17 | benchmark.GlobalCleanup(); 18 | 19 | #else 20 | using BenchmarkDotNet.Running; 21 | 22 | BenchmarkSwitcher 23 | .FromAssembly(typeof(Program).Assembly) 24 | .Run(args); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /sandbox/Benchmark/ReadToEndString.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Attributes; 2 | using Cysharp.IO; 3 | using System.Text.Encodings.Web; 4 | using System.Text.Json; 5 | using System.Text.Unicode; 6 | 7 | namespace Benchmark; 8 | 9 | [SimpleJob, MemoryDiagnoser] 10 | public class ReadToEndString 11 | { 12 | const int C = 1000000; 13 | 14 | string filePath = default!; 15 | 16 | [GlobalSetup] 17 | public void GlobalSetup() 18 | { 19 | var options = new JsonSerializerOptions 20 | { 21 | Encoder = JavaScriptEncoder.Create(UnicodeRanges.All) 22 | }; 23 | 24 | var path = Path.GetTempFileName(); 25 | var newline = OperatingSystem.IsWindows() ? "\r\n"u8 : "\n"u8; 26 | using var file = File.OpenWrite(path); 27 | for (var i = 0; i < C; i++) 28 | { 29 | var json = JsonSerializer.SerializeToUtf8Bytes( 30 | new MyClass { MyProperty = i, MyProperty2 = "あいうえおかきくけこ" }, options); 31 | file.Write(json); 32 | file.Write(newline); 33 | } 34 | 35 | filePath = path; 36 | } 37 | 38 | [GlobalCleanup] 39 | public void GlobalCleanup() 40 | { 41 | File.Delete(filePath); 42 | } 43 | 44 | [Benchmark] 45 | public async Task StreamReaderReadToEndAsync() 46 | { 47 | using var sr = new System.IO.StreamReader(filePath); 48 | return await sr.ReadToEndAsync(); 49 | } 50 | 51 | [Benchmark] 52 | public async Task Utf8TextReaderReadToEndAsync() 53 | { 54 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath).AsTextReader(); 55 | return await sr.ReadToEndAsync(); 56 | } 57 | 58 | [Benchmark] 59 | public async Task FileReadAllTextAsync() 60 | { 61 | return await File.ReadAllTextAsync(filePath); 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /sandbox/ConsoleApp1/ConsoleApp1.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | net8.0 6 | enable 7 | enable 8 | false 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | Always 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /sandbox/ConsoleApp1/Program.cs: -------------------------------------------------------------------------------- 1 | using Cysharp.IO; 2 | using Microsoft.Win32.SafeHandles; 3 | using System.Buffers; 4 | using System.Buffers.Text; 5 | using System.IO; 6 | using System.IO.Pipelines; 7 | using System.Runtime.InteropServices; 8 | using System.Runtime.InteropServices.Marshalling; 9 | using System.Text; 10 | using System.Text.Encodings.Web; 11 | using System.Text.Json; 12 | using System.Text.Unicode; 13 | 14 | 15 | 16 | 17 | var aa = Encoding.UTF8.GetBytes("$5\r\nhello\r\n"); 18 | var stream = new MemoryStream(aa); 19 | 20 | using var reader = new Utf8StreamReader(stream) { SkipBom = false }; 21 | byte[] bytes = await reader.ReadToEndAsync(); 22 | 23 | 24 | //while (await reader.LoadIntoBufferAsync()) 25 | //{ 26 | // while (reader.TryReadLine(out var line)) 27 | // { 28 | // switch (line.Span[0]) 29 | // { 30 | // case (byte)'$': 31 | // Utf8Parser.TryParse(line.Span.Slice(1), out int size, out _); 32 | // if (!reader.TryReadBlock(size + 2, out var block)) // +2 is \r\n 33 | // { 34 | // // ReadBlockAsync is TryReadBlock + LoadIntoBufferAtLeastAsync 35 | // block = await reader.ReadBlockAsync(size + 2); 36 | // } 37 | // yield return block.Slice(0, size); 38 | // break; 39 | // // and others('+', '-', ':', '*') 40 | // default: 41 | // break; 42 | // } 43 | // } 44 | //} 45 | 46 | 47 | //var path = "file1.txt"; 48 | 49 | 50 | //var fs = new FileStream(path, FileMode.Open,FileAccess.Read, FileShare.Read, 0, false); 51 | //var buf = new byte[1024]; 52 | //await fs.ReadAsync(buf); 53 | 54 | //using var reader = new Utf8StreamReader(path).AsTextReader(); 55 | 56 | 57 | 58 | //var str = await reader.ReadToEndAsync(); 59 | //Console.WriteLine(str.ToString()); 60 | 61 | // new StreamReader().ReadBlock( 62 | 63 | 64 | //var options = new JsonSerializerOptions(); 65 | //options.Encoder = JavaScriptEncoder.Create(UnicodeRanges.All); 66 | 67 | //var jsonLines = Enumerable.Range(0, 100000) 68 | // .Select(x => new MyClass { MyProperty = x, MyProperty2 = "あいうえおかきくけこ" }) 69 | // .Select(x => JsonSerializer.Serialize(x, options)) 70 | // .ToArray(); 71 | 72 | //var utf8Data = Encoding.UTF8.GetBytes(string.Join(Environment.NewLine, jsonLines)); 73 | 74 | //var ms = new MemoryStream(utf8Data); 75 | 76 | 77 | ////using var sr = new System.IO.StreamReader(ms); 78 | ////string? line; 79 | ////while ((line = await sr.ReadLineAsync()) != null) 80 | ////{ 81 | //// // JsonSerializer.Deserialize(line); 82 | ////} 83 | 84 | //using var sr = new Cysharp.IO.Utf8StreamReader(ms); 85 | //ReadOnlyMemory? line; 86 | //while ((line = await sr.ReadLineAsync()) != null) 87 | //{ 88 | //} 89 | 90 | 91 | 92 | //public class MyClass 93 | //{ 94 | // public int MyProperty { get; set; } 95 | // public string? MyProperty2 { get; set; } 96 | //} 97 | 98 | -------------------------------------------------------------------------------- /sandbox/ConsoleApp1/ReadMeSample.cs: -------------------------------------------------------------------------------- 1 | using Cysharp.IO; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Text.Json; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConsoleApp1; 10 | 11 | internal class ReadMeSample 12 | { 13 | 14 | 15 | public async void Sample1(Stream stream) 16 | { 17 | using var reader = new Utf8StreamReader(stream); 18 | 19 | // Most performant style, similar as System.Threading.Channels 20 | while (await reader.LoadIntoBufferAsync()) 21 | { 22 | while (reader.TryReadLine(out var line)) 23 | { 24 | // line is ReadOnlyMemory, deserialize UTF8 directly. 25 | _ = JsonSerializer.Deserialize(line.Span); 26 | } 27 | } 28 | } 29 | 30 | public async void Sample2(Stream stream) 31 | { 32 | using var reader = new Utf8StreamReader(stream); 33 | 34 | // Classical style, same as StreamReader 35 | ReadOnlyMemory? line = null; 36 | while ((line = await reader.ReadLineAsync()) != null) 37 | { 38 | _ = JsonSerializer.Deserialize(line.Value.Span); 39 | } 40 | } 41 | 42 | public async void Sample3(Stream stream) 43 | { 44 | using var reader = new Utf8StreamReader(stream); 45 | 46 | // Most easiest style, use async streams 47 | await foreach (var line in reader.ReadAllLinesAsync()) 48 | { 49 | _ = JsonSerializer.Deserialize(line.Span); 50 | } 51 | } 52 | } 53 | 54 | 55 | public class Foo 56 | { 57 | 58 | } 59 | -------------------------------------------------------------------------------- /sandbox/ConsoleApp1/RespReader.cs: -------------------------------------------------------------------------------- 1 | using Cysharp.IO; 2 | using System.Buffers.Text; 3 | using System.Text; 4 | 5 | namespace ConsoleApp1; 6 | 7 | public enum RespType : byte 8 | { 9 | SimpleStrings = (byte)'+', 10 | Errors = (byte)'-', 11 | Integers = (byte)':', 12 | BulkStrings = (byte)'$', 13 | Arrays = (byte)'*' 14 | } 15 | 16 | public class RespReader : IDisposable 17 | { 18 | Utf8StreamReader reader; 19 | 20 | public RespReader(Stream stream) 21 | { 22 | this.reader = new Utf8StreamReader(stream); 23 | } 24 | 25 | // NOTE: for more fast processing, you need to use TryRead method. 26 | 27 | public async ValueTask ReadRespTypeAsync(CancellationToken cancellationToken = default) 28 | { 29 | return (RespType)await reader.ReadAsync(cancellationToken); 30 | } 31 | 32 | // all read message api expect befor call ReadRespTypeAsync(already trimed type prefix) 33 | 34 | public async ValueTask ReadSimpleStringAsync(CancellationToken cancellationToken = default) // +OK\r\n 35 | { 36 | return Encoding.UTF8.GetString((await reader.ReadLineAsync(cancellationToken)).Value.Span); 37 | } 38 | 39 | public async ValueTask ReadErrorMessageAsync(CancellationToken cancellationToken = default) // -Error message\r\n 40 | { 41 | return Encoding.UTF8.GetString((await reader.ReadLineAsync(cancellationToken)).Value.Span); 42 | } 43 | 44 | public async ValueTask ReadIntegerAsync(CancellationToken cancellationToken = default) // :1000\r\n 45 | { 46 | var line = await reader.ReadLineAsync(cancellationToken); 47 | Utf8Parser.TryParse(line.Value.Span, out long value, out _); 48 | return value; 49 | } 50 | 51 | public async ValueTask?> ReadBulkStringAsync(CancellationToken cancellationToken = default) // "$5\r\nhello\r\n" 52 | { 53 | var line = await reader.ReadLineAsync(cancellationToken); 54 | Utf8Parser.TryParse(line.Value.Span, out int count, out _); 55 | if (count == -1) 56 | { 57 | return null; 58 | } 59 | else 60 | { 61 | var dataWithNewLine = await reader.ReadBlockAsync(count + 2, cancellationToken); 62 | return dataWithNewLine[..^2]; // without newline 63 | } 64 | } 65 | 66 | // for perf improvement, ReadIntegerArray, ReadStringArray, ReadArray for bulkstrings is better approach 67 | public async ValueTask ReadArrayAsync(CancellationToken cancellationToken = default) // "*2\r\n$5\r\nhello\r\n$5\r\nworld\r\n" 68 | { 69 | var line = await reader.ReadLineAsync(); 70 | Utf8Parser.TryParse(line.Value.Span, out int count, out _); 71 | 72 | var result = new object[count]; 73 | for (int i = 0; i < count; i++) 74 | { 75 | var type = await ReadRespTypeAsync(cancellationToken); 76 | switch (type) 77 | { 78 | case RespType.SimpleStrings: 79 | result[i] = await ReadSimpleStringAsync(cancellationToken); 80 | break; 81 | case RespType.Errors: 82 | result[i] = await ReadErrorMessageAsync(cancellationToken); 83 | break; 84 | case RespType.Integers: 85 | result[i] = await ReadIntegerAsync(cancellationToken); 86 | break; 87 | case RespType.BulkStrings: 88 | result[i] = (await ReadBulkStringAsync(cancellationToken)).Value.ToArray(); // materialize immediately 89 | break; 90 | case RespType.Arrays: 91 | result[i] = await ReadArrayAsync(cancellationToken); 92 | break; 93 | default: 94 | break; 95 | } 96 | } 97 | 98 | return result; 99 | } 100 | 101 | public void Dispose() 102 | { 103 | reader.Dispose(); 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /sandbox/ConsoleApp1/file1.txt: -------------------------------------------------------------------------------- 1 | abcde 2 | fgh 3 | ijklmnopqrs 4 | -------------------------------------------------------------------------------- /src/Utf8StreamReader/SegmentedArrayBufferWriter.cs: -------------------------------------------------------------------------------- 1 | using System.Buffers; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | namespace Cysharp.IO; 6 | 7 | // similar as .NET9 SegmentedArrayBuilder but for async operation and direct write 8 | internal sealed class SegmentedArrayBufferWriter : IDisposable 9 | { 10 | // NetStandard2.1 does not have Array.MaxLength so use constant. 11 | const int ArrayMaxLength = 0X7FFFFFC7; 12 | 13 | InlineArray19 segments; 14 | int currentSegmentIndex; 15 | int countInFinishedSegments; 16 | 17 | T[] currentSegment; 18 | int currentWritten; 19 | 20 | bool isDisposed = false; 21 | 22 | public int WrittenCount => countInFinishedSegments + currentWritten; 23 | 24 | public SegmentedArrayBufferWriter() 25 | { 26 | currentSegment = segments[0] = ArrayPool.Shared.Rent(InlineArray19.InitialSize); 27 | } 28 | 29 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 30 | public Memory GetMemory() // no sizeHint 31 | { 32 | return currentSegment.AsMemory(currentWritten); 33 | } 34 | 35 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 36 | public Span GetSpan() 37 | { 38 | return currentSegment.AsSpan(currentWritten); 39 | } 40 | 41 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 42 | public void Advance(int count) 43 | { 44 | checked 45 | { 46 | currentWritten += count; 47 | }; 48 | if (currentWritten == currentSegment.Length) 49 | { 50 | AllocateNextMemory(); 51 | } 52 | } 53 | 54 | void AllocateNextMemory() 55 | { 56 | countInFinishedSegments += currentSegment.Length; 57 | var nextSize = currentSegment.Length * 2L; 58 | if (nextSize + countInFinishedSegments > ArrayMaxLength) 59 | { 60 | nextSize = ArrayMaxLength - countInFinishedSegments; 61 | } 62 | 63 | currentSegmentIndex++; 64 | currentSegment = segments[currentSegmentIndex] = ArrayPool.Shared.Rent((int)nextSize); 65 | currentWritten = 0; 66 | } 67 | 68 | public void Write(ReadOnlySpan source) 69 | { 70 | while (source.Length != 0) 71 | { 72 | var destination = GetSpan(); 73 | var copySize = Math.Min(source.Length, destination.Length); 74 | 75 | source.Slice(0, copySize).CopyTo(destination); 76 | 77 | Advance(copySize); 78 | source = source.Slice(copySize); 79 | } 80 | } 81 | 82 | public T[] ToArrayAndDispose() 83 | { 84 | if (isDisposed) throw new ObjectDisposedException(""); 85 | isDisposed = true; 86 | 87 | var size = checked(countInFinishedSegments + currentWritten); 88 | if (size == 0) 89 | { 90 | ArrayPool.Shared.Return(currentSegment, clearArray: RuntimeHelpers.IsReferenceOrContainsReferences()); 91 | return []; 92 | } 93 | 94 | #if !NETSTANDARD 95 | var result = GC.AllocateUninitializedArray(size); 96 | #else 97 | var result = new T[size]; 98 | #endif 99 | var destination = result.AsSpan(); 100 | 101 | // without current 102 | for (int i = 0; i < currentSegmentIndex; i++) 103 | { 104 | var segment = segments[i]; 105 | segment.AsSpan().CopyTo(destination); 106 | destination = destination.Slice(segment.Length); 107 | ArrayPool.Shared.Return(segment, clearArray: RuntimeHelpers.IsReferenceOrContainsReferences()); 108 | } 109 | 110 | // write current 111 | currentSegment.AsSpan(0, currentWritten).CopyTo(destination); 112 | ArrayPool.Shared.Return(currentSegment, clearArray: RuntimeHelpers.IsReferenceOrContainsReferences()); 113 | 114 | currentSegment = null!; 115 | segments = default!; 116 | return result; 117 | } 118 | 119 | // NOTE: create struct enumerator? 120 | public IEnumerable> GetSegmentsAndDispose() 121 | { 122 | if (isDisposed) throw new ObjectDisposedException(""); 123 | isDisposed = true; 124 | 125 | // without current 126 | for (int i = 0; i < currentSegmentIndex; i++) 127 | { 128 | var segment = segments[i]; 129 | yield return segment; 130 | ArrayPool.Shared.Return(segment, clearArray: RuntimeHelpers.IsReferenceOrContainsReferences()); 131 | } 132 | 133 | // current 134 | if (currentWritten != 0) 135 | { 136 | yield return currentSegment.AsMemory(0, currentWritten); 137 | } 138 | ArrayPool.Shared.Return(currentSegment, clearArray: RuntimeHelpers.IsReferenceOrContainsReferences()); 139 | 140 | currentSegment = null!; 141 | segments = default!; 142 | } 143 | 144 | public void Dispose() 145 | { 146 | if (isDisposed) return; 147 | 148 | isDisposed = true; 149 | for (int i = 0; i <= currentSegmentIndex; i++) 150 | { 151 | ArrayPool.Shared.Return(segments[i], clearArray: RuntimeHelpers.IsReferenceOrContainsReferences()); 152 | } 153 | 154 | currentSegment = null!; 155 | segments = default!; 156 | } 157 | } 158 | 159 | [StructLayout(LayoutKind.Sequential)] 160 | struct InlineArray19 161 | { 162 | public const int InitialSize = 8192; 163 | 164 | T[] array00; // 8192 165 | T[] array01; // 16384 166 | T[] array02; // 32768 167 | T[] array03; // 65536 168 | T[] array04; // 131072 169 | T[] array05; // 262144 170 | T[] array06; // 524288 171 | T[] array07; // 1048576 172 | T[] array08; // 2097152 173 | T[] array09; // 4194304 174 | T[] array10; // 8388608 175 | T[] array11; // 16777216 176 | T[] array12; // 33554432 177 | T[] array13; // 67108864 178 | T[] array14; // 134217728 179 | T[] array15; // 268435456 180 | T[] array16; // 536870912 181 | T[] array17; // 1073741824 182 | T[] array18; // Array.MaxLength - total 183 | 184 | public T[] this[int i] 185 | { 186 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 187 | get 188 | { 189 | if (i < 0 || i > 18) Throw(); 190 | return Unsafe.Add(ref array00, i); 191 | } 192 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 193 | set 194 | { 195 | if (i < 0 || i > 18) Throw(); 196 | Unsafe.Add(ref array00, i) = value; 197 | } 198 | } 199 | 200 | void Throw() 201 | { 202 | throw new ArgumentOutOfRangeException(); 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/Utf8StreamReader/Utf8StreamReader.cs: -------------------------------------------------------------------------------- 1 | using System.Buffers; 2 | using System.Runtime.CompilerServices; 3 | using System.Text; 4 | 5 | namespace Cysharp.IO; 6 | 7 | public enum FileOpenMode 8 | { 9 | Scalability, 10 | Throughput 11 | } 12 | 13 | public sealed class Utf8StreamReader : IAsyncDisposable, IDisposable 14 | { 15 | // NetStandard2.1 does not have Array.MaxLength so use constant. 16 | const int ArrayMaxLength = 0X7FFFFFC7; 17 | 18 | const int DefaultBufferSize = 65536; 19 | const int MinBufferSize = 1024; 20 | 21 | Stream stream; 22 | readonly bool leaveOpen; 23 | readonly int bufferSize; 24 | bool endOfStream; 25 | bool checkPreamble = true; 26 | bool skipBom = true; 27 | bool isDisposed; 28 | 29 | byte[] inputBuffer; 30 | int positionBegin; 31 | int positionEnd; 32 | int lastNewLinePosition = -2; // -2 is not exists new line in buffer, -1 is not yet searched. absolute path from inputBuffer begin 33 | int lastExaminedPosition; 34 | 35 | public bool SkipBom 36 | { 37 | get => skipBom; 38 | init => skipBom = checkPreamble = value; 39 | } 40 | 41 | public bool ConfigureAwait { get; init; } = false; 42 | 43 | public bool SyncRead { get; init; } = false; 44 | 45 | public Utf8StreamReader(Stream stream) 46 | : this(stream, DefaultBufferSize, false) 47 | { 48 | } 49 | 50 | public Utf8StreamReader(Stream stream, int bufferSize) 51 | : this(stream, bufferSize, false) 52 | { 53 | } 54 | 55 | public Utf8StreamReader(Stream stream, bool leaveOpen) 56 | : this(stream, DefaultBufferSize, leaveOpen) 57 | { 58 | } 59 | 60 | public Utf8StreamReader(Stream stream, int bufferSize, bool leaveOpen) 61 | { 62 | this.inputBuffer = ArrayPool.Shared.Rent(Math.Max(bufferSize, MinBufferSize)); 63 | this.stream = stream; 64 | this.bufferSize = bufferSize; 65 | this.leaveOpen = leaveOpen; 66 | } 67 | 68 | public Utf8StreamReader(string path, FileOpenMode fileOpenMode = FileOpenMode.Throughput) 69 | : this(path, DefaultBufferSize, fileOpenMode) 70 | { 71 | } 72 | 73 | public Utf8StreamReader(string path, int bufferSize, FileOpenMode fileOpenMode = FileOpenMode.Throughput) 74 | : this(OpenPath(path, fileOpenMode), bufferSize, leaveOpen: false) 75 | { 76 | } 77 | 78 | static FileStream OpenPath(string path, FileOpenMode fileOpenMode = FileOpenMode.Throughput) 79 | { 80 | #if NETSTANDARD 81 | var useSequentialScan = FileOptions.SequentialScan; 82 | #else 83 | // SequentialScan is a perf hint that requires extra sys-call on non-Windows OSes. 84 | var useSequentialScan = OperatingSystem.IsWindows() ? FileOptions.SequentialScan : FileOptions.None; 85 | #endif 86 | var fileOptions = (fileOpenMode == FileOpenMode.Scalability) 87 | ? (FileOptions.Asynchronous | useSequentialScan) 88 | : useSequentialScan; 89 | return new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, options: fileOptions); 90 | } 91 | 92 | #if !NETSTANDARD 93 | 94 | public Utf8StreamReader(string path, FileStreamOptions options) 95 | : this(path, options, DefaultBufferSize) 96 | { 97 | } 98 | 99 | public Utf8StreamReader(string path, FileStreamOptions options, int bufferSize) 100 | : this(OpenPath(path, options), bufferSize) 101 | { 102 | } 103 | 104 | static FileStream OpenPath(string path, FileStreamOptions options) 105 | { 106 | return new FileStream(path, options); 107 | } 108 | 109 | #endif 110 | 111 | // Peek() and EndOfStream is `Sync` method so does not provided. 112 | 113 | public Stream BaseStream => stream; 114 | 115 | public bool TryReadLine(out ReadOnlyMemory line) 116 | { 117 | ThrowIfDisposed(); 118 | 119 | if (lastNewLinePosition >= 0) 120 | { 121 | line = inputBuffer.AsMemory(positionBegin, lastNewLinePosition - positionBegin); 122 | positionBegin = lastExaminedPosition + 1; 123 | lastNewLinePosition = lastExaminedPosition = -1; 124 | return true; 125 | } 126 | 127 | // AsSpan(positionBegin..positionEnd) is more readable but don't use range notation, it is slower. 128 | var index = IndexOfNewline(inputBuffer.AsSpan(positionBegin, positionEnd - positionBegin), out var newLineIndex); 129 | if (index == -1) 130 | { 131 | if (endOfStream && positionBegin != positionEnd) 132 | { 133 | // return last line 134 | line = inputBuffer.AsMemory(positionBegin, positionEnd - positionBegin); 135 | positionBegin = positionEnd; 136 | return true; 137 | } 138 | 139 | lastNewLinePosition = lastExaminedPosition = -2; // not exists new line in this buffer 140 | line = default; 141 | return false; 142 | } 143 | 144 | line = inputBuffer.AsMemory(positionBegin, index); // positionBegin..(positionBegin+index) 145 | positionBegin = (positionBegin + newLineIndex + 1); 146 | lastNewLinePosition = lastExaminedPosition = -1; 147 | return true; 148 | } 149 | 150 | #if !NETSTANDARD 151 | [AsyncMethodBuilder(typeof(PoolingAsyncValueTaskMethodBuilder<>))] 152 | #endif 153 | public async ValueTask LoadIntoBufferAsync(CancellationToken cancellationToken = default) 154 | { 155 | ThrowIfDisposed(); 156 | cancellationToken.ThrowIfCancellationRequested(); 157 | 158 | // pre-check 159 | 160 | if (endOfStream) 161 | { 162 | if (positionBegin != positionEnd) // not yet fully consumed 163 | { 164 | return true; 165 | } 166 | else 167 | { 168 | return false; 169 | } 170 | } 171 | else 172 | { 173 | if (lastNewLinePosition >= 0) return true; // already filled line into buffer 174 | 175 | // lastNewLineIndex, lastExamined is relative from positionBegin 176 | if (lastNewLinePosition == -1) 177 | { 178 | var index = IndexOfNewline(inputBuffer.AsSpan(positionBegin, positionEnd - positionBegin), out var examinedIndex); 179 | if (index != -1) 180 | { 181 | // convert to absolute 182 | lastNewLinePosition = positionBegin + index; 183 | lastExaminedPosition = positionBegin + examinedIndex; 184 | return true; 185 | } 186 | } 187 | else 188 | { 189 | // change status to not searched 190 | lastNewLinePosition = -1; 191 | } 192 | } 193 | 194 | // requires load into buffer 195 | 196 | if (positionEnd != 0 && positionBegin == positionEnd) 197 | { 198 | // can reset buffer position 199 | positionBegin = positionEnd = 0; 200 | } 201 | 202 | var examined = positionEnd; // avoid to duplicate scan 203 | 204 | LOAD_INTO_BUFFER: 205 | // not reaches full, repeatedly read 206 | if (positionEnd != inputBuffer.Length) 207 | { 208 | var read = SyncRead 209 | ? stream.Read(inputBuffer.AsSpan(positionEnd)) 210 | : await stream.ReadAsync(inputBuffer.AsMemory(positionEnd), cancellationToken).ConfigureAwait(ConfigureAwait); 211 | 212 | positionEnd += read; 213 | if (read == 0) 214 | { 215 | endOfStream = true; 216 | if (positionBegin != positionEnd) // has last line 217 | { 218 | return true; 219 | } 220 | else 221 | { 222 | return false; 223 | } 224 | } 225 | else 226 | { 227 | // first Read, require to check UTF8 BOM 228 | if (checkPreamble) 229 | { 230 | if (positionEnd < 3) goto LOAD_INTO_BUFFER; 231 | if (inputBuffer.AsSpan(0, 3).SequenceEqual(Encoding.UTF8.Preamble)) 232 | { 233 | positionBegin = 3; 234 | } 235 | checkPreamble = false; 236 | } 237 | 238 | // scan examined(already scanned) to End. 239 | // Back one index to check if CRLF fell on buffer boundary 240 | var scanFrom = examined > 0 ? examined - 1 : examined; 241 | var index = IndexOfNewline(inputBuffer.AsSpan(scanFrom, positionEnd - scanFrom), out var examinedIndex); 242 | if (index != -1) 243 | { 244 | lastNewLinePosition = scanFrom + index; 245 | lastExaminedPosition = scanFrom + examinedIndex; 246 | return true; 247 | } 248 | 249 | examined = positionEnd; 250 | goto LOAD_INTO_BUFFER; 251 | } 252 | } 253 | 254 | // slide current buffer 255 | if (positionBegin != 0) 256 | { 257 | inputBuffer.AsSpan(positionBegin, positionEnd - positionBegin).CopyTo(inputBuffer); 258 | positionEnd -= positionBegin; 259 | positionBegin = 0; 260 | examined = positionEnd; 261 | goto LOAD_INTO_BUFFER; 262 | } 263 | 264 | // buffer is completely full, needs resize(positionBegin, positionEnd, examined are same) 265 | { 266 | var newBuffer = ArrayPool.Shared.Rent(GetNewSize(inputBuffer.Length)); 267 | inputBuffer.AsSpan().CopyTo(newBuffer); 268 | ArrayPool.Shared.Return(inputBuffer); 269 | inputBuffer = newBuffer; 270 | goto LOAD_INTO_BUFFER; 271 | } 272 | } 273 | 274 | #if !NETSTANDARD 275 | [AsyncMethodBuilder(typeof(PoolingAsyncValueTaskMethodBuilder))] 276 | #endif 277 | public async ValueTask LoadIntoBufferAtLeastAsync(int minimumBytes, CancellationToken cancellationToken = default) 278 | { 279 | var loaded = positionEnd - positionBegin; 280 | if (minimumBytes < loaded) 281 | { 282 | return; 283 | } 284 | if (endOfStream) 285 | { 286 | throw new EndOfStreamException(); 287 | } 288 | 289 | if (positionEnd != 0 && positionBegin == positionEnd) 290 | { 291 | // can reset buffer position 292 | loaded = positionBegin = positionEnd = 0; 293 | lastNewLinePosition = -1; 294 | } 295 | 296 | var remains = minimumBytes - loaded; 297 | 298 | if (inputBuffer.Length - positionEnd < remains) 299 | { 300 | // needs resize before load loop 301 | var newBuffer = ArrayPool.Shared.Rent(Math.Min(GetNewSize(inputBuffer.Length), positionEnd + remains)); 302 | inputBuffer.AsSpan().CopyTo(newBuffer); 303 | ArrayPool.Shared.Return(inputBuffer); 304 | inputBuffer = newBuffer; 305 | } 306 | 307 | LOAD_INTO_BUFFER: 308 | var read = SyncRead 309 | ? stream.Read(inputBuffer.AsSpan(positionEnd)) 310 | : await stream.ReadAsync(inputBuffer.AsMemory(positionEnd), cancellationToken).ConfigureAwait(ConfigureAwait); 311 | positionEnd += read; 312 | if (read == 0) 313 | { 314 | throw new EndOfStreamException(); 315 | } 316 | else 317 | { 318 | // first Read, require to check UTF8 BOM 319 | if (checkPreamble) 320 | { 321 | if (positionEnd < 3) goto LOAD_INTO_BUFFER; 322 | if (inputBuffer.AsSpan(0, 3).SequenceEqual(Encoding.UTF8.Preamble)) 323 | { 324 | positionBegin = 3; 325 | remains += 3; // read 3 bytes should not contains 326 | } 327 | checkPreamble = false; 328 | } 329 | 330 | remains -= read; 331 | if (remains < 0) 332 | { 333 | return; 334 | } 335 | 336 | goto LOAD_INTO_BUFFER; 337 | } 338 | } 339 | 340 | public async IAsyncEnumerable> ReadToEndChunksAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) 341 | { 342 | if (endOfStream) 343 | { 344 | var result = inputBuffer.AsMemory(positionBegin, positionEnd - positionBegin); 345 | positionBegin = positionEnd; 346 | if (result.Length != 0) 347 | { 348 | yield return result; 349 | } 350 | yield break; 351 | } 352 | 353 | if (positionEnd != 0 && positionBegin != positionEnd) 354 | { 355 | yield return inputBuffer.AsMemory(positionBegin, positionEnd - positionBegin); 356 | } 357 | 358 | positionBegin = positionEnd = 0; 359 | lastNewLinePosition = -2; 360 | 361 | LOAD_INTO_BUFFER: 362 | var read = SyncRead 363 | ? stream.Read(inputBuffer.AsSpan(positionEnd)) 364 | : await stream.ReadAsync(inputBuffer.AsMemory(positionEnd), cancellationToken).ConfigureAwait(ConfigureAwait); 365 | 366 | positionEnd += read; 367 | if (read == 0) 368 | { 369 | endOfStream = true; 370 | var result = inputBuffer.AsMemory(positionBegin, positionEnd - positionBegin); 371 | positionBegin = positionEnd; 372 | if (result.Length != 0) 373 | { 374 | yield return result; 375 | } 376 | yield break; 377 | } 378 | else 379 | { 380 | // first Read, require to check UTF8 BOM 381 | if (checkPreamble) 382 | { 383 | if (positionEnd < 3) goto LOAD_INTO_BUFFER; 384 | if (inputBuffer.AsSpan(0, 3).SequenceEqual(Encoding.UTF8.Preamble)) 385 | { 386 | positionBegin = 3; 387 | } 388 | checkPreamble = false; 389 | if (positionEnd - positionBegin == 0) 390 | { 391 | goto LOAD_INTO_BUFFER; 392 | } 393 | } 394 | 395 | yield return inputBuffer.AsMemory(positionBegin, positionEnd - positionBegin); 396 | positionBegin = positionEnd = 0; 397 | goto LOAD_INTO_BUFFER; 398 | } 399 | } 400 | 401 | public ValueTask ReadToEndAsync(CancellationToken cancellationToken = default) 402 | { 403 | return ReadToEndAsync(true, cancellationToken); 404 | } 405 | 406 | public ValueTask ReadToEndAsync(bool disableBomCheck, CancellationToken cancellationToken = default) 407 | { 408 | if (disableBomCheck && BaseStream is FileStream fs && fs.CanSeek) 409 | { 410 | return ReadToEndAsyncCore(fs.Length, true, cancellationToken); 411 | } 412 | 413 | return ReadToEndAsyncCore(-1, disableBomCheck, cancellationToken); 414 | } 415 | 416 | public ValueTask ReadToEndAsync(long resultSizeHint, CancellationToken cancellationToken = default) 417 | { 418 | return ReadToEndAsyncCore(resultSizeHint, true, cancellationToken); 419 | } 420 | 421 | async ValueTask ReadToEndAsyncCore(long resultSizeHint, bool disableBomCheck = true, CancellationToken cancellationToken = default) 422 | { 423 | if (endOfStream) 424 | { 425 | var slice = inputBuffer.AsMemory(positionBegin, positionEnd - positionBegin); 426 | positionBegin = positionEnd = 0; 427 | lastNewLinePosition = -2; 428 | return (slice.Length != 0) 429 | ? slice.ToArray() 430 | : []; 431 | } 432 | 433 | if (resultSizeHint != -1) 434 | { 435 | if (resultSizeHint == 0) 436 | { 437 | return []; 438 | } 439 | 440 | var result = new byte[resultSizeHint]; 441 | var memory = result.AsMemory(); 442 | var totalRead = 0; 443 | 444 | if (positionEnd != 0 && positionBegin != positionEnd) 445 | { 446 | var slice = inputBuffer.AsMemory(positionBegin, positionEnd - positionBegin); 447 | slice.CopyTo(memory); 448 | memory = memory.Slice(slice.Length); 449 | totalRead = slice.Length; 450 | } 451 | 452 | positionBegin = positionEnd = 0; 453 | lastNewLinePosition = -2; 454 | 455 | while (true) 456 | { 457 | var read = SyncRead 458 | ? stream.Read(memory.Span) 459 | : await stream.ReadAsync(memory, cancellationToken).ConfigureAwait(ConfigureAwait); 460 | totalRead += read; 461 | 462 | if (read == 0) 463 | { 464 | break; 465 | } 466 | else 467 | { 468 | memory = memory.Slice(read); 469 | if (memory.Length == 0) 470 | { 471 | // try to check stream is finished. 472 | var finalRead = SyncRead 473 | ? stream.Read(result.AsSpan(0, 1)) 474 | : await stream.ReadAsync(result.AsMemory(0, 1), cancellationToken).ConfigureAwait(ConfigureAwait); 475 | 476 | if (finalRead == 0) 477 | { 478 | break; 479 | } 480 | else 481 | { 482 | throw new InvalidOperationException("resultSizeHint is smaller than data size."); 483 | } 484 | } 485 | } 486 | } 487 | 488 | if (result.Length == totalRead) 489 | { 490 | return result; 491 | } 492 | else 493 | { 494 | return result.AsSpan(0, totalRead).ToArray(); 495 | } 496 | } 497 | else 498 | { 499 | using var writer = new SegmentedArrayBufferWriter(); 500 | if (positionEnd != 0 && positionBegin != positionEnd) 501 | { 502 | var slice = inputBuffer.AsMemory(positionBegin, positionEnd - positionBegin); 503 | writer.Write(slice.Span); 504 | } 505 | 506 | positionBegin = positionEnd = 0; 507 | lastNewLinePosition = -2; 508 | 509 | if (!disableBomCheck && checkPreamble && writer.WrittenCount == 0) 510 | { 511 | var memory = writer.GetMemory(); 512 | var readCount = 0; 513 | READ_FOR_BOM: 514 | var read = SyncRead 515 | ? stream.Read(memory.Span) 516 | : await stream.ReadAsync(memory, cancellationToken).ConfigureAwait(ConfigureAwait); 517 | readCount += read; 518 | 519 | if (readCount < 3) 520 | { 521 | memory = memory.Slice(read); 522 | goto READ_FOR_BOM; 523 | } 524 | 525 | memory = writer.GetMemory(); 526 | if (memory.Span.Slice(0, 3).SequenceEqual(Encoding.UTF8.Preamble)) 527 | { 528 | // copy 529 | memory.Span.Slice(3).CopyTo(memory.Span); 530 | writer.Advance(readCount - 3); 531 | } 532 | else 533 | { 534 | writer.Advance(readCount); 535 | } 536 | 537 | checkPreamble = false; 538 | } 539 | 540 | while (true) 541 | { 542 | var read = SyncRead 543 | ? stream.Read(writer.GetMemory().Span) 544 | : await stream.ReadAsync(writer.GetMemory(), cancellationToken).ConfigureAwait(ConfigureAwait); 545 | 546 | if (read == 0) 547 | { 548 | break; 549 | } 550 | else 551 | { 552 | writer.Advance(read); 553 | } 554 | } 555 | 556 | endOfStream = true; 557 | return writer.ToArrayAndDispose(); 558 | } 559 | } 560 | 561 | public ValueTask?> ReadLineAsync(CancellationToken cancellationToken = default) 562 | { 563 | if (TryReadLine(out var line)) 564 | { 565 | return new ValueTask?>(line); 566 | } 567 | 568 | return Core(cancellationToken); 569 | 570 | #if !NETSTANDARD 571 | [AsyncMethodBuilder(typeof(PoolingAsyncValueTaskMethodBuilder<>))] 572 | #endif 573 | async ValueTask?> Core(CancellationToken cancellationToken) 574 | { 575 | if (await LoadIntoBufferAsync(cancellationToken).ConfigureAwait(ConfigureAwait)) 576 | { 577 | if (TryReadLine(out var line)) 578 | { 579 | return line; 580 | } 581 | } 582 | return null; 583 | } 584 | } 585 | 586 | public async IAsyncEnumerable> ReadAllLinesAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) 587 | { 588 | while (await LoadIntoBufferAsync(cancellationToken).ConfigureAwait(ConfigureAwait)) 589 | { 590 | while (TryReadLine(out var line)) 591 | { 592 | yield return line; 593 | } 594 | } 595 | } 596 | 597 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 598 | public bool TryPeek(out byte data) 599 | { 600 | ThrowIfDisposed(); 601 | 602 | if (positionEnd - positionBegin > 0) 603 | { 604 | data = inputBuffer[positionBegin]; 605 | return true; 606 | } 607 | 608 | data = default; 609 | return false; 610 | } 611 | 612 | public ValueTask PeekAsync(CancellationToken cancellationToken = default) 613 | { 614 | if (TryPeek(out var data)) 615 | { 616 | return new ValueTask(data); 617 | } 618 | 619 | return Core(cancellationToken); 620 | 621 | #if !NETSTANDARD 622 | [AsyncMethodBuilder(typeof(PoolingAsyncValueTaskMethodBuilder<>))] 623 | #endif 624 | async ValueTask Core(CancellationToken cancellationToken) 625 | { 626 | await LoadIntoBufferAtLeastAsync(1, cancellationToken); 627 | return inputBuffer[positionBegin]; 628 | } 629 | } 630 | 631 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 632 | public bool TryRead(out byte data) 633 | { 634 | ThrowIfDisposed(); 635 | 636 | if (TryPeek(out data)) 637 | { 638 | positionBegin += 1; 639 | lastNewLinePosition = lastExaminedPosition = -1; 640 | return true; 641 | } 642 | 643 | data = default; 644 | return false; 645 | } 646 | 647 | public ValueTask ReadAsync(CancellationToken cancellationToken = default) 648 | { 649 | ThrowIfDisposed(); 650 | 651 | if (TryRead(out var data)) 652 | { 653 | return new ValueTask(data); 654 | } 655 | 656 | return Core(cancellationToken); 657 | 658 | #if !NETSTANDARD 659 | [AsyncMethodBuilder(typeof(PoolingAsyncValueTaskMethodBuilder<>))] 660 | #endif 661 | async ValueTask Core(CancellationToken cancellationToken) 662 | { 663 | await LoadIntoBufferAtLeastAsync(1, cancellationToken); 664 | TryRead(out var data); 665 | return data; 666 | } 667 | } 668 | 669 | public bool TryReadBlock(int count, out ReadOnlyMemory block) 670 | { 671 | ThrowIfDisposed(); 672 | 673 | var loaded = positionEnd - positionBegin; 674 | if (count < loaded) 675 | { 676 | block = inputBuffer.AsMemory(positionBegin, count); 677 | positionBegin += count; 678 | lastNewLinePosition = lastExaminedPosition = -1; 679 | return true; 680 | } 681 | 682 | block = default; 683 | return false; 684 | } 685 | 686 | public ValueTask> ReadBlockAsync(int count, CancellationToken cancellationToken = default) 687 | { 688 | if (TryReadBlock(count, out var block)) 689 | { 690 | return new ValueTask>(block); 691 | } 692 | 693 | return Core(count, cancellationToken); 694 | 695 | #if !NETSTANDARD 696 | [AsyncMethodBuilder(typeof(PoolingAsyncValueTaskMethodBuilder<>))] 697 | #endif 698 | async ValueTask> Core(int count, CancellationToken cancellationToken) 699 | { 700 | await LoadIntoBufferAtLeastAsync(count, cancellationToken); 701 | TryReadBlock(count, out var block); 702 | return block; 703 | } 704 | } 705 | 706 | static int GetNewSize(int capacity) 707 | { 708 | int newCapacity = unchecked(capacity * 2); 709 | if ((uint)newCapacity > ArrayMaxLength) newCapacity = ArrayMaxLength; 710 | return newCapacity; 711 | } 712 | 713 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 714 | static int IndexOfNewline(ReadOnlySpan span, out int examined) 715 | { 716 | // we only supports LF(\n) or CRLF(\r\n). 717 | var indexOfNewLine = span.IndexOf((byte)'\n'); 718 | if (indexOfNewLine == -1) 719 | { 720 | examined = span.Length - 1; 721 | return -1; 722 | } 723 | examined = indexOfNewLine; 724 | 725 | if (indexOfNewLine >= 1 && span[indexOfNewLine - 1] == '\r') 726 | { 727 | indexOfNewLine--; // case of '\r\n' 728 | } 729 | 730 | return indexOfNewLine; 731 | } 732 | 733 | // Reset API like Utf8JsonWriter 734 | 735 | public void Reset() 736 | { 737 | ThrowIfDisposed(); 738 | ClearState(); 739 | } 740 | 741 | public void Reset(Stream stream) 742 | { 743 | ThrowIfDisposed(); 744 | ClearState(); 745 | 746 | this.inputBuffer = ArrayPool.Shared.Rent(Math.Max(bufferSize, MinBufferSize)); 747 | this.stream = stream; 748 | } 749 | 750 | public void Dispose() 751 | { 752 | if (isDisposed) return; 753 | 754 | isDisposed = true; 755 | ClearState(); 756 | } 757 | 758 | public async ValueTask DisposeAsync() 759 | { 760 | if (isDisposed) return; 761 | 762 | isDisposed = true; 763 | if (!leaveOpen && stream != null) 764 | { 765 | await stream.DisposeAsync().ConfigureAwait(ConfigureAwait); 766 | stream = null!; 767 | } 768 | ClearState(); 769 | } 770 | 771 | void ClearState() 772 | { 773 | if (inputBuffer != null) 774 | { 775 | ArrayPool.Shared.Return(inputBuffer); 776 | inputBuffer = null!; 777 | } 778 | 779 | if (!leaveOpen && stream != null) 780 | { 781 | stream.Dispose(); 782 | stream = null!; 783 | } 784 | 785 | positionBegin = positionEnd = 0; 786 | endOfStream = false; 787 | checkPreamble = skipBom; 788 | lastNewLinePosition = lastExaminedPosition = -2; 789 | } 790 | 791 | void ThrowIfDisposed() 792 | { 793 | if (isDisposed) throw new ObjectDisposedException(""); 794 | } 795 | } 796 | -------------------------------------------------------------------------------- /src/Utf8StreamReader/Utf8StreamReader.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | netstandard2.1;net6.0;net8.0 5 | 12 6 | enable 7 | enable 8 | Cysharp.IO 9 | 10 | 11 | true 12 | 1701;1702;1591;1573 13 | 14 | 15 | string 16 | Utf8 based StreamReader for high performance text processing. 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | all 33 | runtime; build; native; contentfiles; analyzers; buildtransitive 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/Utf8StreamReader/Utf8TextReader.cs: -------------------------------------------------------------------------------- 1 | using System.Buffers; 2 | using System.Runtime.CompilerServices; 3 | using System.Text; 4 | 5 | namespace Cysharp.IO; 6 | 7 | public sealed class Utf8TextReader : IDisposable, IAsyncDisposable 8 | { 9 | const int DefaultCharBufferSize = 1024; // buffer per line. 10 | const int MinBufferSize = 128; 11 | 12 | readonly Utf8StreamReader reader; 13 | readonly int bufferSize; 14 | char[] outputBuffer; 15 | bool isDisposed; 16 | 17 | public Utf8TextReader(Utf8StreamReader reader) 18 | : this(reader, DefaultCharBufferSize) 19 | { 20 | } 21 | 22 | public Utf8TextReader(Utf8StreamReader reader, int bufferSize) 23 | { 24 | this.reader = reader; 25 | this.outputBuffer = ArrayPool.Shared.Rent(Math.Max(bufferSize, MinBufferSize)); 26 | this.bufferSize = bufferSize; 27 | } 28 | 29 | public Stream BaseStream => reader.BaseStream; 30 | public Utf8StreamReader BaseReader => reader; 31 | 32 | public ValueTask LoadIntoBufferAsync(CancellationToken cancellationToken = default) 33 | { 34 | return reader.LoadIntoBufferAsync(cancellationToken); 35 | } 36 | 37 | public bool TryReadLine(out ReadOnlyMemory line) 38 | { 39 | if (!reader.TryReadLine(out var utf8Line)) 40 | { 41 | line = default; 42 | return false; 43 | } 44 | 45 | var maxCharCount = Encoding.UTF8.GetMaxCharCount(utf8Line.Length); 46 | if (outputBuffer.Length < maxCharCount) 47 | { 48 | // need new buffer 49 | ArrayPool.Shared.Return(outputBuffer); 50 | outputBuffer = ArrayPool.Shared.Rent(maxCharCount); 51 | } 52 | 53 | var size = Encoding.UTF8.GetChars(utf8Line.Span, outputBuffer); 54 | line = outputBuffer.AsMemory(0, size); 55 | return true; 56 | } 57 | 58 | public ValueTask?> ReadLineAsync(CancellationToken cancellationToken = default) 59 | { 60 | if (TryReadLine(out var line)) 61 | { 62 | return new ValueTask?>(line); 63 | } 64 | 65 | return Core(cancellationToken); 66 | 67 | #if !NETSTANDARD 68 | [AsyncMethodBuilder(typeof(PoolingAsyncValueTaskMethodBuilder<>))] 69 | #endif 70 | async ValueTask?> Core(CancellationToken cancellationToken) 71 | { 72 | if (await LoadIntoBufferAsync(cancellationToken).ConfigureAwait(reader.ConfigureAwait)) 73 | { 74 | if (TryReadLine(out var line)) 75 | { 76 | return line; 77 | } 78 | } 79 | return null; 80 | } 81 | } 82 | 83 | public async IAsyncEnumerable> ReadAllLinesAsync([EnumeratorCancellation] CancellationToken cancellationToken = default) 84 | { 85 | while (await LoadIntoBufferAsync(cancellationToken).ConfigureAwait(reader.ConfigureAwait)) 86 | { 87 | while (TryReadLine(out var line)) 88 | { 89 | yield return line; 90 | } 91 | } 92 | } 93 | 94 | // Utf8TextReader is a helper class for ReadOnlyMemory and string generation that internally holds Utf8StreamReader 95 | public async ValueTask ReadToEndAsync(CancellationToken cancellationToken = default) 96 | { 97 | // Using a method similar to .NET 9 LINQ to Objects's ToArray improvement, returns a structure optimized for gap-free sequential expansion 98 | // StreamReader.ReadToEnd copies the buffer to a StringBuilder, but this implementation holds char[] chunks(char[][]) without copying. 99 | using var writer = new SegmentedArrayBufferWriter(); 100 | var decoder = Encoding.UTF8.GetDecoder(); 101 | 102 | // Utf8StreamReader.ReadToEndChunksAsync returns the internal buffer ReadOnlyMemory as an asynchronous sequence upon each read completion 103 | await foreach (var chunk in reader.ReadToEndChunksAsync(cancellationToken).ConfigureAwait(reader.ConfigureAwait)) 104 | { 105 | var input = chunk; 106 | while (input.Length != 0) 107 | { 108 | // The Decoder directly writes from the read buffer to the char[] buffer 109 | decoder.Convert(input.Span, writer.GetMemory().Span, flush: false, out var bytesUsed, out var charsUsed, out var completed); 110 | input = input.Slice(bytesUsed); 111 | writer.Advance(charsUsed); 112 | } 113 | } 114 | 115 | decoder.Convert([], writer.GetMemory().Span, flush: true, out _, out var finalCharsUsed, out _); 116 | writer.Advance(finalCharsUsed); 117 | 118 | // Directly generate a string from the char[][] buffer using String.Create 119 | return string.Create(writer.WrittenCount, writer, static (stringSpan, writer) => 120 | { 121 | foreach (var item in writer.GetSegmentsAndDispose()) 122 | { 123 | item.Span.CopyTo(stringSpan); 124 | stringSpan = stringSpan.Slice(item.Length); 125 | } 126 | }); 127 | } 128 | 129 | public void Reset() 130 | { 131 | ThrowIfDisposed(); 132 | ClearState(); 133 | reader.Reset(); 134 | } 135 | 136 | public void Reset(Stream stream) 137 | { 138 | ThrowIfDisposed(); 139 | ClearState(); 140 | 141 | outputBuffer = ArrayPool.Shared.Rent(Math.Max(bufferSize, MinBufferSize)); 142 | reader.Reset(stream); 143 | } 144 | 145 | public void Dispose() 146 | { 147 | if (isDisposed) return; 148 | 149 | isDisposed = true; 150 | ClearState(); 151 | reader.Dispose(); 152 | } 153 | 154 | public ValueTask DisposeAsync() 155 | { 156 | if (isDisposed) return default; 157 | 158 | isDisposed = true; 159 | ClearState(); 160 | return reader.DisposeAsync(); 161 | } 162 | 163 | void ClearState() 164 | { 165 | if (outputBuffer != null) 166 | { 167 | ArrayPool.Shared.Return(outputBuffer); 168 | outputBuffer = null!; 169 | } 170 | } 171 | 172 | void ThrowIfDisposed() 173 | { 174 | if (isDisposed) throw new ObjectDisposedException(""); 175 | } 176 | } 177 | 178 | public static class Utf8StreamReaderExtensions 179 | { 180 | public static Utf8TextReader AsTextReader(this Utf8StreamReader reader) => new Utf8TextReader(reader); 181 | public static Utf8TextReader AsTextReader(this Utf8StreamReader reader, int bufferSize) => new Utf8TextReader(reader, bufferSize); 182 | } 183 | -------------------------------------------------------------------------------- /tests/Utf8StreamReader.Tests/FakeMemoryStream.cs: -------------------------------------------------------------------------------- 1 | #pragma warning disable CS1998 2 | 3 | using System.Runtime.CompilerServices; 4 | using System.Runtime.InteropServices; 5 | 6 | namespace Utf8StreamReaderTests; 7 | 8 | internal class FakeMemoryStream : Stream 9 | { 10 | #region NotImplemented 11 | 12 | public override bool CanRead => true; 13 | 14 | public override bool CanSeek => throw new NotImplementedException(); 15 | 16 | public override bool CanWrite => throw new NotImplementedException(); 17 | 18 | public override long Length => throw new NotImplementedException(); 19 | 20 | public override long Position { get => throw new NotImplementedException(); set => throw new NotImplementedException(); } 21 | 22 | public override void Flush() 23 | { 24 | throw new NotImplementedException(); 25 | } 26 | 27 | public override int Read(byte[] buffer, int offset, int count) 28 | { 29 | throw new NotImplementedException(); 30 | } 31 | 32 | public override long Seek(long offset, SeekOrigin origin) 33 | { 34 | throw new NotImplementedException(); 35 | } 36 | 37 | public override void SetLength(long value) 38 | { 39 | throw new NotImplementedException(); 40 | } 41 | 42 | public override void Write(byte[] buffer, int offset, int count) 43 | { 44 | throw new NotImplementedException(); 45 | } 46 | 47 | #endregion 48 | 49 | public bool IsDisposed { get; set; } 50 | 51 | protected override void Dispose(bool disposing) 52 | { 53 | IsDisposed = true; 54 | } 55 | 56 | Memory[] lastAddedData = default!; 57 | Queue>> data = new(); 58 | 59 | public void AddMemory(params Memory[] memories) 60 | { 61 | foreach (Memory mem in memories) 62 | { 63 | if (mem.Length == 0) throw new ArgumentException("Length 0 is not allowed."); 64 | data.Enqueue(new(mem)); 65 | } 66 | this.lastAddedData = memories; 67 | } 68 | 69 | public void Restart() 70 | { 71 | data.Clear(); 72 | AddMemory(lastAddedData); 73 | } 74 | 75 | public override async ValueTask ReadAsync(Memory buffer, CancellationToken cancellationToken = default) 76 | { 77 | if (data.Count == 0) 78 | { 79 | return 0; 80 | } 81 | 82 | var memory = data.Peek().Value; 83 | 84 | var copySize = Math.Min(memory.Length, buffer.Length); 85 | memory.Slice(0, copySize).CopyTo(buffer); 86 | var newMemory = memory.Slice(copySize); 87 | if (newMemory.Length == 0) 88 | { 89 | data.Dequeue(); 90 | } 91 | else 92 | { 93 | data.Peek().Value = newMemory; 94 | } 95 | 96 | return copySize; 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /tests/Utf8StreamReader.Tests/FileReadTest.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Reflection; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace Utf8StreamReaderTests; 9 | 10 | public class FileReadTest(ITestOutputHelper Console) 11 | { 12 | [Fact] 13 | public async Task ReadPath() 14 | { 15 | var path1 = Path.Combine(Path.GetDirectoryName(typeof(FileReadTest).Assembly.FullName!)!, "file1.txt"); 16 | var actual = await Utf8StreamReaderResultAsync(path1); 17 | 18 | actual.Should().Equal([ 19 | "abcde", 20 | "fgh", 21 | "ijklmnopqrs" 22 | ]); 23 | } 24 | 25 | static async Task Utf8StreamReaderResultAsync(string path) 26 | { 27 | using var reader = new Utf8StreamReader(path); 28 | var l = new List(); 29 | await foreach (var item in reader.ReadAllLinesAsync()) 30 | { 31 | l.Add(Encoding.UTF8.GetString(item.Span)); 32 | } 33 | return l.ToArray(); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /tests/Utf8StreamReader.Tests/ReadBlockTest.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace Utf8StreamReaderTests; 8 | 9 | public class ReadBlockTest 10 | { 11 | [Fact] 12 | public async Task LineAndBlock() 13 | { 14 | var ms = new FakeMemoryStream(); 15 | 16 | ms.AddMemory( 17 | GetBytes("a"), 18 | GetBytes("bc\n"), 19 | GetBytes("def\r\n"), 20 | GetBytes("ghij\n"), 21 | GetBytes("zklmno\r\n\n")); 22 | 23 | //var sr = new StreamReader(ms); 24 | //var a = await sr.ReadLineAsync(); 25 | //var buf = new char[1024]; 26 | //await sr.ReadBlockAsync(buf.AsMemory(0, 10)); 27 | 28 | 29 | var reader = new Utf8StreamReader(ms); 30 | ToString((await reader.ReadLineAsync()).Value).Should().Be("abc"); 31 | 32 | ToString((await reader.ReadBlockAsync(2))).Should().Be("de"); 33 | 34 | ToString((await reader.ReadLineAsync()).Value).Should().Be("f"); 35 | 36 | ToString((await reader.ReadBlockAsync(8))).Should().Be("ghij\nzkl"); 37 | 38 | ToString((await reader.ReadLineAsync()).Value).Should().Be("mno"); 39 | 40 | 41 | } 42 | 43 | static byte[] GetBytes(string x) 44 | { 45 | return Encoding.UTF8.GetBytes(x); 46 | } 47 | 48 | static string ToString(ReadOnlyMemory buffer) 49 | { 50 | return Encoding.UTF8.GetString(buffer.Span); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /tests/Utf8StreamReader.Tests/ReadTest.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace Utf8StreamReaderTests; 8 | 9 | public class ReadTest 10 | { 11 | [Fact] 12 | public async Task ReadToEndAsync() 13 | { 14 | // with bom 15 | { 16 | var bom = Encoding.UTF8.GetPreamble(); 17 | 18 | var ms = new FakeMemoryStream(); 19 | 20 | ms.AddMemory( 21 | new byte[] { bom[0] }, 22 | new byte[] { bom[1] }, 23 | new byte[] { bom[2], (byte)'Z' }, 24 | GetBytes("a"), 25 | GetBytes("bc\n"), 26 | GetBytes("def\r\n"), 27 | GetBytes("ghij\n"), 28 | GetBytes("zklmno\r\n\n")); 29 | 30 | var sr = new Utf8StreamReader(ms); 31 | var result = await sr.ReadToEndAsync(disableBomCheck: false); 32 | 33 | var expected = "Zabc\ndef\r\nghij\nzklmno\r\n\n"; 34 | var actual = ToString(result); 35 | 36 | actual.Should().Be(expected); 37 | } 38 | // no bom 39 | { 40 | var ms = new FakeMemoryStream(); 41 | 42 | ms.AddMemory( 43 | new byte[] { (byte)'Z' }, 44 | GetBytes("a"), 45 | GetBytes("bc\n"), 46 | GetBytes("def\r\n"), 47 | GetBytes("ghij\n"), 48 | GetBytes("zklmno\r\n\n")); 49 | 50 | var sr = new Utf8StreamReader(ms); 51 | var result = await sr.ReadToEndAsync(); 52 | 53 | var expected = "Zabc\ndef\r\nghij\nzklmno\r\n\n"; 54 | var actual = ToString(result); 55 | 56 | actual.Should().Be(expected); 57 | } 58 | } 59 | 60 | [Fact] 61 | public async Task ReadToEndChunks() 62 | { 63 | var bom = Encoding.UTF8.GetPreamble(); 64 | 65 | var ms = new FakeMemoryStream(); 66 | 67 | ms.AddMemory( 68 | new byte[] { bom[0] }, 69 | new byte[] { bom[1] }, 70 | new byte[] { bom[2], (byte)'Z' }, 71 | GetBytes("a"), 72 | GetBytes("bc\n"), 73 | GetBytes("def\r\n"), 74 | GetBytes("ghij\n"), 75 | GetBytes("zklmno\r\n\n")); 76 | 77 | var sr = new Utf8StreamReader(ms); 78 | 79 | var list = new List(); 80 | await foreach (var item in sr.ReadToEndChunksAsync()) 81 | { 82 | list.Add(item.ToArray()); 83 | } 84 | 85 | ToString(list[0]).Should().Be("Z"); 86 | ToString(list[1]).Should().Be("a"); 87 | ToString(list[2]).Should().Be("bc\n"); 88 | ToString(list[3]).Should().Be("def\r\n"); 89 | ToString(list[4]).Should().Be("ghij\n"); 90 | ToString(list[5]).Should().Be("zklmno\r\n\n"); 91 | } 92 | 93 | [Fact] 94 | public async Task TestPeek() 95 | { 96 | var ms = new FakeMemoryStream(); 97 | 98 | ms.AddMemory( 99 | GetBytes("a"), 100 | GetBytes("bc\n"), 101 | GetBytes("def\r\n"), 102 | GetBytes("ghij\n"), 103 | GetBytes("zklmno\r\n\n")); 104 | 105 | var sr = new Utf8StreamReader(ms); 106 | 107 | sr.TryPeek(out var data).Should().BeFalse(); 108 | (await sr.PeekAsync()).Should().Be((byte)'a'); 109 | sr.TryPeek(out data).Should().BeTrue(); 110 | data.Should().Be((byte)'a'); 111 | 112 | ToString(await sr.ReadLineAsync()).Should().Be("abc"); 113 | 114 | (await sr.PeekAsync()).Should().Be((byte)'d'); 115 | 116 | ToString(await sr.ReadLineAsync()).Should().Be("def"); 117 | } 118 | 119 | // LoadIntoBufferAtLeastAsync 120 | // TryRead 121 | // ReadAsync 122 | 123 | [Fact] 124 | public async Task TestRead() 125 | { 126 | var ms = new FakeMemoryStream(); 127 | 128 | ms.AddMemory( 129 | GetBytes("a"), 130 | GetBytes("bc\n"), 131 | GetBytes("def\r\n"), 132 | GetBytes("ghij\n"), 133 | GetBytes("zklmno\r\n\n")); 134 | 135 | var sr = new Utf8StreamReader(ms); 136 | 137 | await sr.LoadIntoBufferAtLeastAsync(2); 138 | 139 | sr.TryRead(out var a).Should().BeTrue(); 140 | a.Should().Be((byte)'a'); 141 | 142 | sr.TryRead(out var b).Should().BeTrue(); 143 | b.Should().Be((byte)'b'); 144 | 145 | sr.TryRead(out var c).Should().BeTrue(); 146 | c.Should().Be((byte)'c'); 147 | 148 | sr.TryRead(out var n).Should().BeTrue(); 149 | n.Should().Be((byte)'\n'); 150 | 151 | sr.TryRead(out _).Should().BeFalse(); 152 | 153 | (await sr.ReadAsync()).Should().Be((byte)'d'); 154 | } 155 | 156 | static byte[] GetBytes(string x) 157 | { 158 | return Encoding.UTF8.GetBytes(x); 159 | } 160 | 161 | static string ToString(ReadOnlyMemory? buffer) 162 | { 163 | if (buffer == null) return null!; 164 | return Encoding.UTF8.GetString(buffer.Value.Span); 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /tests/Utf8StreamReader.Tests/ReadToEndTest.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace Utf8StreamReaderTests; 4 | 5 | public class ReadToEndTest 6 | { 7 | 8 | [Fact] 9 | public async Task AfterRead() 10 | { 11 | var ms = new FakeMemoryStream(); 12 | 13 | ms.AddMemory( 14 | GetBytes("a"), 15 | GetBytes("bc\n"), 16 | GetBytes("def\r\n"), 17 | GetBytes("ghij\n"), 18 | GetBytes("zklmno\r\n\n")); 19 | 20 | var all = await new Utf8StreamReader(ms).ReadToEndAsync(); 21 | 22 | ms.Restart(); 23 | 24 | var reader = new Utf8StreamReader(ms); 25 | await reader.ReadLineAsync(); 26 | 27 | var expected = "def\r\nghij\nzklmno\r\n\n"; 28 | 29 | var actual = await reader.ReadToEndAsync(resultSizeHint: all.Length); 30 | 31 | Encoding.UTF8.GetString(actual).Should().Be(expected); 32 | } 33 | 34 | [Fact] 35 | public async Task SmallHint() 36 | { 37 | var ms = new FakeMemoryStream(); 38 | 39 | ms.AddMemory( 40 | GetBytes("a"), 41 | GetBytes("bc\n"), 42 | GetBytes("def\r\n"), 43 | GetBytes("ghij\n"), 44 | GetBytes("zklmno\r\n\n")); 45 | 46 | var reader = new Utf8StreamReader(ms); 47 | 48 | var expected = "abc\ndef\r\nghij\nzklmno\r\n\n"; 49 | 50 | await Assert.ThrowsAsync(async () => 51 | { 52 | var actual = await reader.ReadToEndAsync(resultSizeHint: Encoding.UTF8.GetByteCount(expected) - 2); 53 | }); 54 | } 55 | 56 | [Fact] 57 | public async Task Just() 58 | { 59 | var ms = new FakeMemoryStream(); 60 | 61 | ms.AddMemory( 62 | GetBytes("a"), 63 | GetBytes("bc\n"), 64 | GetBytes("def\r\n"), 65 | GetBytes("ghij\n"), 66 | GetBytes("zklmno\r\n\n")); 67 | 68 | var reader = new Utf8StreamReader(ms); 69 | 70 | var expected = "abc\ndef\r\nghij\nzklmno\r\n\n"; 71 | 72 | var actual = await reader.ReadToEndAsync(resultSizeHint: expected.Length); 73 | Encoding.UTF8.GetString(actual).Should().Be(expected); 74 | } 75 | 76 | static byte[] GetBytes(string x) 77 | { 78 | return Encoding.UTF8.GetBytes(x); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /tests/Utf8StreamReader.Tests/SegmentedArrayBufferWriterTest.cs: -------------------------------------------------------------------------------- 1 | namespace Utf8StreamReaderTests; 2 | 3 | public class SegmentedArrayBufferWriterTest 4 | { 5 | [Fact(Skip = "Reduce memory usage in CI")] 6 | public void AllocateFull() 7 | { 8 | var writer = new SegmentedArrayBufferWriter(); 9 | 10 | var memCount = 8192; 11 | long total = 0; 12 | for (int i = 0; i < 18; i++) 13 | { 14 | var mem = writer.GetMemory(); 15 | mem.Length.Should().Be(memCount); 16 | total += mem.Length; 17 | memCount *= 2; 18 | writer.Advance(mem.Length); 19 | } 20 | 21 | Memory lastMemory = writer.GetMemory(); 22 | (total).Should().BeLessThan(Array.MaxLength); 23 | (total + lastMemory.Length).Should().BeGreaterThan(Array.MaxLength); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /tests/Utf8StreamReader.Tests/Tests.cs: -------------------------------------------------------------------------------- 1 | using System.Buffers; 2 | using System.Text; 3 | 4 | namespace Utf8StreamReaderTests; 5 | 6 | public class Tests(ITestOutputHelper Console) 7 | { 8 | [Fact] 9 | public async Task Standard() 10 | { 11 | var originalStrings = """ 12 | foo 13 | bare 14 | 15 | baz boz too 16 | """; 17 | 18 | var stream = CreateStringStream(originalStrings); 19 | 20 | 21 | using var reader = new Utf8StreamReader(stream); 22 | 23 | var sb = new StringBuilder(); 24 | 25 | bool isFirst = true; 26 | ReadOnlyMemory? line; 27 | while ((line = await reader.ReadLineAsync()) != null) 28 | { 29 | if (isFirst) isFirst = false; 30 | else sb.AppendLine(); 31 | 32 | Console.WriteLine(Encoding.UTF8.GetString(line.Value.Span)); 33 | sb.Append(Encoding.UTF8.GetString(line.Value.Span)); 34 | } 35 | 36 | sb.ToString().Should().Be(originalStrings.ToString()); 37 | } 38 | 39 | [Fact] 40 | public async Task BOM() 41 | { 42 | var bytes = Encoding.UTF8.GetPreamble().Concat(""" 43 | foo 44 | bare 45 | 46 | baz boz too 47 | """u8.ToArray()).ToArray(); 48 | 49 | var bomStrings = Encoding.UTF8.GetString(bytes); 50 | 51 | var stream = CreateStringStream(bomStrings); 52 | 53 | var originalStrings = """ 54 | foo 55 | bare 56 | 57 | baz boz too 58 | """; 59 | 60 | using var reader = new Utf8StreamReader(stream); 61 | 62 | var sb = new StringBuilder(); 63 | 64 | bool isFirst = true; 65 | ReadOnlyMemory? line; 66 | while ((line = await reader.ReadLineAsync()) != null) 67 | { 68 | if (isFirst) isFirst = false; 69 | else sb.AppendLine(); 70 | 71 | Console.WriteLine(Encoding.UTF8.GetString(line.Value.Span)); 72 | sb.Append(Encoding.UTF8.GetString(line.Value.Span)); 73 | } 74 | 75 | sb.ToString().Should().Be(originalStrings.ToString()); 76 | } 77 | 78 | [Fact] 79 | public async Task NewLineCheck() 80 | { 81 | { 82 | var ms = new FakeMemoryStream(); 83 | 84 | ms.AddMemory( 85 | GetBytes("a"), 86 | GetBytes("bc\n"), 87 | GetBytes("def\r\n"), 88 | GetBytes("ghij\n"), 89 | GetBytes("jklmno")); 90 | 91 | var expected = await StreamReaderResultAsync(ms); 92 | 93 | ms.Restart(); 94 | 95 | var actual = await Utf8StreamReaderResultAsync(ms); 96 | 97 | actual.Should().Equal(expected); 98 | } 99 | { 100 | var ms = new FakeMemoryStream(); 101 | 102 | ms.AddMemory( 103 | GetBytes("a"), 104 | GetBytes("bc\n"), 105 | GetBytes("def\r\n"), 106 | GetBytes("ghij\n"), 107 | GetBytes("jklmno\r\n")); // + last new line 108 | 109 | var expected = await StreamReaderResultAsync(ms); 110 | 111 | ms.Restart(); 112 | 113 | var actual = await Utf8StreamReaderResultAsync(ms); 114 | 115 | actual.Should().Equal(expected); 116 | } 117 | { 118 | var ms = new FakeMemoryStream(); 119 | 120 | ms.AddMemory( 121 | GetBytes("a"), 122 | GetBytes("bc\n"), 123 | GetBytes("def\r\n"), 124 | GetBytes("ghij\n"), 125 | GetBytes("jklmno\r\n\n")); // + last new line x2 126 | 127 | var expected = await StreamReaderResultAsync(ms); 128 | 129 | ms.Restart(); 130 | 131 | var actual = await Utf8StreamReaderResultAsync(ms); 132 | 133 | actual.Should().Equal(expected); 134 | } 135 | } 136 | 137 | [Fact] 138 | public async Task BOM2() 139 | { 140 | { 141 | var ms = new FakeMemoryStream(); 142 | 143 | // small bom 144 | ms.AddMemory( 145 | Encoding.UTF8.GetPreamble(), 146 | GetBytes("a")); 147 | 148 | var expected = await StreamReaderResultAsync(ms); 149 | 150 | ms.Restart(); 151 | 152 | var actual = await Utf8StreamReaderResultAsync(ms); 153 | 154 | actual.Should().Equal(expected); 155 | } 156 | { 157 | var ms = new FakeMemoryStream(); 158 | 159 | // long bom 160 | ms.AddMemory( 161 | Encoding.UTF8.GetPreamble(), 162 | GetBytes("abcdefghijklmnopqrastu")); 163 | 164 | var expected = await StreamReaderResultAsync(ms); 165 | 166 | ms.Restart(); 167 | 168 | var actual = await Utf8StreamReaderResultAsync(ms); 169 | 170 | actual.Should().Equal(expected); 171 | } 172 | 173 | // yes bom 174 | { 175 | var ms = new FakeMemoryStream(); 176 | 177 | ms.AddMemory( 178 | Encoding.UTF8.GetPreamble(), 179 | GetBytes("あいうえお")); // japanese hiragana. 180 | 181 | var reader = new Utf8StreamReader(ms) { SkipBom = false }; 182 | var line = await reader.ReadLineAsync(); 183 | line.Value.Slice(0, 3).Span.SequenceEqual(Encoding.UTF8.Preamble).Should().BeTrue(); 184 | line.Value.Slice(3).Span.SequenceEqual(GetBytes("あいうえお")).Should().BeTrue(); 185 | } 186 | } 187 | 188 | [Fact] 189 | public async Task EmptyString() 190 | { 191 | { 192 | var ms = new MemoryStream(); 193 | 194 | var expected = await StreamReaderResultAsync(ms); 195 | 196 | ms = new MemoryStream(); 197 | 198 | var actual = await Utf8StreamReaderResultAsync(ms); 199 | 200 | actual.Should().Equal(expected); 201 | } 202 | // bom only 203 | { 204 | var ms = new FakeMemoryStream(); 205 | 206 | // small bom 207 | ms.AddMemory(Encoding.UTF8.GetPreamble()); 208 | 209 | var expected = await StreamReaderResultAsync(ms); 210 | 211 | ms.Restart(); 212 | 213 | var actual = await Utf8StreamReaderResultAsync(ms); 214 | 215 | actual.Should().Equal(expected); 216 | } 217 | // newline only 218 | { 219 | var ms = new FakeMemoryStream(); 220 | 221 | // small bom 222 | ms.AddMemory(GetBytes("\r\n")); 223 | 224 | var expected = await StreamReaderResultAsync(ms); 225 | 226 | ms.Restart(); 227 | 228 | var actual = await Utf8StreamReaderResultAsync(ms); 229 | 230 | actual.Should().Equal(expected); 231 | } 232 | // newline only 2 233 | { 234 | var ms = new FakeMemoryStream(); 235 | 236 | // small bom 237 | ms.AddMemory(GetBytes("\n\r\n")); 238 | 239 | var expected = await StreamReaderResultAsync(ms); 240 | 241 | ms.Restart(); 242 | 243 | var actual = await Utf8StreamReaderResultAsync(ms); 244 | 245 | actual.Should().Equal(expected); 246 | } 247 | } 248 | 249 | [Fact] 250 | public async Task SmallString() 251 | { 252 | var ms = new FakeMemoryStream(); 253 | 254 | ms.AddMemory(GetBytes("z")); 255 | 256 | var expected = await StreamReaderResultAsync(ms); 257 | 258 | ms.Restart(); 259 | 260 | var actual = await Utf8StreamReaderResultAsync(ms); 261 | 262 | actual.Should().Equal(expected); 263 | } 264 | 265 | // minbuffer = 1024 266 | 267 | [Fact] 268 | public async Task Resize() 269 | { 270 | var bufferSize = 1024; 271 | 272 | { 273 | var ms = new FakeMemoryStream(); 274 | 275 | ms.AddMemory( 276 | GetBytes("!!!\r\n"), // first line consume 277 | GetBytes(new string('a', 1018)), 278 | GetBytes("bcdefghijklmnopqrstuvwxyz\r\n"), 279 | GetBytes("あいうえおかきくけこ\n"), 280 | GetBytes("ABCDEFGHIJKLMN") 281 | ); 282 | 283 | var expected = await StreamReaderResultAsync(ms); 284 | 285 | ms.Restart(); 286 | 287 | var actual = await Utf8StreamReaderResultAsync(ms, bufferSize); 288 | 289 | actual[1].Should().Be(expected[1]); 290 | actual.Should().Equal(expected); 291 | } 292 | { 293 | var ms = new FakeMemoryStream(); 294 | 295 | ms.AddMemory( 296 | GetBytes("!!!\r\n"), // first line consume 297 | GetBytes(new string('a', 1018)), 298 | GetBytes("bcdefghijklmnopqrstuvwxyz\r\n"), 299 | GetBytes("あいうえおかきくけこ\n"), 300 | GetBytes("ABCDEFGHIJKLMN") 301 | ); 302 | 303 | var expected = await StreamReaderResultAsync(ms); 304 | 305 | ms.Restart(); 306 | 307 | var actual = await Utf8StreamReaderResultAsync(ms, bufferSize); 308 | 309 | actual[1].Should().Be(expected[1]); 310 | actual.Should().Equal(expected); 311 | } 312 | } 313 | 314 | [Fact] 315 | public async Task OnlySlice() 316 | { 317 | var bufferSize = 1024; 318 | 319 | { 320 | var ms = new FakeMemoryStream(); 321 | 322 | ms.AddMemory( 323 | GetBytes(new string('a', 1018) + "\r\nbcdefgh"), 324 | GetBytes("あいうえおかきくけこ\n"), 325 | GetBytes("ABCDEFGHIJKLMN") 326 | ); 327 | 328 | var expected = await StreamReaderResultAsync(ms); 329 | 330 | ms.Restart(); 331 | 332 | var actual = await Utf8StreamReaderResultAsync(ms, bufferSize); 333 | 334 | actual[1].Should().Be(expected[1]); 335 | actual.Should().Equal(expected); 336 | } 337 | } 338 | 339 | [Fact] 340 | public async Task HugeBuffer() 341 | { 342 | var bufferSize = 1024; 343 | 344 | { 345 | var ms = new FakeMemoryStream(); 346 | 347 | ms.AddMemory( 348 | GetBytes(new string('a', 30000) + "\r\nb"), 349 | GetBytes("あいうえおかきくけこ\n"), 350 | GetBytes("ABCDEFGHIJKLMN") 351 | ); 352 | 353 | var expected = await StreamReaderResultAsync(ms); 354 | 355 | ms.Restart(); 356 | 357 | var actual = await Utf8StreamReaderResultAsync(ms, bufferSize); 358 | 359 | actual[1].Should().Be(expected[1]); 360 | actual.Should().Equal(expected); 361 | } 362 | } 363 | 364 | [Fact] 365 | public async Task NewLineTrimmedAtBufferBoundary() 366 | { 367 | // Buffer 1: aaa....\r\nasdf\r 368 | // Buffer 2: \nasdf 369 | var ms2 = CreateStringStream( 370 | new string('a', 1017) + 371 | "\r\nasdf" + 372 | "\r\nasdf"); 373 | 374 | var actual = await Utf8StreamReaderResultAsync(ms2, 1024); 375 | 376 | string[] expected = 377 | [ 378 | new string('a', 1017), 379 | "asdf", 380 | "asdf", 381 | ]; 382 | 383 | actual[1].Should().Be(expected[1]); 384 | actual.Should().Equal(expected); 385 | } 386 | 387 | static async Task Utf8StreamReaderResultAsync(Stream ms, int? size = null) 388 | { 389 | var reader = (size == null) ? new Utf8StreamReader(ms) : new Utf8StreamReader(ms, size.Value); 390 | var l = new List(); 391 | await foreach (var item in reader.ReadAllLinesAsync()) 392 | { 393 | l.Add(GetString(item)); 394 | } 395 | return l.ToArray(); 396 | } 397 | 398 | static async Task StreamReaderResultAsync(Stream ms) 399 | { 400 | var reader = new StreamReader(ms); 401 | var l = new List(); 402 | string? s; 403 | while ((s = (await reader.ReadLineAsync())) != null) 404 | { 405 | l.Add(s); 406 | } 407 | return l.ToArray(); 408 | } 409 | 410 | static string GetString(ReadOnlyMemory x) 411 | { 412 | return Encoding.UTF8.GetString(x.Span); 413 | } 414 | 415 | static byte[] GetBytes(string x) 416 | { 417 | return Encoding.UTF8.GetBytes(x); 418 | } 419 | 420 | static MemoryStream CreateStringStream(string input) => new(Encoding.UTF8.GetBytes(input)); 421 | } 422 | -------------------------------------------------------------------------------- /tests/Utf8StreamReader.Tests/TextReaderTest.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace Utf8StreamReaderTests; 8 | 9 | public class TextReaderTest 10 | { 11 | [Fact] 12 | public async Task ReadLine() 13 | { 14 | var ms = new FakeMemoryStream(); 15 | ms.AddMemory( 16 | Encoding.UTF8.GetPreamble(), 17 | GetBytes(new string('a', 30000) + "\r\nb"), 18 | GetBytes("あいうえおかきくけこ\n"), 19 | GetBytes("ABCDEFGHIJKLMN") 20 | ); 21 | 22 | var expected = await StreamReaderResultAsync(ms); 23 | 24 | ms.Restart(); 25 | 26 | var actual = await Utf8TextReaderResultAsync(ms); 27 | 28 | actual.Should().Equal(expected); 29 | } 30 | 31 | [Fact] 32 | public async Task ReadToEnd() 33 | { 34 | var ms = new FakeMemoryStream(); 35 | ms.AddMemory( 36 | Encoding.UTF8.GetPreamble(), 37 | GetBytes(new string('a', 30000) + "\r\nb"), 38 | GetBytes("あいうえおかきくけこ\n"), 39 | GetBytes("ABCDEFGHIJKLMN") 40 | ); 41 | 42 | using var sr = new StreamReader(ms, leaveOpen: true); 43 | var expected = await sr.ReadToEndAsync(); 44 | 45 | ms.Restart(); 46 | 47 | using var usr = new Utf8StreamReader(ms).AsTextReader(); 48 | var actual = await usr.ReadToEndAsync(); 49 | 50 | actual.Should().Be(expected); 51 | } 52 | 53 | [Fact] 54 | public async Task ReadToEndLeftOver() 55 | { 56 | var ms = new FakeMemoryStream(); 57 | 58 | var hiragana = Encoding.UTF8.GetBytes("あ"); // 3 byte 59 | 60 | ms.AddMemory( 61 | Encoding.UTF8.GetPreamble(), 62 | new byte[] { hiragana[0] }, 63 | new byte[] { hiragana[1] }, 64 | new byte[] { hiragana[2] }, 65 | GetBytes("あいうえおかきくけこ\n"), 66 | GetBytes("ABCDEFGHIJKLMN") 67 | ); 68 | 69 | using var sr = new StreamReader(ms, leaveOpen: true); 70 | var expected = await sr.ReadToEndAsync(); 71 | 72 | ms.Restart(); 73 | 74 | using var usr = new Utf8StreamReader(ms).AsTextReader(); 75 | var actual = await usr.ReadToEndAsync(); 76 | 77 | actual.Should().Be(expected); 78 | } 79 | 80 | static async Task Utf8TextReaderResultAsync(Stream ms) 81 | { 82 | using var reader = new Utf8StreamReader(ms).AsTextReader(); 83 | var l = new List(); 84 | await foreach (var item in reader.ReadAllLinesAsync()) 85 | { 86 | l.Add(item.ToString()); 87 | } 88 | return l.ToArray(); 89 | } 90 | 91 | static async Task StreamReaderResultAsync(Stream ms) 92 | { 93 | var reader = new StreamReader(ms); 94 | var l = new List(); 95 | string? s; 96 | while ((s = (await reader.ReadLineAsync())) != null) 97 | { 98 | l.Add(s); 99 | } 100 | return l.ToArray(); 101 | } 102 | static string GetString(ReadOnlyMemory x) 103 | { 104 | return Encoding.UTF8.GetString(x.Span); 105 | } 106 | 107 | static byte[] GetBytes(string x) 108 | { 109 | return Encoding.UTF8.GetBytes(x); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /tests/Utf8StreamReader.Tests/Utf8StreamReader.Tests.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | net8.0 5 | enable 6 | enable 7 | Utf8StreamReaderTests 8 | false 9 | true 10 | 9113 11 | 12 | 13 | 14 | 15 | 16 | all 17 | runtime; build; native; contentfiles; analyzers; buildtransitive 18 | 19 | 20 | 21 | 22 | runtime; build; native; contentfiles; analyzers; buildtransitive 23 | all 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | Always 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /tests/Utf8StreamReader.Tests/file1.txt: -------------------------------------------------------------------------------- 1 | abcde 2 | fgh 3 | ijklmnopqrs 4 | --------------------------------------------------------------------------------