├── .editorconfig
├── .github
└── workflows
│ ├── build-debug.yaml
│ ├── build-release.yaml
│ └── stale.yaml
├── .gitignore
├── Directory.Build.props
├── Icon.png
├── LICENSE
├── README.md
├── Utf8StreamReader.sln
├── opensource.snk
├── sandbox
├── Benchmark
│ ├── Benchmark.csproj
│ ├── BytesReadToEnd.cs
│ ├── FromFile.cs
│ ├── FromMemory.cs
│ ├── Program.cs
│ └── ReadToEndString.cs
└── ConsoleApp1
│ ├── ConsoleApp1.csproj
│ ├── Program.cs
│ ├── ReadMeSample.cs
│ ├── RespReader.cs
│ └── file1.txt
├── src
└── Utf8StreamReader
│ ├── SegmentedArrayBufferWriter.cs
│ ├── Utf8StreamReader.cs
│ ├── Utf8StreamReader.csproj
│ └── Utf8TextReader.cs
└── tests
└── Utf8StreamReader.Tests
├── FakeMemoryStream.cs
├── FileReadTest.cs
├── ReadBlockTest.cs
├── ReadTest.cs
├── ReadToEndTest.cs
├── SegmentedArrayBufferWriterTest.cs
├── Tests.cs
├── TextReaderTest.cs
├── Utf8StreamReader.Tests.csproj
└── file1.txt
/.editorconfig:
--------------------------------------------------------------------------------
1 | # top-most EditorConfig file
2 | root = true
3 |
4 | [*]
5 | charset = utf-8
6 | end_of_line = lf
7 | indent_style = space
8 | indent_size = 2
9 | insert_final_newline = true
10 | trim_trailing_whitespace = true
11 |
12 | # Visual Studio Spell checker configs (https://learn.microsoft.com/en-us/visualstudio/ide/text-spell-checker?view=vs-2022#how-to-customize-the-spell-checker)
13 | spelling_exclusion_path = ./exclusion.dic
14 |
15 | [*.cs]
16 | indent_size = 4
17 | charset = utf-8-bom
18 | end_of_line = unset
19 |
20 | # Solution files
21 | [*.{sln,slnx}]
22 | end_of_line = unset
23 |
24 | # MSBuild project files
25 | [*.{csproj,props,targets}]
26 | end_of_line = unset
27 |
28 | # Xml config files
29 | [*.{ruleset,config,nuspec,resx,runsettings,DotSettings}]
30 | end_of_line = unset
31 |
32 | [*{_AssemblyInfo.cs,.notsupported.cs}]
33 | generated_code = true
34 |
35 | # C# code style settings
36 | [*.{cs}]
37 | dotnet_style_coalesce_expression = true:suggestion
38 | dotnet_style_null_propagation = true:suggestion
39 | dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion
40 | dotnet_style_prefer_auto_properties = true:suggestion
41 | dotnet_style_object_initializer = true:suggestion
42 | dotnet_style_prefer_collection_expression = true:suggestion
43 | dotnet_style_collection_initializer = true:suggestion
44 | dotnet_style_prefer_simplified_boolean_expressions = true:suggestion
45 | dotnet_style_prefer_conditional_expression_over_assignment = true:silent
46 | dotnet_style_prefer_conditional_expression_over_return = true:silent
47 | dotnet_style_explicit_tuple_names = true:suggestion
48 | dotnet_style_prefer_inferred_tuple_names = true:suggestion
49 | dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion
50 | dotnet_style_prefer_compound_assignment = true:suggestion
51 | dotnet_style_prefer_simplified_interpolation = true:suggestion
52 | dotnet_style_namespace_match_folder = true:suggestion
53 | dotnet_style_readonly_field = true:suggestion
54 | dotnet_style_predefined_type_for_member_access = true:suggestion
55 | dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion
56 | dotnet_style_require_accessibility_modifiers = for_non_interface_members:silent
57 | dotnet_style_allow_statement_immediately_after_block_experimental = true:silent
58 | dotnet_style_allow_multiple_blank_lines_experimental = true:silent
59 | dotnet_code_quality_unused_parameters = non_public:suggestion
60 | dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity:silent
61 | dotnet_style_parentheses_in_other_binary_operators = always_for_clarity:silent
62 | dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity:silent
63 | dotnet_style_parentheses_in_other_operators = never_if_unnecessary:silent
64 | dotnet_style_qualification_for_method = false:none
65 | dotnet_style_qualification_for_property = false:none
66 | dotnet_style_qualification_for_field = false:none
67 | dotnet_style_qualification_for_event = false:none
68 |
69 | # New line preferences
70 | csharp_new_line_before_open_brace = all
71 | csharp_new_line_before_else = true
72 | csharp_new_line_before_catch = true
73 | csharp_new_line_before_finally = true
74 | csharp_new_line_before_members_in_object_initializers = true
75 | csharp_new_line_before_members_in_anonymous_types = true
76 | csharp_new_line_between_query_expression_clauses = true
77 |
78 | # Indentation preferences
79 | csharp_indent_block_contents = true
80 | csharp_indent_braces = false
81 | csharp_indent_case_contents = true
82 | csharp_indent_case_contents_when_block = true
83 | csharp_indent_switch_labels = true
84 | csharp_indent_labels = one_less_than_current
85 |
86 | # Modifier preferences
87 | csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:suggestion
88 |
89 | # avoid this. unless absolutely necessary
90 | dotnet_style_qualification_for_field = false:none
91 | dotnet_style_qualification_for_property = false:none
92 | dotnet_style_qualification_for_method = false:none
93 | dotnet_style_qualification_for_event = false:none
94 |
95 | # Types: use keywords instead of BCL types, and permit var only when the type is clear
96 | csharp_style_var_for_built_in_types = false:none
97 | csharp_style_var_when_type_is_apparent = false:none
98 | csharp_style_var_elsewhere = false:none
99 | dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion
100 | dotnet_style_predefined_type_for_member_access = true:suggestion
101 |
102 | # name all constant fields using PascalCase
103 | dotnet_naming_rule.constant_fields_should_be_pascal_case.severity = suggestion
104 | dotnet_naming_rule.constant_fields_should_be_pascal_case.symbols = constant_fields
105 | dotnet_naming_rule.constant_fields_should_be_pascal_case.style = pascal_case_style
106 | dotnet_naming_symbols.constant_fields.applicable_kinds = field
107 | dotnet_naming_symbols.constant_fields.required_modifiers = const
108 | dotnet_naming_style.pascal_case_style.capitalization = pascal_case
109 |
110 | # static fields
111 | dotnet_naming_rule.static_fields_should_have_prefix.severity = none
112 | dotnet_naming_rule.static_fields_should_have_prefix.symbols = static_fields
113 | dotnet_naming_rule.static_fields_should_have_prefix.style = static_prefix_style
114 | dotnet_naming_symbols.static_fields.applicable_kinds = field
115 | dotnet_naming_symbols.static_fields.required_modifiers = static
116 | dotnet_naming_symbols.static_fields.applicable_accessibilities = private, internal, private_protected
117 | dotnet_naming_style.static_prefix_style.required_prefix = s_
118 | dotnet_naming_style.static_prefix_style.capitalization = camel_case
119 |
120 | # internal and private fields
121 | dotnet_naming_rule.camel_case_for_private_internal_fields.severity = none
122 | dotnet_naming_rule.camel_case_for_private_internal_fields.symbols = private_internal_fields
123 | dotnet_naming_rule.camel_case_for_private_internal_fields.style = camel_case_underscore_style
124 | dotnet_naming_symbols.private_internal_fields.applicable_kinds = field
125 | dotnet_naming_symbols.private_internal_fields.applicable_accessibilities = private, internal
126 | dotnet_naming_style.camel_case_underscore_style.required_prefix = _
127 | dotnet_naming_style.camel_case_underscore_style.capitalization = camel_case
128 |
129 | # Code style defaults
130 | csharp_using_directive_placement = outside_namespace:suggestion
131 | csharp_prefer_braces = true:silent
132 | csharp_preserve_single_line_blocks = true:none
133 | csharp_preserve_single_line_statements = false:none
134 | csharp_prefer_static_local_function = true:suggestion
135 | csharp_prefer_simple_using_statement = false:none
136 | csharp_style_prefer_switch_expression = true:suggestion
137 |
138 | # Code quality
139 | dotnet_style_readonly_field = true:suggestion
140 | dotnet_code_quality_unused_parameters = non_public:suggestion
141 |
142 | # Expression-level preferences
143 | dotnet_style_object_initializer = true:suggestion
144 | dotnet_style_collection_initializer = true:suggestion
145 | dotnet_style_explicit_tuple_names = true:suggestion
146 | dotnet_style_coalesce_expression = true:suggestion
147 | dotnet_style_null_propagation = true:suggestion
148 | dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion
149 | dotnet_style_prefer_inferred_tuple_names = true:suggestion
150 | dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion
151 | dotnet_style_prefer_auto_properties = true:suggestion
152 | dotnet_style_prefer_conditional_expression_over_assignment = true:silent
153 | dotnet_style_prefer_conditional_expression_over_return = true:silent
154 | csharp_prefer_simple_default_expression = true:suggestion
155 |
156 | # Expression-bodied members
157 | csharp_style_expression_bodied_methods = true:silent
158 | csharp_style_expression_bodied_constructors = true:silent
159 | csharp_style_expression_bodied_operators = true:silent
160 | csharp_style_expression_bodied_properties = true:silent
161 | csharp_style_expression_bodied_indexers = true:silent
162 | csharp_style_expression_bodied_accessors = true:silent
163 | csharp_style_expression_bodied_lambdas = true:silent
164 | csharp_style_expression_bodied_local_functions = true:silent
165 |
166 | # Pattern matching
167 | csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion
168 | csharp_style_pattern_matching_over_as_with_null_check = true:suggestion
169 | csharp_style_inlined_variable_declaration = true:suggestion
170 |
171 | # Null checking preferences
172 | csharp_style_throw_expression = true:suggestion
173 | csharp_style_conditional_delegate_call = true:suggestion
174 |
175 | # Other features
176 | csharp_style_prefer_index_operator = false:none
177 | csharp_style_prefer_range_operator = false:none
178 | csharp_style_pattern_local_over_anonymous_function = false:none
179 |
180 | # Space preferences
181 | csharp_space_after_cast = false
182 | csharp_space_after_colon_in_inheritance_clause = true
183 | csharp_space_after_comma = true
184 | csharp_space_after_dot = false
185 | csharp_space_after_keywords_in_control_flow_statements = true
186 | csharp_space_after_semicolon_in_for_statement = true
187 | csharp_space_around_binary_operators = before_and_after
188 | csharp_space_around_declaration_statements = do_not_ignore
189 | csharp_space_before_colon_in_inheritance_clause = true
190 | csharp_space_before_comma = false
191 | csharp_space_before_dot = false
192 | csharp_space_before_open_square_brackets = false
193 | csharp_space_before_semicolon_in_for_statement = false
194 | csharp_space_between_empty_square_brackets = false
195 | csharp_space_between_method_call_empty_parameter_list_parentheses = false
196 | csharp_space_between_method_call_name_and_opening_parenthesis = false
197 | csharp_space_between_method_call_parameter_list_parentheses = false
198 | csharp_space_between_method_declaration_empty_parameter_list_parentheses = false
199 | csharp_space_between_method_declaration_name_and_open_parenthesis = false
200 | csharp_space_between_method_declaration_parameter_list_parentheses = false
201 | csharp_space_between_parentheses = false
202 | csharp_space_between_square_brackets = false
203 |
204 | # Analyzers
205 | dotnet_code_quality.CA1052.api_surface = private, internal
206 | dotnet_code_quality.CA1802.api_surface = private, internal
207 | dotnet_code_quality.CA1822.api_surface = private, internal
208 | dotnet_code_quality.CA2208.api_surface = public
209 |
210 | # IDE0008: Use explicit type
211 | dotnet_diagnostic.IDE0008.severity = none
212 |
213 | # IDE0090: Use 'new(...)'
214 | dotnet_diagnostic.IDE0090.severity = none
215 |
216 | # IDE0040: Add accessibility modifiers
217 | dotnet_diagnostic.IDE0040.severity = none
218 |
219 | # Nullability in reference types of interface implemented by the base type doesn't match
220 | dotnet_diagnostic.CS8644.severity = none
221 |
222 | dotnet_diagnostic.CA1816.severity = none
223 |
224 | dotnet_diagnostic.IDE1006.severity = none
225 |
226 | #Remove unnecessary suppression
227 | dotnet_diagnostic.IDE0079.severity = none
228 |
229 | dotnet_diagnostic.IDE0130.severity = none
230 |
231 | dotnet_diagnostic.CA1822.severity = none
232 |
233 | csharp_style_prefer_switch_expression = false:suggestion
234 |
235 | csharp_style_pattern_matching_over_as_with_null_check = false:suggestion
236 |
237 | dotnet_naming_symbols.functional_symbols.applicable_kinds = property,method,event,delegate
238 | dotnet_naming_style.pascal_case_style.capitalization = pascal_case
239 | dotnet_naming_rule.functional_symbols_must_be_capitalized.symbols = functional_symbols
240 | dotnet_naming_rule.functional_symbols_must_be_capitalized.style = pascal_case_style
241 | dotnet_naming_rule.functional_symbols_must_be_capitalized.severity = warning
242 |
243 | dotnet_naming_symbols.public_symbols.applicable_kinds = property,method,field,event,delegate
244 | dotnet_naming_symbols.public_symbols.applicable_accessibilities = public
245 | dotnet_naming_symbols.public_symbols.required_modifiers = readonly
246 | dotnet_naming_style.first_word_upper_case_style.capitalization = first_word_upper
247 | dotnet_naming_rule.public_members_must_be_capitalized.symbols = public_symbols
248 | dotnet_naming_rule.public_members_must_be_capitalized.style = first_word_upper_case_style
249 | dotnet_naming_rule.public_members_must_be_capitalized.severity = warning
250 |
251 | csharp_style_expression_bodied_methods = false:silent
252 | csharp_style_expression_bodied_constructors = false:silent
253 | csharp_style_expression_bodied_operators = false:silent
254 | csharp_style_namespace_declarations = file_scoped:suggestion
255 | csharp_style_prefer_method_group_conversion = true:silent
256 | csharp_style_prefer_top_level_statements = true:silent
257 | csharp_style_prefer_primary_constructors = true:suggestion
258 | csharp_style_prefer_null_check_over_type_check = true:suggestion
259 | csharp_style_prefer_local_over_anonymous_function = true:suggestion
260 | csharp_style_implicit_object_creation_when_type_is_apparent = true:suggestion
261 | csharp_style_prefer_tuple_swap = true:suggestion
262 | csharp_style_prefer_utf8_string_literals = true:suggestion
263 | csharp_style_deconstructed_variable_declaration = true:suggestion
264 | csharp_style_unused_value_assignment_preference = discard_variable:suggestion
265 | csharp_style_unused_value_expression_statement_preference = discard_variable:silent
266 | csharp_style_prefer_readonly_struct_member = true:suggestion
267 | csharp_style_prefer_readonly_struct = true:suggestion
268 | csharp_style_allow_embedded_statements_on_same_line_experimental = true:silent
269 | csharp_style_allow_blank_line_after_token_in_arrow_expression_clause_experimental = true:silent
270 | csharp_style_allow_blank_line_after_token_in_conditional_expression_experimental = true:silent
271 | csharp_style_allow_blank_line_after_colon_in_constructor_initializer_experimental = true:silent
272 | csharp_style_allow_blank_lines_between_consecutive_braces_experimental = true:silent
273 | csharp_style_prefer_pattern_matching = true:silent
274 | csharp_style_prefer_extended_property_pattern = true:suggestion
275 | csharp_style_prefer_not_pattern = true:suggestion
276 |
--------------------------------------------------------------------------------
/.github/workflows/build-debug.yaml:
--------------------------------------------------------------------------------
1 | name: Build-Debug
2 |
3 | on:
4 | push:
5 | branches:
6 | - "main"
7 | pull_request:
8 | branches:
9 | - "main"
10 |
11 | jobs:
12 | build-dotnet:
13 | permissions:
14 | contents: read
15 | runs-on: ubuntu-24.04
16 | timeout-minutes: 10
17 | steps:
18 | - uses: Cysharp/Actions/.github/actions/checkout@main
19 | - uses: Cysharp/Actions/.github/actions/setup-dotnet@main
20 | - run: dotnet build -c Debug
21 | - run: dotnet test -c Debug --no-build
22 |
--------------------------------------------------------------------------------
/.github/workflows/build-release.yaml:
--------------------------------------------------------------------------------
1 | name: Build-Release
2 |
3 | on:
4 | workflow_dispatch:
5 | inputs:
6 | tag:
7 | description: "tag: git tag you want create. (sample 1.0.0)"
8 | required: true
9 | dry-run:
10 | description: "dry-run: true will never create relase/nuget."
11 | required: true
12 | default: false
13 | type: boolean
14 |
15 | jobs:
16 | build-dotnet:
17 | permissions:
18 | contents: read
19 | runs-on: ubuntu-24.04
20 | timeout-minutes: 10
21 | steps:
22 | - uses: Cysharp/Actions/.github/actions/checkout@main
23 | - uses: Cysharp/Actions/.github/actions/setup-dotnet@main
24 | - run: dotnet build -c Release -p:Version=${{ inputs.tag }}
25 | - run: dotnet test -c Release --no-build
26 | - run: dotnet pack -c Release --no-build -p:Version=${{ inputs.tag }} -o ./publish
27 | # Store artifacts.
28 | - uses: Cysharp/Actions/.github/actions/upload-artifact@main
29 | with:
30 | name: nuget
31 | path: ./publish/
32 |
33 | # release
34 | create-release:
35 | needs: [build-dotnet]
36 | permissions:
37 | contents: write
38 | uses: Cysharp/Actions/.github/workflows/create-release.yaml@main
39 | with:
40 | commit-id: ''
41 | dry-run: ${{ inputs.dry-run }}
42 | tag: ${{ inputs.tag }}
43 | nuget-push: true
44 | release-upload: false
45 | secrets: inherit
46 |
--------------------------------------------------------------------------------
/.github/workflows/stale.yaml:
--------------------------------------------------------------------------------
1 | name: "Close stale issues"
2 |
3 | on:
4 | workflow_dispatch:
5 | schedule:
6 | - cron: "0 0 * * *"
7 |
8 | jobs:
9 | stale:
10 | permissions:
11 | contents: read
12 | pull-requests: write
13 | issues: write
14 | uses: Cysharp/Actions/.github/workflows/stale-issue.yaml@main
15 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Build Folders (you can keep bin if you'd like, to store dlls and pdbs)
2 | [Bb]in/
3 | [Oo]bj/
4 |
5 | # mstest test results
6 | TestResults
7 |
8 | ## Ignore Visual Studio temporary files, build results, and
9 | ## files generated by popular Visual Studio add-ons.
10 |
11 | # User-specific files
12 | *.suo
13 | *.user
14 | *.sln.docstates
15 |
16 | # Build results
17 | [Dd]ebug/
18 | [Rr]elease/
19 | x64/
20 | *_i.c
21 | *_p.c
22 | *.ilk
23 | *.obj
24 | *.pch
25 | *.pdb
26 | *.pgc
27 | *.pgd
28 | *.rsp
29 | *.sbr
30 | *.tlb
31 | *.tli
32 | *.tlh
33 | *.tmp
34 | *.log
35 | *.vspscc
36 | *.vssscc
37 | .builds
38 |
39 | # Visual C++ cache files
40 | ipch/
41 | *.aps
42 | *.ncb
43 | *.opensdf
44 | *.sdf
45 |
46 | # Visual Studio profiler
47 | *.psess
48 | *.vsp
49 | *.vspx
50 |
51 | # Guidance Automation Toolkit
52 | *.gpState
53 |
54 | # ReSharper is a .NET coding add-in
55 | _ReSharper*
56 |
57 | # NCrunch
58 | *.ncrunch*
59 | .*crunch*.local.xml
60 |
61 | # Installshield output folder
62 | [Ee]xpress
63 |
64 | # DocProject is a documentation generator add-in
65 | DocProject/buildhelp/
66 | DocProject/Help/*.HxT
67 | DocProject/Help/*.HxC
68 | DocProject/Help/*.hhc
69 | DocProject/Help/*.hhk
70 | DocProject/Help/*.hhp
71 | DocProject/Help/Html2
72 | DocProject/Help/html
73 |
74 | # Click-Once directory
75 | publish
76 |
77 | # Publish Web Output
78 | *.Publish.xml
79 |
80 | # NuGet Packages Directory
81 | packages
82 |
83 | # Windows Azure Build Output
84 | csx
85 | *.build.csdef
86 |
87 | # Windows Store app package directory
88 | AppPackages/
89 |
90 | # Others
91 | [Bb]in
92 | [Oo]bj
93 | sql
94 | TestResults
95 | [Tt]est[Rr]esult*
96 | *.Cache
97 | ClientBin
98 | [Ss]tyle[Cc]op.*
99 | ~$*
100 | *.dbmdl
101 | Generated_Code #added for RIA/Silverlight projects
102 |
103 | # Backup & report files from converting an old project file to a newer
104 | # Visual Studio version. Backup files are not needed, because we have git ;-)
105 | _UpgradeReport_Files/
106 | Backup*/
107 | UpgradeLog*.XML
108 | .vs/config/applicationhost.config
109 | .vs/restore.dg
110 |
111 | # OTHER
112 | nuget/tools/*
113 | *.nupkg
114 |
115 | .vs
116 | **/.DS_Store
117 | .idea
118 |
119 | # publish directory
120 | out/
121 | *.tsbuildinfo
122 |
123 | # BenchmarkDotNet Artifacts
124 | BenchmarkDotNet.Artifacts/
125 |
--------------------------------------------------------------------------------
/Directory.Build.props:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | $(Version)
6 | Cysharp
7 | Cysharp
8 | © Cysharp, Inc.
9 | https://github.com/Cysharp/Utf8StreamReader
10 | $(PackageProjectUrl)
11 | git
12 | MIT
13 | Icon.png
14 | true
15 | ../../opensource.snk
16 |
17 | ../../../../../../opensource.snk
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/Icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Cysharp/Utf8StreamReader/a92ba5ef05e22234eb9ec7d02ac5b5f885b492bd/Icon.png
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Cysharp, Inc.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Utf8StreamReader
2 |
3 | [](https://github.com/Cysharp/Utf8StreamReader/actions) [](https://github.com/Cysharp/Utf8StreamReader/releases)
4 | [](https://nuget.org/packages/Utf8StreamReader)
5 |
6 | Utf8 based StreamReader for high performance text processing. In addition to UTF-8 based binary processing, it can also be used as a a high-performance replacement for StreamReader and as a helper for fast binary reading.
7 |
8 | Avoiding unnecessary string allocation is a fundamental aspect of recent .NET performance improvements. Given that most file and network data is in UTF8, features like [JsonSerializer](https://learn.microsoft.com/en-us/dotnet/api/system.text.json.jsonserializer?view=net-8.0) and [IUtf8SpanParsable](https://learn.microsoft.com/en-us/dotnet/api/system.iutf8spanparsable-1?view=net-8.0), which operate on UTF8-based data, have been added. More recently, methods like [.NET8 MemoryExtensions.Split](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.split?view=net-8.0), which avoids allocations, have also been introduced.
9 |
10 | However, for the most common use case of parsing strings delimited by newlines, only the traditional [StreamReader](https://learn.microsoft.com/en-us/dotnet/api/system.io.streamreader) is provided, which generates a new String for each line, resulting in a large amount of allocations.
11 |
12 | 
13 | > Read simple 1000000 lines text
14 |
15 | Incredibly, there is a **240,000 times** difference!
16 |
17 | While it is possible to process data in UTF8 format using standard classes like [PipeReader](https://learn.microsoft.com/en-us/dotnet/api/system.io.pipelines.pipereader?view=dotnet-plat-ext-8.0) and [SequenceReader](https://learn.microsoft.com/en-us/dotnet/api/system.buffers.sequencereader-1?view=net-8.0), they are generic librardies, so properly handling newline processing requires considerable effort(Handling BOM and Multiple Types of Newline Characters).
18 |
19 | `Utf8StreamReader` provides a familiar API similar to StreamReader, making it easy to use, while its ReadLine-specific implementation maximizes performance.
20 |
21 | By using optimized internal processing, higher performance can be achieved when reading Strings from Files compared to using the standard `StreamReader.ReadToEnd` or `File.ReadAllText` methods.
22 |
23 | 
24 |
25 | > Read from file(1000000 lines text)
26 |
27 | ```csharp
28 | [Benchmark]
29 | public async Task StreamReaderReadToEndAsync()
30 | {
31 | using var sr = new System.IO.StreamReader(filePath);
32 | return await sr.ReadToEndAsync();
33 | }
34 |
35 | [Benchmark]
36 | public async Task Utf8TextReaderReadToEndAsync()
37 | {
38 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath).AsTextReader();
39 | return await sr.ReadToEndAsync();
40 | }
41 |
42 | [Benchmark]
43 | public async Task FileReadAllTextAsync()
44 | {
45 | return await File.ReadAllTextAsync(filePath);
46 | }
47 | ```
48 |
49 | For an explanation of the performance difference, please refer to the [ReadString Section](#readstring).
50 |
51 | ## Getting Started
52 |
53 | This library is distributed via NuGet, supporting `.NET Standard 2.1`, `.NET 6(.NET 7)` and `.NET 8` or above. For information on usage with Unity, please refer to the [Unity Section](#unity).
54 |
55 | PM> Install-Package [Utf8StreamReader](https://www.nuget.org/packages/Utf8StreamReader)
56 |
57 | The basic API involves `using var streamReader = new Utf8StreamReader(stream);` and then `ReadOnlyMemory line = await streamReader.ReadLineAsync();`. When enumerating all lines, you can choose from three styles:
58 |
59 | ```csharp
60 | using Cysharp.IO; // namespace of Utf8StreamReader
61 |
62 | public async Task Sample1(Stream stream)
63 | {
64 | using var reader = new Utf8StreamReader(stream);
65 |
66 | // Most performant style, similar as System.Threading.Channels
67 | while (await reader.LoadIntoBufferAsync())
68 | {
69 | while (reader.TryReadLine(out var line))
70 | {
71 | // line is ReadOnlyMemory, deserialize UTF8 directly.
72 | _ = JsonSerializer.Deserialize(line.Span);
73 | }
74 | }
75 | }
76 |
77 | public async Task Sample2(Stream stream)
78 | {
79 | using var reader = new Utf8StreamReader(stream);
80 |
81 | // Classical style, same as StreamReader
82 | ReadOnlyMemory? line = null;
83 | while ((line = await reader.ReadLineAsync()) != null)
84 | {
85 | _ = JsonSerializer.Deserialize(line.Value.Span);
86 | }
87 | }
88 |
89 | public async Task Sample3(Stream stream)
90 | {
91 | using var reader = new Utf8StreamReader(stream);
92 |
93 | // Most easiest style, use async streams
94 | await foreach (var line in reader.ReadAllLinesAsync())
95 | {
96 | _ = JsonSerializer.Deserialize(line.Span);
97 | }
98 | }
99 | ```
100 |
101 | From a performance perspective, `Utf8StreamReader` only provides asynchronous APIs.
102 |
103 | Theoretically, the highest performance can be achieved by combining `LoadIntoBufferAsync` and `TryReadLine` in a double while loop. This is similar to the combination of `WaitToReadAsync` and `TryRead` in [Channels](https://learn.microsoft.com/en-us/dotnet/core/extensions/channels).
104 |
105 | `ReadLineAsync`, like StreamReader.ReadLine, returns null to indicate that the end has been reached.
106 |
107 | `ReadAllLinesAsync` returns an `IAsyncEnumerable>`. Although there is a performance difference, it is minimal, so this API is ideal when you want to use it easily.
108 |
109 | All asynchronous methods accept a `CancellationToken` and support cancellation.
110 |
111 | For a real-world usage example, refer to [StreamMessageReader.cs](https://github.com/Cysharp/Claudia/blob/main/src/Claudia/StreamMessageReader.cs) in [Cysharp/Claudia](https://github.com/Cysharp/Claudia/), a C# SDK for Anthropic Claude, which parses [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events).
112 |
113 | ## Buffer Lifetimes
114 |
115 | The `ReadOnlyMemory` returned from `ReadLineAsync` or `TryReadLine` is only valid until the next call to `LoadIntoBufferAsync` or `TryReadLine` or `ReadLineAsync`. Since the data is shared with the internal buffer, it may be overwritten, moved, or returned on the next call, so the safety of the data cannot be guaranteed. The received data must be promptly parsed and converted into a separate object. If you want to keep the data as is, use `ToArray()` to convert it to a `byte[]`.
116 |
117 | This design is similar to [System.IO.Pipelines](https://learn.microsoft.com/en-us/dotnet/standard/io/pipelines).
118 |
119 | ## Read as `ReadOnlyMemory`
120 |
121 | You can convert it to a `Utf8TextReader` that extracts `ReadOnlyMemory` or `string`. Although there is a conversion cost, it is still fast and low allocation, so it can be used as an alternative to `StreamReader`.
122 |
123 | 
124 |
125 | After converting with `AsTextReader()`, all the same methods (`TryReadLine`, `ReadLineAsync`, `LoadIntoBufferAsync`, `ReadAllLinesAsync`) can be used.
126 |
127 | ```csharp
128 | using var sr = new Cysharp.IO.Utf8StreamReader(ms).AsTextReader();
129 | while (await sr.LoadIntoBufferAsync())
130 | {
131 | while (sr.TryReadLine(out var line))
132 | {
133 | // line is ReadOnlyMemory, you can add to StringBuilder or other parsing method.
134 |
135 | // If you neeed string, ReadOnlyMemory.ToString() build string instance
136 | // string str = line.ToString();
137 | }
138 | }
139 | ```
140 |
141 | You can perform text processing without allocation, such as splitting `ReadOnlySpan` using [MemoryExtensions.Split](https://learn.microsoft.com/en-us/dotnet/api/system.memoryextensions.split?view=net-8.0#system-memoryextensions-split(system-readonlyspan((system-char))-system-span((system-range))-system-char-system-stringsplitoptions)), and concatenate the results using StringBuilder's [`Append/AppendLine(ReadOnlySpan)`](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.append). This way, string-based processing can be done with much lower allocation compared to `StreamReader`.
142 |
143 | When a string is needed, you can convert `ReadOnlyMemory` to a string using `ToString()`. Even with the added string conversion, the performance is higher than `StreamReader`, so it can be used as a better alternative.
144 |
145 | ## Optimizing FileStream
146 |
147 | Similar to `StreamReader`, `Utf8StreamReader` has the ability to open a `FileStream` by accepting a `string path`.
148 |
149 | ```csharp
150 | public Utf8StreamReader(string path, FileOpenMode fileOpenMode = FileOpenMode.Throughput)
151 | public Utf8StreamReader(string path, int bufferSize, FileOpenMode fileOpenMode = FileOpenMode.Throughput)
152 | public Utf8StreamReader(string path, FileStreamOptions options)
153 | public Utf8StreamReader(string path, FileStreamOptions options, int bufferSize)
154 | ```
155 |
156 | Unfortunately, the `FileStream` used by `StreamReader` is not optimized for modern .NET. For example, when using `FileStream` with asynchronous methods, it should be opened with `useAsync: true` for optimal performance. However, since `StreamReader` has both synchronous and asynchronous methods in its API, false is specified. Additionally, although `StreamReader` itself has a buffer and `FileStream` does not require a buffer, the buffer of `FileStream` is still being utilized.
157 |
158 | It is difficult to handle `FileStream` correctly with high performance. By specifying a `string path`, the stream is opened with options optimized for `Utf8StreamReader`, so it is recommended to use this overload rather than opening `FileStream` yourself. The following is a benchmark of `FileStream`.
159 |
160 | 
161 |
162 | `Utf8StreamReader` opens `FileStream` with the following settings:
163 |
164 | ```csharp
165 | var useAsync = (fileOpenMode == FileOpenMode.Scalability);
166 | new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: 1, useAsync: useAsync)
167 | ```
168 |
169 | Due to historical reasons, the options for `FileStream` are odd, but by setting `bufferSize` to 1, you can avoid the use of internal buffers. `FileStream` has been significantly revamped in .NET 6, and by controlling the setting of this option and the way `Utf8StreamReader` is called as a whole, it can function as a thin wrapper around the fast [RandomAccess.ReadAsync](https://learn.microsoft.com/en-us/dotnet/api/system.io.randomaccess.readasync), allowing you to avoid most of the overhead of FileStream.
170 |
171 | `FileOpenMode` is a proprietary option of `Utf8StreamReader`.
172 |
173 |
174 | ```csharp
175 | public enum FileOpenMode
176 | {
177 | Scalability,
178 | Throughput
179 | }
180 | ```
181 |
182 | In a Windows environment, the table in the [IO section of the Performance Improvements in .NET 6 blog](https://devblogs.microsoft.com/dotnet/performance-improvements-in-net-6/#io) shows that throughput decreases when `useAsync: true` is used.
183 |
184 | | Method | Runtime | IsAsync | BufferSize | Mean |
185 | | - | - | - | - | - |
186 | | ReadAsync | .NET 6.0 | True | 1 | 119.573 ms |
187 | | ReadAsync | .NET 6.0 | False | 1 | 36.018 ms |
188 |
189 | By setting `Utf8StreamReader` to `FileOpenMode.Scalability`, true async I/O is enabled and scalability is prioritized. If set to `FileOpenMode.Throughput`, it internally becomes sync-over-async and consumes the ThreadPool, but reduces the overhead of asynchronous I/O and improves throughput.
190 |
191 | If frequently executed within a server application, setting it to `Scalability`, and for batch applications, setting it to `Throughput` will likely yield the best performance characteristics. The default is `Throughput`. (In the current .NET implementation, both seem to be the same (similar to Throughput on Windows) in Linux environments.)
192 |
193 | In `Utf8StreamReader`, by carefully adjusting the buffer size on the `Utf8StreamReader` side, the performance difference is minimized. Please refer to the above benchmark results image for specific values.
194 |
195 | For overloads that accept `FileStreamOptions`, the above settings are not reflected, so please adjust them manually.
196 |
197 | ## ReadString
198 |
199 | By combining the above FileStream optimization with `.AsTextReader().ReadToEndAsync()`, you can achieve higher performance when reading out a `string` compared to `StreamReader.ReadToEnd` or `File.ReadAllText`.
200 |
201 | 
202 |
203 | The implementation of `File.ReadAllText` in dotnet/runtime uses `StreamReader.ReadToEnd`, so they are almost the same. However, in the case of `File.ReadAllText`, it uses `useAsync: true` when opening the `FileStream`. That accounts for the performance difference in the benchmark.
204 |
205 | Another significant difference in the implementation is that `Utf8StreamReader` generates a `string` without using `StringBuilder`. `StreamReader.ReadToEnd` generates a string using the following flow: `byte[] buffer` -> `char[] decodeBuffer` -> `StringBuilder.Append(char[])` -> `StringBuilder.ToString()`, but there are removable inefficiencies. Both `char[]` and `StringBuilder` are `char[]` buffers, and copying occurs. By generating a `string` directly from `char[]`, the copy to the internal buffer of `StringBuilder` can be eliminated.
206 |
207 | In `Utf8StreamReader`'s `.AsTextReader().ReadToEndAsync()`, it receives streaming data in read buffer units from `Utf8StreamReader` (`ReadToEndChunksAsync`), converts it to `char[]` chunks using `Decoder`, and generates the string all at once using `string.Create`.
208 |
209 | ```csharp
210 | // Utf8TextReader is a helper class for ReadOnlyMemory and string generation that internally holds Utf8StreamReader
211 | public async ValueTask ReadToEndAsync(CancellationToken cancellationToken = default)
212 | {
213 | // Using a method similar to .NET 9 LINQ to Objects's ToArray improvement, returns a structure optimized for gap-free sequential expansion
214 | // StreamReader.ReadToEnd copies the buffer to a StringBuilder, but this implementation holds char[] chunks(char[][]) without copying.
215 | using var writer = new SegmentedArrayBufferWriter();
216 | var decoder = Encoding.UTF8.GetDecoder();
217 |
218 | // Utf8StreamReader.ReadToEndChunksAsync returns the internal buffer ReadOnlyMemory as an asynchronous sequence upon each read completion
219 | await foreach (var chunk in reader.ReadToEndChunksAsync(cancellationToken).ConfigureAwait(reader.ConfigureAwait))
220 | {
221 | var input = chunk;
222 | while (input.Length != 0)
223 | {
224 | // The Decoder directly writes from the read buffer to the char[] buffer
225 | decoder.Convert(input.Span, writer.GetMemory().Span, flush: false, out var bytesUsed, out var charsUsed, out var completed);
226 | input = input.Slice(bytesUsed);
227 | writer.Advance(charsUsed);
228 | }
229 | }
230 |
231 | decoder.Convert([], writer.GetMemory().Span, flush: true, out _, out var finalCharsUsed, out _);
232 | writer.Advance(finalCharsUsed);
233 |
234 | // Directly generate a string from the char[][] buffer using String.Create
235 | return string.Create(writer.WrittenCount, writer, static (stringSpan, writer) =>
236 | {
237 | foreach (var item in writer.GetSegmentsAndDispose())
238 | {
239 | item.Span.CopyTo(stringSpan);
240 | stringSpan = stringSpan.Slice(item.Length);
241 | }
242 | });
243 | }
244 | ```
245 |
246 | SegmentedArrayBufferWriter borrows the idea (which I proposed) from [the performance improvement of ToArray in LINQ in .NET 9](https://github.com/dotnet/runtime/pull/96570), and internally holds an InlineArray that expands by equal multipliers.
247 |
248 | ```csharp
249 | [StructLayout(LayoutKind.Sequential)]
250 | struct InlineArray19
251 | {
252 | public const int InitialSize = 8192;
253 |
254 | T[] array00; // 8192
255 | T[] array01; // 16384
256 | T[] array02; // 32768
257 | T[] array03; // 65536
258 | T[] array04; // 131072
259 | T[] array05; // 262144
260 | T[] array06; // 524288
261 | T[] array07; // 1048576
262 | T[] array08; // 2097152
263 | T[] array09; // 4194304
264 | T[] array10; // 8388608
265 | T[] array11; // 16777216
266 | T[] array12; // 33554432
267 | T[] array13; // 67108864
268 | T[] array14; // 134217728
269 | T[] array15; // 268435456
270 | T[] array16; // 536870912
271 | T[] array17; // 1073741824
272 | T[] array18; // Array.MaxLength - total
273 |
274 | public T[] this[int i]
275 | {
276 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
277 | get
278 | {
279 | if (i < 0 || i > 18) Throw();
280 | return Unsafe.Add(ref array00, i);
281 | }
282 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
283 | set
284 | {
285 | if (i < 0 || i > 18) Throw();
286 | Unsafe.Add(ref array00, i) = value;
287 | }
288 | }
289 | void Throw() { throw new ArgumentOutOfRangeException(); }
290 | }
291 | ```
292 |
293 | With these optimizations for both reading and writing, we achieved several times the speedup compared to the .NET standard library.
294 |
295 | ## Binary Read
296 |
297 | `TryPeek`, `PeekAsync`, `TryRead`, `ReadAsync`, `TryReadBlock`, and `ReadBlockAsync` enable reading as binary, irrespective of newline codes. For example, [Redis's protocol, RESP](https://redis.io/docs/latest/develop/reference/protocol-spec/), is a text protocol and typically newline-delimited, but after `$N`, it requires reading N bytes (BulkString). For instance, `$5\r\nhello\r\n` means reading 5 bytes.
298 |
299 | Here's an example of how it can be parsed:
300 |
301 | ```csharp
302 | // $5\r\nhello\r\n
303 | var line = await reader.ReadLineAsync(); // $5(+ consumed \r\n)
304 | if (line.Value.Span[0] == (byte)'$')
305 | {
306 | Utf8Parser.TryParse(line.Value.Span.Slice(1), out int size, out _); // 5
307 | var block = await reader.ReadBlockAsync(size); // hello
308 | await reader.ReadLineAsync(); // consume \r\n
309 | Console.WriteLine(Encoding.UTF8.GetString(block.Span));
310 | }
311 | ```
312 |
313 | A sample that parses all RESP code is available in [RespReader.cs](https://github.com/Cysharp/Utf8StreamReader/blob/e400444/sandbox/ConsoleApp1/RespReader.cs).
314 |
315 | Additionally, when using `LoadIntoBufferAsync` and `LoadIntoBufferAtLeastAsync` to include data in the buffer, using `Try***` allows for more efficient execution.
316 |
317 | ```csharp
318 | while (await reader.LoadIntoBufferAsync())
319 | {
320 | while (reader.TryReadLine(out var line))
321 | {
322 | switch (line.Span[0])
323 | {
324 | case (byte)'$':
325 | Utf8Parser.TryParse(line.Span.Slice(1), out int size, out _);
326 | if (!reader.TryReadBlock(size + 2, out var block)) // +2 is \r\n
327 | {
328 | // ReadBlockAsync is TryReadBlock + LoadIntoBufferAtLeastAsync
329 | block = await reader.ReadBlockAsync(size + 2);
330 | }
331 | yield return block.Slice(0, size);
332 | break;
333 | // and others('+', '-', ':', '*')
334 | default:
335 | break;
336 | }
337 | }
338 | }
339 | ```
340 |
341 | When using `ReadToEndAsync`, you can obtain a `byte[]` using Utf8StreamReader's efficient binary reading/concatenation (`SegmentedArrayBufferWriter, InlineArray19`).
342 |
343 | ```csharp
344 | using var reader = new Utf8StreamReader(stream);
345 | byte[] bytes = await reader.ReadToEndAsync();
346 | ```
347 |
348 | `ReadToEndAsync()` has two optional overloads, `(bool disableBomCheck)` and `(long resultSizeHint)`.
349 |
350 | If `disableBomCheck` is true, it disables the BOM check/trim and always performs a complete binary-matching read. The default for `ReadToEndAsync` is true, which always expects a binary-matching read. If false, it follows Utf8StreamReader.SkipBom.
351 |
352 | `resultSizeHint` allows for reducing the copy cost by directly generating `new byte[resultSizeHint]` when the final binary size is known and reading directly into that buffer. When reading a file, i.e., when the `Stream` is a `FileStream` and seekable, `FileStream.Length` is used as the resultSizeHint as an optimization.
353 |
354 | Here is the peformance comparison between copying a normal `Stream` to a `MemoryStream` by `CopyToAsync` and using `ToArray`, and using `ReadToEndAsync` of `Utf8StreamReader` when converting to `byte[]`. The options are adjusted so that optimization does not occur when directly passing FileStream to Utf8StreamReader, in order to intentionally avoid optimization.
355 |
356 | 
357 |
358 | ```csharp
359 | [Benchmark]
360 | public async Task MemoryStreamCopyToToArray()
361 | {
362 | using var fs = new FileStream(filePath, FileMode.Open);
363 | var ms = new MemoryStream();
364 | await fs.CopyToAsync(ms);
365 |
366 | return ms.ToArray();
367 | }
368 |
369 | [Benchmark]
370 | public async Task Utf8StreamReaderReadToEndAsync()
371 | {
372 | using var fs = new FileStream(filePath, FileMode.Open);
373 | using var sr = new Cysharp.IO.Utf8StreamReader(fs);
374 | return await sr.ReadToEndAsync(disableBomCheck: false); // hack for disable optimize(for benchmark fairness)
375 | }
376 | ```
377 |
378 | ## Reset
379 |
380 | `Utf8StreamReader` is a class that supports reuse. By calling `Reset()`, the Stream and internal state are released. Using `Reset(Stream)`, it can be reused with a new `Stream`.
381 |
382 | ## Options
383 |
384 | The constructor accepts `int bufferSize` and `bool leaveOpen` as parameters.
385 |
386 | `int bufferSize` defaults to 65536 and the buffer is rented from `ArrayPool`. If the data per line is large, changing the buffer size may improve performance. When the buffer size and the size per line are close, frequent buffer copy operations occur, leading to performance degradation.
387 |
388 | `bool leaveOpen` determines whether the internal Stream is also disposed when the object is disposed. The default is `false`, which means the Stream is disposed.
389 |
390 | Additionally, there are init properties that allow changing the option values for `ConfigureAwait`, `SyncRead` and `SkipBom`.
391 |
392 | `bool ConfigureAwait { init; }` allows you to specify the value for `ConfigureAwait(bool continueOnCapturedContext)` when awaiting asynchronous methods internally. The default is `false`.
393 |
394 | `bool SyncRead { init; }` configures the Stream to use synchronous reading, meaning it will use Read instead. This causes all Async operations to complete synchronously. There is potential for slight performance improvements when a `FileStream` is opened with `useAsync:false`. Normally, leaving it as false is fine. The default is `false`.
395 |
396 | `bool SkipBom { init; }` determines whether to identify and skip the BOM (Byte Order Mark) included at the beginning of the data during the first read. The default is `true`, which means the BOM is skipped.
397 |
398 | Currently, this is not an option, but `Utf8StreamReader` only determines `CRLF(\r\n)` or `LF(\n)` as newline characters. Since environments that use `CR(\r)` are now extremely rare, the CR check is omitted for performance reasons. If you need this functionality, please let us know by creating an Issue. We will consider adding it as an option
399 |
400 | Unity
401 | ---
402 | Unity, which supports .NET Standard 2.1, can run this library. Since the library is only provided through NuGet, it is recommended to use [NuGetForUnity](https://github.com/GlitchEnzo/NuGetForUnity) for installation.
403 |
404 | For detailed instructions on using NuGet libraries in Unity, please refer to the documentation of [Cysharp/R3](https://github.com/Cysharp/R3/) and other similar resources.
405 |
406 | License
407 | ---
408 | This library is under the MIT License.
409 |
--------------------------------------------------------------------------------
/Utf8StreamReader.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.8.34330.188
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{BD07BD08-1CB4-41AE-B2BD-3975BE13B8EC}"
7 | EndProject
8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Utf8StreamReader", "src\Utf8StreamReader\Utf8StreamReader.csproj", "{983561F1-F180-4188-AE80-BFA95FD69656}"
9 | EndProject
10 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{5A8808D6-63E0-48EE-A115-0380E0E57156}"
11 | EndProject
12 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Utf8StreamReader.Tests", "tests\Utf8StreamReader.Tests\Utf8StreamReader.Tests.csproj", "{6C953584-A04B-42C7-9CF3-267AFB010C2B}"
13 | EndProject
14 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "sandbox", "sandbox", "{6BA94544-B2DF-4DD2-9390-DAA8AF5CA90A}"
15 | EndProject
16 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ConsoleApp1", "sandbox\ConsoleApp1\ConsoleApp1.csproj", "{27B89B32-EC1A-48B0-BFC9-6172FCCE2961}"
17 | EndProject
18 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Benchmark", "sandbox\Benchmark\Benchmark.csproj", "{48293CC8-A87C-4F59-A398-51CD37E6B62B}"
19 | EndProject
20 | Global
21 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
22 | Debug|Any CPU = Debug|Any CPU
23 | Release|Any CPU = Release|Any CPU
24 | EndGlobalSection
25 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
26 | {983561F1-F180-4188-AE80-BFA95FD69656}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
27 | {983561F1-F180-4188-AE80-BFA95FD69656}.Debug|Any CPU.Build.0 = Debug|Any CPU
28 | {983561F1-F180-4188-AE80-BFA95FD69656}.Release|Any CPU.ActiveCfg = Release|Any CPU
29 | {983561F1-F180-4188-AE80-BFA95FD69656}.Release|Any CPU.Build.0 = Release|Any CPU
30 | {6C953584-A04B-42C7-9CF3-267AFB010C2B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
31 | {6C953584-A04B-42C7-9CF3-267AFB010C2B}.Debug|Any CPU.Build.0 = Debug|Any CPU
32 | {6C953584-A04B-42C7-9CF3-267AFB010C2B}.Release|Any CPU.ActiveCfg = Release|Any CPU
33 | {6C953584-A04B-42C7-9CF3-267AFB010C2B}.Release|Any CPU.Build.0 = Release|Any CPU
34 | {27B89B32-EC1A-48B0-BFC9-6172FCCE2961}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
35 | {27B89B32-EC1A-48B0-BFC9-6172FCCE2961}.Debug|Any CPU.Build.0 = Debug|Any CPU
36 | {27B89B32-EC1A-48B0-BFC9-6172FCCE2961}.Release|Any CPU.ActiveCfg = Release|Any CPU
37 | {27B89B32-EC1A-48B0-BFC9-6172FCCE2961}.Release|Any CPU.Build.0 = Release|Any CPU
38 | {48293CC8-A87C-4F59-A398-51CD37E6B62B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
39 | {48293CC8-A87C-4F59-A398-51CD37E6B62B}.Debug|Any CPU.Build.0 = Debug|Any CPU
40 | {48293CC8-A87C-4F59-A398-51CD37E6B62B}.Release|Any CPU.ActiveCfg = Release|Any CPU
41 | {48293CC8-A87C-4F59-A398-51CD37E6B62B}.Release|Any CPU.Build.0 = Release|Any CPU
42 | EndGlobalSection
43 | GlobalSection(SolutionProperties) = preSolution
44 | HideSolutionNode = FALSE
45 | EndGlobalSection
46 | GlobalSection(NestedProjects) = preSolution
47 | {983561F1-F180-4188-AE80-BFA95FD69656} = {BD07BD08-1CB4-41AE-B2BD-3975BE13B8EC}
48 | {6C953584-A04B-42C7-9CF3-267AFB010C2B} = {5A8808D6-63E0-48EE-A115-0380E0E57156}
49 | {27B89B32-EC1A-48B0-BFC9-6172FCCE2961} = {6BA94544-B2DF-4DD2-9390-DAA8AF5CA90A}
50 | {48293CC8-A87C-4F59-A398-51CD37E6B62B} = {6BA94544-B2DF-4DD2-9390-DAA8AF5CA90A}
51 | EndGlobalSection
52 | GlobalSection(ExtensibilityGlobals) = postSolution
53 | SolutionGuid = {38C0CA37-B15E-4200-9F2C-AD08076E4013}
54 | EndGlobalSection
55 | EndGlobal
56 |
--------------------------------------------------------------------------------
/opensource.snk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Cysharp/Utf8StreamReader/a92ba5ef05e22234eb9ec7d02ac5b5f885b492bd/opensource.snk
--------------------------------------------------------------------------------
/sandbox/Benchmark/Benchmark.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net8.0
6 | enable
7 | enable
8 | false
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/sandbox/Benchmark/BytesReadToEnd.cs:
--------------------------------------------------------------------------------
1 | using BenchmarkDotNet.Attributes;
2 | using Cysharp.IO;
3 | using System.Text.Encodings.Web;
4 | using System.Text.Json;
5 | using System.Text.Unicode;
6 |
7 | namespace Benchmark;
8 |
9 | [SimpleJob, MemoryDiagnoser]
10 | public class BytesReadToEnd
11 | {
12 | const int C = 1000000;
13 |
14 | string filePath = default!;
15 |
16 | [GlobalSetup]
17 | public void GlobalSetup()
18 | {
19 | var options = new JsonSerializerOptions
20 | {
21 | Encoder = JavaScriptEncoder.Create(UnicodeRanges.All)
22 | };
23 |
24 | var path = Path.GetTempFileName();
25 | var newline = OperatingSystem.IsWindows() ? "\r\n"u8 : "\n"u8;
26 | using var file = File.OpenWrite(path);
27 | for (var i = 0; i < C; i++)
28 | {
29 | var json = JsonSerializer.SerializeToUtf8Bytes(
30 | new MyClass { MyProperty = i, MyProperty2 = "あいうえおかきくけこ" }, options);
31 | file.Write(json);
32 | file.Write(newline);
33 | }
34 |
35 | filePath = path;
36 | }
37 |
38 | [GlobalCleanup]
39 | public void GlobalCleanup()
40 | {
41 | File.Delete(filePath);
42 | }
43 |
44 | [Benchmark]
45 | public async Task FileReadAllBytesAsync()
46 | {
47 | // ReadAllBytes knows file-length so fastest.
48 | return await File.ReadAllBytesAsync(filePath);
49 | }
50 |
51 | [Benchmark]
52 | public async Task Utf8StreamReaderReadToEndAsync()
53 | {
54 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath);
55 | return await sr.ReadToEndAsync();
56 | }
57 | }
58 |
59 | [SimpleJob, MemoryDiagnoser]
60 | public class BytesReadToEnd2
61 | {
62 | const int C = 1000000;
63 |
64 | string filePath = default!;
65 |
66 | [GlobalSetup]
67 | public void GlobalSetup()
68 | {
69 | var options = new JsonSerializerOptions
70 | {
71 | Encoder = JavaScriptEncoder.Create(UnicodeRanges.All)
72 | };
73 |
74 | var path = Path.GetTempFileName();
75 | var newline = OperatingSystem.IsWindows() ? "\r\n"u8 : "\n"u8;
76 | using var file = File.OpenWrite(path);
77 | for (var i = 0; i < C; i++)
78 | {
79 | var json = JsonSerializer.SerializeToUtf8Bytes(
80 | new MyClass { MyProperty = i, MyProperty2 = "あいうえおかきくけこ" }, options);
81 | file.Write(json);
82 | file.Write(newline);
83 | }
84 |
85 | filePath = path;
86 | }
87 |
88 | [GlobalCleanup]
89 | public void GlobalCleanup()
90 | {
91 | File.Delete(filePath);
92 | }
93 |
94 | [Benchmark]
95 | public async Task MemoryStreamCopyToToArray()
96 | {
97 | using var fs = new FileStream(filePath, FileMode.Open);
98 | var ms = new MemoryStream();
99 | await fs.CopyToAsync(ms);
100 |
101 | return ms.ToArray();
102 | }
103 |
104 | [Benchmark]
105 | public async Task Utf8StreamReaderReadToEndAsync()
106 | {
107 | using var fs = new FileStream(filePath, FileMode.Open);
108 | using var sr = new Cysharp.IO.Utf8StreamReader(fs);
109 | return await sr.ReadToEndAsync(disableBomCheck: false); // hack for ignore optimize(for benchmark fairness)
110 | }
111 | }
112 |
--------------------------------------------------------------------------------
/sandbox/Benchmark/FromFile.cs:
--------------------------------------------------------------------------------
1 | using BenchmarkDotNet.Attributes;
2 | using Cysharp.IO;
3 | using System.Text;
4 | using System.Text.Encodings.Web;
5 | using System.Text.Json;
6 | using System.Text.Unicode;
7 |
8 | namespace Benchmark;
9 |
10 | [SimpleJob, MemoryDiagnoser]
11 | public class FromFile
12 | {
13 | const int C = 1000000;
14 |
15 | string filePath = default!;
16 |
17 | [GlobalSetup]
18 | public void GlobalSetup()
19 | {
20 | var options = new JsonSerializerOptions
21 | {
22 | Encoder = JavaScriptEncoder.Create(UnicodeRanges.All)
23 | };
24 |
25 | var path = Path.GetTempFileName();
26 | var newline = OperatingSystem.IsWindows() ? "\r\n"u8 : "\n"u8;
27 | using var file = File.OpenWrite(path);
28 | for (var i = 0; i < C; i++)
29 | {
30 | var json = JsonSerializer.SerializeToUtf8Bytes(
31 | new MyClass { MyProperty = i, MyProperty2 = "あいうえおかきくけこ" }, options);
32 | file.Write(json);
33 | file.Write(newline);
34 | }
35 |
36 | filePath = path;
37 | }
38 |
39 | [GlobalCleanup]
40 | public void GlobalCleanup()
41 | {
42 | File.Delete(filePath);
43 | }
44 |
45 | [Benchmark]
46 | public async Task StreamReaderFileStream()
47 | {
48 | using var sr = new System.IO.StreamReader(filePath);
49 | string? line;
50 | while ((line = await sr.ReadLineAsync()) != null)
51 | {
52 | // ...
53 | }
54 | }
55 |
56 | [Benchmark]
57 | public async Task FileReadLinesAsync()
58 | {
59 | await foreach (var line in File.ReadLinesAsync(filePath, Encoding.UTF8))
60 | {
61 | }
62 | }
63 |
64 | [Benchmark]
65 | public async Task Utf8StreamReaderFileStreamScalability()
66 | {
67 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Scalability);
68 | while (await sr.LoadIntoBufferAsync())
69 | {
70 | while (sr.TryReadLine(out var line))
71 | {
72 | // ...
73 | }
74 | }
75 | }
76 |
77 | [Benchmark]
78 | public async Task Utf8StreamReaderFileStreamThroughput()
79 | {
80 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput);
81 | while (await sr.LoadIntoBufferAsync())
82 | {
83 | while (sr.TryReadLine(out var line))
84 | {
85 | // ...
86 | }
87 | }
88 | }
89 |
90 | [Benchmark]
91 | public async ValueTask Utf8StreamReaderFileStreamThroughputSyncRead()
92 | {
93 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput) { SyncRead = true };
94 | while (await sr.LoadIntoBufferAsync())
95 | {
96 | while (sr.TryReadLine(out var line))
97 | {
98 | }
99 | }
100 | }
101 |
102 | [Benchmark]
103 | public async Task Utf8TextReaderFileStreamScalability()
104 | {
105 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Scalability).AsTextReader();
106 | while (await sr.LoadIntoBufferAsync())
107 | {
108 | while (sr.TryReadLine(out var line))
109 | {
110 | // ...
111 | }
112 | }
113 | }
114 |
115 | [Benchmark]
116 | public async Task Utf8TextReaderFileStreamThroughput()
117 | {
118 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput).AsTextReader();
119 | while (await sr.LoadIntoBufferAsync())
120 | {
121 | while (sr.TryReadLine(out var line))
122 | {
123 | // ...
124 | }
125 | }
126 | }
127 |
128 | [Benchmark]
129 | public async ValueTask Utf8TextReaderFileStreamThroughputSyncRead()
130 | {
131 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput) { SyncRead = true }.AsTextReader();
132 | while (await sr.LoadIntoBufferAsync())
133 | {
134 | while (sr.TryReadLine(out var line))
135 | {
136 | // ...
137 | }
138 | }
139 | }
140 |
141 | [Benchmark]
142 | public async Task Utf8TextReaderToStringFileStreamScalability()
143 | {
144 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Scalability).AsTextReader();
145 | while (await sr.LoadIntoBufferAsync())
146 | {
147 | while (sr.TryReadLine(out var line))
148 | {
149 | _ = line.ToString();
150 | }
151 | }
152 | }
153 |
154 | [Benchmark]
155 | public async Task Utf8TextReaderToStringFileStreamThroughput()
156 | {
157 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath, fileOpenMode: FileOpenMode.Throughput).AsTextReader();
158 | while (await sr.LoadIntoBufferAsync())
159 | {
160 | while (sr.TryReadLine(out var line))
161 | {
162 | _ = line.ToString();
163 | }
164 | }
165 | }
166 | }
167 |
--------------------------------------------------------------------------------
/sandbox/Benchmark/FromMemory.cs:
--------------------------------------------------------------------------------
1 | using System.Buffers;
2 | using System.IO.Pipelines;
3 | using System.Text;
4 | using System.Text.Encodings.Web;
5 | using System.Text.Json;
6 | using System.Text.Unicode;
7 | using BenchmarkDotNet.Attributes;
8 | using Cysharp.IO;
9 |
10 | namespace Benchmark;
11 |
12 | [SimpleJob, MemoryDiagnoser]
13 | public class FromMemory
14 | {
15 | const int C = 1000000;
16 | // const int C = 100;
17 |
18 | byte[] utf8Data = default!;
19 | MemoryStream ms = default!;
20 |
21 | [GlobalSetup]
22 | public void GlobalSetup()
23 | {
24 | var options = new JsonSerializerOptions
25 | {
26 | Encoder = JavaScriptEncoder.Create(UnicodeRanges.All)
27 | };
28 |
29 | var jsonLines = Enumerable.Range(0, C)
30 | .Select(x => new MyClass { MyProperty = x, MyProperty2 = "あいうえおかきくけこ" })
31 | .Select(x => JsonSerializer.Serialize(x, options))
32 | .ToArray();
33 |
34 | utf8Data = Encoding.UTF8.GetBytes(string.Join(Environment.NewLine, jsonLines));
35 | }
36 |
37 | [IterationSetup]
38 | public void Setup()
39 | {
40 | ms = new MemoryStream(utf8Data);
41 | }
42 |
43 | [Benchmark]
44 | public async Task StreamReader()
45 | {
46 | using var sr = new System.IO.StreamReader(ms);
47 | string? line;
48 | while ((line = await sr.ReadLineAsync()) != null)
49 | {
50 | // Console.WriteLine(line);
51 | }
52 | }
53 |
54 | [Benchmark]
55 | public async Task Utf8StreamReader()
56 | {
57 | using var sr = new Cysharp.IO.Utf8StreamReader(ms);
58 | while (await sr.LoadIntoBufferAsync())
59 | {
60 | while (sr.TryReadLine(out var line))
61 | {
62 | // Console.WriteLine(Encoding.UTF8.GetString( line.Span));
63 | }
64 | }
65 | }
66 |
67 | [Benchmark]
68 | public async Task Utf8TextReader()
69 | {
70 | using var sr = new Cysharp.IO.Utf8StreamReader(ms).AsTextReader();
71 | while (await sr.LoadIntoBufferAsync())
72 | {
73 | while (sr.TryReadLine(out var line))
74 | {
75 | // Console.WriteLine(Encoding.UTF8.GetString( line.Span));
76 | }
77 | }
78 | }
79 |
80 | [Benchmark]
81 | public async Task Utf8TextReaderToString()
82 | {
83 | using var sr = new Cysharp.IO.Utf8StreamReader(ms).AsTextReader();
84 | while (await sr.LoadIntoBufferAsync())
85 | {
86 | while (sr.TryReadLine(out var line))
87 | {
88 | _ = line.ToString();
89 | // Console.WriteLine(Encoding.UTF8.GetString( line.Span));
90 | }
91 | }
92 | }
93 |
94 | //[Benchmark]
95 | //public async Task Utf8StreamReaderReadLine()
96 | //{
97 | // using var sr = new Cysharp.IO.Utf8StreamReader(ms);
98 | // ReadOnlyMemory? line;
99 | // while ((line = await sr.ReadLineAsync()) != null)
100 | // {
101 | // // Console.WriteLine(Encoding.UTF8.GetString(line.Value.Span));
102 | // }
103 | //}
104 |
105 | //[Benchmark]
106 | //public async Task Utf8StreamReaderReadAllLines()
107 | //{
108 | // using var sr = new Cysharp.IO.Utf8StreamReader(ms);
109 | // await foreach (var line in sr.ReadAllLinesAsync())
110 | // {
111 | // //Console.WriteLine(Encoding.UTF8.GetString(line.Span));
112 | // }
113 | //}
114 |
115 | [Benchmark]
116 | public async Task PipeReaderSequenceReader()
117 | {
118 | using (ms)
119 | {
120 | var reader = PipeReader.Create(ms);
121 |
122 | READ_AGAIN:
123 | var readResult = await reader.ReadAsync();
124 |
125 | if (!(readResult.IsCompleted | readResult.IsCanceled))
126 | {
127 | var buffer = readResult.Buffer;
128 |
129 | while (TryReadData(ref buffer, out var line))
130 | {
131 | //Console.WriteLine(Encoding.UTF8.GetString(line));
132 | }
133 |
134 | reader.AdvanceTo(buffer.Start, buffer.End);
135 | goto READ_AGAIN;
136 | }
137 |
138 | }
139 |
140 | static bool TryReadData(ref ReadOnlySequence buffer, out ReadOnlySequence line)
141 | {
142 | var reader = new SequenceReader(buffer);
143 | if (reader.TryReadTo(out line, (byte)'\n', advancePastDelimiter: true))
144 | {
145 | buffer = buffer.Slice(reader.Consumed);
146 | return true;
147 | }
148 | return false;
149 | }
150 | }
151 |
152 | //[Benchmark]
153 | //public async Task PipelineStreamReader2()
154 | //{
155 | // using (ms)
156 | // {
157 | // var reader = PipeReader.Create(ms);
158 |
159 | // READ_AGAIN:
160 | // var readResult = await reader.ReadAsync();
161 |
162 | // if (!(readResult.IsCompleted | readResult.IsCanceled))
163 | // {
164 | // var buffer = readResult.Buffer;
165 | // ConsumeAllData(ref buffer);
166 | // reader.AdvanceTo(buffer.Start, buffer.End);
167 | // goto READ_AGAIN;
168 | // }
169 | // }
170 |
171 | // static void ConsumeAllData(ref ReadOnlySequence buffer)
172 | // {
173 | // var reader = new SequenceReader(buffer);
174 | // while (reader.TryReadTo(out ReadOnlySequence line, (byte)'\n', advancePastDelimiter: true))
175 | // {
176 | // //Console.WriteLine(Encoding.UTF8.GetString(line));
177 | // }
178 | // buffer = buffer.Slice(reader.Consumed);
179 | // }
180 | //}
181 | }
182 |
183 |
184 | public class MyClass
185 | {
186 | public int MyProperty { get; set; }
187 | public string? MyProperty2 { get; set; }
188 | }
189 |
--------------------------------------------------------------------------------
/sandbox/Benchmark/Program.cs:
--------------------------------------------------------------------------------
1 | #if DEBUG
2 |
3 | using Benchmark;
4 | using System.Runtime.CompilerServices;
5 |
6 | global::System.Console.WriteLine("DEBUG");
7 |
8 | //var benchmark = new BytesReadToEnd();
9 | var benchmark = new ReadToEndString();
10 | benchmark.GlobalSetup();
11 |
12 | //var s1 = await benchmark.FileReadAllBytesAsync();
13 | var s2 = await benchmark.Utf8TextReaderReadToEndAsync();
14 |
15 | //Console.WriteLine(s1.SequenceEqual(s2));
16 |
17 | benchmark.GlobalCleanup();
18 |
19 | #else
20 | using BenchmarkDotNet.Running;
21 |
22 | BenchmarkSwitcher
23 | .FromAssembly(typeof(Program).Assembly)
24 | .Run(args);
25 |
26 | #endif
27 |
--------------------------------------------------------------------------------
/sandbox/Benchmark/ReadToEndString.cs:
--------------------------------------------------------------------------------
1 | using BenchmarkDotNet.Attributes;
2 | using Cysharp.IO;
3 | using System.Text.Encodings.Web;
4 | using System.Text.Json;
5 | using System.Text.Unicode;
6 |
7 | namespace Benchmark;
8 |
9 | [SimpleJob, MemoryDiagnoser]
10 | public class ReadToEndString
11 | {
12 | const int C = 1000000;
13 |
14 | string filePath = default!;
15 |
16 | [GlobalSetup]
17 | public void GlobalSetup()
18 | {
19 | var options = new JsonSerializerOptions
20 | {
21 | Encoder = JavaScriptEncoder.Create(UnicodeRanges.All)
22 | };
23 |
24 | var path = Path.GetTempFileName();
25 | var newline = OperatingSystem.IsWindows() ? "\r\n"u8 : "\n"u8;
26 | using var file = File.OpenWrite(path);
27 | for (var i = 0; i < C; i++)
28 | {
29 | var json = JsonSerializer.SerializeToUtf8Bytes(
30 | new MyClass { MyProperty = i, MyProperty2 = "あいうえおかきくけこ" }, options);
31 | file.Write(json);
32 | file.Write(newline);
33 | }
34 |
35 | filePath = path;
36 | }
37 |
38 | [GlobalCleanup]
39 | public void GlobalCleanup()
40 | {
41 | File.Delete(filePath);
42 | }
43 |
44 | [Benchmark]
45 | public async Task StreamReaderReadToEndAsync()
46 | {
47 | using var sr = new System.IO.StreamReader(filePath);
48 | return await sr.ReadToEndAsync();
49 | }
50 |
51 | [Benchmark]
52 | public async Task Utf8TextReaderReadToEndAsync()
53 | {
54 | using var sr = new Cysharp.IO.Utf8StreamReader(filePath).AsTextReader();
55 | return await sr.ReadToEndAsync();
56 | }
57 |
58 | [Benchmark]
59 | public async Task FileReadAllTextAsync()
60 | {
61 | return await File.ReadAllTextAsync(filePath);
62 | }
63 |
64 | }
65 |
--------------------------------------------------------------------------------
/sandbox/ConsoleApp1/ConsoleApp1.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net8.0
6 | enable
7 | enable
8 | false
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | Always
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/sandbox/ConsoleApp1/Program.cs:
--------------------------------------------------------------------------------
1 | using Cysharp.IO;
2 | using Microsoft.Win32.SafeHandles;
3 | using System.Buffers;
4 | using System.Buffers.Text;
5 | using System.IO;
6 | using System.IO.Pipelines;
7 | using System.Runtime.InteropServices;
8 | using System.Runtime.InteropServices.Marshalling;
9 | using System.Text;
10 | using System.Text.Encodings.Web;
11 | using System.Text.Json;
12 | using System.Text.Unicode;
13 |
14 |
15 |
16 |
17 | var aa = Encoding.UTF8.GetBytes("$5\r\nhello\r\n");
18 | var stream = new MemoryStream(aa);
19 |
20 | using var reader = new Utf8StreamReader(stream) { SkipBom = false };
21 | byte[] bytes = await reader.ReadToEndAsync();
22 |
23 |
24 | //while (await reader.LoadIntoBufferAsync())
25 | //{
26 | // while (reader.TryReadLine(out var line))
27 | // {
28 | // switch (line.Span[0])
29 | // {
30 | // case (byte)'$':
31 | // Utf8Parser.TryParse(line.Span.Slice(1), out int size, out _);
32 | // if (!reader.TryReadBlock(size + 2, out var block)) // +2 is \r\n
33 | // {
34 | // // ReadBlockAsync is TryReadBlock + LoadIntoBufferAtLeastAsync
35 | // block = await reader.ReadBlockAsync(size + 2);
36 | // }
37 | // yield return block.Slice(0, size);
38 | // break;
39 | // // and others('+', '-', ':', '*')
40 | // default:
41 | // break;
42 | // }
43 | // }
44 | //}
45 |
46 |
47 | //var path = "file1.txt";
48 |
49 |
50 | //var fs = new FileStream(path, FileMode.Open,FileAccess.Read, FileShare.Read, 0, false);
51 | //var buf = new byte[1024];
52 | //await fs.ReadAsync(buf);
53 |
54 | //using var reader = new Utf8StreamReader(path).AsTextReader();
55 |
56 |
57 |
58 | //var str = await reader.ReadToEndAsync();
59 | //Console.WriteLine(str.ToString());
60 |
61 | // new StreamReader().ReadBlock(
62 |
63 |
64 | //var options = new JsonSerializerOptions();
65 | //options.Encoder = JavaScriptEncoder.Create(UnicodeRanges.All);
66 |
67 | //var jsonLines = Enumerable.Range(0, 100000)
68 | // .Select(x => new MyClass { MyProperty = x, MyProperty2 = "あいうえおかきくけこ" })
69 | // .Select(x => JsonSerializer.Serialize(x, options))
70 | // .ToArray();
71 |
72 | //var utf8Data = Encoding.UTF8.GetBytes(string.Join(Environment.NewLine, jsonLines));
73 |
74 | //var ms = new MemoryStream(utf8Data);
75 |
76 |
77 | ////using var sr = new System.IO.StreamReader(ms);
78 | ////string? line;
79 | ////while ((line = await sr.ReadLineAsync()) != null)
80 | ////{
81 | //// // JsonSerializer.Deserialize(line);
82 | ////}
83 |
84 | //using var sr = new Cysharp.IO.Utf8StreamReader(ms);
85 | //ReadOnlyMemory? line;
86 | //while ((line = await sr.ReadLineAsync()) != null)
87 | //{
88 | //}
89 |
90 |
91 |
92 | //public class MyClass
93 | //{
94 | // public int MyProperty { get; set; }
95 | // public string? MyProperty2 { get; set; }
96 | //}
97 |
98 |
--------------------------------------------------------------------------------
/sandbox/ConsoleApp1/ReadMeSample.cs:
--------------------------------------------------------------------------------
1 | using Cysharp.IO;
2 | using System;
3 | using System.Collections.Generic;
4 | using System.Linq;
5 | using System.Text;
6 | using System.Text.Json;
7 | using System.Threading.Tasks;
8 |
9 | namespace ConsoleApp1;
10 |
11 | internal class ReadMeSample
12 | {
13 |
14 |
15 | public async void Sample1(Stream stream)
16 | {
17 | using var reader = new Utf8StreamReader(stream);
18 |
19 | // Most performant style, similar as System.Threading.Channels
20 | while (await reader.LoadIntoBufferAsync())
21 | {
22 | while (reader.TryReadLine(out var line))
23 | {
24 | // line is ReadOnlyMemory, deserialize UTF8 directly.
25 | _ = JsonSerializer.Deserialize(line.Span);
26 | }
27 | }
28 | }
29 |
30 | public async void Sample2(Stream stream)
31 | {
32 | using var reader = new Utf8StreamReader(stream);
33 |
34 | // Classical style, same as StreamReader
35 | ReadOnlyMemory? line = null;
36 | while ((line = await reader.ReadLineAsync()) != null)
37 | {
38 | _ = JsonSerializer.Deserialize(line.Value.Span);
39 | }
40 | }
41 |
42 | public async void Sample3(Stream stream)
43 | {
44 | using var reader = new Utf8StreamReader(stream);
45 |
46 | // Most easiest style, use async streams
47 | await foreach (var line in reader.ReadAllLinesAsync())
48 | {
49 | _ = JsonSerializer.Deserialize(line.Span);
50 | }
51 | }
52 | }
53 |
54 |
55 | public class Foo
56 | {
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/sandbox/ConsoleApp1/RespReader.cs:
--------------------------------------------------------------------------------
1 | using Cysharp.IO;
2 | using System.Buffers.Text;
3 | using System.Text;
4 |
5 | namespace ConsoleApp1;
6 |
7 | public enum RespType : byte
8 | {
9 | SimpleStrings = (byte)'+',
10 | Errors = (byte)'-',
11 | Integers = (byte)':',
12 | BulkStrings = (byte)'$',
13 | Arrays = (byte)'*'
14 | }
15 |
16 | public class RespReader : IDisposable
17 | {
18 | Utf8StreamReader reader;
19 |
20 | public RespReader(Stream stream)
21 | {
22 | this.reader = new Utf8StreamReader(stream);
23 | }
24 |
25 | // NOTE: for more fast processing, you need to use TryRead method.
26 |
27 | public async ValueTask ReadRespTypeAsync(CancellationToken cancellationToken = default)
28 | {
29 | return (RespType)await reader.ReadAsync(cancellationToken);
30 | }
31 |
32 | // all read message api expect befor call ReadRespTypeAsync(already trimed type prefix)
33 |
34 | public async ValueTask ReadSimpleStringAsync(CancellationToken cancellationToken = default) // +OK\r\n
35 | {
36 | return Encoding.UTF8.GetString((await reader.ReadLineAsync(cancellationToken)).Value.Span);
37 | }
38 |
39 | public async ValueTask ReadErrorMessageAsync(CancellationToken cancellationToken = default) // -Error message\r\n
40 | {
41 | return Encoding.UTF8.GetString((await reader.ReadLineAsync(cancellationToken)).Value.Span);
42 | }
43 |
44 | public async ValueTask ReadIntegerAsync(CancellationToken cancellationToken = default) // :1000\r\n
45 | {
46 | var line = await reader.ReadLineAsync(cancellationToken);
47 | Utf8Parser.TryParse(line.Value.Span, out long value, out _);
48 | return value;
49 | }
50 |
51 | public async ValueTask?> ReadBulkStringAsync(CancellationToken cancellationToken = default) // "$5\r\nhello\r\n"
52 | {
53 | var line = await reader.ReadLineAsync(cancellationToken);
54 | Utf8Parser.TryParse(line.Value.Span, out int count, out _);
55 | if (count == -1)
56 | {
57 | return null;
58 | }
59 | else
60 | {
61 | var dataWithNewLine = await reader.ReadBlockAsync(count + 2, cancellationToken);
62 | return dataWithNewLine[..^2]; // without newline
63 | }
64 | }
65 |
66 | // for perf improvement, ReadIntegerArray, ReadStringArray, ReadArray for bulkstrings is better approach
67 | public async ValueTask