├── LICENSE-Apache2 ├── LICENSE-Boost ├── README.md ├── RyuCsharp.sln ├── RyuCsharp ├── AssertException.cs ├── RyuCsharp.csproj ├── Status.cs ├── common.cs ├── d2fixed.cs ├── d2fixed_full_table.cs ├── d2s.cs ├── d2s_full_table.cs ├── d2s_intrinsics.cs ├── d2s_small_table.cs ├── digit_table.cs ├── f2s.cs ├── f2s_full_table.cs ├── floating_decimal_32.cs ├── floating_decimal_64.cs └── s2d.cs └── RyuCsharpTest ├── Program.cs └── RyuCsharpTest.csproj /LICENSE-Apache2: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-Boost: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RyuCsharp 2 | 3 | 使用 C# 实现的 [Ryu](https://github.com/ulfjack/ryu) ,一个高性能浮点数转换为字符串的算法。 4 | 5 | 6 | Ryu, implemented by C#, a high-performance algorithm for converting floats to strings. 7 | 8 | 9 | 10 | ### 2023-09-21 RyuCsharp update descriptions: 11 | 12 | - To avoid managed memory fragmentation, the implementation was changed from pointers to references. (The difference between pointer and reference is that the former is an unmanaged pointer and the latter is a managed pointer.) 13 | - Inherit the open source license of the parent project to make it easier for everyone to use. 14 | - fix bugs. 15 | - Publish [nuget package](https://www.nuget.org/packages/RyuCsharp/). 16 | 17 | ### Usage example: 18 | 19 | ```C# 20 | 21 | { 22 | // Array 23 | char[] charArray = new char[32]; // Need to ensure that the memory size is sufficient. 24 | var writtenLength = Ryu.d2s_buffered_n(3.1415926D, ref charArray[0]); 25 | Console.WriteLine(new string(charArray, 0, writtenLength)); // Output: 3.1415926E0 26 | } 27 | 28 | { 29 | // Span 30 | Span charSpan = (new char[32]).AsSpan(); 31 | var writtenLength = Ryu.d2s_buffered_n(3.1415926D, ref charSpan[0]); 32 | Console.WriteLine(charSpan.Slice(0, writtenLength).ToString()); // Output: 3.1415926E0 33 | } 34 | 35 | { 36 | // Pointer 37 | char* pChars = stackalloc char[32]; 38 | Ryu.d2s_buffered(3.1415926D, ref *pChars); 39 | Console.WriteLine(new string(pChars)); // Output: 3.1415926E0 40 | } 41 | 42 | ``` 43 | -------------------------------------------------------------------------------- /RyuCsharp.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.29418.71 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RyuCsharp", "RyuCsharp\RyuCsharp.csproj", "{BF9D8C01-1F34-49F8-B0BE-0A88B0193266}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "RyuCsharpTest", "RyuCsharpTest\RyuCsharpTest.csproj", "{56A4475F-7C25-441A-A18F-52B44D7F7201}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Release|Any CPU = Release|Any CPU 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {BF9D8C01-1F34-49F8-B0BE-0A88B0193266}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 17 | {BF9D8C01-1F34-49F8-B0BE-0A88B0193266}.Debug|Any CPU.Build.0 = Debug|Any CPU 18 | {BF9D8C01-1F34-49F8-B0BE-0A88B0193266}.Release|Any CPU.ActiveCfg = Release|Any CPU 19 | {BF9D8C01-1F34-49F8-B0BE-0A88B0193266}.Release|Any CPU.Build.0 = Release|Any CPU 20 | {56A4475F-7C25-441A-A18F-52B44D7F7201}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 | {56A4475F-7C25-441A-A18F-52B44D7F7201}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 | {56A4475F-7C25-441A-A18F-52B44D7F7201}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 | {56A4475F-7C25-441A-A18F-52B44D7F7201}.Release|Any CPU.Build.0 = Release|Any CPU 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {C07085A4-EB67-4FB6-9C87-93FBC7604744} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /RyuCsharp/AssertException.cs: -------------------------------------------------------------------------------- 1 | namespace RyuCsharp; 2 | 3 | sealed class AssertException : Exception 4 | { 5 | public AssertException() 6 | { 7 | } 8 | } -------------------------------------------------------------------------------- /RyuCsharp/RyuCsharp.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | netstandard2.0 5 | TRACE;NDEBUG; 6 | 11 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /RyuCsharp/Status.cs: -------------------------------------------------------------------------------- 1 | namespace RyuCsharp; 2 | 3 | public enum Status 4 | { 5 | SUCCESS, 6 | INPUT_TOO_SHORT, 7 | INPUT_TOO_LONG, 8 | MALFORMED_INPUT 9 | } -------------------------------------------------------------------------------- /RyuCsharp/common.cs: -------------------------------------------------------------------------------- 1 | global using System; 2 | global using System.Runtime.CompilerServices; 3 | global using int32_t = System.Int32; 4 | global using uint32_t = System.UInt32; 5 | global using uint64_t = System.UInt64; 6 | global using uint8_t = System.Byte; 7 | global using uint16_t = System.UInt16; 8 | global using int64_t = System.Int64; 9 | 10 | 11 | 12 | 13 | 14 | namespace RyuCsharp; 15 | 16 | public static partial class Ryu 17 | { 18 | const int DOUBLE_MANTISSA_BITS = 52; 19 | const int DOUBLE_EXPONENT_BITS = 11; 20 | const int DOUBLE_BIAS = 1023; 21 | 22 | // Returns the number of decimal digits in v, which must not contain more than 9 digits. 23 | static uint32_t decimalLength9(uint32_t v) 24 | { 25 | // Function precondition: v is not a 10-digit number. 26 | // (f2s: 9 digits are sufficient for round-tripping.) 27 | // (d2fixed: We print 9-digit blocks.) 28 | assert(v < 1000000000); 29 | if (v >= 100000000) { return 9; } 30 | if (v >= 10000000) { return 8; } 31 | if (v >= 1000000) { return 7; } 32 | if (v >= 100000) { return 6; } 33 | if (v >= 10000) { return 5; } 34 | if (v >= 1000) { return 4; } 35 | if (v >= 100) { return 3; } 36 | if (v >= 10) { return 2; } 37 | return 1; 38 | } 39 | 40 | // Returns e == 0 ? 1 : [log_2(5^e)]; requires 0 <= e <= 3528. 41 | static int32_t log2pow5(int32_t e) 42 | { 43 | // This approximation works up to the point that the multiplication overflows at e = 3529. 44 | // If the multiplication were done in 64 bits, it would fail at 5^4004 which is just greater 45 | // than 2^9297. 46 | assert(e >= 0); 47 | assert(e <= 3528); 48 | return (int32_t)((((uint32_t)e) * 1217359) >> 19); 49 | } 50 | 51 | // Returns e == 0 ? 1 : ceil(log_2(5^e)); requires 0 <= e <= 3528. 52 | static int32_t pow5bits(int32_t e) 53 | { 54 | // This approximation works up to the point that the multiplication overflows at e = 3529. 55 | // If the multiplication were done in 64 bits, it would fail at 5^4004 which is just greater 56 | // than 2^9297. 57 | assert(e >= 0); 58 | assert(e <= 3528); 59 | return (int32_t)(((((uint32_t)e) * 1217359) >> 19) + 1); 60 | } 61 | 62 | // Returns e == 0 ? 1 : ceil(log_2(5^e)); requires 0 <= e <= 3528. 63 | static int32_t ceil_log2pow5(int32_t e) 64 | { 65 | return log2pow5(e) + 1; 66 | } 67 | 68 | // Returns floor(log_10(2^e)); requires 0 <= e <= 1650. 69 | static uint32_t log10Pow2(int32_t e) 70 | { 71 | // The first value this approximation fails for is 2^1651 which is just greater than 10^297. 72 | assert(e >= 0); 73 | assert(e <= 1650); 74 | return (((uint32_t)e) * 78913) >> 18; 75 | } 76 | 77 | // Returns floor(log_10(5^e)); requires 0 <= e <= 2620. 78 | static uint32_t log10Pow5(int32_t e) 79 | { 80 | // The first value this approximation fails for is 5^2621 which is just greater than 10^1832. 81 | assert(e >= 0); 82 | assert(e <= 2620); 83 | return (((uint32_t)e) * 732923) >> 20; 84 | } 85 | 86 | static int copy_special_str(ref char result, bool sign, bool exponent, bool mantissa) 87 | { 88 | int offset = 0; 89 | 90 | if (mantissa) 91 | { 92 | Unsafe.Add(ref result, offset++) = 'N'; 93 | Unsafe.Add(ref result, offset++) = 'a'; 94 | Unsafe.Add(ref result, offset++) = 'N'; 95 | 96 | return offset; 97 | } 98 | 99 | if (sign) 100 | { 101 | Unsafe.Add(ref result, offset++) = '-'; 102 | } 103 | 104 | if (exponent) 105 | { 106 | Unsafe.Add(ref result, offset++) = 'I'; 107 | Unsafe.Add(ref result, offset++) = 'n'; 108 | Unsafe.Add(ref result, offset++) = 'f'; 109 | Unsafe.Add(ref result, offset++) = 'i'; 110 | Unsafe.Add(ref result, offset++) = 'n'; 111 | Unsafe.Add(ref result, offset++) = 'i'; 112 | Unsafe.Add(ref result, offset++) = 't'; 113 | Unsafe.Add(ref result, offset++) = 'y'; 114 | 115 | return offset; 116 | } 117 | 118 | Unsafe.Add(ref result, offset++) = '0'; 119 | Unsafe.Add(ref result, offset++) = 'E'; 120 | Unsafe.Add(ref result, offset++) = '0'; 121 | 122 | return offset; 123 | } 124 | 125 | static uint32_t float_to_bits(float f) 126 | { 127 | return Unsafe.As(ref f); 128 | } 129 | 130 | static uint64_t double_to_bits(double d) 131 | { 132 | return Unsafe.As(ref d); 133 | } 134 | 135 | static void memcpy(ref char _Dst, ref char _Src, uint32_t _Size) 136 | { 137 | Unsafe.CopyBlock( 138 | ref Unsafe.As(ref _Dst), 139 | ref Unsafe.As(ref _Src), 140 | checked(_Size * sizeof(char)) 141 | ); 142 | } 143 | 144 | static void memcpy(ref char _Dst, string _Src, uint32_t _Size) 145 | { 146 | memcpy( 147 | ref _Dst, 148 | ref Unsafe.AddByteOffset(ref Unsafe.As(ref _Src), (nint)RuntimeHelpers.OffsetToStringData), 149 | _Size); 150 | } 151 | 152 | static void memset(ref char _Dst, char _Val, uint32_t _Size) 153 | { 154 | for (int i = 0; i < _Size; i++) 155 | { 156 | Unsafe.Add(ref _Dst, i) = _Val; 157 | } 158 | } 159 | 160 | static int32_t strlen(ref char str) 161 | { 162 | int32_t len = 0; 163 | 164 | while (Unsafe.Add(ref str, len) != '\0') 165 | { 166 | ++len; 167 | } 168 | 169 | return len; 170 | } 171 | 172 | static uint32_t __builtin_clzll(uint64_t value) 173 | { 174 | uint32_t r = 0; 175 | 176 | if ((value & 0xffffffff00000000UL) == 0) 177 | { 178 | r += 32; 179 | value <<= 32; 180 | } 181 | 182 | if ((value & 0xffff000000000000UL) == 0) 183 | { 184 | r += 16; 185 | value <<= 16; 186 | } 187 | 188 | if ((value & 0xff00000000000000UL) == 0) 189 | { 190 | r += 8; 191 | value <<= 8; 192 | } 193 | 194 | if ((value & 0xf000000000000000UL) == 0) 195 | { 196 | r += 4; 197 | value <<= 4; 198 | } 199 | 200 | if ((value & 0xC000000000000000UL) == 0) 201 | { 202 | r += 2; 203 | value <<= 2; 204 | } 205 | 206 | if ((value & 0x8000000000000000UL) == 0) 207 | { 208 | r += 1; 209 | value <<= 1; 210 | } 211 | 212 | return r; 213 | } 214 | 215 | #if NDEBUG 216 | static void assert(bool expression) { } 217 | #else 218 | static void assert(bool expression) 219 | { 220 | if (!expression) 221 | { 222 | throw new AssertException(); 223 | } 224 | } 225 | #endif 226 | } -------------------------------------------------------------------------------- /RyuCsharp/d2fixed.cs: -------------------------------------------------------------------------------- 1 | namespace RyuCsharp; 2 | 3 | partial class Ryu 4 | { 5 | const int POW10_ADDITIONAL_BITS = 120; 6 | 7 | // Returns the low 64 bits of the high 128 bits of the 256-bit product of a and b. 8 | static uint64_t umul256_hi128_lo64(uint64_t aHi, uint64_t aLo, uint64_t bHi, uint64_t bLo) 9 | { 10 | uint64_t b00Hi; 11 | uint64_t b00Lo = umul128(aLo, bLo, out b00Hi); 12 | uint64_t b01Hi; 13 | uint64_t b01Lo = umul128(aLo, bHi, out b01Hi); 14 | uint64_t b10Hi; 15 | uint64_t b10Lo = umul128(aHi, bLo, out b10Hi); 16 | uint64_t b11Hi; 17 | uint64_t b11Lo = umul128(aHi, bHi, out b11Hi); 18 | uint64_t temp1Lo = b10Lo + b00Hi; 19 | uint64_t temp1Hi = b10Hi; 20 | if (temp1Lo < b10Lo) ++temp1Hi; 21 | uint64_t temp2Lo = b01Lo + temp1Lo; 22 | uint64_t temp2Hi = b01Hi; 23 | if (temp2Lo < b01Lo) ++temp2Hi; 24 | return b11Lo + temp1Hi + temp2Hi; 25 | } 26 | 27 | static uint32_t uint128_mod1e9(uint64_t vHi, uint64_t vLo) 28 | { 29 | // After multiplying, we're going to shift right by 29, then truncate to uint32_t. 30 | // This means that we need only 29 + 32 = 61 bits, so we can truncate to uint64_t before shifting. 31 | uint64_t multiplied = umul256_hi128_lo64(vHi, vLo, 0x89705F4136B4A597u, 0x31680A88F8953031u); 32 | 33 | // For uint32_t truncation, see the mod1e9() comment in d2s_intrinsics.h. 34 | uint32_t shifted = (uint32_t)(multiplied >> 29); 35 | 36 | return ((uint32_t)vLo) - 1000000000 * shifted; 37 | } 38 | 39 | static uint32_t mulShift_mod1e9(uint64_t m, ref uint64_t mul, int32_t j) 40 | { 41 | uint64_t high0; // 64 42 | uint64_t low0 = umul128(m, Unsafe.Add(ref mul, 0), out high0); // 0 43 | uint64_t high1; // 128 44 | uint64_t low1 = umul128(m, Unsafe.Add(ref mul, 1), out high1); // 64 45 | uint64_t high2; // 192 46 | uint64_t low2 = umul128(m, Unsafe.Add(ref mul, 2), out high2); // 128 47 | uint64_t s0low = low0; // 0 48 | uint64_t s0high = low1 + high0; // 64 49 | uint32_t c1 = s0high < low1 ? 1U : 0; 50 | uint64_t s1low = low2 + high1 + c1; // 128 51 | uint32_t c2 = s1low < low2 ? 1U : 0; // high1 + c1 can't overflow, so compare against low2 52 | uint64_t s1high = high2 + c2; // 192 53 | 54 | assert(j >= 128); 55 | assert(j <= 180); 56 | uint32_t dist = (uint32_t)(j - 128); // dist: [0, 52] 57 | uint64_t shiftedhigh = s1high >> (int)dist; 58 | uint64_t shiftedlow = shiftright128(s1low, s1high, dist); 59 | return uint128_mod1e9(shiftedhigh, shiftedlow); 60 | } 61 | 62 | static void append_n_digits(uint32_t olength, uint32_t digits, ref char result) 63 | { 64 | 65 | uint32_t i = 0; 66 | while (digits >= 10000) 67 | { 68 | #if __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217 69 | uint32_t c = digits - 10000 * (digits / 10000); 70 | #else 71 | uint32_t c = digits % 10000; 72 | #endif 73 | digits /= 10000; 74 | uint32_t c0 = (c % 100) << 1; 75 | uint32_t c1 = (c / 100) << 1; 76 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(olength - i - 2) * sizeof(char)), ref DIGIT_TABLE[c0], 2); 77 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(olength - i - 4) * sizeof(char)), ref DIGIT_TABLE[c1], 2); 78 | i += 4; 79 | } 80 | if (digits >= 100) 81 | { 82 | uint32_t c = (digits % 100) << 1; 83 | digits /= 100; 84 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(olength - i - 2) * sizeof(char)), ref DIGIT_TABLE[c], 2); 85 | i += 2; 86 | } 87 | if (digits >= 10) 88 | { 89 | uint32_t c = digits << 1; 90 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(olength - i - 2) * sizeof(char)), ref DIGIT_TABLE[c], 2); 91 | } 92 | else 93 | { 94 | Unsafe.Add(ref result, 0) = (char)('0' + digits); 95 | } 96 | } 97 | 98 | static void append_d_digits(uint32_t olength, uint32_t digits, ref char result) 99 | { 100 | uint32_t i = 0; 101 | while (digits >= 10000) 102 | { 103 | #if __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217 104 | uint32_t c = digits - 10000 * (digits / 10000); 105 | #else 106 | uint32_t c = digits % 10000; 107 | #endif 108 | digits /= 10000; 109 | uint32_t c0 = (c % 100) << 1; 110 | uint32_t c1 = (c / 100) << 1; 111 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(olength + 1 - i - 2) * sizeof(char)), ref DIGIT_TABLE[c0], 2); 112 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(olength + 1 - i - 4) * sizeof(char)), ref DIGIT_TABLE[c1], 2); 113 | i += 4; 114 | } 115 | if (digits >= 100) 116 | { 117 | uint32_t c = (digits % 100) << 1; 118 | digits /= 100; 119 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(olength + 1 - i - 2) * sizeof(char)), ref DIGIT_TABLE[c], 2); 120 | i += 2; 121 | } 122 | if (digits >= 10) 123 | { 124 | uint32_t c = digits << 1; 125 | Unsafe.Add(ref result, 2) = DIGIT_TABLE[c + 1]; 126 | Unsafe.Add(ref result, 1) = '.'; 127 | Unsafe.Add(ref result, 0) = DIGIT_TABLE[c]; 128 | } 129 | else 130 | { 131 | Unsafe.Add(ref result, 1) = '.'; 132 | Unsafe.Add(ref result, 0) = (char)('0' + digits); 133 | } 134 | } 135 | 136 | static void append_c_digits(uint32_t count, uint32_t digits, ref char result) 137 | { 138 | uint32_t i = 0; 139 | for (; i < count - 1; i += 2) 140 | { 141 | uint32_t c = (digits % 100) << 1; 142 | digits /= 100; 143 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(count - i - 2) * sizeof(char)), ref DIGIT_TABLE[c], 2); 144 | } 145 | if (i < count) 146 | { 147 | char c = (char)('0' + (digits % 10)); 148 | Unsafe.AddByteOffset(ref result, (nint)(count - i - 1) * sizeof(char)) = c; 149 | } 150 | } 151 | 152 | static void append_nine_digits(uint32_t digits, ref char result) 153 | { 154 | if (digits == 0) 155 | { 156 | memset(ref result, '0', 9); 157 | return; 158 | } 159 | 160 | for (uint32_t i = 0; i < 5; i += 4) 161 | { 162 | #if __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217 163 | uint32_t c = digits - 10000 * (digits / 10000); 164 | #else 165 | uint32_t c = digits % 10000; 166 | #endif 167 | digits /= 10000; 168 | uint32_t c0 = (c % 100) << 1; 169 | uint32_t c1 = (c / 100) << 1; 170 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(7 - i) * sizeof(char)), ref DIGIT_TABLE[c0], 2); 171 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(5 - i) * sizeof(char)), ref DIGIT_TABLE[c1], 2); 172 | } 173 | Unsafe.Add(ref result, 0) = (char)('0' + digits); 174 | } 175 | 176 | static uint32_t indexForExponent(uint32_t e) 177 | { 178 | return (e + 15) / 16; 179 | } 180 | 181 | static uint32_t pow10BitsForIndex(uint32_t idx) 182 | { 183 | return 16 * idx + POW10_ADDITIONAL_BITS; 184 | } 185 | 186 | static uint32_t lengthForIndex(uint32_t idx) 187 | { 188 | // +1 for ceil, +16 for mantissa, +8 to round up when dividing by 9 189 | return (log10Pow2(16 * (int32_t)idx) + 1 + 16 + 8) / 9; 190 | } 191 | 192 | static int copy_special_str_printf(ref char result, bool sign, uint64_t mantissa) 193 | { 194 | #if _MSC_VER 195 | // TODO: Check that -nan is expected output on Windows. 196 | if (sign) { 197 | Unsafe.Add(ref result, 0) = '-'; 198 | } 199 | if (mantissa != 0) { 200 | if (mantissa < (1ul << (DOUBLE_MANTISSA_BITS - 1))) { 201 | memcpy(ref Unsafe.Add(ref result, sign ? 1 : 0), "nan(snan)", 9); 202 | return (sign ? 1 : 0) + 9; 203 | } 204 | memcpy(ref Unsafe.Add(ref result, sign ? 1 : 0), "nan", 3); 205 | return (sign ? 1 : 0) + 3; 206 | } 207 | #else 208 | if (mantissa != 0) 209 | { 210 | memcpy(ref result, "nan", 3); 211 | return 3; 212 | } 213 | if (sign) 214 | { 215 | Unsafe.Add(ref result, 0) = '-'; 216 | } 217 | #endif 218 | memcpy(ref Unsafe.Add(ref result, sign ? 1 : 0), "Infinity", 8); 219 | return (sign ? 1 : 0) + 8; 220 | } 221 | 222 | public static int d2fixed_buffered_n(double d, uint32_t precision, ref char result) 223 | { 224 | uint64_t bits = double_to_bits(d); 225 | 226 | 227 | // Decode bits into sign, mantissa, and exponent. 228 | bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0; 229 | uint64_t ieeeMantissa = bits & ((1ul << DOUBLE_MANTISSA_BITS) - 1); 230 | uint32_t ieeeExponent = (uint32_t)((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1)); 231 | 232 | // Case distinction; exit early for the easy cases. 233 | if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u)) 234 | { 235 | return copy_special_str_printf(ref result, ieeeSign, ieeeMantissa); 236 | } 237 | if (ieeeExponent == 0 && ieeeMantissa == 0) 238 | { 239 | int index2 = 0; 240 | if (ieeeSign) 241 | { 242 | Unsafe.Add(ref result, index2++) = '-'; 243 | } 244 | Unsafe.Add(ref result, index2++) = '0'; 245 | if (precision > 0) 246 | { 247 | Unsafe.Add(ref result, index2++) = '.'; 248 | memset(ref Unsafe.Add(ref result, index2), '0', precision); 249 | index2 += (int)precision; 250 | } 251 | return index2; 252 | } 253 | 254 | int32_t e2; 255 | uint64_t m2; 256 | if (ieeeExponent == 0) 257 | { 258 | e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS; 259 | m2 = ieeeMantissa; 260 | } 261 | else 262 | { 263 | e2 = (int32_t)ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS; 264 | m2 = (1ul << DOUBLE_MANTISSA_BITS) | ieeeMantissa; 265 | } 266 | 267 | 268 | int index = 0; 269 | bool nonzero = false; 270 | if (ieeeSign) 271 | { 272 | Unsafe.Add(ref result, index++) = '-'; 273 | } 274 | if (e2 >= -52) 275 | { 276 | uint32_t idx = e2 < 0 ? 0 : indexForExponent((uint32_t)e2); 277 | uint32_t p10bits = pow10BitsForIndex(idx); 278 | int32_t len = (int32_t)lengthForIndex(idx); 279 | 280 | for (int32_t i = len - 1; i >= 0; --i) 281 | { 282 | uint32_t j = (uint32_t)(p10bits - e2); 283 | // Temporary: j is usually around 128, and by shifting a bit, we push it to 128 or above, which is 284 | // a slightly faster code path in mulShift_mod1e9. Instead, we can just increase the multipliers. 285 | uint32_t digits = mulShift_mod1e9(m2 << 8, ref POW10_SPLIT[(uint)(POW10_OFFSET[(uint)idx] + i), 0], (int32_t)(j + 8)); 286 | if (nonzero) 287 | { 288 | append_nine_digits(digits, ref Unsafe.Add(ref result, index)); 289 | index += 9; 290 | } 291 | else if (digits != 0) 292 | { 293 | uint32_t olength = decimalLength9(digits); 294 | append_n_digits(olength, digits, ref Unsafe.Add(ref result, index)); 295 | index += (int)olength; 296 | nonzero = true; 297 | } 298 | } 299 | } 300 | if (!nonzero) 301 | { 302 | Unsafe.Add(ref result, index++) = '0'; 303 | } 304 | if (precision > 0) 305 | { 306 | Unsafe.Add(ref result, index++) = '.'; 307 | } 308 | if (e2 < 0) 309 | { 310 | int32_t idx = -e2 / 16; 311 | 312 | uint32_t blocks = precision / 9 + 1; 313 | // 0 = don't round up; 1 = round up unconditionally; 2 = round up if odd. 314 | int roundUp = 0; 315 | uint32_t i = 0; 316 | if (blocks <= MIN_BLOCK_2[(uint)idx]) 317 | { 318 | i = blocks; 319 | memset(ref Unsafe.Add(ref result, index), '0', precision); 320 | index += (int)precision; 321 | } 322 | else if (i < MIN_BLOCK_2[(uint)idx]) 323 | { 324 | i = MIN_BLOCK_2[(uint)idx]; 325 | memset(ref Unsafe.Add(ref result, index), '0', 9 * i); 326 | index += (int)(9 * i); 327 | } 328 | for (; i < blocks; ++i) 329 | { 330 | int32_t j = ADDITIONAL_BITS_2 + (-e2 - 16 * idx); 331 | uint32_t p = POW10_OFFSET_2[(uint)idx] + i - MIN_BLOCK_2[(uint)idx]; 332 | if (p >= POW10_OFFSET_2[(uint)idx + 1]) 333 | { 334 | // If the remaining digits are all 0, then we might as well use memset. 335 | // No rounding required in this case. 336 | uint32_t fill = precision - 9 * i; 337 | memset(ref Unsafe.Add(ref result, index), '0', fill); 338 | index += (int)fill; 339 | break; 340 | } 341 | // Temporary: j is usually around 128, and by shifting a bit, we push it to 128 or above, which is 342 | // a slightly faster code path in mulShift_mod1e9. Instead, we can just increase the multipliers. 343 | uint32_t digits = mulShift_mod1e9(m2 << 8, ref POW10_SPLIT_2[p, 0], j + 8); 344 | 345 | if (i < blocks - 1) 346 | { 347 | append_nine_digits(digits, ref Unsafe.Add(ref result, index)); 348 | index += 9; 349 | } 350 | else 351 | { 352 | uint32_t maximum = precision - 9 * i; 353 | uint32_t lastDigit = 0; 354 | for (uint32_t k = 0; k < 9 - maximum; ++k) 355 | { 356 | lastDigit = digits % 10; 357 | digits /= 10; 358 | } 359 | 360 | if (lastDigit != 5) 361 | { 362 | roundUp = lastDigit > 5 ? 1 : 0; 363 | } 364 | else 365 | { 366 | // Is m * 10^(additionalDigits + 1) / 2^(-e2) integer? 367 | int32_t requiredTwos = -e2 - (int32_t)precision - 1; 368 | bool trailingZeros = requiredTwos <= 0 369 | || (requiredTwos < 60 && multipleOfPowerOf2(m2, (uint32_t)requiredTwos)); 370 | roundUp = trailingZeros ? 2 : 1; 371 | 372 | } 373 | if (maximum > 0) 374 | { 375 | append_c_digits(maximum, digits, ref Unsafe.Add(ref result, index)); 376 | index += (int)maximum; 377 | } 378 | break; 379 | } 380 | } 381 | 382 | if (roundUp != 0) 383 | { 384 | int roundIndex = index; 385 | int dotIndex = 0; // '.' can't be located at index 0 386 | while (true) 387 | { 388 | --roundIndex; 389 | char c; 390 | if (roundIndex == -1 || (c = Unsafe.Add(ref result, roundIndex)) == '-') 391 | { 392 | Unsafe.Add(ref result, roundIndex + 1) = '1'; 393 | if (dotIndex > 0) 394 | { 395 | Unsafe.Add(ref result, dotIndex) = '0'; 396 | Unsafe.Add(ref result, dotIndex + 1) = '.'; 397 | } 398 | Unsafe.Add(ref result, index++) = '0'; 399 | break; 400 | } 401 | if (c == '.') 402 | { 403 | dotIndex = roundIndex; 404 | continue; 405 | } 406 | else if (c == '9') 407 | { 408 | Unsafe.Add(ref result, roundIndex) = '0'; 409 | roundUp = 1; 410 | continue; 411 | } 412 | else 413 | { 414 | if (roundUp == 2 && c % 2 == 0) 415 | { 416 | break; 417 | } 418 | Unsafe.Add(ref result, roundIndex) = (char)(c + 1); 419 | break; 420 | } 421 | } 422 | } 423 | } 424 | else 425 | { 426 | memset(ref Unsafe.Add(ref result, index), '0', precision); 427 | index += (int)precision; 428 | } 429 | return index; 430 | } 431 | 432 | static void d2fixed_buffered(double d, uint32_t precision, ref char result) 433 | { 434 | int len = d2fixed_buffered_n(d, precision, ref result); 435 | Unsafe.Add(ref result, len) = '\0'; 436 | } 437 | 438 | public static int d2exp_buffered_n(double d, uint32_t precision, ref char result) 439 | { 440 | uint64_t bits = double_to_bits(d); 441 | 442 | 443 | // Decode bits into sign, mantissa, and exponent. 444 | bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0; 445 | uint64_t ieeeMantissa = bits & ((1ul << DOUBLE_MANTISSA_BITS) - 1); 446 | uint32_t ieeeExponent = (uint32_t)((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1)); 447 | 448 | // Case distinction; exit early for the easy cases. 449 | if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u)) 450 | { 451 | return copy_special_str_printf(ref result, ieeeSign, ieeeMantissa); 452 | } 453 | if (ieeeExponent == 0 && ieeeMantissa == 0) 454 | { 455 | int index2 = 0; 456 | if (ieeeSign) 457 | { 458 | Unsafe.Add(ref result, index2++) = '-'; 459 | } 460 | Unsafe.Add(ref result, index2++) = '0'; 461 | if (precision > 0) 462 | { 463 | Unsafe.Add(ref result, index2++) = '.'; 464 | memset(ref Unsafe.Add(ref result, index2), '0', precision); 465 | index2 += (int)precision; 466 | } 467 | memcpy(ref Unsafe.Add(ref result, index2), "e+00", 4); 468 | index2 += 4; 469 | return index2; 470 | } 471 | 472 | int32_t e2; 473 | uint64_t m2; 474 | if (ieeeExponent == 0) 475 | { 476 | e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS; 477 | m2 = ieeeMantissa; 478 | } 479 | else 480 | { 481 | e2 = (int32_t)ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS; 482 | m2 = (1ul << DOUBLE_MANTISSA_BITS) | ieeeMantissa; 483 | } 484 | 485 | 486 | bool printDecimalPoint = precision > 0; 487 | ++precision; 488 | int index = 0; 489 | if (ieeeSign) 490 | { 491 | Unsafe.Add(ref result, index++) = '-'; 492 | } 493 | uint32_t digits = 0; 494 | uint32_t printedDigits = 0; 495 | uint32_t availableDigits = 0; 496 | int32_t exp = 0; 497 | if (e2 >= -52) 498 | { 499 | uint32_t idx = e2 < 0 ? 0 : indexForExponent((uint32_t)e2); 500 | uint32_t p10bits = pow10BitsForIndex(idx); 501 | int32_t len = (int32_t)lengthForIndex(idx); 502 | 503 | for (int32_t i = len - 1; i >= 0; --i) 504 | { 505 | uint32_t j = (uint32_t)(p10bits - e2); 506 | // Temporary: j is usually around 128, and by shifting a bit, we push it to 128 or above, which is 507 | // a slightly faster code path in mulShift_mod1e9. Instead, we can just increase the multipliers. 508 | digits = mulShift_mod1e9(m2 << 8, ref POW10_SPLIT[(uint)(POW10_OFFSET[(uint)idx] + i), 0], (int32_t)(j + 8)); 509 | if (printedDigits != 0) 510 | { 511 | if (printedDigits + 9 > precision) 512 | { 513 | availableDigits = 9; 514 | break; 515 | } 516 | append_nine_digits(digits, ref Unsafe.Add(ref result, index)); 517 | index += 9; 518 | printedDigits += 9; 519 | } 520 | else if (digits != 0) 521 | { 522 | availableDigits = decimalLength9(digits); 523 | exp = i * 9 + (int32_t)availableDigits - 1; 524 | if (availableDigits > precision) 525 | { 526 | break; 527 | } 528 | if (printDecimalPoint) 529 | { 530 | append_d_digits(availableDigits, digits, ref Unsafe.Add(ref result, index)); 531 | index += (int)(availableDigits + 1); // +1 for decimal point 532 | } 533 | else 534 | { 535 | Unsafe.Add(ref result, index++) = (char)('0' + digits); 536 | } 537 | printedDigits = availableDigits; 538 | availableDigits = 0; 539 | } 540 | } 541 | } 542 | 543 | if (e2 < 0 && availableDigits == 0) 544 | { 545 | int32_t idx = -e2 / 16; 546 | 547 | for (int32_t i = MIN_BLOCK_2[(uint)idx]; i < 200; ++i) 548 | { 549 | int32_t j = ADDITIONAL_BITS_2 + (-e2 - 16 * idx); 550 | uint32_t p = POW10_OFFSET_2[(uint)idx] + (uint32_t)i - MIN_BLOCK_2[(uint)idx]; 551 | // Temporary: j is usually around 128, and by shifting a bit, we push it to 128 or above, which is 552 | // a slightly faster code path in mulShift_mod1e9. Instead, we can just increase the multipliers. 553 | digits = (p >= POW10_OFFSET_2[(uint)(idx + 1)]) ? 0 : mulShift_mod1e9(m2 << 8, ref POW10_SPLIT_2[p, 0], j + 8); 554 | 555 | if (printedDigits != 0) 556 | { 557 | if (printedDigits + 9 > precision) 558 | { 559 | availableDigits = 9; 560 | break; 561 | } 562 | append_nine_digits(digits, ref Unsafe.Add(ref result, index)); 563 | index += 9; 564 | printedDigits += 9; 565 | } 566 | else if (digits != 0) 567 | { 568 | availableDigits = decimalLength9(digits); 569 | exp = -(i + 1) * 9 + (int32_t)availableDigits - 1; 570 | if (availableDigits > precision) 571 | { 572 | break; 573 | } 574 | if (printDecimalPoint) 575 | { 576 | append_d_digits(availableDigits, digits, ref Unsafe.Add(ref result, index)); 577 | index += (int)(availableDigits + 1); // +1 for decimal point 578 | } 579 | else 580 | { 581 | Unsafe.Add(ref result, index++) = (char)('0' + digits); 582 | } 583 | printedDigits = availableDigits; 584 | availableDigits = 0; 585 | } 586 | } 587 | } 588 | 589 | uint32_t maximum = precision - printedDigits; 590 | 591 | if (availableDigits == 0) 592 | { 593 | digits = 0; 594 | } 595 | uint32_t lastDigit = 0; 596 | if (availableDigits > maximum) 597 | { 598 | for (uint32_t k = 0; k < availableDigits - maximum; ++k) 599 | { 600 | lastDigit = digits % 10; 601 | digits /= 10; 602 | } 603 | } 604 | 605 | // 0 = don't round up; 1 = round up unconditionally; 2 = round up if odd. 606 | int roundUp = 0; 607 | if (lastDigit != 5) 608 | { 609 | roundUp = lastDigit > 5 ? 1 : 0; 610 | } 611 | else 612 | { 613 | // Is m * 2^e2 * 10^(precision + 1 - exp) integer? 614 | // precision was already increased by 1, so we don't need to write + 1 here. 615 | int32_t rexp = (int32_t)precision - exp; 616 | int32_t requiredTwos = -e2 - rexp; 617 | bool trailingZeros = requiredTwos <= 0 618 | || (requiredTwos < 60 && multipleOfPowerOf2(m2, (uint32_t)requiredTwos)); 619 | if (rexp < 0) 620 | { 621 | int32_t requiredFives = -rexp; 622 | trailingZeros = trailingZeros && multipleOfPowerOf5(m2, (uint32_t)requiredFives); 623 | } 624 | roundUp = trailingZeros ? 2 : 1; 625 | 626 | } 627 | if (printedDigits != 0) 628 | { 629 | if (digits == 0) 630 | { 631 | memset(ref Unsafe.Add(ref result, index), '0', maximum); 632 | } 633 | else 634 | { 635 | append_c_digits(maximum, digits, ref Unsafe.Add(ref result, index)); 636 | } 637 | index += (int)maximum; 638 | } 639 | else 640 | { 641 | if (printDecimalPoint) 642 | { 643 | append_d_digits(maximum, digits, ref Unsafe.Add(ref result, index)); 644 | index += (int)(maximum + 1); // +1 for decimal point 645 | } 646 | else 647 | { 648 | Unsafe.Add(ref result, index++) = (char)('0' + digits); 649 | } 650 | } 651 | 652 | if (roundUp != 0) 653 | { 654 | int roundIndex = index; 655 | while (true) 656 | { 657 | --roundIndex; 658 | char c; 659 | if (roundIndex == -1 || ((c = Unsafe.Add(ref result, roundIndex)) == '-')) 660 | { 661 | Unsafe.Add(ref result, roundIndex + 1) = '1'; 662 | ++exp; 663 | break; 664 | } 665 | if (c == '.') 666 | { 667 | continue; 668 | } 669 | else if (c == '9') 670 | { 671 | Unsafe.Add(ref result, roundIndex) = '0'; 672 | roundUp = 1; 673 | continue; 674 | } 675 | else 676 | { 677 | if (roundUp == 2 && c % 2 == 0) 678 | { 679 | break; 680 | } 681 | Unsafe.Add(ref result, roundIndex) = (char)(c + 1); 682 | break; 683 | } 684 | } 685 | } 686 | Unsafe.Add(ref result, index++) = 'e'; 687 | if (exp < 0) 688 | { 689 | Unsafe.Add(ref result, index++) = '-'; 690 | exp = -exp; 691 | } 692 | else 693 | { 694 | Unsafe.Add(ref result, index++) = '+'; 695 | } 696 | 697 | if (exp >= 100) 698 | { 699 | int32_t c = exp % 10; 700 | memcpy(ref Unsafe.Add(ref result, index), ref DIGIT_TABLE[2 * (exp / 10)], 2); 701 | Unsafe.Add(ref result, index + 2) = (char)('0' + c); 702 | index += 3; 703 | } 704 | else 705 | { 706 | memcpy(ref Unsafe.Add(ref result, index), ref DIGIT_TABLE[2 * exp], 2); 707 | index += 2; 708 | } 709 | 710 | return index; 711 | } 712 | 713 | static void d2exp_buffered(double d, uint32_t precision, ref char result) 714 | { 715 | int len = d2exp_buffered_n(d, precision, ref result); 716 | Unsafe.Add(ref result, len) = '\0'; 717 | } 718 | 719 | } -------------------------------------------------------------------------------- /RyuCsharp/d2s.cs: -------------------------------------------------------------------------------- 1 |  2 | namespace RyuCsharp; 3 | 4 | partial class Ryu 5 | { 6 | static uint32_t decimalLength17(uint64_t v) 7 | { 8 | // This is slightly faster than a loop. 9 | // The average output length is 16.38 digits, so we check high-to-low. 10 | // Function precondition: v is not an 18, 19, or 20-digit number. 11 | // (17 digits are sufficient for round-tripping.) 12 | assert(v < 100000000000000000L); 13 | if (v >= 10000000000000000L) { return 17; } 14 | if (v >= 1000000000000000L) { return 16; } 15 | if (v >= 100000000000000L) { return 15; } 16 | if (v >= 10000000000000L) { return 14; } 17 | if (v >= 1000000000000L) { return 13; } 18 | if (v >= 100000000000L) { return 12; } 19 | if (v >= 10000000000L) { return 11; } 20 | if (v >= 1000000000L) { return 10; } 21 | if (v >= 100000000L) { return 9; } 22 | if (v >= 10000000L) { return 8; } 23 | if (v >= 1000000L) { return 7; } 24 | if (v >= 100000L) { return 6; } 25 | if (v >= 10000L) { return 5; } 26 | if (v >= 1000L) { return 4; } 27 | if (v >= 100L) { return 3; } 28 | if (v >= 10L) { return 2; } 29 | return 1; 30 | } 31 | 32 | static floating_decimal_64 d2d(uint64_t ieeeMantissa, uint32_t ieeeExponent) 33 | { 34 | int32_t e2; 35 | uint64_t m2; 36 | if (ieeeExponent == 0) 37 | { 38 | // We subtract 2 so that the bounds computation has 2 additional bits. 39 | e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2; 40 | m2 = ieeeMantissa; 41 | } 42 | else 43 | { 44 | e2 = (int32_t)ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS - 2; 45 | m2 = (1ul << DOUBLE_MANTISSA_BITS) | ieeeMantissa; 46 | } 47 | bool even = (m2 & 1) == 0; 48 | bool acceptBounds = even; 49 | 50 | 51 | // Step 2: Determine the interval of valid decimal representations. 52 | uint64_t mv = 4 * m2; 53 | // Implicit bool -> int conversion. True is 1, false is 0. 54 | uint32_t mmShift = (ieeeMantissa != 0 || ieeeExponent <= 1) ? 1U : 0; 55 | // We would compute mp and mm like this: 56 | // uint64_t mp = 4 * m2 + 2; 57 | // uint64_t mm = mv - 1 - mmShift; 58 | 59 | // Step 3: Convert to a decimal power base using 128-bit arithmetic. 60 | uint64_t vr, vp, vm; 61 | int32_t e10; 62 | bool vmIsTrailingZeros = false; 63 | bool vrIsTrailingZeros = false; 64 | if (e2 >= 0) 65 | { 66 | // I tried special-casing q == 0, but there was no effect on performance. 67 | // This expression is slightly faster than max(0, log10Pow2(e2) - 1). 68 | uint32_t q = log10Pow2(e2); 69 | if (e2 > 3) --q; 70 | e10 = (int32_t)q; 71 | int32_t k = DOUBLE_POW5_INV_BITCOUNT + pow5bits((int32_t)q) - 1; 72 | int32_t i = -e2 + (int32_t)q + k; 73 | 74 | vr = mulShiftAll64(m2, ref DOUBLE_POW5_INV_SPLIT[q, 0], i, out vp, out vm, mmShift); 75 | 76 | if (q <= 21) 77 | { 78 | // This should use q <= 22, but I think 21 is also safe. Smaller values 79 | // may still be safe, but it's more difficult to reason about them. 80 | // Only one of mp, mv, and mm can be a multiple of 5, if any. 81 | uint32_t mvMod5 = ((uint32_t)mv) - 5 * ((uint32_t)div5(mv)); 82 | if (mvMod5 == 0) 83 | { 84 | vrIsTrailingZeros = multipleOfPowerOf5(mv, q); 85 | } 86 | else if (acceptBounds) 87 | { 88 | // Same as min(e2 + (~mm & 1), pow5Factor(mm)) >= q 89 | // <=> e2 + (~mm & 1) >= q && pow5Factor(mm) >= q 90 | // <=> true && pow5Factor(mm) >= q, since e2 >= q. 91 | vmIsTrailingZeros = multipleOfPowerOf5(mv - 1 - mmShift, q); 92 | } 93 | else 94 | { 95 | // Same as min(e2 + 1, pow5Factor(mp)) >= q. 96 | if (multipleOfPowerOf5(mv + 2, q)) --vp; 97 | } 98 | } 99 | } 100 | else 101 | { 102 | // This expression is slightly faster than max(0, log10Pow5(-e2) - 1). 103 | uint32_t q = log10Pow5(-e2); 104 | if (-e2 > 1) --q; 105 | e10 = (int32_t)q + e2; 106 | int32_t i = -e2 - (int32_t)q; 107 | int32_t k = pow5bits(i) - DOUBLE_POW5_BITCOUNT; 108 | int32_t j = (int32_t)q - k; 109 | vr = mulShiftAll64(m2, ref DOUBLE_POW5_SPLIT[(uint)i, 0], j, out vp, out vm, mmShift); 110 | if (q <= 1) 111 | { 112 | // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits. 113 | // mv = 4 * m2, so it always has at least two trailing 0 bits. 114 | vrIsTrailingZeros = true; 115 | if (acceptBounds) 116 | { 117 | // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1. 118 | vmIsTrailingZeros = mmShift == 1; 119 | } 120 | else 121 | { 122 | // mp = mv + 2, so it always has at least one trailing 0 bit. 123 | --vp; 124 | } 125 | } 126 | else if (q < 63) 127 | { // TODO(ulfjack): Use a tighter bound here. 128 | // We want to know if the full product has at least q trailing zeros. 129 | // We need to compute min(p2(mv), p5(mv) - e2) >= q 130 | // <=> p2(mv) >= q && p5(mv) - e2 >= q 131 | // <=> p2(mv) >= q (because -e2 >= q) 132 | vrIsTrailingZeros = multipleOfPowerOf2(mv, q); 133 | 134 | } 135 | } 136 | 137 | // Step 4: Find the shortest decimal representation in the interval of valid representations. 138 | int32_t removed = 0; 139 | uint8_t lastRemovedDigit = 0; 140 | uint64_t output; 141 | // On average, we remove ~2 digits. 142 | if (vmIsTrailingZeros || vrIsTrailingZeros) 143 | { 144 | // General case, which happens rarely (~0.7%). 145 | for (; ; ) 146 | { 147 | uint64_t vpDiv10 = div10(vp); 148 | uint64_t vmDiv10 = div10(vm); 149 | if (vpDiv10 <= vmDiv10) 150 | { 151 | break; 152 | } 153 | uint32_t vmMod10 = ((uint32_t)vm) - 10 * ((uint32_t)vmDiv10); 154 | uint64_t vrDiv10 = div10(vr); 155 | uint32_t vrMod10 = ((uint32_t)vr) - 10 * ((uint32_t)vrDiv10); 156 | vmIsTrailingZeros &= vmMod10 == 0; 157 | vrIsTrailingZeros &= lastRemovedDigit == 0; 158 | lastRemovedDigit = (uint8_t)vrMod10; 159 | vr = vrDiv10; 160 | vp = vpDiv10; 161 | vm = vmDiv10; 162 | ++removed; 163 | } 164 | if (vmIsTrailingZeros) 165 | { 166 | for (; ; ) 167 | { 168 | uint64_t vmDiv10 = div10(vm); 169 | uint32_t vmMod10 = ((uint32_t)vm) - 10 * ((uint32_t)vmDiv10); 170 | if (vmMod10 != 0) 171 | { 172 | break; 173 | } 174 | uint64_t vpDiv10 = div10(vp); 175 | uint64_t vrDiv10 = div10(vr); 176 | uint32_t vrMod10 = ((uint32_t)vr) - 10 * ((uint32_t)vrDiv10); 177 | vrIsTrailingZeros &= lastRemovedDigit == 0; 178 | lastRemovedDigit = (uint8_t)vrMod10; 179 | vr = vrDiv10; 180 | vp = vpDiv10; 181 | vm = vmDiv10; 182 | ++removed; 183 | } 184 | } 185 | if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) 186 | { 187 | // Round even if the exact number is .....50..0. 188 | lastRemovedDigit = 4; 189 | } 190 | // We need to take vr + 1 if vr is outside bounds or we need to round up. 191 | output = vr; 192 | if ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5) ++output; 193 | } 194 | else 195 | { 196 | // Specialized for the common case (~99.3%). Percentages below are relative to this. 197 | bool roundUp = false; 198 | uint64_t vpDiv100 = div100(vp); 199 | uint64_t vmDiv100 = div100(vm); 200 | if (vpDiv100 > vmDiv100) 201 | { // Optimization: remove two digits at a time (~86.2%). 202 | uint64_t vrDiv100 = div100(vr); 203 | uint32_t vrMod100 = ((uint32_t)vr) - 100 * ((uint32_t)vrDiv100); 204 | roundUp = vrMod100 >= 50; 205 | vr = vrDiv100; 206 | vp = vpDiv100; 207 | vm = vmDiv100; 208 | removed += 2; 209 | } 210 | // Loop iterations below (approximately), without optimization above: 211 | // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02% 212 | // Loop iterations below (approximately), with optimization above: 213 | // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02% 214 | for (; ; ) 215 | { 216 | uint64_t vpDiv10 = div10(vp); 217 | uint64_t vmDiv10 = div10(vm); 218 | if (vpDiv10 <= vmDiv10) 219 | { 220 | break; 221 | } 222 | uint64_t vrDiv10 = div10(vr); 223 | uint32_t vrMod10 = ((uint32_t)vr) - 10 * ((uint32_t)vrDiv10); 224 | roundUp = vrMod10 >= 5; 225 | vr = vrDiv10; 226 | vp = vpDiv10; 227 | vm = vmDiv10; 228 | ++removed; 229 | } 230 | 231 | // We need to take vr + 1 if vr is outside bounds or we need to round up. 232 | output = vr; 233 | if (vr == vm || roundUp) ++output; 234 | } 235 | int32_t exp = e10 + removed; 236 | 237 | 238 | 239 | floating_decimal_64 fd = default; 240 | fd.exponent = exp; 241 | fd.mantissa = output; 242 | return fd; 243 | } 244 | 245 | static int to_chars(floating_decimal_64 v, bool sign, ref char result) 246 | { 247 | // Step 5: Print the decimal representation. 248 | int index = 0; 249 | if (sign) 250 | { 251 | Unsafe.Add(ref result, index++) = '-'; 252 | } 253 | 254 | uint64_t output = v.mantissa; 255 | uint32_t olength = decimalLength17(output); 256 | 257 | 258 | // Print the decimal digits. 259 | // The following code is equivalent to: 260 | // for (uint32_t i = 0; i < olength - 1; ++i) { 261 | // const uint32_t c = output % 10; output /= 10; 262 | // result[index + olength - i] = (char) ('0' + c); 263 | // } 264 | // result[index] = '0' + output % 10; 265 | 266 | uint32_t i = 0; 267 | // We prefer 32-bit operations, even on 64-bit platforms. 268 | // We have at most 17 digits, and uint32_t can store 9 digits. 269 | // If output doesn't fit into uint32_t, we cut off 8 digits, 270 | // so the rest will fit into uint32_t. 271 | if ((output >> 32) != 0) 272 | { 273 | // Expensive 64-bit division. 274 | uint64_t q = div1e8(output); 275 | uint32_t output3 = ((uint32_t)output) - 100000000 * ((uint32_t)q); 276 | output = q; 277 | 278 | uint32_t c = output3 % 10000; 279 | output3 /= 10000; 280 | uint32_t d = output3 % 10000; 281 | uint32_t c0 = (c % 100) << 1; 282 | uint32_t c1 = (c / 100) << 1; 283 | uint32_t d0 = (d % 100) << 1; 284 | uint32_t d1 = (d / 100) << 1; 285 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(index + olength - i - 1) * sizeof(char)), ref DIGIT_TABLE[c0], 2); 286 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(index + olength - i - 3) * sizeof(char)), ref DIGIT_TABLE[c1], 2); 287 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(index + olength - i - 5) * sizeof(char)), ref DIGIT_TABLE[d0], 2); 288 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(index + olength - i - 7) * sizeof(char)), ref DIGIT_TABLE[d1], 2); 289 | i += 8; 290 | } 291 | uint32_t output2 = (uint32_t)output; 292 | while (output2 >= 10000) 293 | { 294 | #if __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217 295 | uint32_t c = output2 - 10000 * (output2 / 10000); 296 | #else 297 | uint32_t c = output2 % 10000; 298 | #endif 299 | output2 /= 10000; 300 | uint32_t c0 = (c % 100) << 1; 301 | uint32_t c1 = (c / 100) << 1; 302 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(index + olength - i - 1) * sizeof(char)), ref DIGIT_TABLE[c0], 2); 303 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(index + olength - i - 3) * sizeof(char)), ref DIGIT_TABLE[c1], 2); 304 | i += 4; 305 | } 306 | if (output2 >= 100) 307 | { 308 | uint32_t c = (output2 % 100) << 1; 309 | output2 /= 100; 310 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(index + olength - i - 1) * sizeof(char)), ref DIGIT_TABLE[c], 2); 311 | i += 2; 312 | } 313 | if (output2 >= 10) 314 | { 315 | uint32_t c = output2 << 1; 316 | // We can't use memcpy here: the decimal dot goes between these two digits. 317 | Unsafe.AddByteOffset(ref result, (nint)(index + olength - i) * sizeof(char)) = DIGIT_TABLE[c + 1]; 318 | Unsafe.Add(ref result, index) = DIGIT_TABLE[c]; 319 | } 320 | else 321 | { 322 | Unsafe.Add(ref result, index) = (char)('0' + output2); 323 | } 324 | 325 | // Print decimal point if needed. 326 | if (olength > 1) 327 | { 328 | Unsafe.Add(ref result, index + 1) = '.'; 329 | index += (int)olength + 1; 330 | } 331 | else 332 | { 333 | ++index; 334 | } 335 | 336 | // Print the exponent. 337 | Unsafe.Add(ref result, index++) = 'E'; 338 | int32_t exp = v.exponent + (int32_t)olength - 1; 339 | if (exp < 0) 340 | { 341 | Unsafe.Add(ref result, index++) = '-'; 342 | exp = -exp; 343 | } 344 | 345 | if (exp >= 100) 346 | { 347 | int32_t c = exp % 10; 348 | memcpy(ref Unsafe.Add(ref result, index), ref DIGIT_TABLE[2 * (uint)(exp / 10)], 2); 349 | Unsafe.Add(ref result, index + 2) = (char)('0' + c); 350 | index += 3; 351 | } 352 | else if (exp >= 10) 353 | { 354 | memcpy(ref Unsafe.Add(ref result, index), ref DIGIT_TABLE[2 * (uint)exp], 2); 355 | index += 2; 356 | } 357 | else 358 | { 359 | Unsafe.Add(ref result, index++) = (char)('0' + exp); 360 | } 361 | 362 | return index; 363 | } 364 | 365 | static bool d2d_small_int(uint64_t ieeeMantissa, uint32_t ieeeExponent, out floating_decimal_64 v) 366 | { 367 | uint64_t m2 = (1ul << DOUBLE_MANTISSA_BITS) | ieeeMantissa; 368 | int32_t e2 = (int32_t)ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS; 369 | 370 | v.exponent = 0; 371 | v.mantissa = 0; 372 | 373 | if (e2 > 0) 374 | { 375 | // f = m2 * 2^e2 >= 2^53 is an integer. 376 | // Ignore this case for now. 377 | return false; 378 | } 379 | 380 | if (e2 < -52) 381 | { 382 | // f < 1. 383 | return false; 384 | } 385 | 386 | // Since 2^52 <= m2 < 2^53 and 0 <= -e2 <= 52: 1 <= f = m2 / 2^-e2 < 2^53. 387 | // Test if the lower -e2 bits of the significand are 0, i.e. whether the fraction is 0. 388 | uint64_t mask = (1ul << -e2) - 1; 389 | uint64_t fraction = m2 & mask; 390 | if (fraction != 0) 391 | { 392 | return false; 393 | } 394 | 395 | // f is an integer in the range [1, 2^53). 396 | // Note: mantissa might contain trailing (decimal) 0's. 397 | // Note: since 2^53 < 10^16, there is no need to adjust decimalLength17(). 398 | v.mantissa = m2 >> -e2; 399 | v.exponent = 0; 400 | return true; 401 | } 402 | 403 | public static int d2s_buffered_n(double f, ref char result) 404 | { 405 | // Step 1: Decode the floating-point number, and unify normalized and subnormal cases. 406 | uint64_t bits = double_to_bits(f); 407 | 408 | 409 | 410 | // Decode bits into sign, mantissa, and exponent. 411 | bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0; 412 | uint64_t ieeeMantissa = bits & ((1ul << DOUBLE_MANTISSA_BITS) - 1); 413 | uint32_t ieeeExponent = (uint32_t)((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1)); 414 | // Case distinction; exit early for the easy cases. 415 | if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0)) 416 | { 417 | return copy_special_str(ref result, ieeeSign, ieeeExponent != 0, ieeeMantissa != 0); 418 | } 419 | 420 | floating_decimal_64 v; 421 | bool isSmallInt = d2d_small_int(ieeeMantissa, ieeeExponent, out v); 422 | if (isSmallInt) 423 | { 424 | // For small integers in the range [1, 2^53), v.mantissa might contain trailing (decimal) zeros. 425 | // For scientific notation we need to move these zeros into the exponent. 426 | // (This is not needed for fixed-point notation, so it might be beneficial to trim 427 | // trailing zeros in to_chars only if needed - once fixed-point notation output is implemented.) 428 | for (; ; ) 429 | { 430 | uint64_t q = div10(v.mantissa); 431 | uint32_t r = ((uint32_t)v.mantissa) - 10 * ((uint32_t)q); 432 | if (r != 0) 433 | { 434 | break; 435 | } 436 | v.mantissa = q; 437 | ++v.exponent; 438 | } 439 | } 440 | else 441 | { 442 | v = d2d(ieeeMantissa, ieeeExponent); 443 | } 444 | 445 | return to_chars(v, ieeeSign, ref result); 446 | } 447 | 448 | public static void d2s_buffered(double f, ref char result) 449 | { 450 | int index = d2s_buffered_n(f, ref result); 451 | 452 | // Terminate the string. 453 | Unsafe.Add(ref result, index) = '\0'; 454 | } 455 | 456 | } -------------------------------------------------------------------------------- /RyuCsharp/d2s_full_table.cs: -------------------------------------------------------------------------------- 1 |  2 | 3 | namespace RyuCsharp; 4 | 5 | partial class Ryu 6 | { 7 | 8 | // These tables are generated by PrintDoubleLookupTable. 9 | const int DOUBLE_POW5_INV_BITCOUNT = 125; 10 | const int DOUBLE_POW5_BITCOUNT = 125; 11 | 12 | const int DOUBLE_POW5_INV_TABLE_SIZE = 342; 13 | const int DOUBLE_POW5_TABLE_SIZE = 326; 14 | 15 | static readonly uint64_t[,] DOUBLE_POW5_INV_SPLIT = new uint64_t[DOUBLE_POW5_INV_TABLE_SIZE, 2] { 16 | { 1u, 2305843009213693952u }, { 11068046444225730970u, 1844674407370955161u }, 17 | { 5165088340638674453u, 1475739525896764129u }, { 7821419487252849886u, 1180591620717411303u }, 18 | { 8824922364862649494u, 1888946593147858085u }, { 7059937891890119595u, 1511157274518286468u }, 19 | { 13026647942995916322u, 1208925819614629174u }, { 9774590264567735146u, 1934281311383406679u }, 20 | { 11509021026396098440u, 1547425049106725343u }, { 16585914450600699399u, 1237940039285380274u }, 21 | { 15469416676735388068u, 1980704062856608439u }, { 16064882156130220778u, 1584563250285286751u }, 22 | { 9162556910162266299u, 1267650600228229401u }, { 7281393426775805432u, 2028240960365167042u }, 23 | { 16893161185646375315u, 1622592768292133633u }, { 2446482504291369283u, 1298074214633706907u }, 24 | { 7603720821608101175u, 2076918743413931051u }, { 2393627842544570617u, 1661534994731144841u }, 25 | { 16672297533003297786u, 1329227995784915872u }, { 11918280793837635165u, 2126764793255865396u }, 26 | { 5845275820328197809u, 1701411834604692317u }, { 15744267100488289217u, 1361129467683753853u }, 27 | { 3054734472329800808u, 2177807148294006166u }, { 17201182836831481939u, 1742245718635204932u }, 28 | { 6382248639981364905u, 1393796574908163946u }, { 2832900194486363201u, 2230074519853062314u }, 29 | { 5955668970331000884u, 1784059615882449851u }, { 1075186361522890384u, 1427247692705959881u }, 30 | { 12788344622662355584u, 2283596308329535809u }, { 13920024512871794791u, 1826877046663628647u }, 31 | { 3757321980813615186u, 1461501637330902918u }, { 10384555214134712795u, 1169201309864722334u }, 32 | { 5547241898389809503u, 1870722095783555735u }, { 4437793518711847602u, 1496577676626844588u }, 33 | { 10928932444453298728u, 1197262141301475670u }, { 17486291911125277965u, 1915619426082361072u }, 34 | { 6610335899416401726u, 1532495540865888858u }, { 12666966349016942027u, 1225996432692711086u }, 35 | { 12888448528943286597u, 1961594292308337738u }, { 17689456452638449924u, 1569275433846670190u }, 36 | { 14151565162110759939u, 1255420347077336152u }, { 7885109000409574610u, 2008672555323737844u }, 37 | { 9997436015069570011u, 1606938044258990275u }, { 7997948812055656009u, 1285550435407192220u }, 38 | { 12796718099289049614u, 2056880696651507552u }, { 2858676849947419045u, 1645504557321206042u }, 39 | { 13354987924183666206u, 1316403645856964833u }, { 17678631863951955605u, 2106245833371143733u }, 40 | { 3074859046935833515u, 1684996666696914987u }, { 13527933681774397782u, 1347997333357531989u }, 41 | { 10576647446613305481u, 2156795733372051183u }, { 15840015586774465031u, 1725436586697640946u }, 42 | { 8982663654677661702u, 1380349269358112757u }, { 18061610662226169046u, 2208558830972980411u }, 43 | { 10759939715039024913u, 1766847064778384329u }, { 12297300586773130254u, 1413477651822707463u }, 44 | { 15986332124095098083u, 2261564242916331941u }, { 9099716884534168143u, 1809251394333065553u }, 45 | { 14658471137111155161u, 1447401115466452442u }, { 4348079280205103483u, 1157920892373161954u }, 46 | { 14335624477811986218u, 1852673427797059126u }, { 7779150767507678651u, 1482138742237647301u }, 47 | { 2533971799264232598u, 1185710993790117841u }, { 15122401323048503126u, 1897137590064188545u }, 48 | { 12097921058438802501u, 1517710072051350836u }, { 5988988032009131678u, 1214168057641080669u }, 49 | { 16961078480698431330u, 1942668892225729070u }, { 13568862784558745064u, 1554135113780583256u }, 50 | { 7165741412905085728u, 1243308091024466605u }, { 11465186260648137165u, 1989292945639146568u }, 51 | { 16550846638002330379u, 1591434356511317254u }, { 16930026125143774626u, 1273147485209053803u }, 52 | { 4951948911778577463u, 2037035976334486086u }, { 272210314680951647u, 1629628781067588869u }, 53 | { 3907117066486671641u, 1303703024854071095u }, { 6251387306378674625u, 2085924839766513752u }, 54 | { 16069156289328670670u, 1668739871813211001u }, { 9165976216721026213u, 1334991897450568801u }, 55 | { 7286864317269821294u, 2135987035920910082u }, { 16897537898041588005u, 1708789628736728065u }, 56 | { 13518030318433270404u, 1367031702989382452u }, { 6871453250525591353u, 2187250724783011924u }, 57 | { 9186511415162383406u, 1749800579826409539u }, { 11038557946871817048u, 1399840463861127631u }, 58 | { 10282995085511086630u, 2239744742177804210u }, { 8226396068408869304u, 1791795793742243368u }, 59 | { 13959814484210916090u, 1433436634993794694u }, { 11267656730511734774u, 2293498615990071511u }, 60 | { 5324776569667477496u, 1834798892792057209u }, { 7949170070475892320u, 1467839114233645767u }, 61 | { 17427382500606444826u, 1174271291386916613u }, { 5747719112518849781u, 1878834066219066582u }, 62 | { 15666221734240810795u, 1503067252975253265u }, { 12532977387392648636u, 1202453802380202612u }, 63 | { 5295368560860596524u, 1923926083808324180u }, { 4236294848688477220u, 1539140867046659344u }, 64 | { 7078384693692692099u, 1231312693637327475u }, { 11325415509908307358u, 1970100309819723960u }, 65 | { 9060332407926645887u, 1576080247855779168u }, { 14626963555825137356u, 1260864198284623334u }, 66 | { 12335095245094488799u, 2017382717255397335u }, { 9868076196075591040u, 1613906173804317868u }, 67 | { 15273158586344293478u, 1291124939043454294u }, { 13369007293925138595u, 2065799902469526871u }, 68 | { 7005857020398200553u, 1652639921975621497u }, { 16672732060544291412u, 1322111937580497197u }, 69 | { 11918976037903224966u, 2115379100128795516u }, { 5845832015580669650u, 1692303280103036413u }, 70 | { 12055363241948356366u, 1353842624082429130u }, { 841837113407818570u, 2166148198531886609u }, 71 | { 4362818505468165179u, 1732918558825509287u }, { 14558301248600263113u, 1386334847060407429u }, 72 | { 12225235553534690011u, 2218135755296651887u }, { 2401490813343931363u, 1774508604237321510u }, 73 | { 1921192650675145090u, 1419606883389857208u }, { 17831303500047873437u, 2271371013423771532u }, 74 | { 6886345170554478103u, 1817096810739017226u }, { 1819727321701672159u, 1453677448591213781u }, 75 | { 16213177116328979020u, 1162941958872971024u }, { 14873036941900635463u, 1860707134196753639u }, 76 | { 15587778368262418694u, 1488565707357402911u }, { 8780873879868024632u, 1190852565885922329u }, 77 | { 2981351763563108441u, 1905364105417475727u }, { 13453127855076217722u, 1524291284333980581u }, 78 | { 7073153469319063855u, 1219433027467184465u }, { 11317045550910502167u, 1951092843947495144u }, 79 | { 12742985255470312057u, 1560874275157996115u }, { 10194388204376249646u, 1248699420126396892u }, 80 | { 1553625868034358140u, 1997919072202235028u }, { 8621598323911307159u, 1598335257761788022u }, 81 | { 17965325103354776697u, 1278668206209430417u }, { 13987124906400001422u, 2045869129935088668u }, 82 | { 121653480894270168u, 1636695303948070935u }, { 97322784715416134u, 1309356243158456748u }, 83 | { 14913111714512307107u, 2094969989053530796u }, { 8241140556867935363u, 1675975991242824637u }, 84 | { 17660958889720079260u, 1340780792994259709u }, { 17189487779326395846u, 2145249268790815535u }, 85 | { 13751590223461116677u, 1716199415032652428u }, { 18379969808252713988u, 1372959532026121942u }, 86 | { 14650556434236701088u, 2196735251241795108u }, { 652398703163629901u, 1757388200993436087u }, 87 | { 11589965406756634890u, 1405910560794748869u }, { 7475898206584884855u, 2249456897271598191u }, 88 | { 2291369750525997561u, 1799565517817278553u }, { 9211793429904618695u, 1439652414253822842u }, 89 | { 18428218302589300235u, 2303443862806116547u }, { 7363877012587619542u, 1842755090244893238u }, 90 | { 13269799239553916280u, 1474204072195914590u }, { 10615839391643133024u, 1179363257756731672u }, 91 | { 2227947767661371545u, 1886981212410770676u }, { 16539753473096738529u, 1509584969928616540u }, 92 | { 13231802778477390823u, 1207667975942893232u }, { 6413489186596184024u, 1932268761508629172u }, 93 | { 16198837793502678189u, 1545815009206903337u }, { 5580372605318321905u, 1236652007365522670u }, 94 | { 8928596168509315048u, 1978643211784836272u }, { 18210923379033183008u, 1582914569427869017u }, 95 | { 7190041073742725760u, 1266331655542295214u }, { 436019273762630246u, 2026130648867672343u }, 96 | { 7727513048493924843u, 1620904519094137874u }, { 9871359253537050198u, 1296723615275310299u }, 97 | { 4726128361433549347u, 2074757784440496479u }, { 7470251503888749801u, 1659806227552397183u }, 98 | { 13354898832594820487u, 1327844982041917746u }, { 13989140502667892133u, 2124551971267068394u }, 99 | { 14880661216876224029u, 1699641577013654715u }, { 11904528973500979224u, 1359713261610923772u }, 100 | { 4289851098633925465u, 2175541218577478036u }, { 18189276137874781665u, 1740432974861982428u }, 101 | { 3483374466074094362u, 1392346379889585943u }, { 1884050330976640656u, 2227754207823337509u }, 102 | { 5196589079523222848u, 1782203366258670007u }, { 15225317707844309248u, 1425762693006936005u }, 103 | { 5913764258841343181u, 2281220308811097609u }, { 8420360221814984868u, 1824976247048878087u }, 104 | { 17804334621677718864u, 1459980997639102469u }, { 17932816512084085415u, 1167984798111281975u }, 105 | { 10245762345624985047u, 1868775676978051161u }, { 4507261061758077715u, 1495020541582440929u }, 106 | { 7295157664148372495u, 1196016433265952743u }, { 7982903447895485668u, 1913626293225524389u }, 107 | { 10075671573058298858u, 1530901034580419511u }, { 4371188443704728763u, 1224720827664335609u }, 108 | { 14372599139411386667u, 1959553324262936974u }, { 15187428126271019657u, 1567642659410349579u }, 109 | { 15839291315758726049u, 1254114127528279663u }, { 3206773216762499739u, 2006582604045247462u }, 110 | { 13633465017635730761u, 1605266083236197969u }, { 14596120828850494932u, 1284212866588958375u }, 111 | { 4907049252451240275u, 2054740586542333401u }, { 236290587219081897u, 1643792469233866721u }, 112 | { 14946427728742906810u, 1315033975387093376u }, { 16535586736504830250u, 2104054360619349402u }, 113 | { 5849771759720043554u, 1683243488495479522u }, { 15747863852001765813u, 1346594790796383617u }, 114 | { 10439186904235184007u, 2154551665274213788u }, { 15730047152871967852u, 1723641332219371030u }, 115 | { 12584037722297574282u, 1378913065775496824u }, { 9066413911450387881u, 2206260905240794919u }, 116 | { 10942479943902220628u, 1765008724192635935u }, { 8753983955121776503u, 1412006979354108748u }, 117 | { 10317025513452932081u, 2259211166966573997u }, { 874922781278525018u, 1807368933573259198u }, 118 | { 8078635854506640661u, 1445895146858607358u }, { 13841606313089133175u, 1156716117486885886u }, 119 | { 14767872471458792434u, 1850745787979017418u }, { 746251532941302978u, 1480596630383213935u }, 120 | { 597001226353042382u, 1184477304306571148u }, { 15712597221132509104u, 1895163686890513836u }, 121 | { 8880728962164096960u, 1516130949512411069u }, { 10793931984473187891u, 1212904759609928855u }, 122 | { 17270291175157100626u, 1940647615375886168u }, { 2748186495899949531u, 1552518092300708935u }, 123 | { 2198549196719959625u, 1242014473840567148u }, { 18275073973719576693u, 1987223158144907436u }, 124 | { 10930710364233751031u, 1589778526515925949u }, { 12433917106128911148u, 1271822821212740759u }, 125 | { 8826220925580526867u, 2034916513940385215u }, { 7060976740464421494u, 1627933211152308172u }, 126 | { 16716827836597268165u, 1302346568921846537u }, { 11989529279587987770u, 2083754510274954460u }, 127 | { 9591623423670390216u, 1667003608219963568u }, { 15051996368420132820u, 1333602886575970854u }, 128 | { 13015147745246481542u, 2133764618521553367u }, { 3033420566713364587u, 1707011694817242694u }, 129 | { 6116085268112601993u, 1365609355853794155u }, { 9785736428980163188u, 2184974969366070648u }, 130 | { 15207286772667951197u, 1747979975492856518u }, { 1097782973908629988u, 1398383980394285215u }, 131 | { 1756452758253807981u, 2237414368630856344u }, { 5094511021344956708u, 1789931494904685075u }, 132 | { 4075608817075965366u, 1431945195923748060u }, { 6520974107321544586u, 2291112313477996896u }, 133 | { 1527430471115325346u, 1832889850782397517u }, { 12289990821117991246u, 1466311880625918013u }, 134 | { 17210690286378213644u, 1173049504500734410u }, { 9090360384495590213u, 1876879207201175057u }, 135 | { 18340334751822203140u, 1501503365760940045u }, { 14672267801457762512u, 1201202692608752036u }, 136 | { 16096930852848599373u, 1921924308174003258u }, { 1809498238053148529u, 1537539446539202607u }, 137 | { 12515645034668249793u, 1230031557231362085u }, { 1578287981759648052u, 1968050491570179337u }, 138 | { 12330676829633449412u, 1574440393256143469u }, { 13553890278448669853u, 1259552314604914775u }, 139 | { 3239480371808320148u, 2015283703367863641u }, { 17348979556414297411u, 1612226962694290912u }, 140 | { 6500486015647617283u, 1289781570155432730u }, { 10400777625036187652u, 2063650512248692368u }, 141 | { 15699319729512770768u, 1650920409798953894u }, { 16248804598352126938u, 1320736327839163115u }, 142 | { 7551343283653851484u, 2113178124542660985u }, { 6041074626923081187u, 1690542499634128788u }, 143 | { 12211557331022285596u, 1352433999707303030u }, { 1091747655926105338u, 2163894399531684849u }, 144 | { 4562746939482794594u, 1731115519625347879u }, { 7339546366328145998u, 1384892415700278303u }, 145 | { 8053925371383123274u, 2215827865120445285u }, { 6443140297106498619u, 1772662292096356228u }, 146 | { 12533209867169019542u, 1418129833677084982u }, { 5295740528502789974u, 2269007733883335972u }, 147 | { 15304638867027962949u, 1815206187106668777u }, { 4865013464138549713u, 1452164949685335022u }, 148 | { 14960057215536570740u, 1161731959748268017u }, { 9178696285890871890u, 1858771135597228828u }, 149 | { 14721654658196518159u, 1487016908477783062u }, { 4398626097073393881u, 1189613526782226450u }, 150 | { 7037801755317430209u, 1903381642851562320u }, { 5630241404253944167u, 1522705314281249856u }, 151 | { 814844308661245011u, 1218164251424999885u }, { 1303750893857992017u, 1949062802279999816u }, 152 | { 15800395974054034906u, 1559250241823999852u }, { 5261619149759407279u, 1247400193459199882u }, 153 | { 12107939454356961969u, 1995840309534719811u }, { 5997002748743659252u, 1596672247627775849u }, 154 | { 8486951013736837725u, 1277337798102220679u }, { 2511075177753209390u, 2043740476963553087u }, 155 | { 13076906586428298482u, 1634992381570842469u }, { 14150874083884549109u, 1307993905256673975u }, 156 | { 4194654460505726958u, 2092790248410678361u }, { 18113118827372222859u, 1674232198728542688u }, 157 | { 3422448617672047318u, 1339385758982834151u }, { 16543964232501006678u, 2143017214372534641u }, 158 | { 9545822571258895019u, 1714413771498027713u }, { 15015355686490936662u, 1371531017198422170u }, 159 | { 5577825024675947042u, 2194449627517475473u }, { 11840957649224578280u, 1755559702013980378u }, 160 | { 16851463748863483271u, 1404447761611184302u }, { 12204946739213931940u, 2247116418577894884u }, 161 | { 13453306206113055875u, 1797693134862315907u }, { 3383947335406624054u, 1438154507889852726u }, 162 | { 16482362180876329456u, 2301047212623764361u }, { 9496540929959153242u, 1840837770099011489u }, 163 | { 11286581558709232917u, 1472670216079209191u }, { 5339916432225476010u, 1178136172863367353u }, 164 | { 4854517476818851293u, 1885017876581387765u }, { 3883613981455081034u, 1508014301265110212u }, 165 | { 14174937629389795797u, 1206411441012088169u }, { 11611853762797942306u, 1930258305619341071u }, 166 | { 5600134195496443521u, 1544206644495472857u }, { 15548153800622885787u, 1235365315596378285u }, 167 | { 6430302007287065643u, 1976584504954205257u }, { 16212288050055383484u, 1581267603963364205u }, 168 | { 12969830440044306787u, 1265014083170691364u }, { 9683682259845159889u, 2024022533073106183u }, 169 | { 15125643437359948558u, 1619218026458484946u }, { 8411165935146048523u, 1295374421166787957u }, 170 | { 17147214310975587960u, 2072599073866860731u }, { 10028422634038560045u, 1658079259093488585u }, 171 | { 8022738107230848036u, 1326463407274790868u }, { 9147032156827446534u, 2122341451639665389u }, 172 | { 11006974540203867551u, 1697873161311732311u }, { 5116230817421183718u, 1358298529049385849u }, 173 | { 15564666937357714594u, 2173277646479017358u }, { 1383687105660440706u, 1738622117183213887u }, 174 | { 12174996128754083534u, 1390897693746571109u }, { 8411947361780802685u, 2225436309994513775u }, 175 | { 6729557889424642148u, 1780349047995611020u }, { 5383646311539713719u, 1424279238396488816u }, 176 | { 1235136468979721303u, 2278846781434382106u }, { 15745504434151418335u, 1823077425147505684u }, 177 | { 16285752362063044992u, 1458461940118004547u }, { 5649904260166615347u, 1166769552094403638u }, 178 | { 5350498001524674232u, 1866831283351045821u }, { 591049586477829062u, 1493465026680836657u }, 179 | { 11540886113407994219u, 1194772021344669325u }, { 18673707743239135u, 1911635234151470921u }, 180 | { 14772334225162232601u, 1529308187321176736u }, { 8128518565387875758u, 1223446549856941389u }, 181 | { 1937583260394870242u, 1957514479771106223u }, { 8928764237799716840u, 1566011583816884978u }, 182 | { 14521709019723594119u, 1252809267053507982u }, { 8477339172590109297u, 2004494827285612772u }, 183 | { 17849917782297818407u, 1603595861828490217u }, { 6901236596354434079u, 1282876689462792174u }, 184 | { 18420676183650915173u, 2052602703140467478u }, { 3668494502695001169u, 1642082162512373983u }, 185 | { 10313493231639821582u, 1313665730009899186u }, { 9122891541139893884u, 2101865168015838698u }, 186 | { 14677010862395735754u, 1681492134412670958u }, { 673562245690857633u, 1345193707530136767u } 187 | }; 188 | 189 | static readonly uint64_t[,] DOUBLE_POW5_SPLIT = new uint64_t[DOUBLE_POW5_TABLE_SIZE, 2] { 190 | { 0u, 1152921504606846976u }, { 0u, 1441151880758558720u }, 191 | { 0u, 1801439850948198400u }, { 0u, 2251799813685248000u }, 192 | { 0u, 1407374883553280000u }, { 0u, 1759218604441600000u }, 193 | { 0u, 2199023255552000000u }, { 0u, 1374389534720000000u }, 194 | { 0u, 1717986918400000000u }, { 0u, 2147483648000000000u }, 195 | { 0u, 1342177280000000000u }, { 0u, 1677721600000000000u }, 196 | { 0u, 2097152000000000000u }, { 0u, 1310720000000000000u }, 197 | { 0u, 1638400000000000000u }, { 0u, 2048000000000000000u }, 198 | { 0u, 1280000000000000000u }, { 0u, 1600000000000000000u }, 199 | { 0u, 2000000000000000000u }, { 0u, 1250000000000000000u }, 200 | { 0u, 1562500000000000000u }, { 0u, 1953125000000000000u }, 201 | { 0u, 1220703125000000000u }, { 0u, 1525878906250000000u }, 202 | { 0u, 1907348632812500000u }, { 0u, 1192092895507812500u }, 203 | { 0u, 1490116119384765625u }, { 4611686018427387904u, 1862645149230957031u }, 204 | { 9799832789158199296u, 1164153218269348144u }, { 12249790986447749120u, 1455191522836685180u }, 205 | { 15312238733059686400u, 1818989403545856475u }, { 14528612397897220096u, 2273736754432320594u }, 206 | { 13692068767113150464u, 1421085471520200371u }, { 12503399940464050176u, 1776356839400250464u }, 207 | { 15629249925580062720u, 2220446049250313080u }, { 9768281203487539200u, 1387778780781445675u }, 208 | { 7598665485932036096u, 1734723475976807094u }, { 274959820560269312u, 2168404344971008868u }, 209 | { 9395221924704944128u, 1355252715606880542u }, { 2520655369026404352u, 1694065894508600678u }, 210 | { 12374191248137781248u, 2117582368135750847u }, { 14651398557727195136u, 1323488980084844279u }, 211 | { 13702562178731606016u, 1654361225106055349u }, { 3293144668132343808u, 2067951531382569187u }, 212 | { 18199116482078572544u, 1292469707114105741u }, { 8913837547316051968u, 1615587133892632177u }, 213 | { 15753982952572452864u, 2019483917365790221u }, { 12152082354571476992u, 1262177448353618888u }, 214 | { 15190102943214346240u, 1577721810442023610u }, { 9764256642163156992u, 1972152263052529513u }, 215 | { 17631875447420442880u, 1232595164407830945u }, { 8204786253993389888u, 1540743955509788682u }, 216 | { 1032610780636961552u, 1925929944387235853u }, { 2951224747111794922u, 1203706215242022408u }, 217 | { 3689030933889743652u, 1504632769052528010u }, { 13834660704216955373u, 1880790961315660012u }, 218 | { 17870034976990372916u, 1175494350822287507u }, { 17725857702810578241u, 1469367938527859384u }, 219 | { 3710578054803671186u, 1836709923159824231u }, { 26536550077201078u, 2295887403949780289u }, 220 | { 11545800389866720434u, 1434929627468612680u }, { 14432250487333400542u, 1793662034335765850u }, 221 | { 8816941072311974870u, 2242077542919707313u }, { 17039803216263454053u, 1401298464324817070u }, 222 | { 12076381983474541759u, 1751623080406021338u }, { 5872105442488401391u, 2189528850507526673u }, 223 | { 15199280947623720629u, 1368455531567204170u }, { 9775729147674874978u, 1710569414459005213u }, 224 | { 16831347453020981627u, 2138211768073756516u }, { 1296220121283337709u, 1336382355046097823u }, 225 | { 15455333206886335848u, 1670477943807622278u }, { 10095794471753144002u, 2088097429759527848u }, 226 | { 6309871544845715001u, 1305060893599704905u }, { 12499025449484531656u, 1631326116999631131u }, 227 | { 11012095793428276666u, 2039157646249538914u }, { 11494245889320060820u, 1274473528905961821u }, 228 | { 532749306367912313u, 1593091911132452277u }, { 5277622651387278295u, 1991364888915565346u }, 229 | { 7910200175544436838u, 1244603055572228341u }, { 14499436237857933952u, 1555753819465285426u }, 230 | { 8900923260467641632u, 1944692274331606783u }, { 12480606065433357876u, 1215432671457254239u }, 231 | { 10989071563364309441u, 1519290839321567799u }, { 9124653435777998898u, 1899113549151959749u }, 232 | { 8008751406574943263u, 1186945968219974843u }, { 5399253239791291175u, 1483682460274968554u }, 233 | { 15972438586593889776u, 1854603075343710692u }, { 759402079766405302u, 1159126922089819183u }, 234 | { 14784310654990170340u, 1448908652612273978u }, { 9257016281882937117u, 1811135815765342473u }, 235 | { 16182956370781059300u, 2263919769706678091u }, { 7808504722524468110u, 1414949856066673807u }, 236 | { 5148944884728197234u, 1768687320083342259u }, { 1824495087482858639u, 2210859150104177824u }, 237 | { 1140309429676786649u, 1381786968815111140u }, { 1425386787095983311u, 1727233711018888925u }, 238 | { 6393419502297367043u, 2159042138773611156u }, { 13219259225790630210u, 1349401336733506972u }, 239 | { 16524074032238287762u, 1686751670916883715u }, { 16043406521870471799u, 2108439588646104644u }, 240 | { 803757039314269066u, 1317774742903815403u }, { 14839754354425000045u, 1647218428629769253u }, 241 | { 4714634887749086344u, 2059023035787211567u }, { 9864175832484260821u, 1286889397367007229u }, 242 | { 16941905809032713930u, 1608611746708759036u }, { 2730638187581340797u, 2010764683385948796u }, 243 | { 10930020904093113806u, 1256727927116217997u }, { 18274212148543780162u, 1570909908895272496u }, 244 | { 4396021111970173586u, 1963637386119090621u }, { 5053356204195052443u, 1227273366324431638u }, 245 | { 15540067292098591362u, 1534091707905539547u }, { 14813398096695851299u, 1917614634881924434u }, 246 | { 13870059828862294966u, 1198509146801202771u }, { 12725888767650480803u, 1498136433501503464u }, 247 | { 15907360959563101004u, 1872670541876879330u }, { 14553786618154326031u, 1170419088673049581u }, 248 | { 4357175217410743827u, 1463023860841311977u }, { 10058155040190817688u, 1828779826051639971u }, 249 | { 7961007781811134206u, 2285974782564549964u }, { 14199001900486734687u, 1428734239102843727u }, 250 | { 13137066357181030455u, 1785917798878554659u }, { 11809646928048900164u, 2232397248598193324u }, 251 | { 16604401366885338411u, 1395248280373870827u }, { 16143815690179285109u, 1744060350467338534u }, 252 | { 10956397575869330579u, 2180075438084173168u }, { 6847748484918331612u, 1362547148802608230u }, 253 | { 17783057643002690323u, 1703183936003260287u }, { 17617136035325974999u, 2128979920004075359u }, 254 | { 17928239049719816230u, 1330612450002547099u }, { 17798612793722382384u, 1663265562503183874u }, 255 | { 13024893955298202172u, 2079081953128979843u }, { 5834715712847682405u, 1299426220705612402u }, 256 | { 16516766677914378815u, 1624282775882015502u }, { 11422586310538197711u, 2030353469852519378u }, 257 | { 11750802462513761473u, 1268970918657824611u }, { 10076817059714813937u, 1586213648322280764u }, 258 | { 12596021324643517422u, 1982767060402850955u }, { 5566670318688504437u, 1239229412751781847u }, 259 | { 2346651879933242642u, 1549036765939727309u }, { 7545000868343941206u, 1936295957424659136u }, 260 | { 4715625542714963254u, 1210184973390411960u }, { 5894531928393704067u, 1512731216738014950u }, 261 | { 16591536947346905892u, 1890914020922518687u }, { 17287239619732898039u, 1181821263076574179u }, 262 | { 16997363506238734644u, 1477276578845717724u }, { 2799960309088866689u, 1846595723557147156u }, 263 | { 10973347230035317489u, 1154122327223216972u }, { 13716684037544146861u, 1442652909029021215u }, 264 | { 12534169028502795672u, 1803316136286276519u }, { 11056025267201106687u, 2254145170357845649u }, 265 | { 18439230838069161439u, 1408840731473653530u }, { 13825666510731675991u, 1761050914342066913u }, 266 | { 3447025083132431277u, 2201313642927583642u }, { 6766076695385157452u, 1375821026829739776u }, 267 | { 8457595869231446815u, 1719776283537174720u }, { 10571994836539308519u, 2149720354421468400u }, 268 | { 6607496772837067824u, 1343575221513417750u }, { 17482743002901110588u, 1679469026891772187u }, 269 | { 17241742735199000331u, 2099336283614715234u }, { 15387775227926763111u, 1312085177259197021u }, 270 | { 5399660979626290177u, 1640106471573996277u }, { 11361262242960250625u, 2050133089467495346u }, 271 | { 11712474920277544544u, 1281333180917184591u }, { 10028907631919542777u, 1601666476146480739u }, 272 | { 7924448521472040567u, 2002083095183100924u }, { 14176152362774801162u, 1251301934489438077u }, 273 | { 3885132398186337741u, 1564127418111797597u }, { 9468101516160310080u, 1955159272639746996u }, 274 | { 15140935484454969608u, 1221974545399841872u }, { 479425281859160394u, 1527468181749802341u }, 275 | { 5210967620751338397u, 1909335227187252926u }, { 17091912818251750210u, 1193334516992033078u }, 276 | { 12141518985959911954u, 1491668146240041348u }, { 15176898732449889943u, 1864585182800051685u }, 277 | { 11791404716994875166u, 1165365739250032303u }, { 10127569877816206054u, 1456707174062540379u }, 278 | { 8047776328842869663u, 1820883967578175474u }, { 836348374198811271u, 2276104959472719343u }, 279 | { 7440246761515338900u, 1422565599670449589u }, { 13911994470321561530u, 1778206999588061986u }, 280 | { 8166621051047176104u, 2222758749485077483u }, { 2798295147690791113u, 1389224218428173427u }, 281 | { 17332926989895652603u, 1736530273035216783u }, { 17054472718942177850u, 2170662841294020979u }, 282 | { 8353202440125167204u, 1356664275808763112u }, { 10441503050156459005u, 1695830344760953890u }, 283 | { 3828506775840797949u, 2119787930951192363u }, { 86973725686804766u, 1324867456844495227u }, 284 | { 13943775212390669669u, 1656084321055619033u }, { 3594660960206173375u, 2070105401319523792u }, 285 | { 2246663100128858359u, 1293815875824702370u }, { 12031700912015848757u, 1617269844780877962u }, 286 | { 5816254103165035138u, 2021587305976097453u }, { 5941001823691840913u, 1263492066235060908u }, 287 | { 7426252279614801142u, 1579365082793826135u }, { 4671129331091113523u, 1974206353492282669u }, 288 | { 5225298841145639904u, 1233878970932676668u }, { 6531623551432049880u, 1542348713665845835u }, 289 | { 3552843420862674446u, 1927935892082307294u }, { 16055585193321335241u, 1204959932551442058u }, 290 | { 10846109454796893243u, 1506199915689302573u }, { 18169322836923504458u, 1882749894611628216u }, 291 | { 11355826773077190286u, 1176718684132267635u }, { 9583097447919099954u, 1470898355165334544u }, 292 | { 11978871809898874942u, 1838622943956668180u }, { 14973589762373593678u, 2298278679945835225u }, 293 | { 2440964573842414192u, 1436424174966147016u }, { 3051205717303017741u, 1795530218707683770u }, 294 | { 13037379183483547984u, 2244412773384604712u }, { 8148361989677217490u, 1402757983365377945u }, 295 | { 14797138505523909766u, 1753447479206722431u }, { 13884737113477499304u, 2191809349008403039u }, 296 | { 15595489723564518921u, 1369880843130251899u }, { 14882676136028260747u, 1712351053912814874u }, 297 | { 9379973133180550126u, 2140438817391018593u }, { 17391698254306313589u, 1337774260869386620u }, 298 | { 3292878744173340370u, 1672217826086733276u }, { 4116098430216675462u, 2090272282608416595u }, 299 | { 266718509671728212u, 1306420176630260372u }, { 333398137089660265u, 1633025220787825465u }, 300 | { 5028433689789463235u, 2041281525984781831u }, { 10060300083759496378u, 1275800953740488644u }, 301 | { 12575375104699370472u, 1594751192175610805u }, { 1884160825592049379u, 1993438990219513507u }, 302 | { 17318501580490888525u, 1245899368887195941u }, { 7813068920331446945u, 1557374211108994927u }, 303 | { 5154650131986920777u, 1946717763886243659u }, { 915813323278131534u, 1216698602428902287u }, 304 | { 14979824709379828129u, 1520873253036127858u }, { 9501408849870009354u, 1901091566295159823u }, 305 | { 12855909558809837702u, 1188182228934474889u }, { 2234828893230133415u, 1485227786168093612u }, 306 | { 2793536116537666769u, 1856534732710117015u }, { 8663489100477123587u, 1160334207943823134u }, 307 | { 1605989338741628675u, 1450417759929778918u }, { 11230858710281811652u, 1813022199912223647u }, 308 | { 9426887369424876662u, 2266277749890279559u }, { 12809333633531629769u, 1416423593681424724u }, 309 | { 16011667041914537212u, 1770529492101780905u }, { 6179525747111007803u, 2213161865127226132u }, 310 | { 13085575628799155685u, 1383226165704516332u }, { 16356969535998944606u, 1729032707130645415u }, 311 | { 15834525901571292854u, 2161290883913306769u }, { 2979049660840976177u, 1350806802445816731u }, 312 | { 17558870131333383934u, 1688508503057270913u }, { 8113529608884566205u, 2110635628821588642u }, 313 | { 9682642023980241782u, 1319147268013492901u }, { 16714988548402690132u, 1648934085016866126u }, 314 | { 11670363648648586857u, 2061167606271082658u }, { 11905663298832754689u, 1288229753919426661u }, 315 | { 1047021068258779650u, 1610287192399283327u }, { 15143834390605638274u, 2012858990499104158u }, 316 | { 4853210475701136017u, 1258036869061940099u }, { 1454827076199032118u, 1572546086327425124u }, 317 | { 1818533845248790147u, 1965682607909281405u }, { 3442426662494187794u, 1228551629943300878u }, 318 | { 13526405364972510550u, 1535689537429126097u }, { 3072948650933474476u, 1919611921786407622u }, 319 | { 15755650962115585259u, 1199757451116504763u }, { 15082877684217093670u, 1499696813895630954u }, 320 | { 9630225068416591280u, 1874621017369538693u }, { 8324733676974063502u, 1171638135855961683u }, 321 | { 5794231077790191473u, 1464547669819952104u }, { 7242788847237739342u, 1830684587274940130u }, 322 | { 18276858095901949986u, 2288355734093675162u }, { 16034722328366106645u, 1430222333808546976u }, 323 | { 1596658836748081690u, 1787777917260683721u }, { 6607509564362490017u, 2234722396575854651u }, 324 | { 1823850468512862308u, 1396701497859909157u }, { 6891499104068465790u, 1745876872324886446u }, 325 | { 17837745916940358045u, 2182346090406108057u }, { 4231062170446641922u, 1363966306503817536u }, 326 | { 5288827713058302403u, 1704957883129771920u }, { 6611034641322878003u, 2131197353912214900u }, 327 | { 13355268687681574560u, 1331998346195134312u }, { 16694085859601968200u, 1664997932743917890u }, 328 | { 11644235287647684442u, 2081247415929897363u }, { 4971804045566108824u, 1300779634956185852u }, 329 | { 6214755056957636030u, 1625974543695232315u }, { 3156757802769657134u, 2032468179619040394u }, 330 | { 6584659645158423613u, 1270292612261900246u }, { 17454196593302805324u, 1587865765327375307u }, 331 | { 17206059723201118751u, 1984832206659219134u }, { 6142101308573311315u, 1240520129162011959u }, 332 | { 3065940617289251240u, 1550650161452514949u }, { 8444111790038951954u, 1938312701815643686u }, 333 | { 665883850346957067u, 1211445438634777304u }, { 832354812933696334u, 1514306798293471630u }, 334 | { 10263815553021896226u, 1892883497866839537u }, { 17944099766707154901u, 1183052186166774710u }, 335 | { 13206752671529167818u, 1478815232708468388u }, { 16508440839411459773u, 1848519040885585485u }, 336 | { 12623618533845856310u, 1155324400553490928u }, { 15779523167307320387u, 1444155500691863660u }, 337 | { 1277659885424598868u, 1805194375864829576u }, { 1597074856780748586u, 2256492969831036970u }, 338 | { 5609857803915355770u, 1410308106144398106u }, { 16235694291748970521u, 1762885132680497632u }, 339 | { 1847873790976661535u, 2203606415850622041u }, { 12684136165428883219u, 1377254009906638775u }, 340 | { 11243484188358716120u, 1721567512383298469u }, { 219297180166231438u, 2151959390479123087u }, 341 | { 7054589765244976505u, 1344974619049451929u }, { 13429923224983608535u, 1681218273811814911u }, 342 | { 12175718012802122765u, 2101522842264768639u }, { 14527352785642408584u, 1313451776415480399u }, 343 | { 13547504963625622826u, 1641814720519350499u }, { 12322695186104640628u, 2052268400649188124u }, 344 | { 16925056528170176201u, 1282667750405742577u }, { 7321262604930556539u, 1603334688007178222u }, 345 | { 18374950293017971482u, 2004168360008972777u }, { 4566814905495150320u, 1252605225005607986u }, 346 | { 14931890668723713708u, 1565756531257009982u }, { 9441491299049866327u, 1957195664071262478u }, 347 | { 1289246043478778550u, 1223247290044539049u }, { 6223243572775861092u, 1529059112555673811u }, 348 | { 3167368447542438461u, 1911323890694592264u }, { 1979605279714024038u, 1194577431684120165u }, 349 | { 7086192618069917952u, 1493221789605150206u }, { 18081112809442173248u, 1866527237006437757u }, 350 | { 13606538515115052232u, 1166579523129023598u }, { 7784801107039039482u, 1458224403911279498u }, 351 | { 507629346944023544u, 1822780504889099373u }, { 5246222702107417334u, 2278475631111374216u }, 352 | { 3278889188817135834u, 1424047269444608885u }, { 8710297504448807696u, 1780059086805761106u } 353 | }; 354 | 355 | } -------------------------------------------------------------------------------- /RyuCsharp/d2s_intrinsics.cs: -------------------------------------------------------------------------------- 1 | namespace RyuCsharp; 2 | 3 | partial class Ryu 4 | { 5 | static uint64_t umul128(uint64_t a, uint64_t b, out uint64_t productHi) 6 | { 7 | // The casts here help MSVC to avoid calls to the __allmul library function. 8 | uint32_t aLo = (uint32_t)a; 9 | uint32_t aHi = (uint32_t)(a >> 32); 10 | uint32_t bLo = (uint32_t)b; 11 | uint32_t bHi = (uint32_t)(b >> 32); 12 | 13 | uint64_t b00 = (uint64_t)aLo * bLo; 14 | uint64_t b01 = (uint64_t)aLo * bHi; 15 | uint64_t b10 = (uint64_t)aHi * bLo; 16 | uint64_t b11 = (uint64_t)aHi * bHi; 17 | 18 | uint32_t b00Lo = (uint32_t)b00; 19 | uint32_t b00Hi = (uint32_t)(b00 >> 32); 20 | 21 | uint64_t mid1 = b10 + b00Hi; 22 | uint32_t mid1Lo = (uint32_t)(mid1); 23 | uint32_t mid1Hi = (uint32_t)(mid1 >> 32); 24 | 25 | uint64_t mid2 = b01 + mid1Lo; 26 | uint32_t mid2Lo = (uint32_t)(mid2); 27 | uint32_t mid2Hi = (uint32_t)(mid2 >> 32); 28 | 29 | uint64_t pHi = b11 + mid1Hi + mid2Hi; 30 | uint64_t pLo = ((uint64_t)mid2Lo << 32) | b00Lo; 31 | 32 | productHi = pHi; 33 | return pLo; 34 | } 35 | static uint64_t shiftright128(uint64_t lo, uint64_t hi, uint32_t dist) 36 | { 37 | // We don't need to handle the case dist >= 64 here (see above). 38 | assert(dist < 64); 39 | assert(dist >= 0); 40 | return (hi << (int)(64 - dist)) | (lo >> (int)dist); 41 | } 42 | 43 | static uint64_t div5(uint64_t x) 44 | { 45 | return x / 5; 46 | } 47 | 48 | static uint64_t div10(uint64_t x) 49 | { 50 | return x / 10; 51 | } 52 | 53 | static uint64_t div100(uint64_t x) 54 | { 55 | return x / 100; 56 | } 57 | 58 | static uint64_t div1e8(uint64_t x) 59 | { 60 | return x / 100000000; 61 | } 62 | 63 | static uint64_t div1e9(uint64_t x) 64 | { 65 | return x / 1000000000; 66 | } 67 | 68 | static uint32_t mod1e9(uint64_t x) 69 | { 70 | return (uint32_t)(x - 1000000000 * div1e9(x)); 71 | } 72 | 73 | static uint32_t pow5Factor(uint64_t value) 74 | { 75 | uint32_t count = 0; 76 | for (; ; ) 77 | { 78 | assert(value != 0); 79 | uint64_t q = div5(value); 80 | uint32_t r = ((uint32_t)value) - 5 * ((uint32_t)q); 81 | if (r != 0) 82 | { 83 | break; 84 | } 85 | value = q; 86 | ++count; 87 | } 88 | return count; 89 | } 90 | 91 | // Returns true if value is divisible by 5^p. 92 | static bool multipleOfPowerOf5(uint64_t value, uint32_t p) 93 | { 94 | // I tried a case distinction on p, but there was no performance difference. 95 | return pow5Factor(value) >= p; 96 | } 97 | 98 | // Returns true if value is divisible by 2^p. 99 | static bool multipleOfPowerOf2(uint64_t value, uint32_t p) 100 | { 101 | assert(value != 0); 102 | // __builtin_ctzll doesn't appear to be faster here. 103 | return (value & ((1ul << (int)p) - 1)) == 0; 104 | } 105 | 106 | // We need a 64x128-bit multiplication and a subsequent 128-bit shift. 107 | // Multiplication: 108 | // The 64-bit factor is variable and passed in, the 128-bit factor comes 109 | // from a lookup table. We know that the 64-bit factor only has 55 110 | // significant bits (i.e., the 9 topmost bits are zeros). The 128-bit 111 | // factor only has 124 significant bits (i.e., the 4 topmost bits are 112 | // zeros). 113 | // Shift: 114 | // In principle, the multiplication result requires 55 + 124 = 179 bits to 115 | // represent. However, we then shift this value to the right by j, which is 116 | // at least j >= 115, so the result is guaranteed to fit into 179 - 115 = 64 117 | // bits. This means that we only need the topmost 64 significant bits of 118 | // the 64x128-bit multiplication. 119 | // 120 | // There are several ways to do this: 121 | // 1. Best case: the compiler exposes a 128-bit type. 122 | // We perform two 64x64-bit multiplications, add the higher 64 bits of the 123 | // lower result to the higher result, and shift by j - 64 bits. 124 | // 125 | // We explicitly cast from 64-bit to 128-bit, so the compiler can tell 126 | // that these are only 64-bit inputs, and can map these to the best 127 | // possible sequence of assembly instructions. 128 | // x64 machines happen to have matching assembly instructions for 129 | // 64x64-bit multiplications and 128-bit shifts. 130 | // 131 | // 2. Second best case: the compiler exposes intrinsics for the x64 assembly 132 | // instructions mentioned in 1. 133 | // 134 | // 3. We only have 64x64 bit instructions that return the lower 64 bits of 135 | // the result, i.e., we have to use plain C. 136 | // Our inputs are less than the full width, so we have three options: 137 | // a. Ignore this fact and just implement the intrinsics manually. 138 | // b. Split both into 31-bit pieces, which guarantees no internal overflow, 139 | // but requires extra work upfront (unless we change the lookup table). 140 | // c. Split only the first factor into 31-bit pieces, which also guarantees 141 | // no internal overflow, but requires extra work since the intermediate 142 | // results are not perfectly aligned. 143 | static uint64_t mulShift64(uint64_t m, ref uint64_t mul, int32_t j) 144 | { 145 | // m is maximum 55 bits 146 | uint64_t high1; // 128 147 | uint64_t low1 = umul128(m, Unsafe.Add(ref mul, 1), out high1); // 64 148 | uint64_t high0; // 64 149 | umul128(m, Unsafe.Add(ref mul, 0), out high0); // 0 150 | uint64_t sum = high0 + low1; 151 | if (sum < high0) 152 | { 153 | ++high1; // overflow into high1 154 | } 155 | return shiftright128(sum, high1, (uint)j - 64); 156 | } 157 | 158 | // This is faster if we don't have a 64x64->128-bit multiplication. 159 | static uint64_t mulShiftAll64(uint64_t m, ref uint64_t mul, int32_t j, out uint64_t vp, out uint64_t vm, uint32_t mmShift) 160 | { 161 | m <<= 1; 162 | // m is maximum 55 bits 163 | uint64_t tmp; 164 | uint64_t lo = umul128(m, Unsafe.Add(ref mul, 0), out tmp); 165 | uint64_t hi; 166 | uint64_t mid = tmp + umul128(m, Unsafe.Add(ref mul, 1), out hi); 167 | if (mid < tmp) ++hi;// overflow into hi 168 | 169 | uint64_t lo2 = lo + Unsafe.Add(ref mul, 0); 170 | uint64_t mid2 = mid + Unsafe.Add(ref mul, 1); 171 | if (lo2 < lo) ++mid2; 172 | uint64_t hi2 = hi; 173 | if (mid2 < mid) ++hi2; 174 | vp = shiftright128(mid2, hi2, (uint32_t)(j - 64 - 1)); 175 | 176 | if (mmShift == 1) 177 | { 178 | uint64_t lo3 = lo - Unsafe.Add(ref mul, 0); 179 | uint64_t mid3 = mid - Unsafe.Add(ref mul, 1); 180 | if (lo3 > lo) --mid3; 181 | uint64_t hi3 = hi; 182 | if (mid3 > mid) --hi3; 183 | vm = shiftright128(mid3, hi3, (uint32_t)(j - 64 - 1)); 184 | } 185 | else 186 | { 187 | uint64_t lo3 = lo + lo; 188 | uint64_t mid3 = mid + mid; 189 | if (lo3 < lo) ++mid3; 190 | uint64_t hi3 = hi + hi; 191 | if (mid3 < mid) ++hi3; 192 | uint64_t lo4 = lo3 - Unsafe.Add(ref mul, 0); 193 | uint64_t mid4 = mid3 - Unsafe.Add(ref mul, 1); 194 | if (lo4 > lo3) --mid4; 195 | uint64_t hi4 = hi3; 196 | if (mid4 > mid3) --hi4; 197 | vm = shiftright128(mid4, hi4, (uint32_t)(j - 64)); 198 | } 199 | 200 | return shiftright128(mid, hi, (uint32_t)(j - 64 - 1)); 201 | } 202 | } -------------------------------------------------------------------------------- /RyuCsharp/d2s_small_table.cs: -------------------------------------------------------------------------------- 1 |  2 | namespace RyuCsharp; 3 | 4 | partial class Ryu 5 | { 6 | static readonly uint64_t[,] DOUBLE_POW5_INV_SPLIT2 = new uint64_t[13, 2] { 7 | { 1u, 2305843009213693952u }, 8 | { 5955668970331000884u, 1784059615882449851u }, 9 | { 8982663654677661702u, 1380349269358112757u }, 10 | { 7286864317269821294u, 2135987035920910082u }, 11 | { 7005857020398200553u, 1652639921975621497u }, 12 | { 17965325103354776697u, 1278668206209430417u }, 13 | { 8928596168509315048u, 1978643211784836272u }, 14 | { 10075671573058298858u, 1530901034580419511u }, 15 | { 597001226353042382u, 1184477304306571148u }, 16 | { 1527430471115325346u, 1832889850782397517u }, 17 | { 12533209867169019542u, 1418129833677084982u }, 18 | { 5577825024675947042u, 2194449627517475473u }, 19 | { 11006974540203867551u, 1697873161311732311u } 20 | }; 21 | static readonly uint32_t[] POW5_INV_OFFSETS = new uint32_t[19]{ 22 | 0x54544554, 0x04055545, 0x10041000, 0x00400414, 0x40010000, 0x41155555, 23 | 0x00000454, 0x00010044, 0x40000000, 0x44000041, 0x50454450, 0x55550054, 24 | 0x51655554, 0x40004000, 0x01000001, 0x00010500, 0x51515411, 0x05555554, 25 | 0x00000000 26 | }; 27 | 28 | static readonly uint64_t[,] DOUBLE_POW5_SPLIT2 = new uint64_t[13, 2] { 29 | { 0u, 1152921504606846976u }, 30 | { 0u, 1490116119384765625u }, 31 | { 1032610780636961552u, 1925929944387235853u }, 32 | { 7910200175544436838u, 1244603055572228341u }, 33 | { 16941905809032713930u, 1608611746708759036u }, 34 | { 13024893955298202172u, 2079081953128979843u }, 35 | { 6607496772837067824u, 1343575221513417750u }, 36 | { 17332926989895652603u, 1736530273035216783u }, 37 | { 13037379183483547984u, 2244412773384604712u }, 38 | { 1605989338741628675u, 1450417759929778918u }, 39 | { 9630225068416591280u, 1874621017369538693u }, 40 | { 665883850346957067u, 1211445438634777304u }, 41 | { 14931890668723713708u, 1565756531257009982u } 42 | }; 43 | static readonly uint32_t[] POW5_OFFSETS = new uint32_t[21] { 44 | 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, 0x59695995, 45 | 0x55545555, 0x56555515, 0x41150504, 0x40555410, 0x44555145, 0x44504540, 46 | 0x45555550, 0x40004000, 0x96440440, 0x55565565, 0x54454045, 0x40154151, 47 | 0x55559155, 0x51405555, 0x00000105 48 | }; 49 | 50 | const int POW5_TABLE_SIZE = 26; 51 | static readonly uint64_t[] DOUBLE_POW5_TABLE = new uint64_t[POW5_TABLE_SIZE] { 52 | 1ul, 5ul, 25ul, 125ul, 625ul, 3125ul, 15625ul, 78125ul, 390625ul, 53 | 1953125ul, 9765625ul, 48828125ul, 244140625ul, 1220703125ul, 6103515625ul, 54 | 30517578125ul, 152587890625ul, 762939453125ul, 3814697265625ul, 55 | 19073486328125ul, 95367431640625ul, 476837158203125ul, 56 | 2384185791015625ul, 11920928955078125ul, 59604644775390625ul, 57 | 298023223876953125ul //, 1490116119384765625ul 58 | }; 59 | 60 | // Computes 5^i in the form required by Ryu, and stores it in the given pointer. 61 | static void double_computePow5(uint32_t i, ref uint64_t result) 62 | { 63 | uint32_t @base = i / POW5_TABLE_SIZE; 64 | uint32_t base2 = @base * POW5_TABLE_SIZE; 65 | uint32_t offset = i - base2; 66 | ref uint64_t mul = ref DOUBLE_POW5_SPLIT2[@base, 0]; 67 | if (offset == 0) 68 | { 69 | Unsafe.Add(ref result,0 ) = Unsafe.Add(ref mul, 0); 70 | Unsafe.Add(ref result, 1) = Unsafe.Add(ref mul, 1); 71 | return; 72 | } 73 | uint64_t m = DOUBLE_POW5_TABLE[offset]; 74 | uint64_t high1; 75 | uint64_t low1 = umul128(m, Unsafe.Add(ref mul, 1), out high1); 76 | uint64_t high0; 77 | uint64_t low0 = umul128(m, Unsafe.Add(ref mul, 0), out high0); 78 | uint64_t sum = high0 + low1; 79 | if (sum < high0) 80 | { 81 | ++high1; // overflow into high1 82 | } 83 | // high1 | sum | low0 84 | uint32_t delta = (uint32_t)(pow5bits((int32_t)i) - pow5bits((int32_t)base2)); 85 | Unsafe.Add(ref result, 0) = shiftright128(low0, sum, delta) + ((POW5_OFFSETS[i / 16] >> (int)((i % 16) << 1)) & 3); 86 | Unsafe.Add(ref result, 1) = shiftright128(sum, high1, delta); 87 | } 88 | 89 | // Computes 5^-i in the form required by Ryu, and stores it in the given pointer. 90 | static void double_computeInvPow5(uint32_t i, ref uint64_t result) 91 | { 92 | uint32_t @base = (i + POW5_TABLE_SIZE - 1) / POW5_TABLE_SIZE; 93 | uint32_t base2 = @base * POW5_TABLE_SIZE; 94 | uint32_t offset = base2 - i; 95 | ref uint64_t mul = ref DOUBLE_POW5_INV_SPLIT2[@base, 0]; // 1/5^base2 96 | if (offset == 0) 97 | { 98 | Unsafe.Add(ref result, 0) = Unsafe.Add(ref mul, 0); 99 | Unsafe.Add(ref result, 1) = Unsafe.Add(ref mul, 1); 100 | return; 101 | } 102 | uint64_t m = DOUBLE_POW5_TABLE[offset]; 103 | uint64_t high1; 104 | uint64_t low1 = umul128(m, Unsafe.Add(ref mul, 1), out high1); 105 | uint64_t high0; 106 | uint64_t low0 = umul128(m, Unsafe.Add(ref mul, 0), out high0); 107 | uint64_t sum = high0 + low1; 108 | if (sum < high0) 109 | { 110 | ++high1; // overflow into high1 111 | } 112 | // high1 | sum | low0 113 | uint32_t delta = (uint32_t)(pow5bits((int32_t)base2) - pow5bits((int32_t)i)); 114 | Unsafe.Add(ref result, 0) = shiftright128(low0, sum, delta) + 1 + ((POW5_INV_OFFSETS[i / 16] >> (int)((i % 16) << 1)) & 3); 115 | Unsafe.Add(ref result, 1) = shiftright128(sum, high1, delta); 116 | } 117 | } -------------------------------------------------------------------------------- /RyuCsharp/digit_table.cs: -------------------------------------------------------------------------------- 1 | namespace RyuCsharp; 2 | 3 | partial class Ryu 4 | { 5 | static readonly char[] DIGIT_TABLE = new char[200] { 6 | '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9', 7 | '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9', 8 | '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9', 9 | '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9', 10 | '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9', 11 | '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9', 12 | '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9', 13 | '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9', 14 | '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9', 15 | '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9' 16 | }; 17 | } -------------------------------------------------------------------------------- /RyuCsharp/f2s.cs: -------------------------------------------------------------------------------- 1 |  2 | 3 | namespace RyuCsharp; 4 | 5 | partial class Ryu 6 | { 7 | const int FLOAT_MANTISSA_BITS = 23; 8 | const int FLOAT_EXPONENT_BITS = 8; 9 | const int FLOAT_BIAS = 127; 10 | 11 | static uint32_t pow5factor_32(uint32_t value) 12 | { 13 | uint32_t count = 0; 14 | for (; ; ) 15 | { 16 | assert(value != 0); 17 | uint32_t q = value / 5; 18 | uint32_t r = value % 5; 19 | if (r != 0) 20 | { 21 | break; 22 | } 23 | value = q; 24 | ++count; 25 | } 26 | return count; 27 | } 28 | 29 | // Returns true if value is divisible by 5^p. 30 | static bool multipleOfPowerOf5_32(uint32_t value, uint32_t p) 31 | { 32 | return pow5factor_32(value) >= p; 33 | } 34 | 35 | // Returns true if value is divisible by 2^p. 36 | static bool multipleOfPowerOf2_32(uint32_t value, uint32_t p) 37 | { 38 | // __builtin_ctz doesn't appear to be faster here. 39 | return (value & ((1u << (int)p) - 1)) == 0; 40 | } 41 | 42 | // It seems to be slightly faster to avoid uint128_t here, although the 43 | // generated code for uint128_t looks slightly nicer. 44 | static uint32_t mulShift32(uint32_t m, uint64_t factor, int32_t shift) 45 | { 46 | assert(shift > 32); 47 | 48 | // The casts here help MSVC to avoid calls to the __allmul library 49 | // function. 50 | uint32_t factorLo = (uint32_t)(factor); 51 | uint32_t factorHi = (uint32_t)(factor >> 32); 52 | uint64_t bits0 = (uint64_t)m * factorLo; 53 | uint64_t bits1 = (uint64_t)m * factorHi; 54 | 55 | uint64_t sum = (bits0 >> 32) + bits1; 56 | uint64_t shiftedSum = sum >> (shift - 32); 57 | assert(shiftedSum <= /*UINT32_MAX*/uint32_t.MaxValue); 58 | return (uint32_t)shiftedSum; 59 | } 60 | 61 | static uint32_t mulPow5InvDivPow2(uint32_t m, uint32_t q, int32_t j) 62 | { 63 | return mulShift32(m, FLOAT_POW5_INV_SPLIT[q], j); 64 | } 65 | 66 | static uint32_t mulPow5divPow2(uint32_t m, uint32_t i, int32_t j) 67 | { 68 | return mulShift32(m, FLOAT_POW5_SPLIT[i], j); 69 | } 70 | 71 | static floating_decimal_32 f2d(uint32_t ieeeMantissa, uint32_t ieeeExponent) 72 | { 73 | int32_t e2; 74 | uint32_t m2; 75 | if (ieeeExponent == 0) 76 | { 77 | // We subtract 2 so that the bounds computation has 2 additional bits. 78 | e2 = 1 - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; 79 | m2 = ieeeMantissa; 80 | } 81 | else 82 | { 83 | e2 = (int32_t)ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; 84 | m2 = (1u << FLOAT_MANTISSA_BITS) | ieeeMantissa; 85 | } 86 | bool even = (m2 & 1) == 0; 87 | bool acceptBounds = even; 88 | 89 | 90 | // Step 2: Determine the interval of valid decimal representations. 91 | uint32_t mv = 4 * m2; 92 | uint32_t mp = 4 * m2 + 2; 93 | // Implicit bool -> int conversion. True is 1, false is 0. 94 | uint32_t mmShift = (ieeeMantissa != 0 || ieeeExponent <= 1) ? 1U : 0; 95 | uint32_t mm = 4 * m2 - 1 - mmShift; 96 | 97 | // Step 3: Convert to a decimal power base using 64-bit arithmetic. 98 | uint32_t vr, vp, vm; 99 | int32_t e10; 100 | bool vmIsTrailingZeros = false; 101 | bool vrIsTrailingZeros = false; 102 | uint8_t lastRemovedDigit = 0; 103 | if (e2 >= 0) 104 | { 105 | uint32_t q = log10Pow2(e2); 106 | e10 = (int32_t)q; 107 | int32_t k = FLOAT_POW5_INV_BITCOUNT + pow5bits((int32_t)q) - 1; 108 | int32_t i = -e2 + (int32_t)q + k; 109 | vr = mulPow5InvDivPow2(mv, q, i); 110 | vp = mulPow5InvDivPow2(mp, q, i); 111 | vm = mulPow5InvDivPow2(mm, q, i); 112 | 113 | if (q != 0 && (vp - 1) / 10 <= vm / 10) 114 | { 115 | // We need to know one removed digit even if we are not going to loop below. We could use 116 | // q = X - 1 above, except that would require 33 bits for the result, and we've found that 117 | // 32-bit arithmetic is faster even on 64-bit machines. 118 | int32_t l = FLOAT_POW5_INV_BITCOUNT + pow5bits((int32_t)(q - 1)) - 1; 119 | lastRemovedDigit = (uint8_t)(mulPow5InvDivPow2(mv, q - 1, -e2 + (int32_t)q - 1 + l) % 10); 120 | } 121 | if (q <= 9) 122 | { 123 | // The largest power of 5 that fits in 24 bits is 5^10, but q <= 9 seems to be safe as well. 124 | // Only one of mp, mv, and mm can be a multiple of 5, if any. 125 | if (mv % 5 == 0) 126 | { 127 | vrIsTrailingZeros = multipleOfPowerOf5_32(mv, q); 128 | } 129 | else if (acceptBounds) 130 | { 131 | vmIsTrailingZeros = multipleOfPowerOf5_32(mm, q); 132 | } 133 | else 134 | { 135 | if (multipleOfPowerOf5_32(mp, q)) --vp; 136 | } 137 | } 138 | } 139 | else 140 | { 141 | uint32_t q = log10Pow5(-e2); 142 | e10 = (int32_t)q + e2; 143 | int32_t i = -e2 - (int32_t)q; 144 | int32_t k = pow5bits(i) - FLOAT_POW5_BITCOUNT; 145 | int32_t j = (int32_t)q - k; 146 | vr = mulPow5divPow2(mv, (uint32_t)i, j); 147 | vp = mulPow5divPow2(mp, (uint32_t)i, j); 148 | vm = mulPow5divPow2(mm, (uint32_t)i, j); 149 | 150 | if (q != 0 && (vp - 1) / 10 <= vm / 10) 151 | { 152 | j = (int32_t)q - 1 - (pow5bits(i + 1) - FLOAT_POW5_BITCOUNT); 153 | lastRemovedDigit = (uint8_t)(mulPow5divPow2(mv, (uint32_t)(i + 1), j) % 10); 154 | } 155 | if (q <= 1) 156 | { 157 | // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits. 158 | // mv = 4 * m2, so it always has at least two trailing 0 bits. 159 | vrIsTrailingZeros = true; 160 | if (acceptBounds) 161 | { 162 | // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1. 163 | vmIsTrailingZeros = mmShift == 1; 164 | } 165 | else 166 | { 167 | // mp = mv + 2, so it always has at least one trailing 0 bit. 168 | --vp; 169 | } 170 | } 171 | else if (q < 31) 172 | { // TODO(ulfjack): Use a tighter bound here. 173 | vrIsTrailingZeros = multipleOfPowerOf2_32(mv, q - 1); 174 | 175 | } 176 | } 177 | 178 | 179 | // Step 4: Find the shortest decimal representation in the interval of valid representations. 180 | int32_t removed = 0; 181 | uint32_t output; 182 | if (vmIsTrailingZeros || vrIsTrailingZeros) 183 | { 184 | // General case, which happens rarely (~4.0%). 185 | while (vp / 10 > vm / 10) 186 | { 187 | #if __clang__ // https://bugs.llvm.org/show_bug.cgi?id=23106 188 | // The compiler does not realize that vm % 10 can be computed from vm / 10 189 | // as vm - (vm / 10) * 10. 190 | vmIsTrailingZeros &= vm - (vm / 10) * 10 == 0; 191 | #else 192 | vmIsTrailingZeros &= vm % 10 == 0; 193 | #endif 194 | vrIsTrailingZeros &= lastRemovedDigit == 0; 195 | lastRemovedDigit = (uint8_t)(vr % 10); 196 | vr /= 10; 197 | vp /= 10; 198 | vm /= 10; 199 | ++removed; 200 | } 201 | 202 | if (vmIsTrailingZeros) 203 | { 204 | while (vm % 10 == 0) 205 | { 206 | vrIsTrailingZeros &= lastRemovedDigit == 0; 207 | lastRemovedDigit = (uint8_t)(vr % 10); 208 | vr /= 10; 209 | vp /= 10; 210 | vm /= 10; 211 | ++removed; 212 | } 213 | } 214 | 215 | if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) 216 | { 217 | // Round even if the exact number is .....50..0. 218 | lastRemovedDigit = 4; 219 | } 220 | // We need to take vr + 1 if vr is outside bounds or we need to round up. 221 | output = vr; 222 | if ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5) ++output; 223 | } 224 | else 225 | { 226 | // Specialized for the common case (~96.0%). Percentages below are relative to this. 227 | // Loop iterations below (approximately): 228 | // 0: 13.6%, 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01% 229 | while (vp / 10 > vm / 10) 230 | { 231 | lastRemovedDigit = (uint8_t)(vr % 10); 232 | vr /= 10; 233 | vp /= 10; 234 | vm /= 10; 235 | ++removed; 236 | } 237 | 238 | // We need to take vr + 1 if vr is outside bounds or we need to round up. 239 | output = vr; 240 | if (vr == vm || lastRemovedDigit >= 5) ++output; 241 | } 242 | int32_t exp = e10 + removed; 243 | 244 | 245 | 246 | floating_decimal_32 fd = default; 247 | fd.exponent = exp; 248 | fd.mantissa = output; 249 | return fd; 250 | } 251 | 252 | static int to_chars(floating_decimal_32 v, bool sign, ref char result) 253 | { 254 | // Step 5: Print the decimal representation. 255 | int index = 0; 256 | if (sign) 257 | { 258 | Unsafe.Add(ref result, index++) = '-'; 259 | } 260 | 261 | uint32_t output = v.mantissa; 262 | uint32_t olength = decimalLength9(output); 263 | 264 | 265 | // Print the decimal digits. 266 | // The following code is equivalent to: 267 | // for (uint32_t i = 0; i < olength - 1; ++i) { 268 | // const uint32_t c = output % 10; output /= 10; 269 | // result[index + olength - i] = (char) ('0' + c); 270 | // } 271 | // result[index] = '0' + output % 10; 272 | uint32_t i = 0; 273 | while (output >= 10000) 274 | { 275 | #if __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217 276 | uint32_t c = output - 10000 * (output / 10000); 277 | #else 278 | uint32_t c = output % 10000; 279 | #endif 280 | output /= 10000; 281 | uint32_t c0 = (c % 100) << 1; 282 | uint32_t c1 = (c / 100) << 1; 283 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(index + olength - i - 1) * sizeof(char)), ref DIGIT_TABLE[c0], 2); 284 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(index + olength - i - 3) * sizeof(char)), ref DIGIT_TABLE[c1], 2); 285 | i += 4; 286 | } 287 | if (output >= 100) 288 | { 289 | uint32_t c = (output % 100) << 1; 290 | output /= 100; 291 | memcpy(ref Unsafe.AddByteOffset(ref result, (nint)(index + olength - i - 1) * sizeof(char)), ref DIGIT_TABLE[c], 2); 292 | i += 2; 293 | } 294 | if (output >= 10) 295 | { 296 | uint32_t c = output << 1; 297 | // We can't use memcpy here: the decimal dot goes between these two digits. 298 | Unsafe.AddByteOffset(ref result, (nint)(index + olength - i) * sizeof(char)) = DIGIT_TABLE[c + 1]; 299 | Unsafe.Add(ref result, index) = DIGIT_TABLE[c]; 300 | } 301 | else 302 | { 303 | Unsafe.Add(ref result, index) = (char)('0' + output); 304 | } 305 | 306 | // Print decimal point if needed. 307 | if (olength > 1) 308 | { 309 | Unsafe.Add(ref result, index + 1) = '.'; 310 | index += (int)olength + 1; 311 | } 312 | else 313 | { 314 | ++index; 315 | } 316 | 317 | // Print the exponent. 318 | Unsafe.Add(ref result, index++) = 'E'; 319 | int32_t exp = v.exponent + (int32_t)olength - 1; 320 | if (exp < 0) 321 | { 322 | Unsafe.Add(ref result, index++) = '-'; 323 | exp = -exp; 324 | } 325 | 326 | if (exp >= 10) 327 | { 328 | memcpy(ref Unsafe.Add(ref result, index), ref DIGIT_TABLE[2 * exp], 2); 329 | index += 2; 330 | } 331 | else 332 | { 333 | Unsafe.Add(ref result, index++) = (char)('0' + exp); 334 | } 335 | 336 | return index; 337 | } 338 | 339 | public static int f2s_buffered_n(float f, ref char result) 340 | { 341 | // Step 1: Decode the floating-point number, and unify normalized and subnormal cases. 342 | uint32_t bits = float_to_bits(f); 343 | 344 | 345 | 346 | // Decode bits into sign, mantissa, and exponent. 347 | bool ieeeSign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0; 348 | uint32_t ieeeMantissa = bits & ((1u << FLOAT_MANTISSA_BITS) - 1); 349 | uint32_t ieeeExponent = (bits >> FLOAT_MANTISSA_BITS) & ((1u << FLOAT_EXPONENT_BITS) - 1); 350 | 351 | // Case distinction; exit early for the easy cases. 352 | if (ieeeExponent == ((1u << FLOAT_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0)) 353 | { 354 | return copy_special_str(ref result, ieeeSign, ieeeExponent != 0, ieeeMantissa != 0); 355 | } 356 | 357 | floating_decimal_32 v = f2d(ieeeMantissa, ieeeExponent); 358 | return to_chars(v, ieeeSign, ref result); 359 | } 360 | 361 | public static void f2s_buffered(float f, ref char result) 362 | { 363 | int index = f2s_buffered_n(f, ref result); 364 | 365 | // Terminate the string. 366 | Unsafe.Add(ref result, index) = '\0'; 367 | } 368 | 369 | } -------------------------------------------------------------------------------- /RyuCsharp/f2s_full_table.cs: -------------------------------------------------------------------------------- 1 |  2 | 3 | namespace RyuCsharp; 4 | 5 | partial class Ryu 6 | { 7 | // This table is generated by PrintFloatLookupTable. 8 | const int FLOAT_POW5_INV_BITCOUNT = 59; 9 | const int FLOAT_POW5_BITCOUNT = 61; 10 | 11 | static readonly uint64_t[] FLOAT_POW5_INV_SPLIT = new uint64_t[31] { 12 | 576460752303423489u, 461168601842738791u, 368934881474191033u, 295147905179352826u, 13 | 472236648286964522u, 377789318629571618u, 302231454903657294u, 483570327845851670u, 14 | 386856262276681336u, 309485009821345069u, 495176015714152110u, 396140812571321688u, 15 | 316912650057057351u, 507060240091291761u, 405648192073033409u, 324518553658426727u, 16 | 519229685853482763u, 415383748682786211u, 332306998946228969u, 531691198313966350u, 17 | 425352958651173080u, 340282366920938464u, 544451787073501542u, 435561429658801234u, 18 | 348449143727040987u, 557518629963265579u, 446014903970612463u, 356811923176489971u, 19 | 570899077082383953u, 456719261665907162u, 365375409332725730u 20 | }; 21 | static readonly uint64_t[] FLOAT_POW5_SPLIT = new uint64_t[47] { 22 | 1152921504606846976u, 1441151880758558720u, 1801439850948198400u, 2251799813685248000u, 23 | 1407374883553280000u, 1759218604441600000u, 2199023255552000000u, 1374389534720000000u, 24 | 1717986918400000000u, 2147483648000000000u, 1342177280000000000u, 1677721600000000000u, 25 | 2097152000000000000u, 1310720000000000000u, 1638400000000000000u, 2048000000000000000u, 26 | 1280000000000000000u, 1600000000000000000u, 2000000000000000000u, 1250000000000000000u, 27 | 1562500000000000000u, 1953125000000000000u, 1220703125000000000u, 1525878906250000000u, 28 | 1907348632812500000u, 1192092895507812500u, 1490116119384765625u, 1862645149230957031u, 29 | 1164153218269348144u, 1455191522836685180u, 1818989403545856475u, 2273736754432320594u, 30 | 1421085471520200371u, 1776356839400250464u, 2220446049250313080u, 1387778780781445675u, 31 | 1734723475976807094u, 2168404344971008868u, 1355252715606880542u, 1694065894508600678u, 32 | 2117582368135750847u, 1323488980084844279u, 1654361225106055349u, 2067951531382569187u, 33 | 1292469707114105741u, 1615587133892632177u, 2019483917365790221u 34 | }; 35 | } -------------------------------------------------------------------------------- /RyuCsharp/floating_decimal_32.cs: -------------------------------------------------------------------------------- 1 |  2 | 3 | namespace RyuCsharp; 4 | 5 | // A floating decimal representing m * 10^e. 6 | struct floating_decimal_32 7 | { 8 | public uint32_t mantissa; 9 | // Decimal exponent's range is -45 to 38 10 | // inclusive, and can fit in a short if needed. 11 | public int32_t exponent; 12 | } -------------------------------------------------------------------------------- /RyuCsharp/floating_decimal_64.cs: -------------------------------------------------------------------------------- 1 |  2 | 3 | namespace RyuCsharp; 4 | 5 | // A floating decimal representing m * 10^e. 6 | struct floating_decimal_64 7 | { 8 | public uint64_t mantissa; 9 | // Decimal exponent's range is -324 to 308 10 | // inclusive, and can fit in a short if needed. 11 | public int32_t exponent; 12 | } -------------------------------------------------------------------------------- /RyuCsharp/s2d.cs: -------------------------------------------------------------------------------- 1 |  2 | 3 | using static RyuCsharp.Status; 4 | 5 | 6 | namespace RyuCsharp; 7 | 8 | partial class Ryu 9 | { 10 | const int DOUBLE_EXPONENT_BIAS = 1023; 11 | #if _MSC_VER 12 | static uint32_t floor_log2(uint64_t value) 13 | { 14 | long index; 15 | return _BitScanReverse64(&index, value) ? index : 64; 16 | } 17 | 18 | #else 19 | 20 | static uint32_t floor_log2(uint64_t value) 21 | { 22 | return 63 - __builtin_clzll(value); 23 | } 24 | 25 | #endif 26 | 27 | // The max function is already defined on Windows. 28 | static int32_t max32(int32_t a, int32_t b) 29 | { 30 | return a < b ? b : a; 31 | } 32 | 33 | static double int64Bits2Double(uint64_t bits) 34 | { 35 | return Unsafe.As(ref bits); 36 | } 37 | 38 | public static Status s2d_n(ref char buffer, int len, out double result) 39 | { 40 | result = 0; 41 | 42 | if (len == 0) 43 | { 44 | return INPUT_TOO_SHORT; 45 | } 46 | int m10digits = 0; 47 | int e10digits = 0; 48 | int dotIndex = len; 49 | int eIndex = len; 50 | uint64_t m10 = 0; 51 | int32_t e10 = 0; 52 | bool signedM = false; 53 | bool signedE = false; 54 | int i = 0; 55 | if (Unsafe.Add(ref buffer, i) == '-') 56 | { 57 | signedM = true; 58 | i++; 59 | } 60 | for (; i < len; i++) 61 | { 62 | char c = Unsafe.Add(ref buffer, i); 63 | if (c == '.') 64 | { 65 | if (dotIndex != len) 66 | { 67 | return MALFORMED_INPUT; 68 | } 69 | dotIndex = i; 70 | continue; 71 | } 72 | if ((c < '0') || (c > '9')) 73 | { 74 | break; 75 | } 76 | if (m10digits >= 17) 77 | { 78 | return INPUT_TOO_LONG; 79 | } 80 | m10 = 10 * m10 + (char)(c - '0'); 81 | if (m10 != 0) 82 | { 83 | m10digits++; 84 | } 85 | } 86 | if (i < len && ((Unsafe.Add(ref buffer, i) == 'e') || (Unsafe.Add(ref buffer, i) == 'E'))) 87 | { 88 | eIndex = i; 89 | i++; 90 | if (i < len && ((Unsafe.Add(ref buffer, i) == '-') || (Unsafe.Add(ref buffer, i) == '+'))) 91 | { 92 | signedE = Unsafe.Add(ref buffer, i) == '-'; 93 | i++; 94 | } 95 | for (; i < len; i++) 96 | { 97 | char c = Unsafe.Add(ref buffer, i); 98 | if ((c < '0') || (c > '9')) 99 | { 100 | return MALFORMED_INPUT; 101 | } 102 | if (e10digits > 3) 103 | { 104 | // TODO: Be more lenient. Return +/-Infinity or +/-0 instead. 105 | return INPUT_TOO_LONG; 106 | } 107 | e10 = 10 * e10 + (c - '0'); 108 | if (e10 != 0) 109 | { 110 | e10digits++; 111 | } 112 | } 113 | } 114 | if (i < len) 115 | { 116 | return MALFORMED_INPUT; 117 | } 118 | if (signedE) 119 | { 120 | e10 = -e10; 121 | } 122 | e10 -= dotIndex < eIndex ? eIndex - dotIndex - 1 : 0; 123 | if (m10 == 0) 124 | { 125 | result = signedM ? -0.0 : 0.0; 126 | return SUCCESS; 127 | } 128 | 129 | 130 | if ((m10digits + e10 <= -324) || (m10 == 0)) 131 | { 132 | // Number is less than 1e-324, which should be rounded down to 0; return +/-0.0. 133 | uint64_t ieee = ((uint64_t)(signedM ? 1 : 0)) << (DOUBLE_EXPONENT_BITS + DOUBLE_MANTISSA_BITS); 134 | result = int64Bits2Double(ieee); 135 | return SUCCESS; 136 | } 137 | if (m10digits + e10 >= 310) 138 | { 139 | // Number is larger than 1e+309, which should be rounded down to 0; return +/-Infinity. 140 | uint64_t ieee = (((uint64_t)(signedM ? 1 : 0)) << (DOUBLE_EXPONENT_BITS + DOUBLE_MANTISSA_BITS)) | (0x7fful << DOUBLE_MANTISSA_BITS); 141 | result = int64Bits2Double(ieee); 142 | return SUCCESS; 143 | } 144 | 145 | // Convert to binary float m2 * 2^e2, while retaining information about whether the conversion 146 | // was exact (trailingZeros). 147 | int32_t e2; 148 | uint64_t m2; 149 | bool trailingZeros; 150 | if (e10 >= 0) 151 | { 152 | // The length of m * 10^e in bits is: 153 | // log2(m10 * 10^e10) = log2(m10) + e10 log2(10) = log2(m10) + e10 + e10 * log2(5) 154 | // 155 | // We want to compute the DOUBLE_MANTISSA_BITS + 1 top-most bits (+1 for the implicit leading 156 | // one in IEEE format). We therefore choose a binary output exponent of 157 | // log2(m10 * 10^e10) - (DOUBLE_MANTISSA_BITS + 1). 158 | // 159 | // We use floor(log2(5^e10)) so that we get at least this many bits; better to 160 | // have an additional bit than to not have enough bits. 161 | e2 = (int32_t)(floor_log2(m10) + e10 + log2pow5(e10) - (DOUBLE_MANTISSA_BITS + 1)); 162 | 163 | // We now compute [m10 * 10^e10 / 2^e2] = [m10 * 5^e10 / 2^(e2-e10)]. 164 | // To that end, we use the DOUBLE_POW5_SPLIT table. 165 | int j = e2 - e10 - ceil_log2pow5(e10) + DOUBLE_POW5_BITCOUNT; 166 | assert(j >= 0); 167 | assert(e10 < DOUBLE_POW5_TABLE_SIZE); 168 | m2 = mulShift64(m10, ref DOUBLE_POW5_SPLIT[(uint)e10, 0], j); 169 | 170 | // We also compute if the result is exact, i.e., 171 | // [m10 * 10^e10 / 2^e2] == m10 * 10^e10 / 2^e2. 172 | // This can only be the case if 2^e2 divides m10 * 10^e10, which in turn requires that the 173 | // largest power of 2 that divides m10 + e10 is greater than e2. If e2 is less than e10, then 174 | // the result must be exact. Otherwise we use the existing multipleOfPowerOf2 function. 175 | trailingZeros = e2 < e10 || multipleOfPowerOf2(m10, (uint32_t)(e2 - e10)); 176 | } 177 | else 178 | { 179 | e2 = (int32_t)(floor_log2(m10) + e10 - ceil_log2pow5(-e10) - (DOUBLE_MANTISSA_BITS + 1)); 180 | int j = e2 - e10 + ceil_log2pow5(-e10) - 1 + DOUBLE_POW5_INV_BITCOUNT; 181 | assert(-e10 < DOUBLE_POW5_INV_TABLE_SIZE); 182 | m2 = mulShift64(m10, ref DOUBLE_POW5_INV_SPLIT[(uint)(-e10), 0], j); 183 | trailingZeros = multipleOfPowerOf5(m10, (uint)(-e10)); 184 | } 185 | 186 | 187 | // Compute the final IEEE exponent. 188 | uint32_t ieee_e2 = (uint32_t)max32(0, (int32_t)(e2 + DOUBLE_EXPONENT_BIAS + floor_log2(m2))); 189 | 190 | if (ieee_e2 > 0x7fe) 191 | { 192 | // Final IEEE exponent is larger than the maximum representable; return +/-Infinity. 193 | uint64_t ieee = (((uint64_t)(signedM ? 1 : 0)) << (DOUBLE_EXPONENT_BITS + DOUBLE_MANTISSA_BITS)) | (0x7fful << DOUBLE_MANTISSA_BITS); 194 | result = int64Bits2Double(ieee); 195 | return SUCCESS; 196 | } 197 | 198 | // We need to figure out how much we need to shift m2. The tricky part is that we need to take 199 | // the final IEEE exponent into account, so we need to reverse the bias and also special-case 200 | // the value 0. 201 | int32_t shift = (int32_t)((ieee_e2 == 0 ? 1 : ieee_e2) - e2 - DOUBLE_EXPONENT_BIAS - DOUBLE_MANTISSA_BITS); 202 | assert(shift >= 0); 203 | 204 | 205 | // We need to round up if the exact value is more than 0.5 above the value we computed. That's 206 | // equivalent to checking if the last removed bit was 1 and either the value was not just 207 | // trailing zeros or the result would otherwise be odd. 208 | // 209 | // We need to update trailingZeros given that we have the exact output exponent ieee_e2 now. 210 | trailingZeros &= (m2 & ((1ul << (shift - 1)) - 1)) == 0; 211 | uint64_t lastRemovedBit = (m2 >> (shift - 1)) & 1; 212 | bool roundUp = (lastRemovedBit != 0) && (!trailingZeros || (((m2 >> shift) & 1) != 0)); 213 | 214 | 215 | uint64_t ieee_m2 = (m2 >> shift) + (roundUp ? 1U : 0); 216 | if (ieee_m2 == (1ul << (DOUBLE_MANTISSA_BITS + 1))) 217 | { 218 | // Due to how the IEEE represents +/-Infinity, we don't need to check for overflow here. 219 | ieee_e2++; 220 | } 221 | ieee_m2 &= (1ul << DOUBLE_MANTISSA_BITS) - 1; 222 | uint64_t ieee2 = (((((uint64_t)(signedM ? 1 : 0)) << DOUBLE_EXPONENT_BITS) | (uint64_t)ieee_e2) << DOUBLE_MANTISSA_BITS) | ieee_m2; 223 | result = int64Bits2Double(ieee2); 224 | return SUCCESS; 225 | } 226 | 227 | static Status s2d(ref char buffer, out double result) 228 | { 229 | return s2d_n(ref buffer, strlen(ref buffer), out result); 230 | } 231 | } -------------------------------------------------------------------------------- /RyuCsharpTest/Program.cs: -------------------------------------------------------------------------------- 1 | using RyuCsharp; 2 | using System; 3 | using System.Runtime.CompilerServices; 4 | 5 | namespace RyuCsharpTest 6 | { 7 | unsafe class Program 8 | { 9 | static unsafe void Main(string[] args) 10 | { 11 | Test(3); 12 | Test(3.14); 13 | Test(3.1415926); 14 | Test(998); 15 | Test(1218); 16 | Test(19971218); 17 | Test(ulong.MaxValue); 18 | Test(long.MinValue); 19 | Test(0.1); 20 | Test(0.00314); 21 | Test(0.0000000998); 22 | Test(-0.0000000998); 23 | 24 | Console.WriteLine(); 25 | 26 | Test(3.1415926e100); 27 | Test(double.MaxValue); 28 | Test(double.MinValue); 29 | Test(5737722933969577e-231); 30 | } 31 | 32 | public static void Test(double val) 33 | { 34 | const int buffer_length = 2000; 35 | 36 | var buffer = new char[2000]; 37 | 38 | var str1 = new string(buffer, 0, Ryu.d2s_buffered_n(val, ref buffer[0])); 39 | double val1; 40 | var eq1 = Ryu.s2d_n(ref buffer[0], str1.Length, out val1); 41 | Empty(ref buffer[0], buffer_length); 42 | 43 | var str2 = new string(buffer, 0, Ryu.d2exp_buffered_n(val, 10, ref buffer[0])); 44 | double val2; 45 | var eq2 = Ryu.s2d_n(ref buffer[0], str2.Length, out val2); 46 | Empty(ref buffer[0], buffer_length); 47 | 48 | var str3 = new string(buffer, 0, Ryu.d2fixed_buffered_n(val, 10, ref buffer[0])); 49 | double val3; 50 | var eq3 = Ryu.s2d_n(ref buffer[0], str3.Length, out val3); 51 | Empty(ref buffer[0], buffer_length); 52 | 53 | Console.WriteLine($"Value: {val}, d2s: [{str1} -- s2d: {val1}], d2exp(10): [{str2} -- s2d: {val2}], d2fixed(10): [{str3} -- s2d: {val3}]"); 54 | } 55 | 56 | public static void Empty(ref char buffer, int length) 57 | { 58 | Unsafe.InitBlock( 59 | ref Unsafe.As(ref buffer), 60 | 0, 61 | checked((uint)length * sizeof(char)) 62 | ); 63 | } 64 | } 65 | } -------------------------------------------------------------------------------- /RyuCsharpTest/RyuCsharpTest.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | netcoreapp3.0 6 | true 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | --------------------------------------------------------------------------------