├── .gitignore ├── README.md ├── dub.sdl ├── mono-d ├── docs.dproj ├── fast.dproj ├── fast.sln └── generate unicode tables.dproj ├── source ├── fast │ ├── buffer.d │ ├── cstring.d │ ├── format.d │ ├── internal │ │ ├── benchmarks.d │ │ ├── helpers.d │ │ ├── sysdef.di │ │ └── unicode_tables.d │ ├── intmath.d │ ├── json.d │ ├── parsing.d │ ├── string.d │ └── unicode.d ├── std │ └── simd.d └── unicode │ └── generator.d └── test ├── fail1.json ├── fail10.json ├── fail11.json ├── fail12.json ├── fail13.json ├── fail14.json ├── fail15.json ├── fail16.json ├── fail17.json ├── fail18.json ├── fail19.json ├── fail2.json ├── fail20.json ├── fail21.json ├── fail22.json ├── fail23.json ├── fail24.json ├── fail25.json ├── fail26.json ├── fail27.json ├── fail28.json ├── fail29.json ├── fail3.json ├── fail30.json ├── fail31.json ├── fail32.json ├── fail33.json ├── fail4.json ├── fail5.json ├── fail6.json ├── fail7.json ├── fail8.json ├── fail9.json ├── pass1.json ├── pass2.json └── pass3.json /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated documentation 2 | /docs/ 3 | 4 | # Dub cache 5 | /.dub/ 6 | 7 | # Object and executable output directory 8 | /generated/ 9 | 10 | # Generated by OProfile (system wide profiler) 11 | /oprofile_data/ 12 | 13 | # Unicode Character Database files can be downloaded here when tables need to be regenerated 14 | /ucd/ 15 | 16 | # Mono-D user preferences 17 | /mono-d/fast.userprefs 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | fast 2 | ==== 3 | 4 | This library aims to provide the fastest possible implementation of some every day routines. 5 | 6 | The contained functions avoid GC allocations and input validation. They may use SSE or stack allocations to reach a high throughput so that in some cases a 20 fold speed increase can be achieved. 7 | 8 | **[DMD](https://dlang.org/)**, **[GDC](https://gdcproject.org/)** and **[LDC2](https://wiki.dlang.org/LDC)** compilers are supported. Tested with front-end versions **2.068** through **2.079**. 9 | 10 | ### Benchmark 11 | A benchmark is included and can be run through dub, e.g.: 12 | 13 | dub --config=benchmark --build=release --compiler=gdc 14 | 15 | ### Examples 16 | 17 | ##### Read JSON file with coordinates. 18 | ```d 19 | struct Point3D { double x, y, z; } 20 | 21 | void main() 22 | { 23 | import fast.json; 24 | auto points = json.coordinates.read!(Point3D[]); 25 | } 26 | ``` 27 | 28 | ##### SSE3 accelerated splitting around '/' and '\' 29 | ```d 30 | string rest = pathname 31 | string element; 32 | 33 | import fast.string; 34 | while (rest.split!`or(=\,=/)`(element, rest)) 35 | { 36 | // `element' is now the next directory. 37 | // `rest' is what remains after the \ or /. 38 | } 39 | // `element` is now the file name part of the path. 40 | ``` 41 | 42 | ##### Calling Windows API functions. 43 | ```d 44 | void createHardlink(string from, string to) 45 | { 46 | import fast.cstring : wcharPtr; 47 | CreateHardLinkW(wcharPtr!to, wcharPtr!from, null); 48 | } 49 | ``` 50 | 51 | ##### Calling Linux API functions. 52 | ```d 53 | void createHardlink(string from, string to) 54 | { 55 | import fast.cstring : charPtr; 56 | link(charPtr!from, charPtr!to); 57 | } 58 | ``` 59 | -------------------------------------------------------------------------------- /dub.sdl: -------------------------------------------------------------------------------- 1 | name "fast" 2 | description "A library that aims to provide the fastest possible implementation of some every day routines." 3 | homepage "http://github.com/mleise/fast" 4 | authors "Marco Leise" 5 | copyright "Copyright © 2017, Marco Leise" 6 | license "GPL-3.0" 7 | 8 | excludedSourceFiles "source/docs/*.d" "source/unicode/*.d" 9 | targetPath "generated" 10 | 11 | configuration "library" { 12 | platforms "posix-dmd" "posix-x86_64-ldc" "posix-x86-gdc" "posix-x86_64-gdc" 13 | targetType "library" 14 | } 15 | 16 | configuration "shared-library" { 17 | platforms "posix-dmd" "posix-x86_64-ldc" "posix-x86-gdc" "posix-x86_64-gdc" 18 | targetType "dynamicLibrary" 19 | libs "gdruntime" "gphobos" platform="gdc" // Force linking with shared Phobos2, not the non-PIC static objects 20 | } 21 | 22 | configuration "benchmark" { 23 | platforms "posix-dmd" "posix-x86_64-ldc" "posix-x86-gdc" "posix-x86_64-gdc" 24 | targetType "executable" 25 | versions "benchmark" 26 | } 27 | 28 | configuration "benchmark-pic" { 29 | platforms "posix-dmd" "posix-x86_64-ldc" "posix-x86-gdc" "posix-x86_64-gdc" 30 | targetType "executable" 31 | versions "benchmark" 32 | dflags "-fPIC" 33 | } 34 | -------------------------------------------------------------------------------- /mono-d/docs.dproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Documentation 5 | AnyCPU 6 | 8.0.30703 7 | 2.0 8 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60} 9 | ..\source 10 | DMD 11 | true 12 | true 13 | true 14 | 0.3.2 15 | 16 | 17 | ..\generated 18 | ../generated/debug 19 | false 20 | false 21 | generate_docs 22 | Executable 23 | true 24 | 0 25 | true 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /mono-d/fast.dproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Debug 5 | AnyCPU 6 | 8.0.30703 7 | 2.0 8 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503} 9 | true 10 | false 11 | True 12 | DMD 13 | 14 | 15 | 16 | 17 | ..\source 18 | 0.3.2 19 | 20 | 21 | ..\generated 22 | Executable 23 | benchmark 24 | false 25 | 0 26 | ../generated/release 27 | ../docs 28 | 29 | 30 | benchmark 31 | 32 | 33 | false 34 | false 35 | 36 | 37 | true 38 | ..\generated 39 | false 40 | ../generated/debug 41 | ../docs 42 | 43 | 44 | benchmark 45 | 46 | 47 | false 48 | benchmark-debug 49 | Executable 50 | false 51 | 0 52 | true 53 | 54 | 55 | true 56 | ..\generated 57 | false 58 | Executable 59 | fast-unittest-x86 60 | true 61 | false 62 | 0 63 | ../generated/unittest-x86 64 | ../docs 65 | 66 | 67 | true 68 | ..\generated 69 | false 70 | Executable 71 | fast-unittest-x64 72 | true 73 | false 74 | 0 75 | ../generated/unittest-x64 76 | ../docs 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /mono-d/fast.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 11.00 3 | # Visual Studio 2010 4 | Project("{3947E667-4C90-4C3A-BEB9-7148D6FE0D7C}") = "fast", "fast.dproj", "{DFEB5CCB-A636-4971-8302-89CDBF3B4503}" 5 | EndProject 6 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{87AD35CC-088E-43A6-99E8-A216AABD25F0}" 7 | ProjectSection(SolutionItems) = preProject 8 | ..\README.md = ..\README.md 9 | ..\.gitignore = ..\.gitignore 10 | ..\dub.sdl = ..\dub.sdl 11 | ..\test\fail1.json = ..\test\fail1.json 12 | ..\test\fail2.json = ..\test\fail2.json 13 | ..\test\fail3.json = ..\test\fail3.json 14 | ..\test\fail4.json = ..\test\fail4.json 15 | ..\test\fail5.json = ..\test\fail5.json 16 | ..\test\fail6.json = ..\test\fail6.json 17 | ..\test\fail7.json = ..\test\fail7.json 18 | ..\test\fail8.json = ..\test\fail8.json 19 | ..\test\fail9.json = ..\test\fail9.json 20 | ..\test\fail10.json = ..\test\fail10.json 21 | ..\test\fail11.json = ..\test\fail11.json 22 | ..\test\fail12.json = ..\test\fail12.json 23 | ..\test\fail13.json = ..\test\fail13.json 24 | ..\test\fail14.json = ..\test\fail14.json 25 | ..\test\fail15.json = ..\test\fail15.json 26 | ..\test\fail16.json = ..\test\fail16.json 27 | ..\test\fail17.json = ..\test\fail17.json 28 | ..\test\fail18.json = ..\test\fail18.json 29 | ..\test\fail19.json = ..\test\fail19.json 30 | ..\test\fail20.json = ..\test\fail20.json 31 | ..\test\fail21.json = ..\test\fail21.json 32 | ..\test\fail22.json = ..\test\fail22.json 33 | ..\test\fail23.json = ..\test\fail23.json 34 | ..\test\fail24.json = ..\test\fail24.json 35 | ..\test\fail25.json = ..\test\fail25.json 36 | ..\test\fail26.json = ..\test\fail26.json 37 | ..\test\fail27.json = ..\test\fail27.json 38 | ..\test\fail28.json = ..\test\fail28.json 39 | ..\test\fail29.json = ..\test\fail29.json 40 | ..\test\fail30.json = ..\test\fail30.json 41 | ..\test\fail31.json = ..\test\fail31.json 42 | ..\test\fail32.json = ..\test\fail32.json 43 | ..\test\fail33.json = ..\test\fail33.json 44 | ..\test\pass1.json = ..\test\pass1.json 45 | ..\test\pass2.json = ..\test\pass2.json 46 | ..\test\pass3.json = ..\test\pass3.json 47 | ..\benchall.sh = ..\benchall.sh 48 | EndProjectSection 49 | EndProject 50 | Project("{3947E667-4C90-4C3A-BEB9-7148D6FE0D7C}") = "docs", "docs.dproj", "{CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}" 51 | EndProject 52 | Project("{3947E667-4C90-4C3A-BEB9-7148D6FE0D7C}") = "generate unicode tables", "generate unicode tables.dproj", "{BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}" 53 | EndProject 54 | Global 55 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 56 | Benchmark|Any CPU = Benchmark|Any CPU 57 | Debug|Any CPU = Debug|Any CPU 58 | Release|Any CPU = Release|Any CPU 59 | Unittest|x86 = Unittest|x86 60 | Unittest|x64 = Unittest|x64 61 | Unicode tables|Any CPU = Unicode tables|Any CPU 62 | Documentation|Any CPU = Documentation|Any CPU 63 | EndGlobalSection 64 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 65 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Benchmark|Any CPU.ActiveCfg = Unicode tables|Any CPU 66 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Debug|Any CPU.ActiveCfg = Unicode tables|Any CPU 67 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Documentation|Any CPU.ActiveCfg = Unicode tables|Any CPU 68 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Release|Any CPU.ActiveCfg = Unicode tables|Any CPU 69 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Unicode tables|Any CPU.ActiveCfg = Unicode tables|Any CPU 70 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Unicode tables|Any CPU.Build.0 = Unicode tables|Any CPU 71 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Unittest|x64.ActiveCfg = Unicode tables|Any CPU 72 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Unittest|x86.ActiveCfg = Unicode tables|Any CPU 73 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Benchmark|Any CPU.ActiveCfg = Unicode tables|Any CPU 74 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Debug|Any CPU.ActiveCfg = Unicode tables|Any CPU 75 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Documentation|Any CPU.ActiveCfg = Documentation|Any CPU 76 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Documentation|Any CPU.Build.0 = Documentation|Any CPU 77 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Release|Any CPU.ActiveCfg = Unicode tables|Any CPU 78 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Unicode tables|Any CPU.ActiveCfg = Documentation|Any CPU 79 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Unittest|x64.ActiveCfg = Unicode tables|Any CPU 80 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Unittest|x86.ActiveCfg = Unicode tables|Any CPU 81 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Benchmark|Any CPU.ActiveCfg = Benchmark|Any CPU 82 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Benchmark|Any CPU.Build.0 = Benchmark|Any CPU 83 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 84 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Debug|Any CPU.Build.0 = Debug|Any CPU 85 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Documentation|Any CPU.ActiveCfg = Documentation|Any CPU 86 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Release|Any CPU.ActiveCfg = Benchmark|Any CPU 87 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Release|Any CPU.Build.0 = Benchmark|Any CPU 88 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unicode tables|Any CPU.ActiveCfg = Benchmark|Any CPU 89 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unittest|x64.ActiveCfg = Unittest|x64 90 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unittest|x64.Build.0 = Unittest|x64 91 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unittest|x86.ActiveCfg = Unittest|x86 92 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unittest|x86.Build.0 = Unittest|x86 93 | EndGlobalSection 94 | GlobalSection(NestedProjects) = preSolution 95 | EndGlobalSection 96 | GlobalSection(MonoDevelopProperties) = preSolution 97 | BaseDirectory = .. 98 | Policies = $0 99 | $0.TextStylePolicy = $1 100 | $1.inheritsSet = null 101 | $1.scope = application/json 102 | $0.TextStylePolicy = $2 103 | $2.inheritsSet = Mono 104 | $2.inheritsScope = text/plain 105 | $2.scope = text/plain 106 | $0.TextStylePolicy = $3 107 | $3.FileWidth = 120 108 | $3.TabsToSpaces = False 109 | $3.NoTabsAfterNonTabs = True 110 | $3.inheritsSet = VisualStudio 111 | $3.inheritsScope = text/plain 112 | $3.scope = text/x-d 113 | $0.DFormattingPolicy = $4 114 | $4.inheritsSet = Mono 115 | $4.inheritsScope = text/x-d 116 | $4.scope = text/x-d 117 | $0.StandardHeader = $5 118 | $5.Text = 119 | $5.IncludeInNewFiles = True 120 | $0.NameConventionPolicy = $6 121 | $6.Rules = $7 122 | $7.NamingRule = $8 123 | $8.Name = Namespaces 124 | $8.AffectedEntity = Namespace 125 | $8.VisibilityMask = VisibilityMask 126 | $8.NamingStyle = PascalCase 127 | $8.IncludeInstanceMembers = True 128 | $8.IncludeStaticEntities = True 129 | $7.NamingRule = $9 130 | $9.Name = Types 131 | $9.AffectedEntity = Class, Struct, Enum, Delegate 132 | $9.VisibilityMask = Public 133 | $9.NamingStyle = PascalCase 134 | $9.IncludeInstanceMembers = True 135 | $9.IncludeStaticEntities = True 136 | $7.NamingRule = $10 137 | $10.Name = Interfaces 138 | $10.RequiredPrefixes = $11 139 | $11.String = I 140 | $10.AffectedEntity = Interface 141 | $10.VisibilityMask = Public 142 | $10.NamingStyle = PascalCase 143 | $10.IncludeInstanceMembers = True 144 | $10.IncludeStaticEntities = True 145 | $7.NamingRule = $12 146 | $12.Name = Attributes 147 | $12.RequiredSuffixes = $13 148 | $13.String = Attribute 149 | $12.AffectedEntity = CustomAttributes 150 | $12.VisibilityMask = Public 151 | $12.NamingStyle = PascalCase 152 | $12.IncludeInstanceMembers = True 153 | $12.IncludeStaticEntities = True 154 | $7.NamingRule = $14 155 | $14.Name = Event Arguments 156 | $14.RequiredSuffixes = $15 157 | $15.String = EventArgs 158 | $14.AffectedEntity = CustomEventArgs 159 | $14.VisibilityMask = Public 160 | $14.NamingStyle = PascalCase 161 | $14.IncludeInstanceMembers = True 162 | $14.IncludeStaticEntities = True 163 | $7.NamingRule = $16 164 | $16.Name = Exceptions 165 | $16.RequiredSuffixes = $17 166 | $17.String = Exception 167 | $16.AffectedEntity = CustomExceptions 168 | $16.VisibilityMask = VisibilityMask 169 | $16.NamingStyle = PascalCase 170 | $16.IncludeInstanceMembers = True 171 | $16.IncludeStaticEntities = True 172 | $7.NamingRule = $18 173 | $18.Name = Methods 174 | $18.AffectedEntity = Methods 175 | $18.VisibilityMask = Protected, Public 176 | $18.NamingStyle = PascalCase 177 | $18.IncludeInstanceMembers = True 178 | $18.IncludeStaticEntities = True 179 | $7.NamingRule = $19 180 | $19.Name = Static Readonly Fields 181 | $19.AffectedEntity = ReadonlyField 182 | $19.VisibilityMask = Protected, Public 183 | $19.NamingStyle = PascalCase 184 | $19.IncludeInstanceMembers = False 185 | $19.IncludeStaticEntities = True 186 | $7.NamingRule = $20 187 | $20.Name = Fields 188 | $20.AffectedEntity = Field 189 | $20.VisibilityMask = Protected, Public 190 | $20.NamingStyle = PascalCase 191 | $20.IncludeInstanceMembers = True 192 | $20.IncludeStaticEntities = True 193 | $7.NamingRule = $21 194 | $21.Name = ReadOnly Fields 195 | $21.AffectedEntity = ReadonlyField 196 | $21.VisibilityMask = Protected, Public 197 | $21.NamingStyle = PascalCase 198 | $21.IncludeInstanceMembers = True 199 | $21.IncludeStaticEntities = False 200 | $7.NamingRule = $22 201 | $22.Name = Constant Fields 202 | $22.AffectedEntity = ConstantField 203 | $22.VisibilityMask = Protected, Public 204 | $22.NamingStyle = PascalCase 205 | $22.IncludeInstanceMembers = True 206 | $22.IncludeStaticEntities = True 207 | $7.NamingRule = $23 208 | $23.Name = Properties 209 | $23.AffectedEntity = Property 210 | $23.VisibilityMask = Protected, Public 211 | $23.NamingStyle = PascalCase 212 | $23.IncludeInstanceMembers = True 213 | $23.IncludeStaticEntities = True 214 | $7.NamingRule = $24 215 | $24.Name = Events 216 | $24.AffectedEntity = Event 217 | $24.VisibilityMask = Protected, Public 218 | $24.NamingStyle = PascalCase 219 | $24.IncludeInstanceMembers = True 220 | $24.IncludeStaticEntities = True 221 | $7.NamingRule = $25 222 | $25.Name = Enum Members 223 | $25.AffectedEntity = EnumMember 224 | $25.VisibilityMask = VisibilityMask 225 | $25.NamingStyle = PascalCase 226 | $25.IncludeInstanceMembers = True 227 | $25.IncludeStaticEntities = True 228 | $7.NamingRule = $26 229 | $26.Name = Parameters 230 | $26.AffectedEntity = Parameter 231 | $26.VisibilityMask = VisibilityMask 232 | $26.NamingStyle = CamelCase 233 | $26.IncludeInstanceMembers = True 234 | $26.IncludeStaticEntities = True 235 | $7.NamingRule = $27 236 | $27.Name = Type Parameters 237 | $27.RequiredPrefixes = $28 238 | $28.String = T 239 | $27.AffectedEntity = TypeParameter 240 | $27.VisibilityMask = VisibilityMask 241 | $27.NamingStyle = PascalCase 242 | $27.IncludeInstanceMembers = True 243 | $27.IncludeStaticEntities = True 244 | $0.VersionControlPolicy = $29 245 | $29.CommitMessageStyle = $30 246 | $30.FileSeparator = ", " 247 | $30.IncludeDirectoryPaths = True 248 | $29.inheritsSet = Mono 249 | $0.ChangeLogPolicy = $31 250 | $31.UpdateMode = None 251 | $31.MessageStyle = $32 252 | $32.LineAlign = 0 253 | $31.inheritsSet = Mono 254 | description = A library for D that aims to provide the fastest possible implementation of some every day routines. 255 | version = 0.3.2 256 | outputpath = .. 257 | EndGlobalSection 258 | GlobalSection(SolutionProperties) = preSolution 259 | HideSolutionNode = FALSE 260 | EndGlobalSection 261 | EndGlobal 262 | -------------------------------------------------------------------------------- /mono-d/generate unicode tables.dproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Unicode tables 5 | AnyCPU 6 | 8.0.30703 7 | 2.0 8 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1} 9 | ..\source 10 | DMD 11 | true 12 | true 13 | true 14 | 0.3.2 15 | 16 | 17 | ..\generated 18 | ../docs 19 | ../generated/debug 20 | false 21 | false 22 | generate_unicode_tables 23 | Executable 24 | false 25 | 0 26 | true 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /source/fast/buffer.d: -------------------------------------------------------------------------------- 1 | /** 2 | * Fast buffer implementation. 3 | * 4 | * Authors: 5 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 6 | * 7 | * Copyright: 8 | * © 2015 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 9 | * 10 | * License: 11 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 12 | */ 13 | module fast.buffer; nothrow 14 | 15 | import core.stdc.stdint; 16 | import core.stdc.stdlib; 17 | import std.range; 18 | import core.exception; 19 | 20 | 21 | enum allocaLimit = 2048; 22 | 23 | 24 | /******************************************************************************* 25 | * 26 | * Dynamic array using `malloc`, `realloc` and `free` under the hood. Note that 27 | * memory will be released on scope exit. 28 | * 29 | **************************************/ 30 | struct RaiiArray(T) 31 | { 32 | private: 33 | 34 | T* m_ptr; 35 | size_t m_capacity; 36 | 37 | 38 | public: 39 | 40 | nothrow 41 | this(size_t capacity) 42 | { 43 | if (capacity) 44 | { 45 | m_ptr = cast(T*) malloc(capacity); 46 | if (m_ptr is null) 47 | onOutOfMemoryError(); 48 | m_capacity = capacity; 49 | } 50 | } 51 | 52 | 53 | nothrow @nogc 54 | ~this() 55 | { 56 | if (m_ptr !is null) 57 | free(m_ptr); 58 | } 59 | 60 | 61 | @safe pure nothrow @nogc 62 | @property inout(T)* ptr() inout 63 | { 64 | return m_ptr; 65 | } 66 | 67 | 68 | @safe pure nothrow @nogc 69 | @property size_t capacity() const 70 | { 71 | return m_capacity; 72 | } 73 | 74 | 75 | nothrow 76 | @property void capacity(size_t value) 77 | { 78 | if (value != 0) 79 | { 80 | if (T* ptrNew = cast(T*) realloc(m_ptr, value)) 81 | m_ptr = ptrNew; 82 | else onOutOfMemoryError(); 83 | } 84 | else if (m_ptr) 85 | { 86 | free(m_ptr); 87 | m_ptr = null; 88 | } 89 | m_capacity = value; 90 | } 91 | 92 | 93 | alias length = capacity; 94 | 95 | 96 | mixin Slicing; 97 | mixin CapacityTools; 98 | } 99 | 100 | 101 | /******************************************************************************* 102 | * 103 | * Fixed maximum number of items on the stack. Memory is a static stack buffer. 104 | * This buffer can be filled up and cleared for reuse. 105 | * 106 | **************************************/ 107 | struct LimitedScopeBuffer(T, size_t n) 108 | { 109 | private: 110 | 111 | T[n] m_data; 112 | size_t m_used; 113 | 114 | 115 | public: 116 | 117 | @safe pure nothrow @nogc 118 | @property inout(T)* ptr() inout 119 | { 120 | return m_data.ptr; 121 | } 122 | 123 | 124 | @safe pure nothrow @nogc 125 | @property size_t length() const 126 | { 127 | return m_used; 128 | } 129 | 130 | @safe pure nothrow @nogc 131 | @property void length(size_t value) 132 | in 133 | { 134 | assert( value <= n ); 135 | } 136 | body 137 | { 138 | m_used = value; 139 | } 140 | 141 | 142 | @safe pure nothrow @nogc 143 | inout(T)[] opSlice() inout 144 | { 145 | return m_data[0 .. m_used]; 146 | } 147 | } 148 | 149 | 150 | struct TempBuffer(T) 151 | { 152 | T[] slice; 153 | bool callFree; 154 | 155 | @disable this(this); 156 | 157 | ~this() nothrow 158 | { 159 | if (this.callFree) 160 | free(this.slice.ptr); 161 | } 162 | 163 | T[] opSlice() @safe pure nothrow { return this.slice[]; } 164 | T[] opSlice(size_t a, size_t b) @safe pure nothrow { return this.slice[a .. b]; } 165 | T[] opSliceAssign(const(T)[] value, size_t a, size_t b) @safe pure nothrow { return this.slice[a .. b] = value; } 166 | ref T opIndex(size_t idx) @safe pure nothrow { return this.slice[idx]; } 167 | @property size_t size() @safe pure nothrow { return T.sizeof * this.slice.length; } 168 | @property size_t length() @safe pure nothrow { return this.slice.length; } 169 | alias opDollar = length; 170 | @property T* ptr() @trusted pure nothrow { return this.slice.ptr; } // must use .ptr here for zero length strings 171 | alias ptr this; 172 | 173 | auto makeOutputRange() 174 | { 175 | struct OutputRange 176 | { 177 | T* ptr; 178 | size_t idx; 179 | 180 | void put(T)(auto ref T t) { ptr[idx++] = t; } 181 | T[] opSlice() pure nothrow { return ptr[0 .. idx]; } 182 | } 183 | return OutputRange(this.slice.ptr, 0); 184 | } 185 | } 186 | 187 | 188 | TempBuffer!T tempBuffer(T, alias length, size_t allocaLimit = .allocaLimit) 189 | (void* buffer = (T.sizeof * length <= allocaLimit) ? alloca(T.sizeof * length) : null) 190 | { 191 | return TempBuffer!T((cast(T*) ( 192 | buffer is null 193 | ? malloc(T.sizeof * length) 194 | : buffer))[0 .. length], 195 | buffer is null); 196 | } 197 | 198 | 199 | /******************************************************************************* 200 | * 201 | * Returns a structure to your stack that contains a buffer of $(D bytes) size. 202 | * Memory is allocated by calling `.alloc!T(count)` on it in order to get 203 | * `count` elements of type `T`. The return value will be a RAII structure 204 | * that releases the memory back to the stack buffer upon destruction, so it can 205 | * be reused. The pointer within that RAII structure is aligned to 206 | * `T.alignof`. If the internal buffer isn't enough to fulfill the request 207 | * including padding from alignment, then `malloc()` is used instead. 208 | * 209 | * Warning: 210 | * Always keep the return value of `.alloc()` around on your stack until 211 | * you are done with its contents. Never pass it directly into functions as 212 | * arguments! 213 | * 214 | * Params: 215 | * bytes = The size of the buffer on the stack. 216 | * 217 | * Returns: 218 | * A stack buffer allocator. 219 | * 220 | **************************************/ 221 | auto stackBuffer(size_t bytes)() @trusted pure 222 | { 223 | // All that remains of this after inlining is a stack pointer decrement and 224 | // a mov instruction for the `null`. 225 | StackBuffer!bytes result = void; 226 | result.last = cast(StackBufferEntry!void*) &result.last; 227 | result.sentinel = null; 228 | return result; 229 | } 230 | 231 | 232 | auto asOutputRange(T)(T* t) @safe pure 233 | { 234 | struct PointerRange 235 | { 236 | private: 237 | 238 | T* start; 239 | T* ptr; 240 | 241 | public: 242 | 243 | void put()(auto ref const(T) t) pure 244 | { 245 | *this.ptr++ = t; 246 | } 247 | 248 | T[] opSlice() pure 249 | { 250 | return this.start[0 .. this.ptr - this.start]; 251 | } 252 | } 253 | static assert(isOutputRange!(PointerRange, T)); 254 | return PointerRange(t, t); 255 | } 256 | 257 | 258 | enum bufferArg(alias size)() 259 | { 260 | return "((size <= allocaLimit) ? alloca(size) : null)"; 261 | } 262 | 263 | 264 | 265 | package: 266 | 267 | struct StackBuffer(size_t bytes) 268 | { 269 | private: 270 | 271 | void[bytes] space = void; 272 | StackBufferEntry!void* last; 273 | void* sentinel; 274 | 275 | public: 276 | 277 | @disable this(this); 278 | 279 | @trusted 280 | StackBufferEntry!T alloc(T)(size_t howMany) 281 | { 282 | enum max = size_t.max / T.sizeof; 283 | alias SBE = StackBufferEntry!T; 284 | T* target = cast(T*) (cast(uintptr_t) this.last.ptr / T.alignof * T.alignof); 285 | if (target > this.space.ptr && cast(uintptr_t) (target - cast(T*) this.space.ptr) >= howMany) 286 | return SBE(target - howMany, this.last); 287 | else 288 | // TODO: Respect alignment here as well by padding. Optionally also embed a length in the heap block, so we can provide slicing of the whole thing. 289 | return SBE(howMany <= max ? cast(T*) malloc(T.sizeof * howMany) : null); 290 | } 291 | } 292 | 293 | struct StackBufferEntry(T) 294 | { 295 | private: 296 | 297 | StackBufferEntry!void* prev; 298 | 299 | this(T* ptr) pure { this.ptr = ptr; } 300 | 301 | this(T* ptr, ref StackBufferEntry!void* last) pure 302 | { 303 | this.ptr = ptr; 304 | this.prev = last; 305 | last = cast(StackBufferEntry!void*) &this; 306 | } 307 | 308 | 309 | public: 310 | 311 | T* ptr; 312 | 313 | static if (!is(T == void)) 314 | { 315 | @disable this(this); 316 | 317 | ~this() @trusted 318 | { 319 | if (this.prev) 320 | { 321 | StackBufferEntry!void* it = this.prev; 322 | while (it.prev) it = it.prev; 323 | auto last = cast(StackBufferEntry!void**) &prev.ptr; 324 | *last = this.prev; 325 | } 326 | else free(this.ptr); 327 | } 328 | 329 | @system pure nothrow @nogc 330 | ref inout(T) opIndex(size_t idx) inout 331 | { 332 | return ptr[idx]; 333 | } 334 | 335 | @system pure nothrow @nogc 336 | inout(T)[] opSlice(size_t a, size_t b) inout 337 | { 338 | return ptr[a .. b]; 339 | } 340 | 341 | @safe pure nothrow @nogc 342 | @property auto range() 343 | { 344 | return ptr.asOutputRange(); 345 | } 346 | } 347 | } 348 | 349 | 350 | 351 | private: 352 | 353 | mixin template Slicing() 354 | { 355 | public 356 | { 357 | @nogc pure nothrow 358 | ref inout(T) opIndex(size_t idx) inout 359 | in 360 | { 361 | assert(idx < length); 362 | } 363 | body 364 | { 365 | return ptr[idx]; 366 | } 367 | 368 | 369 | @nogc pure nothrow 370 | inout(T)[] opSlice() inout 371 | { 372 | return ptr[0 .. length]; 373 | } 374 | 375 | 376 | @nogc pure nothrow 377 | inout(T)[] opSlice(size_t a, size_t b) inout 378 | in 379 | { 380 | assert(a <= b && b <= length); 381 | } 382 | body 383 | { 384 | return ptr[a .. b]; 385 | } 386 | } 387 | } 388 | 389 | 390 | mixin template CapacityTools() 391 | { 392 | public 393 | { 394 | nothrow 395 | void capacityNeeded(size_t c) 396 | { 397 | if (capacity < c) 398 | capacity = c; 399 | } 400 | } 401 | } 402 | -------------------------------------------------------------------------------- /source/fast/cstring.d: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * 3 | * Converts between UTF-8 and UTF-16. 4 | * 5 | * Authors: 6 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 7 | * 8 | * Copyright: 9 | * © 2013 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 10 | * 11 | * License: 12 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 13 | * 14 | **************************************/ 15 | module fast.cstring; @nogc nothrow: 16 | 17 | import core.stdc.stdlib; 18 | import core.stdc.string; 19 | //import std.traits; 20 | import fast.buffer; 21 | 22 | 23 | /** 24 | * Converts a string to a wstring using a buffer provided by the user. 25 | * To get the buffer requirements call $(D wstringSize) on your source buffer. 26 | * 27 | * Params: 28 | * src = The UTF-8 string to convert. 29 | * dst = The destination buffer for the conversion. 30 | * 31 | * Returns: 32 | * The part of the destination buffer used for the conversion as a $(D wchar[]). 33 | * A terminating zero is appended, so the result.ptr can be passed into Windows APIs. 34 | */ 35 | pure 36 | wchar[] string2wstring(in char[] src, wchar* dst) 37 | { 38 | const char* srcEnd = src.ptr + src.length; 39 | const(char)* srcIt = src.ptr; 40 | wchar* dstIt = dst; 41 | 42 | while (srcIt !is srcEnd) 43 | { 44 | // how long is the byte sequence 45 | int len = 0; 46 | uint mask = 0b1000_0000; 47 | while(*srcIt & mask) 48 | { 49 | mask >>= 1; 50 | len++; 51 | } 52 | 53 | // get payload of first byte 54 | dchar ch = *srcIt++ & (mask - 1); 55 | 56 | while (--len > 0) 57 | { 58 | // make space for 6 more bits 59 | ch <<= 6; 60 | ch |= *srcIt++ & 0b0011_1111; 61 | } 62 | 63 | // do we need to store a surrogate pair ? 64 | static if (is(wchar == dchar)) 65 | { 66 | *dstIt++ = ch; 67 | } 68 | else if (ch > wchar.max) 69 | { 70 | *dstIt++ = (ch >> 10) | 0xD800; 71 | *dstIt++ = (ch & 0b11_1111_1111) | 0xDC00; 72 | } 73 | else 74 | { 75 | *dstIt++ = cast(wchar) ch; 76 | } 77 | } 78 | *dstIt = 0; 79 | 80 | return dst[0 .. dstIt - dst]; 81 | } 82 | pure 83 | wchar[] string2wstring(in ushort[] src, wchar* dst) 84 | { 85 | memcpy(dst, cast(wchar*) src.ptr, src.length); 86 | return dst[0 .. src.length]; 87 | } 88 | /** 89 | * Calculates the required buffer size in bytes for a string to wchar[] conversion. 90 | * Room for a terminating '\0' is included. 91 | * 92 | * Params: 93 | * src = The source string. 94 | * 95 | * Returns: 96 | * The maximum byte count the source string could require, including the terminating '\0'. 97 | * 98 | * See_Also: 99 | * string2wstring 100 | * 101 | */ 102 | @safe pure 103 | size_t string2wstringSize(in char[] src) 104 | { 105 | enum limit = size_t.max / wchar.sizeof - 1; 106 | return src.length <= limit ? wchar.sizeof * (src.length + 1) : size_t.max; 107 | } 108 | @safe pure 109 | size_t string2wstringSize(in ushort[] src) 110 | { 111 | enum limit = size_t.max / wchar.sizeof - 1; 112 | return src.length <= limit ? wchar.sizeof * (src.length + 1) : size_t.max; 113 | } 114 | 115 | 116 | /** 117 | * Converts a wstring to a string using a buffer provided by the user. 118 | * To get the buffer requirements call $(D stringSize) on your source buffer. 119 | * 120 | * Params: 121 | * src = The UTF-8 string to convert. 122 | * dst = The destination buffer for the conversion. 123 | * 124 | * Returns: 125 | * The part of the destination buffer used for the conversion as a $(D wchar[]). 126 | * A terminating zero is appended, so the result.ptr can be passed into Windows APIs. 127 | */ 128 | pure 129 | char[] wstring2string(in wchar[] src, char* dst) 130 | { 131 | const wchar* srcEnd = src.ptr + src.length; 132 | const(wchar)* srcIt = src.ptr; 133 | char* dstIt = dst; 134 | 135 | while (srcIt !is srcEnd) 136 | { 137 | if (*srcIt < 0x80) 138 | { 139 | *dstIt++ = cast(char) *srcIt++; 140 | } 141 | else if (*srcIt < 0x800) 142 | { 143 | *dstIt++ = cast(char) (0b_11000000 | *srcIt >> 6); 144 | *dstIt++ = 0b_10000000 | 0b_00111111 & *srcIt++; 145 | } 146 | if (*srcIt < 0xD800 || *srcIt > 0xDBFF) 147 | { 148 | // anything else within the BMP (<= 0xFFFF), but not a high surrogate 149 | *dstIt++ = 0b_11100000 | *srcIt >> 12; 150 | *dstIt++ = 0b_10000000 | 0b_00111111 & *srcIt >> 6; 151 | *dstIt++ = 0b_10000000 | 0b_00111111 & *srcIt++; 152 | } 153 | else 154 | { 155 | // high surrogate, assume correct encoding and that the next wchar is the low surrogate 156 | dchar decoded; 157 | decoded = (*srcIt++ & 0b11_1111_1111) << 10; 158 | decoded |= (*srcIt++ & 0b11_1111_1111); 159 | *dstIt++ = 0b_11110000 | decoded >> 18; 160 | *dstIt++ = 0b_10000000 | 0b_00111111 & decoded >> 12; 161 | *dstIt++ = 0b_10000000 | 0b_00111111 & decoded >> 6; 162 | *dstIt++ = 0b_10000000 | 0b_00111111 & decoded; 163 | } 164 | } 165 | *dstIt = 0; 166 | 167 | return dst[0 .. dstIt - dst]; 168 | } 169 | 170 | /** 171 | * Calculates the required buffer size in bytes for a wstring to char[] conversion. 172 | * Room for a terminating '\0' is included. 173 | * 174 | * Params: 175 | * src = The source string. 176 | * 177 | * Returns: 178 | * The maximum byte count the source string could require, including the terminating '\0'. 179 | * 180 | * See_Also: 181 | * wstring2string 182 | * 183 | */ 184 | @safe pure 185 | size_t wstring2stringSize(in wchar[] src) 186 | { 187 | enum limit = (size_t.max / char.sizeof - 1) / 3; 188 | return src.length <= limit ? char.sizeof * (3 * src.length + 1) : size_t.max; 189 | } 190 | 191 | 192 | /** 193 | * Replaces $(D std.utf.toUTFz) with a version that uses the stack as long as the required bytes for the output are 194 | * <= 1k. Longer strings use $(D malloc) to create a buffer for the conversion. It is freed at least at the end of the 195 | * scope. 196 | * 197 | * Params: 198 | * str = The source string to convert. 199 | * 200 | * See_Also: 201 | * toWstring 202 | * 203 | * Example: 204 | * --- 205 | * string text = "Hello, world!"; 206 | * WinApiW(wcharPtr!text); 207 | * --- 208 | */ 209 | auto wcharPtr(alias str)(void* buffer = string2wstringSize(str) <= allocaLimit ? alloca(string2wstringSize(str)) : null) 210 | { 211 | // In any case we have to return a proper InstantBuffer, so that free() is called in the dtor at some point. 212 | return TempBuffer!wchar( 213 | string2wstring(str, cast(wchar*) (buffer ? buffer : malloc(string2wstringSize(str)))), 214 | buffer is null); 215 | } 216 | 217 | /// ditto 218 | immutable(wchar)* wcharPtr(alias wstr)() 219 | if (is(typeof(wstr) == wstring) && __traits(compiles, { enum wstring e = wstr; })) 220 | { 221 | // D string literals (known at compile time) are always \0-terminated. 222 | return wstr.ptr; 223 | } 224 | 225 | /** 226 | * $(D char*) version of $(D wcharPtr). Basically it appends a \0 to the input. 227 | * The function uses $(D malloc) for strings of lengths 1024 and above. 228 | * 229 | * Params: 230 | * str = The source string to convert to a C UTF-8 string 231 | * 232 | * Note: 233 | * Do not use this to call Windows ANSI functions! Always use wide-char 234 | * functions on this operating system unless you want to deal with codepages. 235 | * 236 | * Example: 237 | * --- 238 | * string text = "Hello, world!"; 239 | * linuxApi(charPtr!text); 240 | * --- 241 | */ 242 | auto charPtr(alias str)(void* buffer = alloca(str.length + 1)) 243 | if (is(typeof(str) : const(char)[]) || is(typeof(str) : const(ubyte)[])) 244 | { 245 | char* dst = cast(char*) memcpy(buffer ? buffer : malloc(str.length + 1), str.ptr, str.length); 246 | dst[str.length] = '\0'; 247 | return TempBuffer!char(dst[0 .. str.length], buffer is null); 248 | } 249 | 250 | /// ditto 251 | immutable(char)* charPtr(alias str)() 252 | if (__traits(compiles, { enum string e = str; })) 253 | { 254 | // D string literals (known at compile time) are always \0-terminated. 255 | return str.ptr; 256 | } 257 | 258 | /** 259 | * This overload allocates the required memory from an existing stack buffer. 260 | * 261 | * Params: 262 | * str = The source string to convert to a C UTF-8 string 263 | * sb = The stack buffer to allocate from 264 | * 265 | * Note: 266 | * Always assign the result to an auto variable first for RAII to work correctly. 267 | */ 268 | StackBufferEntry!char charPtr(SB)(const(char)[] str, ref SB sb) 269 | if (is(SB == StackBuffer!bytes, bytes...)) 270 | { 271 | auto buffer = sb.alloc!char(str.length + 1); 272 | memcpy(buffer.ptr, str.ptr, str.length); 273 | buffer[str.length] = '\0'; 274 | return buffer; 275 | } 276 | 277 | /** 278 | * Returns the given $(D ptr) up to but not including the \0 as a $(D char[]). 279 | */ 280 | inout(char)[] asString(inout(char*) ptr) @trusted pure 281 | { 282 | if (ptr is null) return null; 283 | return ptr[0 .. strlen(ptr)]; 284 | } -------------------------------------------------------------------------------- /source/fast/format.d: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * 3 | * Functions for formatting data into strings and back. 4 | * 5 | * Authors: 6 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 7 | * 8 | * Copyright: 9 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 10 | * 11 | * License: 12 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 13 | * 14 | **************************************/ 15 | module fast.format; 16 | 17 | import core.stdc.stdlib; 18 | import core.stdc.string; 19 | import core.bitop; 20 | import std.string; 21 | import std.traits; 22 | import std.typecons; 23 | import std.typetuple; 24 | import fast.internal.helpers; 25 | 26 | 27 | /+ 28 | ╔══════════════════════════════════════════════════════════════════════════════ 29 | ║ ⚑ Hex String 30 | ╚══════════════════════════════════════════════════════════════════════════════ 31 | +/ 32 | 33 | /** 34 | * Converts an unsigned type into a fixed width 8 digits hex string using lower-case letters. 35 | * 36 | * Params: 37 | * n = the number to convert 38 | * 39 | * Returns: 40 | * hexadecimal representation of $(D n), lower-case letters 41 | */ 42 | @safe pure nothrow @nogc 43 | char[2 * U.sizeof] hexStrLower(U)(Unqual!U n) if (isUnsigned!U) 44 | { 45 | char[2 * U.sizeof] hex = void; 46 | foreach_reverse (i; 0 .. 2 * U.sizeof) 47 | { 48 | U d = n & U(0xF); 49 | hex[i] = cast(char) (d < 10 ? '0' + d : 'a' + d - 10); 50 | n >>= 4; 51 | } 52 | return hex; 53 | } 54 | 55 | 56 | /** 57 | * Converts an unsigned type into a fixed width 8 digits hex string using upper-case letters. 58 | * 59 | * Params: 60 | * n = the number to convert 61 | * 62 | * Returns: 63 | * hexadecimal representation of $(D n), upper-case letters 64 | */ 65 | @safe pure nothrow @nogc 66 | char[2 * U.sizeof] hexStrUpper(U)(U n) if (isUnsigned!U) 67 | { 68 | char[2 * U.sizeof] hex = void; 69 | foreach_reverse (i; 0 .. 2 * U.sizeof) 70 | { 71 | U d = n & U(0xF); 72 | hex[i] = cast(char) (d < 10 ? '0' + d : 'A' + d - 10); 73 | n >>= 4; 74 | } 75 | return hex; 76 | } 77 | 78 | 79 | /+ 80 | ╔══════════════════════════════════════════════════════════════════════════════ 81 | ║ ⚑ Decimal String 82 | ╚══════════════════════════════════════════════════════════════════════════════ 83 | +/ 84 | 85 | template decDigits(T) if (isIntegral!T) 86 | { 87 | static if (is(T == ulong)) 88 | enum decDigits = 20; 89 | else static if (is(T == long)) 90 | enum decDigits = 19; 91 | else static if (is(T == uint) || is(T == int)) 92 | enum decDigits = 10; 93 | else static if (is(T == ushort) || is(T == short)) 94 | enum decDigits = 5; 95 | else static if (is(T == ubyte) || is(T == byte)) 96 | enum decDigits = 3; 97 | } 98 | 99 | 100 | enum decChars(T) = decDigits!T + isSigned!T; 101 | 102 | 103 | @safe pure nothrow @nogc 104 | RevFillStr!(decChars!I) decStr(I)(I i) if (isIntegral!I) 105 | { 106 | RevFillStr!(decChars!I) str; 107 | size_t idx = decChars!I; 108 | 109 | static if (isSigned!I) 110 | { 111 | bool signed = i < 0; 112 | UnsignedOf!I u = i < 0 ? -i : i; 113 | } 114 | else alias u = i; 115 | 116 | do 117 | { 118 | str ~= char('0' + u % 10); 119 | u /= 10; 120 | } 121 | while (u); 122 | 123 | static if (isSigned!I) if (signed) 124 | str ~= '-'; 125 | 126 | return str; 127 | } 128 | 129 | 130 | /+ 131 | ╔══════════════════════════════════════════════════════════════════════════════ 132 | ║ ⚑ Formatting 133 | ╚══════════════════════════════════════════════════════════════════════════════ 134 | +/ 135 | 136 | template hasKnownSpaceRequirement(T) 137 | { 138 | static if (isIntegral!T || isPointer!T) 139 | enum hasKnownSpaceRequirement = true; 140 | else 141 | enum hasKnownSpaceRequirement = false; 142 | } 143 | 144 | 145 | template spaceRequirement(string format, T) if (hasKnownSpaceRequirement!T) 146 | { 147 | static if (isIntegral!T) 148 | { 149 | static if (format == "%s" || format == "%d") 150 | enum spaceRequirement = decChars!T; 151 | else static if (isUnsigned!T && (format == "%x" || format == "%X")) 152 | enum spaceRequirement = 2 * T.sizeof; 153 | else static assert (0, "Don't know how to handle " ~ T.stringof ~ " as " ~ format); 154 | } 155 | else static if (isPointer!T) 156 | { 157 | static if (format == "%s" || format == "%p") 158 | enum spaceRequirement = 2 * T.sizeof; 159 | else static assert (0, "Don't know how to handle " ~ T.stringof ~ " as " ~ format); 160 | } 161 | else static assert (0, "Don't know how to handle " ~ T.stringof); 162 | } 163 | 164 | 165 | enum spaceRequirements(string format, Args...)() if (allSatisfy!(hasKnownSpaceRequirement, Args)) 166 | { 167 | size_t sum = 0; 168 | 169 | alias parts = tokenizedFormatString!format; 170 | foreach (i; staticIota!(0, parts.length)) 171 | { 172 | static if (parts[i][1] == size_t.max) 173 | sum += parts[i][0].length; 174 | else 175 | sum += spaceRequirement!(parts[i][0], Args[parts[i][1]]); 176 | } 177 | 178 | return sum; 179 | } 180 | 181 | 182 | template tokenizedFormatString(string format) 183 | { 184 | enum impl() 185 | { 186 | Tuple!(string, size_t)[] parts; 187 | size_t i = 0; 188 | string rest = format; 189 | 190 | while (1) 191 | { 192 | ptrdiff_t markerPos = rest.indexOf("%"); 193 | if (markerPos < 0) 194 | return rest.length ? parts ~ tuple(rest, size_t.max) : parts; 195 | 196 | if (markerPos) 197 | { 198 | parts ~= tuple(rest[0 .. markerPos], size_t.max); 199 | rest = rest[markerPos .. $]; 200 | } 201 | 202 | // TODO: more complex formats 203 | parts ~= tuple(rest[0 .. 2], i++); 204 | rest = rest[2 .. $]; 205 | } 206 | } 207 | 208 | enum result = impl(); 209 | static immutable Tuple!(string, size_t)[result.length] tokenizedFormatString = result; 210 | } 211 | 212 | 213 | enum formatStringArgCount(string format)() 214 | { 215 | size_t count = 0; 216 | 217 | alias parts = tokenizedFormatString!format; 218 | foreach (i; staticIota!(0, parts.length)) 219 | if (parts[i][1] != size_t.max && parts[i][1] >= count) 220 | count = parts[i][1] + 1; 221 | 222 | return count; 223 | } 224 | 225 | 226 | template format(string fmt) 227 | { 228 | import std.exception; 229 | 230 | enum argCnt = formatStringArgCount!fmt; 231 | 232 | enum codeGen() 233 | { 234 | string code = `pure nothrow string format(`; 235 | foreach (i; staticIota!(0, argCnt)) 236 | { 237 | if (i) code ~= `, `; 238 | code ~= std.string.format("A%s", i); 239 | } 240 | code ~= `)(`; 241 | foreach (i; staticIota!(0, argCnt)) 242 | { 243 | if (i) code ~= `, `; 244 | code ~= std.string.format("A%s a%s", i, i); 245 | } 246 | code ~= `, char[] buffer = new char[](spaceRequirements!(fmt`; 247 | foreach (i; staticIota!(0, argCnt)) 248 | code ~= std.string.format(", A%s", i); 249 | code ~= `))) { return assumeUnique(formattedWrite!fmt(buffer.ptr`; 250 | foreach (i; staticIota!(0, argCnt)) 251 | code ~= std.string.format(", a%s", i); 252 | code ~= `)); }`; 253 | return code; 254 | } 255 | 256 | mixin(codeGen()); 257 | } 258 | 259 | 260 | template formata(string fmt) 261 | { 262 | enum argCnt = formatStringArgCount!fmt; 263 | 264 | enum codeGen() 265 | { 266 | string code = `pure nothrow @nogc char[] formata(`; 267 | foreach (i; staticIota!(0, argCnt)) 268 | { 269 | if (i) code ~= `, `; 270 | code ~= std.string.format("A%s", i); 271 | } 272 | code ~= `)(`; 273 | foreach (i; staticIota!(0, argCnt)) 274 | { 275 | if (i) code ~= `, `; 276 | code ~= std.string.format("A%s a%s", i, i); 277 | } 278 | code ~= `, void* buffer = alloca(spaceRequirements!(fmt`; 279 | foreach (i; staticIota!(0, argCnt)) 280 | code ~= std.string.format(", A%s", i); 281 | code ~= `))) { return formattedWrite!fmt(cast(char*) buffer`; 282 | foreach (i; staticIota!(0, argCnt)) 283 | code ~= std.string.format(", a%s", i); 284 | code ~= `); }`; 285 | return code; 286 | } 287 | 288 | mixin(codeGen()); 289 | } 290 | 291 | 292 | template formats(string fmt) 293 | { 294 | enum argCnt = formatStringArgCount!fmt; 295 | 296 | enum codeGen() 297 | { 298 | string code = `@safe pure nothrow @nogc auto formats(`; 299 | foreach (i; staticIota!(0, argCnt)) 300 | { 301 | if (i) code ~= `, `; 302 | code ~= std.string.format("A%s", i); 303 | } 304 | code ~= `)(`; 305 | foreach (i; staticIota!(0, argCnt)) 306 | { 307 | if (i) code ~= `, `; 308 | code ~= std.string.format("A%s a%s", i, i); 309 | } 310 | code ~= `))) { LimitedScopeBuffer!(char, spaceRequirements!(fmt`; 311 | foreach (i; staticIota!(0, argCnt)) 312 | code ~= std.string.format(", A%s", i); 313 | code ~= `)) buffer; buffer.length = formattedWrite!fmt(buffer.ptr`; 314 | foreach (i; staticIota!(0, argCnt)) 315 | code ~= std.string.format(", a%s", i); 316 | code ~= `).length; return buffer; }`; 317 | return code; 318 | } 319 | 320 | mixin(codeGen()); 321 | } 322 | 323 | 324 | char[] formattedWrite(string format, Args...)(char* buffer, Args args) 325 | { 326 | char* it = buffer; 327 | 328 | alias parts = tokenizedFormatString!format; 329 | foreach (i; staticIota!(0, parts.length)) 330 | { 331 | static if (parts[i][1] == size_t.max) 332 | { 333 | // Direct string copy 334 | memcpy( it, parts[i][0].ptr, parts[i][0].length ); 335 | it += parts[i][0].length; 336 | } 337 | else 338 | { 339 | // Formatted argument 340 | it.formattedWriteItem!(parts[i][0])( args[parts[i][1]] ); 341 | } 342 | } 343 | 344 | return buffer[0 .. it - buffer]; 345 | } 346 | 347 | 348 | pure nothrow @nogc 349 | void formattedWriteItem(string format, T)(ref char* buffer, T t) 350 | if (isUnsigned!T && format == "%x") 351 | { 352 | alias RT = ReturnType!(hexStrLower!T); 353 | *cast(RT*) buffer = hexStrLower!T(t); 354 | buffer += RT.length; 355 | } 356 | 357 | 358 | pure nothrow @nogc 359 | void formattedWriteItem(string format, T)(ref char* buffer, T t) 360 | if (isUnsigned!T && format == "%X") 361 | { 362 | alias RT = ReturnType!(hexStrUpper!T); 363 | *cast(RT*) buffer = hexStrUpper!T(t); 364 | buffer += RT.length; 365 | } 366 | 367 | 368 | pure nothrow @nogc 369 | void formattedWriteItem(string format, T)(ref char* buffer, T t) 370 | if (isIntegral!T && (format == "%s" || format == "%d")) 371 | { 372 | auto str = decStr(t); 373 | memcpy( buffer, str.ptr, str.length ); 374 | buffer += str.length; 375 | } 376 | 377 | 378 | pure nothrow @nogc 379 | void formattedWriteItem(string format)(ref char* buffer, void* p) 380 | if (format == "%s" || format == "%p") 381 | { 382 | buffer.formattedWriteItem!"%X"( cast(size_t) p ); 383 | } 384 | 385 | 386 | /+ 387 | ╔══════════════════════════════════════════════════════════════════════════════ 388 | ║ ⚑ Helper Structs 389 | ╚══════════════════════════════════════════════════════════════════════════════ 390 | +/ 391 | 392 | struct RevFillStr(size_t n) 393 | { 394 | private: 395 | 396 | size_t offset = n; 397 | char[n] buffer = '\0'; 398 | 399 | 400 | public: 401 | 402 | alias opSlice this; 403 | 404 | @safe pure nothrow @nogc 405 | void opOpAssign(string op : "~")(char ch) 406 | in 407 | { 408 | assert( offset > 0 ); 409 | } 410 | body 411 | { 412 | buffer[--offset] = ch; 413 | } 414 | 415 | 416 | @safe pure nothrow @nogc 417 | @property inout(char)[] opSlice() inout 418 | { 419 | return buffer[offset .. n]; 420 | } 421 | 422 | 423 | @safe pure nothrow @nogc 424 | @property inout(char)* ptr() inout 425 | { 426 | return &buffer[offset]; 427 | } 428 | 429 | 430 | @safe pure nothrow @nogc 431 | @property size_t length() const 432 | { 433 | return n - offset; 434 | } 435 | } -------------------------------------------------------------------------------- /source/fast/internal/benchmarks.d: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * 3 | * Internal benchmark module. 4 | * 5 | * Authors: 6 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 7 | * 8 | * Copyright: 9 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 10 | * 11 | * License: 12 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 13 | * 14 | **************************************************************************************************/ 15 | module fast.internal.benchmarks; 16 | 17 | version (benchmark): 18 | 19 | void main() 20 | { 21 | import std.stdio; 22 | import core.stdc.string, core.stdc.stddef, core.stdc.stdlib; 23 | import std.array, std.stdio, std.algorithm, std.regex, std.utf, std.conv, std.string, std.range; 24 | import fast.string, fast.cstring, fast.buffer, fast.format, fast.json; 25 | import std.format : formattedWrite; 26 | 27 | static immutable nums = { ulong[1uL << 8] nums = void; foreach (i; 0 .. nums.length) nums[i] = (1uL << (64 - 8)) * i; return nums; }(); 28 | static immutable part1 = "C:\\"; 29 | static immutable part2 = "Documents and Settings\\User\\My Documents\\My Downloads\\"; 30 | static immutable part3 = "Fast.zip"; 31 | static immutable pathname = "hello/i_am_a/path_name\\with_several_different\\slashes"; 32 | static immutable zeroterm = "wefwfnqwefnw(eknwoemkf)moorroijqwoijq&oqo(vqwojkpjavnal(nvo(eirvn$wefwfnqwefnw(eknwoemkf)moorroijqwoihqioqo(vqwojkpjavnal(nvo(eirvn$wefwfnqwef\"w(eknwoemkf)moorroijqwoijqioqo(vqwojkpjavnal(nvo(eirvn$\0"; 33 | static pathSepRegex = ctRegex!`[/\\]`; 34 | enum pathnameWStringLength = to!(immutable(wchar_t)[])(pathname).length; 35 | 36 | unicode(); 37 | 38 | jsonCoordinates!true(); 39 | jsonCoordinates!false(); 40 | 41 | run ("Format strings for integers...", 13093, 42 | benchmark ("std.*.format", () { uint check; foreach (ulong num; nums) { string str = format("decimal: %s, hex: %x", num, num); check += str[9]; } return check; } ), 43 | benchmark ("fast.*.format", () { uint check; foreach (ulong num; nums) { string str = fast.format.format!"decimal: %s, hex: %x"(num, num); check += str[9]; } return check; } ), 44 | benchmark ("fast.*.formata", () { uint check; foreach (ulong num; nums) { char[] str = formata!"decimal: %s, hex: %x"(num, num); check += str[9]; } return check; } ), 45 | ); 46 | 47 | run ("Convert 256 numbers to fixed width hex strings...", 0x20, 48 | benchmark ("std.*.formattedWrite", () { Appender!(char[]) app; app.reserve(16); char check = 0; foreach (ulong num; nums) { app.formattedWrite("%016X", num); check += app.data[0]; app.clear(); } return check; }), 49 | benchmark ("fast.*.hexStrUpper", () { char[16] str; char check = 0; foreach (ulong num; nums) { str = hexStrUpper(num); check += str[0]; } return check; }), 50 | ); 51 | 52 | run ("Concatenate a known number of strings...", part1.length + part2.length + part3.length, 53 | benchmark ("std.array.appender", () { auto app = appender(part1); app ~= part2; app ~= part3; return app.data.length; }), 54 | benchmark ("~", () { string path = part1 ~ part2 ~ part3; return path.length; }), 55 | benchmark ("fast.string.concat", () { size_t length; { auto path = concat!(part1, part2, part3); length = path.length; } return length; }), 56 | ); 57 | 58 | run ("Allocate a temporary char buffer and fill it with 0xFF...", '\xFF', 59 | benchmark ("new", () { auto str = new char[](zeroterm.length); return str[$-1]; }), 60 | benchmark ("malloc", () { auto ptr = cast(char*) malloc(zeroterm.length); scope(exit) free(ptr); memset(ptr, 0xFF, zeroterm.length); return ptr[zeroterm.length-1]; }), 61 | benchmark ("fast.buffer.tempBuffer", () { char result; { auto buf = tempBuffer!(char, zeroterm.length); memset(buf, 0xFF, zeroterm.length); result = buf[$-1]; } return result; }), 62 | ); 63 | 64 | run("Convert a string to a wchar*...", wchar('\0'), 65 | benchmark ("toUTFz", () { return toUTFz!(wchar*)(pathname)[pathnameWStringLength]; }), 66 | benchmark ("cstring.wcharPtr", () { wchar result; { auto buf = wcharPtr!pathname; result = buf.ptr[pathnameWStringLength]; } return result; }), 67 | ); 68 | 69 | run("Convert a string to a char*...", '\0', 70 | benchmark ("toUTFz", () { return toUTFz!(char*)(pathname)[pathname.length]; }), 71 | benchmark ("toStringz", () { return cast(char) toStringz(pathname)[pathname.length]; }), 72 | benchmark ("cstring.charPtr", () { return cast(char) charPtr!pathname[pathname.length]; }), 73 | ); 74 | 75 | run ("Split a string at each occurance of <, >, & and \"...", "w(eknwoemkf)moorroijqwoijqioqo(vqwojkpjavnal(nvo(eirvn$\0", 76 | benchmark (`while+if with 4 cond.`, () { string before; immutable(char*) stop = zeroterm.ptr + zeroterm.length; immutable(char)* iter = zeroterm.ptr; immutable(char)* done = zeroterm.ptr; if (iter !is stop) do { char c = *iter++; if (c == '<' || c == '>' || c == '&' || c == '"') { before = done[0 .. iter - done]; done = iter; }} while (iter !is stop); return done[0 .. stop - done]; }), 77 | benchmark ("fast.string.split", () { string before, after = zeroterm; while (fast.string.split!`or(or(=<,=>),or(=&,="))`(after, before, after)) {} return before; }), 78 | ); 79 | 80 | run ("Split a path by '/' or '\\'...", "slashes", 81 | benchmark ("std.regex.split", () { return split(pathname, pathSepRegex)[$-1]; }), 82 | benchmark ("std.regex.splitter", () { string last; auto range = splitter(pathname, pathSepRegex); while (!range.empty) { last = range.front; range.popFront(); } return last; }), 83 | benchmark ("fast.string.split", () { string before, after = pathname; while (fast.string.split!`or(=\,=/)`(after, before, after)) {} return before; }), 84 | ); 85 | 86 | writeln("Benchmark done!"); 87 | } 88 | 89 | 90 | 91 | private: 92 | 93 | void unicode() 94 | { 95 | import std.range, std.uni, std.string, std.meta; 96 | import fast.unicode; 97 | 98 | static immutable string devanagari = cast(string)"तदपि कही गुर बारंिह बारा। समुझि परी कछु मति अनुसारा।। 99 | भाषाबद्ध करबि मैं सोई। मोरें मन प्रबोध जेंिह होई।। 100 | जस कछु बुधि बिबेक बल मेरें। तस कहिहउँ हियँ हरि के प्रेरें।। 101 | निज संदेह मोह भ्रम हरनी। करउँ कथा भव सरिता तरनी।। 102 | बुध बिश्राम सकल जन रंजनि। रामकथा कलि कलुष बिभंजनि।। 103 | रामकथा कलि पंनग भरनी। पुनि बिबेक पावक कहुँ अरनी।। 104 | रामकथा कलि कामद गाई। सुजन सजीवनि मूरि सुहाई।। 105 | सोइ बसुधातल सुधा तरंगिनि। भय भंजनि भ्रम भेक भुअंगिनि।। 106 | असुर सेन सम नरक निकंदिनि। साधु बिबुध कुल हित गिरिनंदिनि।। 107 | संत समाज पयोधि रमा सी। बिस्व भार भर अचल छमा सी।। 108 | जम गन मुहँ मसि जग जमुना सी। जीवन मुकुति हेतु जनु कासी।। 109 | रामहि प्रिय पावनि तुलसी सी। तुलसिदास हित हियँ हुलसी सी।। 110 | सिवप्रय मेकल सैल सुता सी। सकल सिद्धि सुख संपति रासी।। 111 | सदगुन सुरगन अंब अदिति सी। रघुबर भगति प्रेम परमिति सी।। 112 | ".representation.repeat(10).join.array(); 113 | static immutable string latin = "A gory knife had been found close to the murdered man, and it had been 114 | recognized by somebody as belonging to Muff Potter--so the story ran. 115 | And it was said that a belated citizen had come upon Potter washing 116 | himself in the \"branch\" about one or two o'clock in the morning, and 117 | that Potter had at once sneaked off--suspicious circumstances, 118 | especially the washing which was not a habit with Potter. It was also 119 | said that the town had been ransacked for this \"murderer\" (the public 120 | are not slow in the matter of sifting evidence and arriving at a 121 | verdict), but that he could not be found. Horsemen had departed down 122 | all the roads in every direction, and the Sheriff \"was confident\" that 123 | he would be captured before night. 124 | ".repeat(10).join.array(); 125 | 126 | void benchCountGraphemes(alias text)(size_t count) 127 | { 128 | run ("Count graphemes in " ~ text.stringof ~ " text...", count, 129 | benchmark ("byGrapheme.walkLength", () { return text.byGrapheme.walkLength(); }), 130 | benchmark ("fast.graphemeCount", () { return text.countGraphemes(); }), 131 | ); 132 | } 133 | benchCountGraphemes!devanagari(5430); 134 | benchCountGraphemes!latin(7210); 135 | } 136 | 137 | 138 | void jsonCoordinates(bool integral)() 139 | { 140 | // A variant of https://github.com/kostya/benchmarks with less coordinate tuples, 141 | // since we repeat the test runs until a time span of one second passed. 142 | import core.memory; 143 | import std.algorithm; 144 | import std.ascii; 145 | import std.format; 146 | import std.random; 147 | import std.range; 148 | import std.typecons; 149 | import fast.internal.sysdef; 150 | 151 | enum coordCount = 10_000; 152 | auto rng = Mt19937(0); 153 | __gshared string text = "{\n \"coordinates\": [\n"; 154 | foreach (i; 0 .. coordCount) 155 | { 156 | static if (integral) 157 | { 158 | text ~= format(" {\n \"x\": %s,\n \"y\": %s,\n \"z\": %s,\n" ~ 159 | " \"name\": \"%s %s\",\n \"opts\": {\n \"1\": [\n 1,\n true\n" ~ 160 | " ]\n }\n }", uniform(0, 10_000, rng), uniform(0, 10_000, rng), uniform(0, 10_000, rng), 161 | iota(5).map!(_ => lowercase[uniform(0, $, rng)]), uniform(0, 10000, rng)); 162 | } 163 | else 164 | { 165 | text ~= format(" {\n \"x\": %.17g,\n \"y\": %.17g,\n \"z\": %.17g,\n" ~ 166 | " \"name\": \"%s %s\",\n \"opts\": {\n \"1\": [\n 1,\n true\n" ~ 167 | " ]\n }\n }", uniform(0.0, 1.0, rng), uniform(0.0, 1.0, rng), uniform(0.0, 1.0, rng), 168 | iota(5).map!(_ => lowercase[uniform(0, $, rng)]), uniform(0, 10000, rng)); 169 | } 170 | text ~= (i == coordCount - 1) ? "\n" : ",\n"; 171 | } 172 | text ~= " ],\n \"info\": \"some info\"\n}\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; 173 | text = text[0 .. $-16]; 174 | 175 | GC.collect(); 176 | 177 | // Dlang on x86 with optimizations rounds up double additions. 178 | static if (integral) 179 | { 180 | version (X86) 181 | enum expect = tuple(4986L, 4997L, 4988L); 182 | else 183 | enum expect = tuple(5003L, 4979L, 4971L); 184 | } 185 | else static if (isDMD && isX86 && (!isRelease || isRelease && (__VERSION__ < 2069 || __VERSION__ > 2070)) || 186 | isGDC && isX86) 187 | enum expect = tuple(0.49823454184104704, 0.50283215330409059, 0.49828840592580270); 188 | else static if (!isX86 || !isRelease) 189 | enum expect = tuple(0.49683911677479053, 0.50166077554665356, 0.49647639699603635); 190 | else 191 | enum expect = tuple(0.49823454184171062, 0.50283215330485886, 0.49828840592673407); 192 | 193 | run!(1, coordCount)("JSON 3D coordinates (" ~ (integral ? "integers" : "floating-point") ~ ")", expect, 194 | benchmark("std.json", { 195 | import std.json; 196 | 197 | auto json = parseJSON(text); 198 | auto coordinates = json["coordinates"].array; 199 | size_t len = coordinates.length; 200 | static if (integral) 201 | long x, y, z; 202 | else 203 | double x = 0, y = 0, z = 0; 204 | foreach (i; 0 .. len) 205 | { 206 | auto coord = coordinates[i]; 207 | static if (integral) 208 | { 209 | x += coord["x"].integer; 210 | y += coord["y"].integer; 211 | z += coord["z"].integer; 212 | } 213 | else 214 | { 215 | x += coord["x"].floating; 216 | y += coord["y"].floating; 217 | z += coord["z"].floating; 218 | } 219 | } 220 | 221 | return tuple(x / long(len), y / long(len), z / long(len)); 222 | }), 223 | // benchmark("stdx.data.json", { 224 | // import stdx.data.json.lexer; 225 | // import stdx.data.json.parser; 226 | // 227 | // auto json = parseJSONStream!(LexOptions.useBigInt)(text); 228 | // json.skipToKey("coordinates"); 229 | // size_t len; 230 | // double x = 0, y = 0, z = 0; 231 | // json.readArray(delegate() @trusted { 232 | // json.readObject!(typeof(json))(delegate(string key) @trusted { 233 | // if (key == "x") 234 | // x += json.readDouble(); 235 | // else if (key == "y") 236 | // y += json.readDouble(); 237 | // else if (key == "z") 238 | // z += json.readDouble(); 239 | // else 240 | // json.skipValue(); 241 | // }); 242 | // len++; 243 | // }); 244 | // 245 | // return tuple(x / len, y / len, z / len); 246 | // }), 247 | benchmark("fast.json", { 248 | import fast.json; 249 | 250 | auto json = Json!(validateAll, true)(text); 251 | long len; 252 | 253 | static if (integral) 254 | { 255 | long x, y, z; 256 | foreach (i; json.coordinates) 257 | { 258 | json.keySwitch!("x", "y", "z")( 259 | { x += json.read!long; }, 260 | { y += json.read!long; }, 261 | { z += json.read!long; } 262 | ); 263 | len++; 264 | } 265 | } 266 | else 267 | { 268 | double x = 0, y = 0, z = 0; 269 | foreach (i; json.coordinates) 270 | { 271 | json.keySwitch!("x", "y", "z")( 272 | { x += json.read!double; }, 273 | { y += json.read!double; }, 274 | { z += json.read!double; } 275 | ); 276 | len++; 277 | } 278 | } 279 | 280 | return tuple(x / len, y / len, z / len); 281 | }), 282 | ); 283 | } 284 | 285 | 286 | /******************************************************************************* 287 | * 288 | * Runs a set of `Benchmark`s and prints comparing runtime statistics. The 289 | * functions are always called until at least a second of time has passed. 290 | * 291 | * Params: 292 | * innerLoop = how many iterations to perform without looking at the clock 293 | * mul = typically `1`, unless the called functions repeat an action multiple 294 | * times and you want to see that reflected in the output 295 | * title = short overall title of this comparing benchmark 296 | * expectation = return value, that is expected from all the tested functions 297 | * for validation purposes and to counter dead-code elimination. 298 | * benchmarks = A set of `Benchmark`s to be run and compared. The first one in 299 | * the list acts as a reference timing for the others. 300 | * 301 | **************************************/ 302 | void run(uint innerLoop = 1000, uint mul = 1, R)(in string title, in R expectation, in Benchmark!R[] benchmarks...) 303 | { 304 | import core.time, std.stdio, std.exception, std.string; 305 | 306 | writeln("\x1b[1m", title, "\x1b[0m"); 307 | writeln(); 308 | ulong reference; 309 | foreach (i, ref bm; benchmarks) { 310 | // Check that the result is as expected... 311 | auto actual = bm.run(); 312 | import std.stdio; 313 | //enforce(actual == expectation, format(`Benchmark "%s" did not result as expected in "%s", but in "%s".`, 314 | // bm.title, expectation, actual)); 315 | ulong iters = 0; 316 | immutable t1 = TickDuration.currSystemTick; 317 | TickDuration t2; 318 | do { 319 | foreach (k; 0 .. innerLoop) 320 | bm.run(); 321 | iters++; 322 | t2 = TickDuration.currSystemTick; 323 | } while (!(t2 - t1).seconds); 324 | ulong times = iters * innerLoop * mul * 1_000_000_000 / (t2 - t1).nsecs; 325 | if (i == 0) { 326 | reference = times; 327 | writefln(" %-22s: %10s per second", bm.title, times); 328 | } else if (reference <= times) { 329 | writefln("\x1b[1m %-22s: %10s per second (done in %.0f%% of time !)\x1b[0m", bm.title, times, 100.0 * reference / times); 330 | } else { 331 | writefln(" %-22s: %10s per second (slower by factor %.1f)", bm.title, times, 1.0 * reference / times); 332 | } 333 | } 334 | writeln(); 335 | } 336 | 337 | 338 | /******************************************************************************* 339 | * 340 | * Functor to create `Benchmark` structs. 341 | * 342 | * Params: 343 | * title = displayed string when the statistics of `run` are displayed 344 | * run = the benchmarked function 345 | * 346 | * Returns: 347 | * a `Benchmark` from the given information 348 | * 349 | **************************************/ 350 | Benchmark!R benchmark(R)(string title, R function() run) 351 | { 352 | return Benchmark!R(title, run); 353 | } 354 | 355 | 356 | /******************************************************************************* 357 | * 358 | * Information about a benchmarked function. 359 | * 360 | **************************************/ 361 | struct Benchmark(R) 362 | { 363 | string title; 364 | R function() run; 365 | } 366 | -------------------------------------------------------------------------------- /source/fast/internal/helpers.d: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * 3 | * Helper functions that serve general purposes. 4 | * 5 | * Authors: 6 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 7 | * 8 | * Copyright: 9 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 10 | * 11 | * License: 12 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 13 | * 14 | **************************************************************************************************/ 15 | module fast.internal.helpers; 16 | 17 | import std.traits; 18 | import fast.internal.sysdef; 19 | 20 | 21 | private enum 一META一PROGRAMMING一; 22 | 23 | // 2.071 fixed visibility rules, so we need to roll our own staticIota. 24 | static if (__VERSION__ >= 2071) 25 | { 26 | import std.meta : AliasSeq; 27 | 28 | template staticIota(int beg, int end) 29 | { 30 | static if (beg + 1 >= end) 31 | { 32 | static if (beg >= end) 33 | { 34 | alias staticIota = AliasSeq!(); 35 | } 36 | else 37 | { 38 | alias staticIota = AliasSeq!(+beg); 39 | } 40 | } 41 | else 42 | { 43 | enum mid = beg + (end - beg) / 2; 44 | alias staticIota = AliasSeq!(staticIota!(beg, mid), staticIota!(mid, end)); 45 | } 46 | } 47 | } 48 | else 49 | { 50 | import std.typecons : staticIota; 51 | } 52 | 53 | 54 | /** 55 | * For any integral type, returns the unsigned type of the same bit-width. 56 | */ 57 | template UnsignedOf(I) if (isIntegral!I) 58 | { 59 | static if (isUnsigned!I) 60 | alias UnsignedOf = I; 61 | else static if (is(I == long)) 62 | alias UnsignedOf = ulong; 63 | else static if (is(I == int)) 64 | alias UnsignedOf = uint; 65 | else static if (is(I == short)) 66 | alias UnsignedOf = ushort; 67 | else static if (is(I == byte)) 68 | alias UnsignedOf = ubyte; 69 | else static assert (0, "Not implemented"); 70 | } 71 | 72 | 73 | /** 74 | * Generates a mixin string for repeating code. It can be used to unroll variadic arguments. 75 | * A format string is instantiated a certain number times with an incrementing parameter. 76 | * The results are then concatenated using an optional joiner. 77 | * 78 | * Params: 79 | * length = Number of elements you want to join. It is passed into format() as an incrementing number from [0 .. count$(RPAREN). 80 | * fmt = The format string to apply on each instanciation. Use %1d$ to refer to the current index multiple times when necessary. 81 | * joiner = Optional string that will be placed between instances. It could be a space or an arithmetic operation. 82 | * 83 | * Returns: 84 | * The combined elements as a mixin string. 85 | * 86 | * See_Also: 87 | * $(LINK2 http://forum.dlang.org/thread/vqfvihyezbmwcjkmpzin@forum.dlang.org, A simple way to do compile time loop unrolling) 88 | */ 89 | enum ctfeJoin(size_t length)(in string fmt, in string joiner = null) 90 | { 91 | import std.range : iota; 92 | import std.string : format; 93 | import std.algorithm : map; 94 | 95 | // BUG: Cannot use, join(), as it "cannot access the nested function 'ctfeJoin'". 96 | string result; 97 | foreach (inst; map!(i => format(fmt, i))(iota(length))) { 98 | if (result && joiner) result ~= joiner; 99 | result ~= inst; 100 | } 101 | return result; 102 | } 103 | 104 | 105 | enum getUDA(alias sym, T)() 106 | { 107 | foreach (uda; __traits(getAttributes, sym)) 108 | static if (is(typeof(uda) == T)) 109 | return uda; 110 | return T.init; 111 | } 112 | 113 | 114 | private enum 一BIT一OPERATIONS一; 115 | 116 | static import core.bitop; 117 | 118 | alias bsr = core.bitop.bsr; 119 | alias bsf = core.bitop.bsf; 120 | 121 | /******************************************************************************* 122 | * 123 | * Count leading zeroes. 124 | * 125 | * Params: 126 | * u = the unsigned value to scan 127 | * 128 | * Returns: 129 | * The number of leading zero bits before the first one bit. If `u` is `0`, 130 | * the result is undefined. 131 | * 132 | **************************************/ 133 | version (DigitalMars) 134 | { 135 | @safe @nogc pure nothrow U 136 | clz(U)(U u) if (is(Unqual!U == uint) || is(Unqual!U == size_t)) 137 | { 138 | pragma(inline, true); 139 | enum U max = 8 * U.sizeof - 1; 140 | return max - bsr(u); 141 | } 142 | 143 | static if (isX86) 144 | { 145 | @safe @nogc pure nothrow uint 146 | clz(U)(U u) if (is(Unqual!U == ulong)) 147 | { 148 | pragma(inline, true); 149 | uint hi = u >> 32; 150 | return hi ? 31 - bsr(hi) : 63 - bsr(cast(uint)u); 151 | } 152 | } 153 | } 154 | else version (GNU) 155 | { 156 | import gcc.builtins; 157 | alias clz = __builtin_clz; 158 | static if (isX86) 159 | { 160 | @safe @nogc pure nothrow uint 161 | clz(ulong u) 162 | { 163 | uint hi = u >> 32; 164 | return hi ? __builtin_clz(hi) : 32 + __builtin_clz(cast(uint)u); 165 | } 166 | } 167 | else alias clz = __builtin_clzl; 168 | } 169 | else version (LDC) 170 | { 171 | @safe @nogc pure nothrow U 172 | clz(U)(U u) if (is(Unqual!U == uint) || is(Unqual!U == size_t)) 173 | { 174 | pragma(inline, true); 175 | import ldc.intrinsics; 176 | return llvm_ctlz(u, false); 177 | } 178 | 179 | static if (isX86) 180 | { 181 | @safe @nogc pure nothrow uint 182 | clz(U)(U u) if (is(Unqual!U == ulong)) 183 | { 184 | pragma(inline, true); 185 | import ldc.intrinsics; 186 | return cast(uint)llvm_ctlz(u, false); 187 | } 188 | } 189 | } 190 | static if (__VERSION__ < 2071) 191 | { 192 | // < 2.071 did not have 64-bit bsr/bsf on x86. 193 | @safe @nogc pure nothrow uint 194 | bsr(U)(U u) if (is(Unqual!U == ulong)) 195 | { 196 | pragma(inline, true); 197 | uint hi = u >> 32; 198 | return hi ? bsr(hi) + 32 : bsr(cast(uint)u); 199 | } 200 | 201 | @safe @nogc pure nothrow uint 202 | bsf(U)(U u) if (is(Unqual!U == ulong)) 203 | { 204 | pragma(inline, true); 205 | uint lo = cast(uint)u; 206 | return lo ? bsf(lo) : 32 + bsf(u >> 32); 207 | } 208 | } 209 | unittest 210 | { 211 | assert(clz(uint(0x01234567)) == 7); 212 | assert(clz(ulong(0x0123456701234567)) == 7); 213 | assert(clz(ulong(0x0000000001234567)) == 7+32); 214 | assert(bsr(uint(0x01234567)) == 24); 215 | assert(bsr(ulong(0x0123456701234567)) == 24+32); 216 | assert(bsr(ulong(0x0000000001234567)) == 24); 217 | assert(bsf(uint(0x76543210)) == 4); 218 | assert(bsf(ulong(0x7654321076543210)) == 4); 219 | assert(bsf(ulong(0x7654321000000000)) == 4+32); 220 | } 221 | 222 | 223 | private enum 一UNITTESTING一; 224 | 225 | // Insert a dummy main when unittesting outside of dub. 226 | version (VibeCustomMain) {} else version (unittest) void main() {} 227 | 228 | 229 | private enum 一MISCELLANEOUS一; 230 | 231 | pure nothrow @nogc 232 | { 233 | /** 234 | * Aligns a pointer to the closest multiple of $(D pot) (a power of two), 235 | * which is equal to or larger than $(D value). 236 | */ 237 | T* alignPtrNext(T)(scope T* ptr, in size_t pot) 238 | in { assert(pot > 0 && pot.isPowerOf2); } 239 | body { return cast(T*) ((cast(size_t) ptr + (pot - 1)) & -pot); } 240 | unittest { assert(alignPtrNext(cast(void*) 65, 64) == cast(void*) 128); } 241 | } 242 | 243 | 244 | @nogc @safe pure nothrow 245 | { 246 | /// Returns whether the (positive) argument is an integral power of two. 247 | @property bool isPowerOf2(in size_t n) 248 | in { assert(n > 0); } 249 | body { return (n & n - 1) == 0; } 250 | 251 | version (LDC) { 252 | import core.simd; 253 | pragma(LDC_intrinsic, "llvm.x86.sse2.pmovmskb.128") 254 | uint moveMask(ubyte16); 255 | } else version (GNU) { 256 | import gcc.builtins; 257 | alias moveMask = __builtin_ia32_pmovmskb128; 258 | } 259 | 260 | template SIMDFromScalar(V, alias scalar) 261 | { 262 | // This wrapper is needed for optimal performance with LDC and 263 | // doesn't hurt GDC's inlining. 264 | V SIMDFromScalar() { 265 | enum V asVectorEnum = scalar; 266 | return asVectorEnum; 267 | } 268 | } 269 | 270 | 271 | template SIMDFromString(string str) if (str.length <= 16) 272 | { 273 | import core.simd, std.algorithm, std.range, std.string; 274 | 275 | private enum data = chain(str.representation, 0.repeat(16 - str.length)).array; 276 | 277 | static if (!isDMD) 278 | immutable ubyte16 SIMDFromString = data; 279 | else version (D_PIC) 280 | { 281 | import std.format; 282 | void SIMDFromString() @safe @nogc pure nothrow 283 | { 284 | mixin(format("asm @trusted @nogc pure nothrow { naked; db %(%s,%); }", data)); 285 | } 286 | } 287 | else static if (isX86) 288 | align(16) __gshared ubyte[16] SIMDFromString = data; 289 | else 290 | __gshared ubyte16 SIMDFromString = data; 291 | } 292 | } 293 | -------------------------------------------------------------------------------- /source/fast/internal/sysdef.di: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * 3 | * Definitions that abstract from the architecture or operating system. 4 | * 5 | * As far as possible these will alias existing definitons from OS headers to facilitate integration 6 | * with other code. 7 | * 8 | * Authors: 9 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 10 | * 11 | * Copyright: 12 | * © 2016 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 13 | * 14 | * License: 15 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 16 | * 17 | **************************************************************************************************/ 18 | module fast.internal.sysdef; 19 | 20 | 21 | private enum 一ARCHITECTURE一; 22 | 23 | version (X86_64) { 24 | enum isAMD64 = true; 25 | enum isX86 = false; 26 | } else version (X86) { 27 | enum isAMD64 = false; 28 | enum isX86 = true; 29 | } 30 | 31 | version (X86_64) 32 | enum hasSSE2 = true; 33 | else 34 | enum hasSSE2 = false; 35 | 36 | 37 | private enum 一OPERATING一SYSTEM一; 38 | 39 | version (Posix) 40 | enum isPosix = true; 41 | else 42 | enum isPosix = false; 43 | 44 | version (Windows) 45 | enum isWindows = true; 46 | else 47 | enum isWindows = false; 48 | 49 | /******************************************************************************* 50 | * 51 | * Despite Phobos' use of `char[]` UTF-8 strings for file names, their internal 52 | * representation in the operating system is a sequence of 8- or 16-bit values. 53 | * On Windows this means that one could get invalid surrogate pairings and on 54 | * Linux, a file name can have any 8-bit encoding that keeps '/' at the same 55 | * code point as ASCII. That's why portable file names should only use a subset 56 | * of ASCII that is interpreted the same in all supported encodings. 57 | * 58 | * MSDN mentions that file paths should be treated as a sequence of `WCHAR`: 59 | * https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath 60 | * 61 | **************************************/ 62 | static if (isPosix) 63 | alias FileChar = ubyte; 64 | else version (Windows) 65 | alias FileChar = ushort; 66 | else static assert(0, "Not implemented"); 67 | 68 | alias Filename = FileChar[]; 69 | 70 | 71 | private enum 一COMPILER一UNIFICATION一; 72 | 73 | version (LDC) { 74 | enum isLDC = true; 75 | enum isGDC = false; 76 | enum isDMD = false; 77 | } else version (GNU) { 78 | enum isLDC = false; 79 | enum isGDC = true; 80 | enum isDMD = false; 81 | } else version (DigitalMars) { 82 | enum isLDC = false; 83 | enum isGDC = false; 84 | enum isDMD = true; 85 | } 86 | 87 | version (DigitalMars) 88 | { 89 | enum noinline; 90 | enum forceinline; 91 | enum sse4; 92 | } 93 | else version (GNU) 94 | { 95 | import gcc.attribute; 96 | enum noinline = gcc.attribute.attribute("noinline"); 97 | enum forceinline = gcc.attribute.attribute("forceinline"); 98 | enum sse4_2 = gcc.attribute.attribute("target", "sse4.2"); 99 | } 100 | else version (LDC) 101 | { 102 | import ldc.attributes; 103 | enum noinline; 104 | enum forceinline; 105 | enum sse4_2 = ldc.attributes.target("+sse4.2"); 106 | } 107 | 108 | version (assert) 109 | enum isRelease = false; 110 | else 111 | enum isRelease = true; 112 | 113 | version (D_PIC) 114 | enum isPIC = true; 115 | else 116 | enum isPIC = false; 117 | -------------------------------------------------------------------------------- /source/fast/intmath.d: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * 3 | * Supplementary integer math functions. 4 | * 5 | * Authors: 6 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 7 | * 8 | * Copyright: 9 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 10 | * 11 | * License: 12 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 13 | * 14 | **************************************************************************************************/ 15 | module fast.intmath; 16 | 17 | import fast.internal.helpers; 18 | import fast.internal.sysdef; 19 | 20 | 21 | version (LDC) 22 | { 23 | @safe @nogc pure nothrow 24 | ulong mulu(ulong x, ulong y, ref bool overflow) 25 | { 26 | import ldc.intrinsics; 27 | auto res = llvm_umul_with_overflow(x, y); 28 | overflow = res.overflow; 29 | return res.result; 30 | } 31 | } 32 | else static if (isPosix && isGDC && (isAMD64 || isX86)) 33 | { 34 | @nogc pure nothrow 35 | ulong mulu(ulong x, ulong y, ref bool overflow) 36 | { 37 | version (GNU) 38 | { 39 | ulong lo; 40 | version (X86) asm { " 41 | cmp $0, 4+%2 42 | je 1f 43 | cmp $0, 4%3 44 | je 1f 45 | movb $1, %1 46 | 1: 47 | mov 4+%2, %%eax 48 | mull %3 49 | jno 2f 50 | movb $1, %1 51 | 2: 52 | mov %%eax, %%ecx 53 | mov %2, %%eax 54 | mull 4%3 55 | jno 3f 56 | movb $1, %1 57 | 3: 58 | add %%eax, %%ecx 59 | jno 4f 60 | movb $1, %1 61 | 4: 62 | mov %2, %%eax 63 | mull %3 64 | add %%ecx, %%edx 65 | jnc 5f 66 | movb $1, %1 67 | 5: 68 | " : "=&A" lo, "+*m" overflow : "m" x, "m" y : "ecx"; } 69 | else asm { "mul %3\njno 1f\nmovb $1, %1\n1:\n" : "=a" lo, "+*m" overflow : "a" x, "r" y : "rdx"; } 70 | return lo; 71 | } 72 | } 73 | } 74 | else 75 | { 76 | // DMD is already faster than my ASM code above, no need to improve. Good job Walter et al. 77 | import core.checkedint; 78 | alias mulu = core.checkedint.mulu; 79 | } 80 | -------------------------------------------------------------------------------- /source/fast/json.d: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * 3 | * A fast JSON parser implementing RFC 7159. 4 | * 5 | * The most prominent change compared to the initial revision is the allowance of all data types as 6 | * root values, not just objects and arrays. 7 | * 8 | * Usage_Hints: 9 | * $(UL 10 | * $(LI This parser only supports UTF-8 without BOM.) 11 | * $(LI When a JSON object has duplicate keys, the last one in the set will determine the value 12 | * of associative-array entries or struct fields.) 13 | * $(LI `BigInt` and large number parsing are not implemented currently, but all integral types 14 | * as well as minimal exact representations of many `double` values are supported.) 15 | * ) 16 | * 17 | * Authors: 18 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 19 | * 20 | * Copyright: 21 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 22 | * 23 | * License: 24 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 25 | * 26 | **************************************************************************************************/ 27 | module fast.json; 28 | 29 | import core.stdc.string; 30 | 31 | import std.ascii; 32 | import std.conv; 33 | import std.exception; 34 | import std.file; 35 | import std.json; 36 | import std.range; 37 | import std.string : representation, format; 38 | import std.traits; 39 | import std.uni; 40 | 41 | import fast.buffer; 42 | import fast.cstring; 43 | import fast.internal.sysdef; 44 | import fast.parsing; 45 | 46 | 47 | /******************************************************************************* 48 | * 49 | * Loads a file as JSON text and validates the used parts. This includes a UTF-8 50 | * validation on strings. 51 | * 52 | * Params: 53 | * fname = The file name to load. 54 | * 55 | * Returns: 56 | * A JSON file object exposing the `Json` API. 57 | * 58 | **************************************/ 59 | auto parseJSONFile(uint vl = validateUsed)(in char[] fname) 60 | { return Json!vl.File(fname); } 61 | 62 | /// ditto 63 | auto parseJSONFile(uint vl = validateUsed)(in Filename fname) 64 | { return Json!vl.File(fname); } 65 | 66 | 67 | /******************************************************************************* 68 | * 69 | * Loads a JSON string and validates the used parts. This includes a UTF-8 70 | * validation on strings. 71 | * 72 | * Params: 73 | * text = The string to load. 74 | * 75 | * Returns: 76 | * A `Json` struct. 77 | * 78 | **************************************/ 79 | auto parseJSON(uint vl = validateUsed, T : const(char)[])(T text) nothrow 80 | { return Json!(vl, false)(text); } 81 | 82 | 83 | /******************************************************************************* 84 | * 85 | * Load a file as JSON text that is considered 100% correct. No checks will be 86 | * performed, not even if you try to read a number as a string. 87 | * 88 | * Params: 89 | * fname = The file name to load. 90 | * 91 | * Returns: 92 | * A JSON file object exposing the `Json` API. 93 | * 94 | **************************************/ 95 | Json!trustedSource.File parseTrustedJSONFile(in char[] fname) 96 | { return Json!trustedSource.File(fname); } 97 | 98 | /// ditto 99 | version(Windows){}else 100 | Json!trustedSource.File parseTrustedJSONFile(in Filename fname) 101 | { return Json!trustedSource.File(fname); } 102 | 103 | 104 | /******************************************************************************* 105 | * 106 | * Load a JSON string that is considered 100% correct. No checks will be 107 | * performed, not even if you try to read a number as a string. 108 | * 109 | * Params: 110 | * text = The string to load. 111 | * 112 | * Returns: 113 | * A `Json` struct. 114 | * 115 | **************************************/ 116 | auto parseTrustedJSON(T : const(char)[])(T text) nothrow 117 | { return Json!(trustedSource, false)(text); } 118 | 119 | 120 | /******************************************************************************* 121 | * 122 | * Validates a JSON text file. 123 | * 124 | * Params: 125 | * fname = The file name to load. 126 | * 127 | * Throws: 128 | * JSONException on validation errors. 129 | * 130 | **************************************/ 131 | void validateJSONFile(in char[] fname) 132 | { Json!(validateAll, true).File(fname).skipValue(); } 133 | 134 | /// ditto 135 | version(Windows){} else 136 | void validateJSONFile(in Filename fname) 137 | { Json!(validateAll, true).File(fname).skipValue(); } 138 | 139 | 140 | /******************************************************************************* 141 | * 142 | * Validates a JSON string. 143 | * 144 | * Params: 145 | * text = The string to load. 146 | * 147 | * Throws: 148 | * JSONException on validation errors. 149 | * 150 | **************************************/ 151 | void validateJSON(T : const(char)[])(T text) 152 | { Json!(validateAll, true)(text).skipValue(); } 153 | 154 | 155 | /// JSON data types returned by `peek`. 156 | enum DataType : ubyte 157 | { 158 | string, number, object, array, boolean, null_ 159 | } 160 | 161 | 162 | /// Validation strength of JSON parser 163 | enum 164 | { 165 | trustedSource, /// Assume 100% correct JSON and speed up parsing. 166 | validateUsed, /// Ignore errors in skipped portions. 167 | validateAll, /// Do a complete validation of the JSON data. 168 | } 169 | 170 | 171 | /// A UDA used to remap enum members or struct field names to JSON strings. 172 | struct JsonMapping { string[string] map; } 173 | 174 | 175 | /// JSON parser state returned by the `state` property. 176 | struct JsonParserState { 177 | const(char)* text; 178 | size_t nesting; 179 | } 180 | 181 | 182 | /******************************************************************************* 183 | * 184 | * This is a forward JSON parser for picking off items of interest on the go. 185 | * It neither produces a node structure, nor does it produce events. Instead you 186 | * can peek at the value type that lies ahead and/or directly consume a JSON 187 | * value from the parser. Objects and arrays can be iterated over via `foreach`, 188 | * while you can also directly ask for one or multiple keys of an object. 189 | * 190 | * Prams: 191 | * vl = Validation level. Any of `trustedSource`, `validateUsed` or 192 | * `validateAll`. 193 | * validateUtf8 = If validation is enabled, this also checks UTF-8 encoding 194 | * of JSON strings. 195 | * 196 | **************************************/ 197 | struct Json(uint vl = validateUsed, bool validateUtf8 = vl > trustedSource) 198 | if (vl > trustedSource || !validateUtf8) 199 | { 200 | private: 201 | 202 | enum isTrusted = vl == trustedSource; 203 | enum skipAllInter = vl == trustedSource; 204 | enum isValidating = vl >= validateUsed; 205 | enum isValidateAll = vl == validateAll; 206 | 207 | const(char*) m_start = void; 208 | const(char)* m_text = void; 209 | size_t m_nesting = 0; 210 | RaiiArray!char m_mem; 211 | bool m_isString = false; 212 | 213 | 214 | public: 215 | 216 | @disable this(); 217 | @disable this(this); 218 | 219 | 220 | /******************************************************************************* 221 | * 222 | * Constructor taking a `string` for fast slicing. 223 | * 224 | * JSON strings without escape sequences can be returned as slices. 225 | * 226 | * Params: 227 | * text = The JSON text to parse. 228 | * simdPrep = Set this to `No.simdPrep` to indicate that `text` is already 229 | * suffixed by 16 zero bytes as required for SIMD processing. 230 | * 231 | **************************************/ 232 | nothrow 233 | this(string text, Flag!"simdPrep" simdPrep = Yes.simdPrep) 234 | { 235 | import core.memory; 236 | m_isString = GC.query(text.ptr) !is ReturnType!(GC.query).init; 237 | this(cast(const(char)[]) text, simdPrep); 238 | } 239 | 240 | 241 | /******************************************************************************* 242 | * 243 | * Constructor taking a `const char[]`. 244 | * 245 | * JSON strings allocate on the GC heap when returned. 246 | * 247 | * Params: 248 | * text = The JSON text to parse. 249 | * simdPrep = Set this to `No.simdPrep` to indicate that `text` is already 250 | * suffixed by 16 zero bytes as required for SIMD processing. 251 | * 252 | **************************************/ 253 | pure nothrow 254 | this(const(char)[] text, Flag!"simdPrep" simdPrep = Yes.simdPrep) 255 | { 256 | if (simdPrep) 257 | { 258 | // We need to append 16 zero bytes for SSE to work, and if that reallocates the char[] 259 | // we can declare it unique/immutable and don't need to allocate when returning JSON strings. 260 | auto oldPtr = text.ptr; 261 | text ~= "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; 262 | m_isString |= oldPtr !is text.ptr; 263 | } 264 | m_start = m_text = text.ptr; 265 | skipWhitespace!false(); 266 | } 267 | 268 | 269 | /+ 270 | ╔══════════════════════════════════════════════════════════════════════════════ 271 | ║ ⚑ String 272 | ╚══════════════════════════════════════════════════════════════════════════════ 273 | +/ 274 | 275 | /******************************************************************************* 276 | * 277 | * Reads a string off the JSON text. 278 | * 279 | * Params: 280 | * allowNull = Allow `null` as a valid option for the string. 281 | * 282 | * Returns: 283 | * A GC managed string. 284 | * 285 | **************************************/ 286 | string read(T)(bool allowNull = true) if (is(T == string)) 287 | { 288 | if (!allowNull || peek == DataType.string) 289 | { 290 | auto borrowed = borrowString(); 291 | return m_isString ? borrowed.assumeUnique() : borrowed.idup; 292 | } 293 | return readNull(); 294 | } 295 | 296 | 297 | /******************************************************************************* 298 | * 299 | * Reads an enumeration off the JSON text. 300 | * 301 | **************************************/ 302 | T read(T)() if (is(T == enum)) 303 | { 304 | enum mapping = buildRemapTable!T; 305 | auto oldPos = m_text; 306 | auto text = borrowString(); 307 | foreach (m; mapping) 308 | if (text.length == m.json.length && memcmp(text.ptr, m.json.ptr, m.json.length) == 0) 309 | return m.d; 310 | m_text = oldPos; 311 | static if (isValidating) 312 | handleError(format("Could not find enum member `%s` in `%s`", text, T.stringof)); 313 | assert(0); 314 | } 315 | 316 | 317 | /******************************************************************************* 318 | * 319 | * Reads a string off the JSON text with limited lifetime. 320 | * 321 | * The reference to this slice is not guaranteed to be valid after the JSON 322 | * parser has been destroyed or another object key or string value has been 323 | * parsed. So make a copy before you continue parsing. 324 | * 325 | * Returns: 326 | * If the string had no escape sequences in it, the returned array is a 327 | * slice of the JSON text buffer, otherwise temporary copy. 328 | * 329 | **************************************/ 330 | const(char)[] borrowString() 331 | { 332 | expect('"', "at start of string"); 333 | auto escFreeStart = m_text; 334 | 335 | if (scanString!validateUtf8()) 336 | { 337 | // Fast path here is to return a slice of the JSON if it doesn't contain escapes. 338 | size_t length = m_text - escFreeStart; 339 | skipOnePlusWhitespace!skipAllInter(); 340 | return escFreeStart[0 .. length]; 341 | } 342 | else 343 | { 344 | // Otherwise we copy to a separate memory area managed by this parser instance. 345 | size_t length = 0; 346 | bool eos = false; 347 | goto CopyToBuffer; 348 | do 349 | { 350 | do 351 | { 352 | m_mem.capacityNeeded( length + 4 ); 353 | uint decoded = decodeEscape( &m_mem[length] ); 354 | length += decoded; 355 | } 356 | while (*m_text == '\\'); 357 | 358 | escFreeStart = m_text; 359 | eos = scanString!validateUtf8(); 360 | CopyToBuffer: 361 | size_t escFreeLength = m_text - escFreeStart; 362 | m_mem.capacityNeeded( length + escFreeLength ); 363 | memcpy( m_mem.ptr + length, escFreeStart, escFreeLength ); 364 | length += escFreeLength; 365 | } 366 | while (!eos); 367 | skipOnePlusWhitespace!skipAllInter(); 368 | return m_mem[0 .. length]; 369 | } 370 | } 371 | 372 | 373 | private bool scanString(bool validate)() 374 | { 375 | static if (validate) 376 | { 377 | import core.bitop; 378 | 379 | while (true) 380 | { 381 | // Stop for control-characters, \, " and anything non-ASCII. 382 | m_text.seekToRanges!"\0\x1F\"\"\\\\\x7F\xFF"; 383 | 384 | // Handle printable ASCII range 385 | if (*m_text == '"') 386 | return true; 387 | if (*m_text == '\\') 388 | return false; 389 | 390 | // Anything else better be UTF-8 391 | uint u = *cast(uint*) m_text; 392 | version (LittleEndian) u = bswap(u); 393 | 394 | // Filter overlong ASCII and missing follow byte. 395 | if ( 396 | (u & 0b111_00000_11_000000_00000000_00000000) == 0b110_00000_10_000000_00000000_00000000 && 397 | (u > 0b110_00001_10_111111_11111111_11111111)) 398 | m_text += 2; 399 | // Handle overlong representation, UTF-16 surrogate pairs and missing follow bytes. 400 | else if ( 401 | (u & 0b1111_0000_11_000000_11_000000_00000000) == 0b1110_0000_10_000000_10_000000_00000000 && 402 | (u & 0b0000_1111_00_100000_00_000000_00000000) != 0b0000_1101_00_100000_00_000000_00000000 && 403 | (u > 0b1110_0000_10_011111_10_111111_11111111)) 404 | m_text += 3; 405 | // Handle missing follow bytes, Handle overlong representation and out of valid range (max. 0x10FFFF) 406 | else if ( 407 | (u & 0b11111_000_11_000000_11_000000_11_000000) == 0b11110_000_10_000000_10_000000_10_000000 && 408 | (u > 0b11110_000_10_001111_10_111111_10_111111) && (u < 0b11110_100_10_010000_10_000000_10_000000)) 409 | m_text += 4; 410 | // Handle invalid code units. 411 | else if (*m_text < ' ' || *m_text == 0x7F) 412 | expectNot("is a disallowed control character in strings"); 413 | else if (*m_text >= 0x80 && *m_text <= 0xBF) 414 | expectNot("is a UTF-8 follow byte and cannot start a sequence"); 415 | else 416 | expectNot("is not a valid UTF-8 sequence start"); 417 | } 418 | } 419 | else 420 | { 421 | m_text.seekToAnyOf!("\\\"\0"); 422 | return *m_text == '"'; 423 | } 424 | } 425 | 426 | 427 | private int matchString(string key)() 428 | { 429 | return m_text.fixedTermStrCmp!(char, key, "\"\0", "\\")(&stringCompareCallback); 430 | } 431 | 432 | 433 | private bool stringCompareCallback(ref immutable(char)* key, ref const(char)* str) 434 | { 435 | do 436 | { 437 | auto key4 = cast(char[4]*) key; 438 | char[4] buf = *key4; 439 | uint bytes = decodeEscape(buf.ptr); 440 | if (buf != *key4) 441 | return false; 442 | key += bytes; 443 | } 444 | while (str[0] == '\\'); 445 | return true; 446 | } 447 | 448 | 449 | private static immutable escapes = { 450 | char[256] result = '\0'; 451 | result['"'] = '"'; 452 | result['\\'] = '\\'; 453 | result['/'] = '/'; 454 | result['b'] = '\b'; 455 | result['f'] = '\f'; 456 | result['n'] = '\n'; 457 | result['r'] = '\r'; 458 | result['t'] = '\t'; 459 | return result; 460 | }(); 461 | 462 | 463 | private void skipEscape() 464 | { 465 | static if (isValidateAll) 466 | { 467 | if (m_text[1] != 'u') 468 | { 469 | // Normal escape sequence. 2 bytes removed. 470 | if (!escapes[*++m_text]) 471 | expectNot("in escape sequence"); 472 | m_text++; 473 | } 474 | else 475 | { 476 | // UTF-16 477 | m_text += 2; 478 | decodeUtf16HexToCodepoint(); 479 | } 480 | } 481 | else m_text += 2; 482 | } 483 | 484 | 485 | private uint decodeEscape(scope char* dst) 486 | { 487 | if (m_text[1] != 'u') 488 | { 489 | // Normal escape sequence. 2 bytes removed. 490 | dst[0] = escapes[m_text[1]]; 491 | static if (isValidating) 492 | if (!dst[0]) 493 | handleError("Invalid escape sequence"); 494 | m_text += 2; 495 | return 1; 496 | } 497 | else 498 | { 499 | // UTF-16 500 | m_text += 2; 501 | uint cp = decodeUtf16HexToCodepoint(); 502 | 503 | if (cp >= 0xD800 && cp <= 0xDBFF) 504 | { 505 | dst[0] = cast(char)(0b11110_000 | cp >> 18); 506 | dst[1] = cast(char)(0b10_000000 | cp >> 12 & 0b00_111111); 507 | dst[2] = cast(char)(0b10_000000 | cp >> 6 & 0b00_111111); 508 | dst[3] = cast(char)(0b10_000000 | cp & 0b00_111111); 509 | return 4; 510 | } 511 | else if (cp >= 0x800) 512 | { 513 | dst[0] = cast(char)(0b1110_0000 | cp >> 12); 514 | dst[1] = cast(char)(0b10_000000 | cp >> 6 & 0b00_111111); 515 | dst[2] = cast(char)(0b10_000000 | cp & 0b00_111111); 516 | return 3; 517 | } 518 | else if (cp >= 0x80) 519 | { 520 | dst[0] = cast(char)(0b110_00000 | cp >> 6); 521 | dst[1] = cast(char)(0b10_000000 | cp & 0b00_111111); 522 | return 2; 523 | } 524 | else 525 | { 526 | dst[0] = cast(char)(cp); 527 | return 1; 528 | } 529 | } 530 | } 531 | 532 | 533 | private dchar decodeUtf16HexToCodepoint() 534 | { 535 | import fast.internal.helpers; 536 | 537 | uint cp, hi; 538 | foreach (i; staticIota!(0, 2)) 539 | { 540 | static if (isValidating) 541 | { 542 | if (auto badByte = hexDecode4(m_text, cp)) 543 | { 544 | m_text = badByte; 545 | expectNot("is not a hex digit"); 546 | } 547 | } 548 | else 549 | { 550 | cp = hexDecode4(m_text); 551 | } 552 | 553 | static if (i == 0) 554 | { 555 | // Is this a high surrogate (followed by a low surrogate) or not ? 556 | if (cp < 0xD800 || cp > 0xDBFF) 557 | break; 558 | hi = cp - 0xD800 + 0x40 << 10; 559 | } 560 | else static if (i == 1) 561 | { 562 | static if (isValidating) 563 | { 564 | if (cp < 0xDC00 || cp > 0xDFFF) 565 | handleError("The UTF-16 escape produced an invalid code point."); 566 | cp -= 0xDC00; 567 | } 568 | cp |= hi; 569 | } 570 | } 571 | 572 | static if (isValidating) 573 | if (cp > 0x10FFFF || cp >= 0xD800 && cp <= 0xDFFF) 574 | handleError("The UTF-16 escape produced an invalid code point."); 575 | 576 | return cp; 577 | } 578 | 579 | 580 | private void skipString(bool skipInter)() 581 | { 582 | m_text++; 583 | skipRestOfString!skipInter(); 584 | } 585 | 586 | 587 | private void skipRestOfString(bool skipInter)() 588 | { 589 | while (!scanString!isValidateAll()) 590 | skipEscape(); 591 | skipOnePlusWhitespace!skipInter(); 592 | } 593 | 594 | 595 | /+ 596 | ╔══════════════════════════════════════════════════════════════════════════════ 597 | ║ ⚑ Number 598 | ╚══════════════════════════════════════════════════════════════════════════════ 599 | +/ 600 | 601 | /******************************************************************************* 602 | * 603 | * Reads a number off the JSON text. 604 | * 605 | * If you ask for an unsigned value, no minus sign will be accepted in the JSON, 606 | * otherwise all features of JSON numbers will be available. In particular large 607 | * integers can be given in scientific notation. 608 | * 609 | * Params: 610 | * N = Built-in numerical type that should be returned. 611 | * 612 | * Returns: 613 | * The parsed number. 614 | * 615 | * Throws: 616 | * JSONException, on invalid JSON or integer overflow. 617 | * 618 | **************************************/ 619 | N read(N)() if (isNumeric!N && !is(N == enum)) 620 | { 621 | N n = void; 622 | static if (isUnsigned!N) 623 | enum NumberOptions opt = {}; 624 | else 625 | enum NumberOptions opt = { minus:true }; 626 | if (parseNumber!opt(m_text, n)) 627 | skipWhitespace!skipAllInter(); 628 | else static if (isValidating) 629 | handleError(format("Could not convert JSON number to `%s`", N.stringof)); 630 | return n; 631 | } 632 | 633 | 634 | private void skipNumber(bool skipInter)() 635 | { 636 | static if (isValidateAll) 637 | { 638 | if (*m_text == '-') 639 | m_text++; 640 | if (*m_text == '0') 641 | m_text++; 642 | else 643 | trySkipDigits(); 644 | if (*m_text == '.') 645 | { 646 | m_text++; 647 | trySkipDigits(); 648 | } 649 | if ((*m_text | 0x20) == 'e') 650 | { 651 | m_text++; 652 | if (*m_text == '+' || *m_text == '-') 653 | m_text++; 654 | trySkipDigits(); 655 | } 656 | skipWhitespace!false(); 657 | } 658 | else 659 | { 660 | m_text.skipCharRanges!"\t\n\r\r ++-.09EEee"; 661 | static if (skipInter) 662 | m_text.skipAllOf!"\t\n\r ,"; 663 | } 664 | } 665 | 666 | 667 | static if (isValidateAll) 668 | { 669 | private void trySkipDigits() 670 | { 671 | if (*m_text - '0' > 9) 672 | expectNot("in number literal"); 673 | m_text.skipAllOf!"0123456789"; 674 | } 675 | } 676 | 677 | 678 | /+ 679 | ╔══════════════════════════════════════════════════════════════════════════════ 680 | ║ ⚑ Object 681 | ╚══════════════════════════════════════════════════════════════════════════════ 682 | +/ 683 | 684 | /******************************************************************************* 685 | * 686 | * Reads a plain old data struct off the JSON text. 687 | * 688 | * Params: 689 | * T = Type of struct that should be returned. 690 | * 691 | * Returns: 692 | * A struct of type `T`. 693 | * 694 | **************************************/ 695 | T read(T)() if (is(T == struct) && __traits(isPOD, T)) 696 | { 697 | nest('{', "on start of object"); 698 | 699 | T t; 700 | if (*m_text != '}') while (true) 701 | { 702 | auto key = borrowString(); 703 | static if (!skipAllInter) 704 | { 705 | expect(':', "between key and value"); 706 | skipWhitespace!false(); 707 | } 708 | 709 | enum mapping = buildRemapTable!T; 710 | foreach (m; mapping) 711 | { 712 | if (key.length == m.json.length && memcmp(key.ptr, m.json.ptr, m.json.length) == 0) 713 | { 714 | mixin("alias keyT = typeof(T." ~ m.d ~ ");"); 715 | mixin("t." ~ m.d ~ " = read!keyT;"); 716 | goto Success; 717 | } 718 | } 719 | skipValue(); 720 | 721 | Success: 722 | if (*m_text == '}') 723 | break; 724 | 725 | static if (!skipAllInter) 726 | { 727 | expect(',', "between key-value pairs"); 728 | skipWhitespace!false(); 729 | } 730 | } 731 | 732 | unnest(); 733 | return t; 734 | } 735 | 736 | 737 | /******************************************************************************* 738 | * 739 | * Reads a plain old data struct or `null` off the JSON text. 740 | * 741 | * Params: 742 | * T = Type of struct pointer that should be returned. 743 | * 744 | * Returns: 745 | * A pointer to a newly filled struct of type `T` on the GC heap. 746 | * 747 | **************************************/ 748 | T read(T)() if (is(PointerTarget!T == struct) && __traits(isPOD, PointerTarget!T)) 749 | { 750 | if (peek == DataType.null_) 751 | return readNull(); 752 | T tp = new PointerTarget!T; 753 | *tp = read!(PointerTarget!T)(); 754 | return tp; 755 | } 756 | 757 | 758 | /******************************************************************************* 759 | * 760 | * Reads an associative-array off a JSON text. 761 | * 762 | * The key type must be `string`, the value type can be any type otherwise 763 | * supported by the parser. 764 | * 765 | * Params: 766 | * T = The type of AA to return. 767 | * 768 | * Returns: 769 | * A newly filled associative array. 770 | * 771 | **************************************/ 772 | T read(T)() if (is(KeyType!T == string)) 773 | { 774 | T aa; 775 | foreach (key; byKey) 776 | aa[m_isString ? cast(immutable)key : key.idup] = read!(ValueType!T)(); 777 | return aa; 778 | } 779 | 780 | 781 | /******************************************************************************* 782 | * 783 | * An alias to the `singleKey` method. Instead of `json.singleKey!"something"` 784 | * you can write `json.something`. Read the notes on `singleKey`. 785 | * 786 | **************************************/ 787 | alias opDispatch = singleKey; 788 | 789 | 790 | /******************************************************************************* 791 | * 792 | * Skips all keys of an object except the first occurence with the given key 793 | * name. 794 | * 795 | * Params: 796 | * name = the key name of interest 797 | * 798 | * Returns: 799 | * A temporary struct, a proxy to the parser, that will automatically seek to 800 | * the end of the current JSON object on destruction. 801 | * 802 | * Throws: 803 | * JSONException when the key is not found in the object or parsing errors 804 | * occur. 805 | * 806 | * Note: 807 | * Since this is an on the fly parser, you can only get one key from an 808 | * object with this method. Use `keySwitch` or `foreach(key; json)` to get 809 | * values from multiple keys. 810 | * 811 | * See_Also: 812 | * keySwitch 813 | * 814 | **************************************/ 815 | @property SingleKey singleKey(string name)() 816 | { 817 | nest('{', "on start of object"); 818 | 819 | if (*m_text != '}') while (true) 820 | { 821 | auto key = borrowString(); 822 | static if (!skipAllInter) 823 | { 824 | expect(':', "between key and value"); 825 | skipWhitespace!false(); 826 | } 827 | 828 | if (key.length == name.length && memcmp(key.ptr, name.ptr, name.length) == 0) 829 | return SingleKey(this); 830 | 831 | skipValueImpl!skipAllInter(); 832 | 833 | if (*m_text == '}') 834 | break; 835 | 836 | static if (!skipAllInter) 837 | { 838 | expect(',', "between key-value pairs"); 839 | skipWhitespace!false(); 840 | } 841 | } 842 | 843 | unnest(); 844 | static if (isValidating) 845 | handleError("Key not found."); 846 | assert(0); 847 | } 848 | 849 | 850 | /******************************************************************************* 851 | * 852 | * Selects from a set of given keys in an object and calls the corresponding 853 | * delegate. The difference to `singleKey` when invoked with a single key is 854 | * that `keySwitch` will not error out if the key is non-existent and may 855 | * trigger the delegate multiple times, if the JSON object has duplicate keys. 856 | * 857 | * Params: 858 | * Args = the names of the keys 859 | * dlg = the delegates corresponding to the keys 860 | * 861 | * Throws: 862 | * JSONException when the key is not found in the object or parsing errors 863 | * occur. 864 | * 865 | **************************************/ 866 | void keySwitch(Args...)(scope void delegate()[Args.length] dlg...) 867 | { 868 | nest('{', "on start of object"); 869 | 870 | if (*m_text != '}') while (true) 871 | { 872 | auto key = borrowString(); 873 | static if (!skipAllInter) 874 | { 875 | expect(':', "between key and value"); 876 | skipWhitespace!false(); 877 | } 878 | 879 | auto oldPos = m_text; 880 | foreach (i, arg; Args) 881 | { 882 | if (key.length == arg.length && memcmp(key.ptr, arg.ptr, arg.length) == 0) 883 | { 884 | dlg[i](); 885 | goto Next; 886 | } 887 | } 888 | skipValue(); 889 | 890 | Next: 891 | if (*m_text == '}') 892 | break; 893 | 894 | static if (!skipAllInter) if (oldPos !is m_text) 895 | { 896 | expect(',', "after key-value pair"); 897 | skipWhitespace!false(); 898 | } 899 | } 900 | 901 | unnest(); 902 | } 903 | 904 | 905 | private int byKeyImpl(scope int delegate(ref const char[]) foreachBody) 906 | { 907 | nest('{', "at start of foreach over object"); 908 | 909 | int result = 0; 910 | if (*m_text != '}') while (true) 911 | { 912 | auto key = borrowString(); 913 | static if (!skipAllInter) 914 | { 915 | expect(':', "between key and value"); 916 | skipWhitespace!false; 917 | } 918 | 919 | if (iterationGuts!"{}"(result, key, foreachBody, "after key-value pair")) 920 | break; 921 | } 922 | 923 | unnest(); 924 | return result; 925 | } 926 | 927 | 928 | /******************************************************************************* 929 | * 930 | * Iterate the keys of a JSON object with `foreach`. 931 | * 932 | * Notes: 933 | * $(UL 934 | * $(LI If you want to store the key, you need to duplicate it.) 935 | * ) 936 | * 937 | * Example: 938 | * --- 939 | * uint id; 940 | * foreach (key; json.byKey) 941 | * if (key == "id") 942 | * id = json.read!uint; 943 | * --- 944 | **************************************/ 945 | @safe @nogc pure nothrow 946 | @property int delegate(scope int delegate(ref const char[])) byKey() 947 | { 948 | return &byKeyImpl; 949 | } 950 | 951 | 952 | /+ 953 | ╔══════════════════════════════════════════════════════════════════════════════ 954 | ║ ⚑ Array handling 955 | ╚══════════════════════════════════════════════════════════════════════════════ 956 | +/ 957 | 958 | /******************************************************************************* 959 | * 960 | * Reads a dynamic array off the JSON text. 961 | * 962 | **************************************/ 963 | T read(T)() if (isDynamicArray!T && !isSomeString!T) 964 | { 965 | import std.array; 966 | Appender!T app; 967 | foreach (i; this) 968 | app.put(read!(typeof(T.init[0]))); 969 | return app.data; 970 | } 971 | 972 | 973 | /******************************************************************************* 974 | * 975 | * Reads a static array off the JSON text. 976 | * 977 | * When validation is enabled, it is an error if the JSON array has a different 978 | * length lengths don't match up. Otherwise unset elements receive their initial 979 | * value. 980 | * 981 | **************************************/ 982 | T read(T)() if (isStaticArray!T) 983 | { 984 | T sa = void; 985 | size_t cnt; 986 | foreach (i; this) 987 | { 988 | if (i < T.length) 989 | sa[i] = read!(typeof(T.init[0])); 990 | cnt = i + 1; 991 | } 992 | static if (isValidating) 993 | { 994 | if (cnt != T.length) 995 | handleError(format("Static array size mismatch. Expected %s, got %s", T.length, cnt)); 996 | } 997 | else 998 | { 999 | foreach (i; cnt .. T.length) 1000 | sa[i] = T.init; 1001 | } 1002 | return sa; 1003 | } 1004 | 1005 | 1006 | /******************************************************************************* 1007 | * 1008 | * Iterate over a JSON array via `foreach`. 1009 | * 1010 | **************************************/ 1011 | int opApply(scope int delegate(const size_t) foreachBody) 1012 | { 1013 | nest('[', "at start of foreach over array"); 1014 | 1015 | int result = 0; 1016 | if (*m_text != ']') for (size_t idx = 0; true; idx++) 1017 | if (iterationGuts!"[]"(result, idx, foreachBody, "after array element")) 1018 | break; 1019 | 1020 | unnest(); 1021 | return result; 1022 | } 1023 | 1024 | 1025 | /+ 1026 | ╔══════════════════════════════════════════════════════════════════════════════ 1027 | ║ ⚑ Boolean 1028 | ╚══════════════════════════════════════════════════════════════════════════════ 1029 | +/ 1030 | 1031 | /******************************************************************************* 1032 | * 1033 | * Reads a boolean value off the JSON text. 1034 | * 1035 | **************************************/ 1036 | bool read(T)() if (is(T == bool)) 1037 | { 1038 | return skipBoolean!(skipAllInter, isValidating)(); 1039 | } 1040 | 1041 | 1042 | private bool skipBoolean(bool skipInter, bool validate = isValidateAll)() 1043 | { 1044 | static immutable char[4][2] keywords = [ "true", "alse" ]; 1045 | auto isFalse = *m_text == 'f'; 1046 | static if (validate) 1047 | if (*cast(char[4]*) &m_text[isFalse] != keywords[isFalse]) 1048 | handleError("`true` or `false` expected."); 1049 | m_text += isFalse ? 5 : 4; 1050 | skipWhitespace!skipInter(); 1051 | return !isFalse; 1052 | } 1053 | 1054 | 1055 | /+ 1056 | ╔══════════════════════════════════════════════════════════════════════════════ 1057 | ║ ⚑ Null 1058 | ╚══════════════════════════════════════════════════════════════════════════════ 1059 | +/ 1060 | 1061 | /******************************************************************************* 1062 | * 1063 | * Reads `null` off the JSON text. 1064 | * 1065 | **************************************/ 1066 | typeof(null) readNull() 1067 | { 1068 | skipNull!(skipAllInter, isValidating)(); 1069 | return null; 1070 | } 1071 | 1072 | 1073 | private void skipNull(bool skipInter, bool validate = isValidateAll)() 1074 | { 1075 | static if (validate) 1076 | if (*cast(const uint*) m_text != *cast(const uint*) "null".ptr) 1077 | handleError("`null` expected."); 1078 | m_text += 4; 1079 | skipWhitespace!skipInter(); 1080 | } 1081 | 1082 | 1083 | /+ 1084 | ╔══════════════════════════════════════════════════════════════════════════════ 1085 | ║ ⚑ Helpers and Error Handling 1086 | ╚══════════════════════════════════════════════════════════════════════════════ 1087 | +/ 1088 | 1089 | /******************************************************************************* 1090 | * 1091 | * Skips the next JSON value if you are not interested. 1092 | * 1093 | **************************************/ 1094 | void skipValue() 1095 | { 1096 | skipValueImpl!skipAllInter(); 1097 | } 1098 | 1099 | 1100 | private void skipValueImpl(bool skipInter)() 1101 | { 1102 | with (DataType) final switch (peek) 1103 | { 1104 | case string: 1105 | skipString!skipInter(); 1106 | break; 1107 | case number: 1108 | skipNumber!skipInter(); 1109 | break; 1110 | case object: 1111 | static if (isValidateAll) 1112 | { 1113 | foreach (_; this.byKey) 1114 | break; 1115 | } 1116 | else 1117 | { 1118 | m_text++; 1119 | seekObjectEnd(); 1120 | skipOnePlusWhitespace!skipInter(); 1121 | } 1122 | break; 1123 | case array: 1124 | static if (isValidateAll) 1125 | { 1126 | foreach (_; this) 1127 | break; 1128 | } 1129 | else 1130 | { 1131 | m_text++; 1132 | seekArrayEnd(); 1133 | skipOnePlusWhitespace!skipInter(); 1134 | } 1135 | break; 1136 | case boolean: 1137 | skipBoolean!skipInter(); 1138 | break; 1139 | case null_: 1140 | skipNull!skipInter(); 1141 | break; 1142 | } 1143 | } 1144 | 1145 | 1146 | /******************************************************************************* 1147 | * 1148 | * Returns the type of data that is up next in the JSON text. 1149 | * 1150 | **************************************/ 1151 | @property DataType peek() 1152 | { 1153 | static immutable trans = { 1154 | DataType[256] result = cast(DataType) ubyte.max; 1155 | result['{'] = DataType.object; 1156 | result['['] = DataType.array; 1157 | result['-'] = DataType.number; 1158 | foreach (i; '0' .. '9'+1) 1159 | result[i] = DataType.number; 1160 | result['"'] = DataType.string; 1161 | result['t'] = DataType.boolean; 1162 | result['f'] = DataType.boolean; 1163 | result['n'] = DataType.null_; 1164 | return result; 1165 | }(); 1166 | 1167 | DataType vt = trans[*m_text]; 1168 | static if (isValidating) 1169 | if (vt == ubyte.max) 1170 | expectNot("while peeking at next value type"); 1171 | return vt; 1172 | } 1173 | 1174 | 1175 | /******************************************************************************* 1176 | * 1177 | * Save or restore the parser's internal state. 1178 | * 1179 | * If you want to read only a certain object from the JSON, but exactly which 1180 | * depends on the value of some key, this is where saving and restoring the 1181 | * parser state helps. 1182 | * 1183 | * Before each candidate you save the parser state. Then you perform just the 1184 | * minimal work to test if the candidate matches some criteria. If it does, 1185 | * restore the parser state and read the elements in full. Of it doesn't, just 1186 | * skip to the next. 1187 | * 1188 | **************************************/ 1189 | @property const(JsonParserState) state() const 1190 | { 1191 | return JsonParserState(m_text, m_nesting); 1192 | } 1193 | 1194 | @property void state(const JsonParserState oldState) 1195 | { 1196 | m_text = oldState.text; 1197 | m_nesting = oldState.nesting; 1198 | } 1199 | 1200 | 1201 | private void nest(char c, string msg) 1202 | { 1203 | expect(c, msg); 1204 | skipWhitespace!false(); 1205 | m_nesting++; 1206 | } 1207 | 1208 | 1209 | private void unnest() 1210 | in { assert(m_nesting > 0); } 1211 | body 1212 | { 1213 | if (--m_nesting == 0) 1214 | { 1215 | skipOnePlusWhitespace!false(); 1216 | static if (isValidating) 1217 | if (*m_text != '\0') 1218 | handleError("Expected end of JSON."); 1219 | } 1220 | else skipOnePlusWhitespace!skipAllInter(); 1221 | } 1222 | 1223 | 1224 | private bool iterationGuts(char[2] braces, T, D)(ref int result, T idx, scope D dlg, 1225 | string missingCommaMsg) 1226 | { 1227 | auto oldPos = m_text; 1228 | static if (isValidateAll) 1229 | { 1230 | if (result) 1231 | { 1232 | skipValueImpl!(!isValidateAll)(); 1233 | goto PastValue; 1234 | } 1235 | } 1236 | result = dlg(idx); 1237 | if (oldPos is m_text) 1238 | skipValueImpl!(!isValidateAll)(); 1239 | 1240 | PastValue: 1241 | if (*m_text == braces[1]) 1242 | return true; 1243 | 1244 | static if (!isValidateAll) if (result) 1245 | { 1246 | seekAggregateEnd!braces(); 1247 | return true; 1248 | } 1249 | 1250 | static if (!skipAllInter) if (oldPos !is m_text) 1251 | { 1252 | expect(',', missingCommaMsg); 1253 | skipWhitespace!false(); 1254 | } 1255 | return false; 1256 | } 1257 | 1258 | 1259 | static if (!isValidateAll) 1260 | { 1261 | private void seekObjectEnd() 1262 | { 1263 | seekAggregateEnd!"{}"(); 1264 | } 1265 | 1266 | 1267 | private void seekArrayEnd() 1268 | { 1269 | seekAggregateEnd!"[]"(); 1270 | } 1271 | 1272 | 1273 | private void seekAggregateEnd(immutable char[2] parenthesis)() 1274 | { 1275 | size_t nesting = 1; 1276 | while (true) 1277 | { 1278 | m_text.seekToAnyOf!(parenthesis ~ "\"\0"); 1279 | final switch (*m_text) 1280 | { 1281 | case parenthesis[0]: 1282 | m_text++; 1283 | nesting++; 1284 | break; 1285 | case parenthesis[1]: 1286 | if (--nesting == 0) 1287 | return; 1288 | m_text++; 1289 | break; 1290 | case '"': 1291 | // Could skip ':' or ',' here by passing `true`, but we skip it above anyways. 1292 | skipString!false(); 1293 | } 1294 | } 1295 | } 1296 | } 1297 | 1298 | 1299 | /// This also increments the JSON read pointer. 1300 | private void expect(char c, string msg) 1301 | { 1302 | static if (isValidating) 1303 | if (*m_text != c) 1304 | expectImpl(c, msg); 1305 | m_text++; 1306 | } 1307 | 1308 | 1309 | private void expectNot(char c, string msg) 1310 | { 1311 | static if (isValidating) 1312 | if (*m_text == c) 1313 | expectNot(msg); 1314 | } 1315 | 1316 | 1317 | static if (isValidating) 1318 | { 1319 | @noinline 1320 | private void expectNot(string msg) 1321 | { 1322 | string tmpl = isPrintable(*m_text) 1323 | ? "Character '%s' %s." 1324 | : "Byte 0x%02x %s."; 1325 | handleError(format(tmpl, *m_text, msg)); 1326 | } 1327 | 1328 | 1329 | @noinline 1330 | private void expectImpl(char c, string msg) 1331 | { 1332 | string tmpl = isPrintable(*m_text) 1333 | ? "Expected '%s', but found '%s' %s." 1334 | : "Expected '%s', but found byte 0x%02x %s."; 1335 | handleError(format(tmpl, c, *m_text, msg)); 1336 | } 1337 | 1338 | 1339 | @noinline 1340 | private void handleError(string msg) 1341 | { 1342 | import fast.unicode; 1343 | 1344 | size_t line; 1345 | const(char)* p = m_start; 1346 | const(char)* last = m_start; 1347 | while (p < m_text) 1348 | { 1349 | last = p; 1350 | p.skipToNextLine(); 1351 | line++; 1352 | } 1353 | line += p is m_text; 1354 | size_t column = last[0 .. m_text - last].countGraphemes() + 1; 1355 | 1356 | throw new JSONException(msg, line.to!int, column.to!int); 1357 | } 1358 | } 1359 | 1360 | 1361 | @forceinline @nogc pure nothrow 1362 | private void skipOnePlusWhitespace(bool skipInter)() 1363 | { 1364 | m_text++; 1365 | skipWhitespace!skipInter(); 1366 | } 1367 | 1368 | 1369 | @forceinline @nogc pure nothrow 1370 | private void skipWhitespace(bool skipInter)() 1371 | { 1372 | static if (skipInter) 1373 | m_text.skipAllOf!"\t\n\r ,:"; 1374 | else 1375 | m_text.skipAsciiWhitespace(); 1376 | } 1377 | 1378 | 1379 | private static struct SingleKey 1380 | { 1381 | alias json this; 1382 | 1383 | private Json* m_pjson; 1384 | private const(char*) m_oldPos; 1385 | 1386 | @safe @nogc pure nothrow 1387 | @property ref Json json() 1388 | { 1389 | return *m_pjson; 1390 | } 1391 | 1392 | this(ref Json json) 1393 | { 1394 | m_pjson = &json; 1395 | m_oldPos = json.m_text; 1396 | } 1397 | 1398 | ~this() 1399 | { 1400 | static if (isValidateAll) 1401 | { 1402 | if (*json.m_text != '}') 1403 | { 1404 | if (m_oldPos !is json.m_text) 1405 | { 1406 | json.expect(',', "after key-value pair"); 1407 | json.skipWhitespace!false(); 1408 | } 1409 | while (true) 1410 | { 1411 | json.skipString!false(); 1412 | json.expect(':', "between key and value"); 1413 | json.skipWhitespace!false(); 1414 | json.skipValueImpl!false(); 1415 | 1416 | if (*json.m_text == '}') 1417 | break; 1418 | 1419 | json.expect(',', "after key-value pair"); 1420 | json.skipWhitespace!false(); 1421 | } 1422 | } 1423 | } 1424 | else 1425 | { 1426 | json.seekObjectEnd(); 1427 | } 1428 | json.unnest(); 1429 | } 1430 | } 1431 | 1432 | 1433 | private static struct File 1434 | { 1435 | alias m_json this; 1436 | 1437 | Json m_json; 1438 | private size_t m_len; 1439 | private bool m_isMapping; 1440 | 1441 | @disable this(); 1442 | @disable this(this); 1443 | 1444 | this(const Filename fname) 1445 | { 1446 | version (Posix) 1447 | { 1448 | import core.sys.posix.fcntl; 1449 | import core.sys.posix.sys.mman; 1450 | import core.sys.posix.unistd; 1451 | 1452 | version (CRuntime_Glibc) 1453 | enum O_CLOEXEC = octal!2000000; 1454 | else version (OSX) // Requires at least OS X 10.7 Lion 1455 | enum O_CLOEXEC = 0x1000000; 1456 | else version(FreeBSD) 1457 | enum O_CLOEXEC = octal!2000000; 1458 | else static assert(0, "Not implemented"); 1459 | 1460 | int fd = { return open(charPtr!fname, O_RDONLY | O_NOCTTY | O_CLOEXEC); }(); 1461 | assert(fcntl(fd, F_GETFD) & FD_CLOEXEC, "Could not set O_CLOEXEC."); 1462 | 1463 | if (fd == -1) 1464 | throw new ErrnoException("Could not open JSON file for reading."); 1465 | scope(exit) close(fd); 1466 | 1467 | // Get the file size 1468 | stat_t info; 1469 | if (fstat(fd, &info) == -1) 1470 | throw new ErrnoException("Could not get JSON file size."); 1471 | 1472 | // Ensure we have 16 extra bytes 1473 | size_t pagesize = sysconf(_SC_PAGESIZE); 1474 | ulong fsize = ulong(info.st_size + pagesize - 1) / pagesize * pagesize; 1475 | bool zeroPage = fsize < info.st_size + 16; 1476 | if (zeroPage) 1477 | fsize += pagesize; 1478 | if (fsize > size_t.max) 1479 | throw new Exception("JSON file too large to be mapped in RAM."); 1480 | m_len = cast(size_t) fsize; 1481 | 1482 | // Map the file 1483 | void* mapping = mmap(null, m_len, PROT_READ, MAP_PRIVATE, fd, 0); 1484 | if (mapping == MAP_FAILED) 1485 | throw new ErrnoException("Could not map JSON file."); 1486 | scope(failure) 1487 | munmap(mapping, m_len); 1488 | 1489 | // Get a zero-page up behind the JSON text 1490 | if (zeroPage) 1491 | { 1492 | void* offs = mapping + m_len - pagesize; 1493 | if (mmap(offs, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0) == MAP_FAILED) 1494 | throw new ErrnoException("Could not map zero-page behind JSON text."); 1495 | } 1496 | 1497 | // Initialize the parser on the JSON text 1498 | m_json = Json((cast(char*) mapping)[0 .. cast(size_t) info.st_size], No.simdPrep); 1499 | } 1500 | else version (Windows) 1501 | { 1502 | import core.sys.windows.winnt; 1503 | import core.sys.windows.winbase; 1504 | 1505 | HANDLE hnd = { return CreateFileW( wcharPtr!fname, GENERIC_READ, FILE_SHARE_READ, null, 1506 | OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, null ); }(); 1507 | 1508 | if (hnd == INVALID_HANDLE_VALUE) 1509 | throw new FileException("Could not open JSON file for reading."); 1510 | scope(exit) 1511 | CloseHandle( hnd ); 1512 | 1513 | // Get the file size 1514 | LARGE_INTEGER fileSize = void; 1515 | if (!GetFileSizeEx( hnd, &fileSize )) 1516 | throw new Exception("Could not get JSON file size."); 1517 | 1518 | // Map the file 1519 | HANDLE mapping = CreateFileMapping( hnd, null, PAGE_READONLY, fileSize.HighPart, fileSize.LowPart, null ); 1520 | if (mapping == INVALID_HANDLE_VALUE) 1521 | throw new Exception("Could not create file mapping for JSON file."); 1522 | scope(exit) CloseHandle( mapping ); 1523 | 1524 | // View the mapping 1525 | void* view = MapViewOfFile( mapping, FILE_MAP_READ, 0, 0, 0 ); 1526 | if (view is null) 1527 | throw new Exception("Could not map view of JSON file."); 1528 | scope(failure) 1529 | UnmapViewOfFile( view ); 1530 | 1531 | // Missing 64-bit version in druntime (2.071) 1532 | version (X86_64) struct MEMORY_BASIC_INFORMATION { 1533 | PVOID BaseAddress; 1534 | PVOID AllocationBase; 1535 | DWORD AllocationProtect; 1536 | DWORD __alignment1; 1537 | ULONGLONG RegionSize; 1538 | DWORD State; 1539 | DWORD Protect; 1540 | DWORD Type; 1541 | DWORD __alignment2; 1542 | } 1543 | 1544 | // Check if the view is 16 bytes larger than the file 1545 | MEMORY_BASIC_INFORMATION query = void; 1546 | if (!VirtualQuery( view, cast(PMEMORY_BASIC_INFORMATION)&query, query.sizeof )) 1547 | throw new Exception("VirtualQuery failed."); 1548 | 1549 | // Initialize the parser on the JSON text 1550 | char[] slice = (cast(char*) view)[0 .. cast(size_t)fileSize.QuadPart]; 1551 | if (query.RegionSize >= fileSize.QuadPart + 16) 1552 | { 1553 | m_json = Json(slice, No.simdPrep); 1554 | m_isMapping = true; 1555 | } 1556 | else 1557 | { 1558 | m_json = Json(slice, Yes.simdPrep); 1559 | UnmapViewOfFile( view ); 1560 | } 1561 | } 1562 | else static assert(0, "Not implemented"); 1563 | } 1564 | 1565 | 1566 | this(const(char)[] fname) 1567 | { 1568 | import std.string; 1569 | 1570 | version (Posix) 1571 | this( fname.representation ); 1572 | else version (Windows) 1573 | { 1574 | import core.stdc.stdlib; 1575 | auto buf = cast(wchar*)alloca(string2wstringSize(fname)); 1576 | auto fnameW = string2wstring(fname, buf); 1577 | this( fnameW.representation ); 1578 | } 1579 | else static assert(0, "Not implemented"); 1580 | } 1581 | 1582 | 1583 | nothrow 1584 | ~this() 1585 | { 1586 | version (Posix) 1587 | { 1588 | import core.sys.posix.sys.mman; 1589 | munmap(cast(void*)m_json.m_start, m_len); 1590 | } 1591 | else version (Windows) 1592 | { 1593 | import core.sys.windows.winnt; 1594 | import core.sys.windows.winbase; 1595 | if (m_isMapping) 1596 | UnmapViewOfFile( cast(LPCVOID)m_json.m_start ); 1597 | } 1598 | else static assert(0, "Not implemented"); 1599 | } 1600 | } 1601 | } 1602 | 1603 | 1604 | private template buildRemapTable(T) 1605 | { 1606 | import std.typetuple; 1607 | import fast.internal.helpers; 1608 | 1609 | static if (is(T == enum)) 1610 | { 1611 | struct Remap { T d; string json; } 1612 | enum members = EnumMembers!T; 1613 | } 1614 | else 1615 | { 1616 | struct Remap { string d; string json; } 1617 | enum members = FieldNameTuple!T; 1618 | } 1619 | enum mapping = getUDA!(T, JsonMapping).map; 1620 | 1621 | template Impl(size_t a, size_t b) 1622 | { 1623 | static if (b - a > 1) 1624 | { 1625 | alias Impl = TypeTuple!(Impl!(a, (b + a) / 2), Impl!((b + a) / 2, b)); 1626 | } 1627 | else static if (b - a == 1) 1628 | { 1629 | static if (is(T == enum)) 1630 | enum key = members[a].to!string; 1631 | else 1632 | alias key = members[a]; 1633 | static if ((key in mapping) !is null) 1634 | enum mapped = mapping[key]; 1635 | else 1636 | alias mapped = key; 1637 | alias Impl = TypeTuple!(Remap(members[a], mapped)); 1638 | } 1639 | else alias Impl = TypeTuple!(); 1640 | } 1641 | 1642 | alias buildRemapTable = Impl!(0, members.length); 1643 | } 1644 | 1645 | 1646 | unittest 1647 | { 1648 | struct Counter 1649 | { 1650 | size_t array, object, key, string, number, boolean, null_; 1651 | } 1652 | 1653 | void valueHandler(ref Json!validateAll.File json, ref Counter ctr) 1654 | { 1655 | with (DataType) final switch (json.peek) 1656 | { 1657 | case array: 1658 | ctr.array++; 1659 | foreach (_; json) 1660 | valueHandler(json, ctr); 1661 | break; 1662 | case object: 1663 | ctr.object++; 1664 | foreach(key; json.byKey) 1665 | { 1666 | ctr.key++; 1667 | valueHandler(json, ctr); 1668 | } 1669 | break; 1670 | case string: 1671 | ctr.string++; 1672 | json.skipValue(); 1673 | break; 1674 | case number: 1675 | ctr.number++; 1676 | json.skipValue(); 1677 | break; 1678 | case boolean: 1679 | ctr.boolean++; 1680 | json.skipValue(); 1681 | break; 1682 | case null_: 1683 | ctr.null_++; 1684 | json.skipValue(); 1685 | break; 1686 | } 1687 | } 1688 | 1689 | void passFile(string fname, Counter valid) 1690 | { 1691 | auto json = parseJSONFile!validateAll(fname); 1692 | Counter ctr; 1693 | valueHandler(json, ctr); 1694 | assert(ctr == valid, fname); 1695 | } 1696 | 1697 | void failFile(string fname) 1698 | { 1699 | auto json = parseJSONFile!validateAll(fname); 1700 | Counter ctr; 1701 | assertThrown!JSONException(valueHandler(json, ctr), fname); 1702 | } 1703 | 1704 | // Tests that need to pass according to RFC 7159 1705 | passFile("test/pass1.json", Counter( 6, 4, 33, 21, 32, 4, 2)); 1706 | passFile("test/pass2.json", Counter(19, 0, 0, 1, 0, 0, 0)); 1707 | passFile("test/pass3.json", Counter( 0, 2, 3, 2, 0, 0, 0)); 1708 | passFile("test/fail1.json", Counter( 0, 0, 0, 1, 0, 0, 0)); 1709 | passFile("test/fail18.json", Counter(20, 0, 0, 1, 0, 0, 0)); 1710 | 1711 | // Tests that need to fail 1712 | foreach (i; chain(iota(2, 18), iota(19, 34))) 1713 | failFile("test/fail" ~ i.to!string ~ ".json"); 1714 | 1715 | // Deserialization 1716 | struct Test 1717 | { 1718 | string text1; 1719 | string text2; 1720 | string text3; 1721 | double dbl = 0; 1722 | float flt = 0; 1723 | ulong ul; 1724 | uint ui; 1725 | ushort us; 1726 | ubyte ub; 1727 | long lm, lp; 1728 | int im, ip; 1729 | short sm, sp; 1730 | byte bm, bp; 1731 | bool t, f; 1732 | Test* tp1, tp2; 1733 | int[2] sa; 1734 | int[] da; 1735 | Test[string] aa; 1736 | SearchPolicy e; 1737 | } 1738 | 1739 | Test t1 = { 1740 | text1 : "abcde", 1741 | text2 : "", 1742 | text3 : null, 1743 | dbl : 1.1, 1744 | flt : -1.1, 1745 | ul : ulong.max, 1746 | ui : uint.max, 1747 | us : ushort.max, 1748 | ub : ubyte.max, 1749 | lm : long.min, 1750 | lp : long.max, 1751 | im : int.min, 1752 | ip : int.max, 1753 | sm : short.min, 1754 | sp : short.max, 1755 | bm : byte.min, 1756 | bp : byte.max, 1757 | t : true, 1758 | f : false, 1759 | tp1 : null, 1760 | tp2 : new Test("This is", "a", "test."), 1761 | sa : [ 33, 44 ], 1762 | da : [ 5, 6, 7 ], 1763 | aa : [ "hash" : Test("x", "y", "z") ], 1764 | e : SearchPolicy.linear 1765 | }; 1766 | Test t2 = parseJSON(`{ 1767 | "text1" : "abcde", 1768 | "text2" : "", 1769 | "text3" : null, 1770 | "dbl" : 1.1, 1771 | "flt" : -1.1, 1772 | "ul" : ` ~ ulong.max.to!string ~ `, 1773 | "ui" : ` ~ uint.max.to!string ~ `, 1774 | "us" : ` ~ ushort.max.to!string ~ `, 1775 | "ub" : ` ~ ubyte.max.to!string ~ `, 1776 | "lm" : ` ~ long.min.to!string ~ `, 1777 | "lp" : ` ~ long.max.to!string ~ `, 1778 | "im" : ` ~ int.min.to!string ~ `, 1779 | "ip" : ` ~ int.max.to!string ~ `, 1780 | "sm" : ` ~ short.min.to!string ~ `, 1781 | "sp" : ` ~ short.max.to!string ~ `, 1782 | "bm" : ` ~ byte.min.to!string ~ `, 1783 | "bp" : ` ~ byte.max.to!string ~ `, 1784 | "t" : true, 1785 | "f" : false, 1786 | "tp1" : null, 1787 | "tp2" : { "text1": "This is", "text2": "a", "text3": "test." }, 1788 | "sa" : [ 33, 44 ], 1789 | "da" : [ 5, 6, 7 ], 1790 | "aa" : { "hash" : { "text1":"x", "text2":"y", "text3":"z" } }, 1791 | "e" : "linear" 1792 | }`).read!Test; 1793 | 1794 | assert(t2.tp2 && *t1.tp2 == *t2.tp2); 1795 | assert(t1.da == t2.da); 1796 | assert(t1.aa == t2.aa); 1797 | t2.tp2 = t1.tp2; 1798 | t2.da = t1.da; 1799 | t2.aa = t1.aa; 1800 | assert(t1 == t2); 1801 | } 1802 | 1803 | // Test case for Issue #4 1804 | unittest 1805 | { 1806 | auto str = `{"initiator_carrier_code":null,"a":"b"}`; 1807 | auto js = parseTrustedJSON(str); 1808 | foreach(key; js.byKey) 1809 | { 1810 | if(key == "initiator_carrier_code") 1811 | { 1812 | auto t = js.read!string; 1813 | assert(t is null); 1814 | } 1815 | } 1816 | } 1817 | 1818 | // Test case for Issue #5 1819 | unittest 1820 | { 1821 | import std.utf; 1822 | auto str = `{"a":"SΛNNO𐍈€한"}`; 1823 | str.validate; 1824 | validateJSON(str); 1825 | } 1826 | -------------------------------------------------------------------------------- /source/fast/parsing.d: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * 3 | * Text parsing functionality. 4 | * 5 | * Authors: 6 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 7 | * 8 | * Copyright: 9 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 10 | * 11 | * License: 12 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 13 | * 14 | **************************************************************************************************/ 15 | module fast.parsing; 16 | 17 | import std.traits; 18 | import fast.internal.sysdef; 19 | 20 | 21 | /+ 22 | ╔══════════════════════════════════════════════════════════════════════════════ 23 | ║ ⚑ Hexadecimal 24 | ╚══════════════════════════════════════════════════════════════════════════════ 25 | +/ 26 | 27 | /******************************************************************************* 28 | * 29 | * Decodes a single hexadecimal character. 30 | * 31 | * Params: 32 | * c = The hexadecimal digit. 33 | * 34 | * Returns: 35 | * `c` converted to an integer. 36 | * 37 | **************************************/ 38 | @safe @nogc pure nothrow 39 | uint hexDecode(char c) 40 | { 41 | return c + 9 * (c >> 6) & 15; 42 | } 43 | 44 | 45 | @nogc pure nothrow 46 | uint hexDecode4(ref const(char)* hex) 47 | { 48 | uint x = *cast(uint*) &hex; 49 | hex += 4; 50 | x = (x & 0x0F0F0F0F) + 9 * (x >> 6 & 0x01010101); 51 | version (LittleEndian) 52 | { 53 | return x >> 24 | x >> 12 & 0xF0 | x & 0xF00 | x << 12 & 0xF000; 54 | } 55 | else 56 | { 57 | x = (x | x >> 4) & 0x00FF00FF; 58 | return (x | x >> 8) & 0x0000FFFF; 59 | } 60 | } 61 | 62 | 63 | @nogc pure nothrow 64 | inout(char)* hexDecode4(ref inout(char)* hex, out uint result) 65 | { 66 | foreach (i; 0 .. 4) 67 | { 68 | result *= 16; 69 | char ch = cast(char) (hex[i] - '0'); 70 | if (ch <= 9) 71 | { 72 | result += ch; 73 | } 74 | else 75 | { 76 | ch = cast(char) ((ch | 0x20) - 0x31); 77 | if (ch <= 5) 78 | result += ch + 10; 79 | else 80 | return hex + i; 81 | } 82 | } 83 | hex += 4; 84 | return null; 85 | } 86 | unittest 87 | { 88 | string x = "aF09"; 89 | const(char)* p = x.ptr; 90 | uint result; 91 | hexDecode4(p, result); 92 | assert(result == 0xAF09); 93 | } 94 | 95 | 96 | /+ 97 | ╔══════════════════════════════════════════════════════════════════════════════ 98 | ║ ⚑ Numbers 99 | ╚══════════════════════════════════════════════════════════════════════════════ 100 | +/ 101 | 102 | 103 | /// Options for `parseNumber`. 104 | struct NumberOptions 105 | { 106 | /// Allows the minus sign as the first character and thus negative numbers. 107 | bool minus; 108 | } 109 | 110 | 111 | /******************************************************************************* 112 | * 113 | * Parse a number from a character read pointer. 114 | * 115 | * On success, the read pointer is set behind the number. 116 | * 117 | * Params: 118 | * opt = Selects features for the implementation. Less features make the 119 | * parser faster. 120 | * str = The read pointer. 121 | * n = A reference to a number to be overwritten with the result. 122 | * 123 | * Returns: 124 | * An indication of success. Typically the function fails when a number cannot 125 | * be stored in an integer of the given size or invalid characters are 126 | * encountered. 127 | * 128 | **************************************/ 129 | @nogc pure nothrow 130 | bool parseNumber(NumberOptions opt, N)(ref const(char)* str, ref N n) if (isNumeric!N) 131 | { 132 | import fast.internal.helpers; 133 | import std.range; 134 | 135 | // Integer types larger than the mantissa of N. 136 | static if (N.sizeof <= size_t.sizeof) 137 | { 138 | alias U = size_t; 139 | alias I = ptrdiff_t; 140 | } 141 | else 142 | { 143 | alias U = ulong; 144 | alias I = long; 145 | } 146 | 147 | // Largest value of type U that can be multiplied by 10 and have a digit added without overflow. 148 | enum canHoldOneMoreDigit = (U.max - 9) / 10; 149 | static if (isFloatingPoint!N) 150 | { 151 | enum significandRightShift = 8 * U.sizeof - N.mant_dig + 1; 152 | enum lastSignificandBit = U(2) << 8 * U.sizeof - N.mant_dig; 153 | enum firstFractionBit = U(1) << 8 * U.sizeof - N.mant_dig; 154 | enum remainderBits = U.max - N.mant_dig + 1; 155 | enum expShift = N.mant_dig - 1; 156 | enum expBias = N.max_exp - 1; 157 | } 158 | 159 | static if (isFloatingPoint!N) 160 | { 161 | alias pow5Max = PowData!(U, 5).powMax; 162 | alias pow5 = PowData!(U, 5).pows; 163 | 164 | // Largest power of 10 that fits into a float of type N. The factor 5 here is correct, as the 2s 165 | // go in as an increment in the exponent, that is neglectable here. 166 | enum pow10MaxF = { 167 | U v = 1; uint exp; 168 | while (v <= ((U(1) << N.mant_dig) - 1) / 5) { v *= 5; exp++; } 169 | return exp; 170 | }(); 171 | 172 | static immutable N[pow10MaxF] pow10F = N(10).recurrence!((a, n) => 10 * a[n-1]).take(pow10MaxF).array; 173 | } 174 | else 175 | { 176 | alias pow10Max = PowData!(U, 10).powMax; 177 | alias pow10 = PowData!(U, 10).pows; 178 | } 179 | 180 | const(char)* p = str; 181 | const(char)* point = null; 182 | U significand = 0; 183 | size_t exponent = 0; 184 | size_t expAdjust = void; 185 | bool expSign = void; 186 | static if (isFloatingPoint!N) 187 | { 188 | U exp2 = void; 189 | bool roundUp = false; 190 | } 191 | 192 | /////////////////// SIGN BIT HANDLING /////////////////// 193 | 194 | // Check for the sign. 195 | static if (opt.minus) 196 | { 197 | bool sign = (*p == '-'); 198 | if (sign) 199 | p++; 200 | } 201 | 202 | /////////////////// INTEGRAL PART OF SIGNIFICAND /////////////////// 203 | 204 | uint digit = *p - '0'; 205 | if (digit == 0) 206 | { 207 | // We have a single zero. 208 | p++; 209 | } 210 | else if (digit <= 9) 211 | { 212 | // Regular case of one or more digits. 213 | do 214 | { 215 | if (significand > canHoldOneMoreDigit) 216 | goto BigMantissa; 217 | BigMantissaNotSoMuch: 218 | significand = 10 * significand + digit; 219 | digit = *++p - '0'; 220 | } 221 | while (digit <= 9); 222 | } 223 | else return false; 224 | 225 | /////////////////// FRACTIONAL PART OF SIGNIFICAND /////////////////// 226 | 227 | if (*p == '.') 228 | { 229 | point = ++p; 230 | digit = *p - '0'; 231 | if (digit > 9) 232 | return false; 233 | do 234 | { 235 | if (significand > canHoldOneMoreDigit) 236 | goto BigMantissa; 237 | significand = 10 * significand + digit; 238 | digit = *++p - '0'; 239 | } 240 | while (digit <= 9); 241 | } 242 | 243 | /////////////////// EXPONENT HANDLING /////////////////// 244 | 245 | expAdjust = (point is null) ? 0 : p - point; 246 | if ((*p | 0x20) == 'e') 247 | { 248 | p++; 249 | expSign = (*p == '-'); 250 | if (expSign || *p == '+') 251 | p++; 252 | digit = *p - '0'; 253 | if (digit > 9) 254 | return false; 255 | do 256 | { 257 | if (exponent > canHoldOneMoreDigit) 258 | goto BigExponent; 259 | exponent = 10 * exponent + digit; 260 | digit = *++p - '0'; 261 | } 262 | while (digit <= 9); 263 | } 264 | 265 | if (expAdjust) 266 | { 267 | if (expSign) 268 | { 269 | if (exponent > size_t.max - expAdjust) 270 | goto BigExponentAdjustForDecimalPoint; 271 | exponent += expAdjust; 272 | } 273 | else if (exponent >= expAdjust) 274 | { 275 | exponent -= expAdjust; 276 | } 277 | else 278 | { 279 | // Amount of fraction digits turns exponent from positive to negative. 280 | expAdjust -= exponent; 281 | exponent = expAdjust; 282 | expSign = true; 283 | } 284 | } 285 | 286 | /////////////////// RESULT ASSEMBLY /////////////////// 287 | 288 | static if (isFloatingPoint!N) 289 | { 290 | if (significand == 0 || exponent == 0) 291 | { 292 | // The significand is the unsigned result. 293 | static if (opt.minus) 294 | if (sign) 295 | n = -N(significand); 296 | n = +N(significand); 297 | str = p; 298 | return true; 299 | } 300 | 301 | // Try the floating-point fast path: The significand's bits, as well as the 10^x exponent can be expressed 302 | // accurately as a float of type N. We just need to divide or multiply them based on the signedness of the 303 | // exponent. 304 | exp2 = bsr(significand); 305 | if (exp2 - bsf(significand) < N.mant_dig && exponent <= pow10MaxF) 306 | { 307 | N b = pow10F[exponent - 1]; 308 | static if (opt.minus) 309 | if (sign) 310 | b = -b; 311 | n = expSign ? significand / b : significand * b; 312 | str = p; 313 | return true; 314 | } 315 | else if (exponent <= pow5Max) 316 | { 317 | // Special case, mostly to handle the little bit of extra precision that comes from 318 | // converting a double to its string representation. The last base-10 digit doesn't quite 319 | // fit back into a double, but we don't need to resort to arbitrary precision math just yet. 320 | if (expSign) 321 | { 322 | U divisor = pow5[exponent - 1]; 323 | static if (isAMD64 && (isLDC || isGDC)) 324 | { 325 | // AMD64 can divide 128-bit numbers by 64-bit numbers directly. 326 | size_t expDivisor = clz(divisor); 327 | divisor <<= expDivisor; 328 | exp2 = expDivisor - exponent - bigDiv(significand, divisor); 329 | significand <<= 1; 330 | } 331 | else 332 | { 333 | // We perform an iterative division. 334 | U dividend = significand << 8 * U.sizeof - 1 - exp2; 335 | U quotient = dividend / divisor; 336 | dividend %= divisor; 337 | 338 | size_t lzs = clz(quotient); 339 | exp2 -= exponent + lzs; 340 | significand = quotient << ++lzs; 341 | size_t accuracy = 8 * U.sizeof - lzs; 342 | while (accuracy < N.mant_dig) 343 | { 344 | lzs = clz(dividend); 345 | dividend <<= lzs; 346 | quotient = dividend / divisor; 347 | dividend %= divisor; 348 | significand |= quotient << (8 * U.sizeof - lzs) >> accuracy; 349 | accuracy += lzs; 350 | } 351 | } 352 | 353 | // Assemble floating point value from bits. 354 | roundUp = (significand & firstFractionBit) != 0; 355 | significand >>= significandRightShift; 356 | if (roundUp) 357 | { 358 | significand++; 359 | significand &= ~(U(1) << N.mant_dig - 1); 360 | if (significand == 0) 361 | ++exp2; 362 | } 363 | 364 | U* result = cast(U*) &n; 365 | *result = exp2 + expBias << expShift | significand; 366 | static if (opt.minus) 367 | *result |= U(sign) << U.sizeof * 8 - 1; 368 | str = p; 369 | return true; 370 | } 371 | else assert(0, "Not implemented"); 372 | } 373 | else assert(0, "Not implemented"); 374 | } 375 | else 376 | { 377 | import fast.intmath; 378 | 379 | if (exponent && significand) 380 | { 381 | // We need to account for the exponent. 382 | U pow = pow10[exponent - 1]; 383 | if (expSign) 384 | { 385 | // Negative exponent, if we get a fractional result, abort. 386 | if (significand % pow) 387 | return false; 388 | significand /= pow; 389 | } 390 | else static if (U.sizeof < ulong.sizeof) 391 | { 392 | // Multiply using a bigger result type 393 | ulong prod = ulong(significand) * pow; 394 | if (prod > U.max) 395 | return false; 396 | significand = cast(U) prod; 397 | } 398 | else 399 | { 400 | // If the multiply will overflow, abort. 401 | bool overflowed; 402 | significand = mulu(significand, pow, overflowed); 403 | if (overflowed) 404 | return false; 405 | } 406 | } 407 | 408 | n = cast(N) significand; 409 | static if (isSigned!N && opt.minus) 410 | { 411 | if (significand > U(N.max) + sign) 412 | return false; 413 | if (sign) 414 | n = cast(N)-n; 415 | } 416 | else if (significand > N.max) 417 | return false; 418 | str = p; 419 | return true; 420 | } 421 | 422 | BigMantissa: 423 | if (significand <= (significand.max - digit) / 10) 424 | goto BigMantissaNotSoMuch; 425 | // assert(0, "Not implemented"); 426 | 427 | BigExponent: 428 | // assert(0, "Not implemented"); 429 | 430 | BigExponentAdjustForDecimalPoint: 431 | // assert(0, "Not implemented"); 432 | return false; 433 | } 434 | 435 | 436 | private template PowData(U, U base) 437 | { 438 | import std.range; 439 | 440 | // Largest power of `base` that fits into an integer of type U. 441 | enum powMax = { U v = 1; uint exp; while (v <= U.max / base) { v *= base; exp++; } return exp; }(); 442 | 443 | // Table of powers of `base`. (We skip base^0) 444 | static immutable U[powMax] pows = base.recurrence!((a, n) => base * a[n-1]).take(powMax).array; 445 | } 446 | 447 | 448 | static if (isAMD64 && (isLDC || isGDC)) 449 | { 450 | @nogc pure nothrow 451 | private size_t bigDiv(ref size_t a, size_t b) 452 | in 453 | { 454 | assert(b > size_t.max / 2, "High bit of divisor must be set."); 455 | } 456 | body 457 | { 458 | // Make sure that the division will yield exactly 32 or 64 significant bits. 459 | import fast.internal.helpers; 460 | size_t lza = clz(a); 461 | version (LDC) 462 | { 463 | import ldc.llvmasm; 464 | a <<= lza; 465 | if (a >= b) { a >>= 1; lza--; } 466 | a = __asm!ulong(" 467 | xor %rax, %rax 468 | divq $2 469 | ", "={rax},{rdx},rm", a, b); 470 | } 471 | else version (GNU) 472 | { 473 | size_t dividend = a << lza; 474 | if (dividend >= b) { dividend >>= 1; lza--; } 475 | asm { " 476 | xor %%rax, %%rax 477 | divq %3 478 | " : "=&a" a, "=d" dividend : "d" dividend, "rm" b; } 479 | } 480 | return ++lza; 481 | } 482 | 483 | unittest 484 | { 485 | size_t a = size_t.max / 11; 486 | size_t b = size_t.max / 5; 487 | version (X86_64) 488 | { 489 | import fast.internal.helpers; 490 | long exp = clz(b); // Positive base-2 exponent 491 | b <<= exp; 492 | exp -= bigDiv(a, b); 493 | assert(a == 0xE8BA2E8BA2E8BA2AUL); 494 | assert(exp == -2); 495 | } 496 | } 497 | } 498 | 499 | 500 | /+ 501 | ╔══════════════════════════════════════════════════════════════════════════════ 502 | ║ ⚑ String Scanning and Comparison 503 | ╚══════════════════════════════════════════════════════════════════════════════ 504 | +/ 505 | 506 | /******************************************************************************* 507 | * 508 | * Compares a string of unknown length against a statically known key. 509 | * 510 | * This function also handles escapes and requires one or more terminator chars. 511 | * 512 | * Params: 513 | * C = Character with. 514 | * key = The static key string. 515 | * terminators = A list of code units that terminate the string. 516 | * special = A list of code units that are handled by the user callback. Use 517 | * this for escape string handling. Default is `null`. 518 | * p_str = Pointer to the string for the comparison. After the function call 519 | * it will be behind the last matching character. 520 | * callback = User callback to handle special escape characters if `special` 521 | * is non-empty. 522 | * 523 | * Returns: 524 | * A code with following meanings: -1 = not equal, terminator character hit, 525 | * 0 = not equal, but string not exhausted, 1 = string equals key. 526 | * 527 | **************************************/ 528 | int fixedTermStrCmp(C, immutable C[] key, immutable C[] terminators, immutable C[] special = null) 529 | (ref const(C)* p_str, scope bool delegate(ref immutable(char)*, ref const(char)*) callback = null) 530 | in 531 | { 532 | assert(special.length == 0 || callback !is null); 533 | } 534 | body 535 | { 536 | import std.algorithm, std.range; 537 | 538 | static immutable byte[256] classify = 539 | iota(256).map!(c => terminators.canFind(c) ? byte(-1) : special.canFind(c) ? 1 : 0).array; 540 | 541 | immutable(C)* p_key = key.ptr; 542 | immutable C* e_key = p_key + key.length; 543 | 544 | while (p_key !is e_key) 545 | { 546 | int clazz = *p_str <= 0xFF ? classify[*p_str] : 0; 547 | 548 | if (clazz < 0) 549 | { 550 | return clazz; 551 | } 552 | else if (clazz == 0) 553 | { 554 | if (*p_str != *p_key) 555 | return clazz; 556 | 557 | p_str++; 558 | p_key++; 559 | } 560 | else if (clazz > 0) 561 | { 562 | if (!callback(p_key, p_str)) 563 | return 0; 564 | } 565 | } 566 | 567 | return classify[*p_str & 0xFF] < 0; 568 | } 569 | 570 | 571 | /* 572 | @nogc nothrow 573 | void fixedStringCompareSSE4() 574 | { 575 | enum words = key.length / 16; 576 | enum remainder = key.length % 16; 577 | enum contains0 = key.canFind('\0'); // For SSE4.2 string search. 578 | static assert(!contains0, "Not implemented"); 579 | 580 | size_t remaining = e - b; 581 | auto p = b; 582 | 583 | foreach (i; staticIota!(0, words)) 584 | { 585 | auto backup = p; 586 | p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid); 587 | p = backup; 588 | p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid); 589 | } 590 | } 591 | */ 592 | 593 | 594 | @forceinline @nogc nothrow pure 595 | void seekToAnyOf(string cs)(ref const(char)* p) 596 | { 597 | p.vpcmpistri!(char, sanitizeChars(cs), Operation.equalAnyElem); 598 | } 599 | 600 | 601 | @forceinline @nogc nothrow pure 602 | void seekToRanges(string cs)(ref const(char)* p) 603 | { 604 | p.vpcmpistri!(char, sanitizeRanges(cs), Operation.inRanges); 605 | } 606 | 607 | 608 | /******************************************************************************* 609 | * 610 | * Searches for a specific character known to appear in the stream and skips the 611 | * read pointer over it. 612 | * 613 | * Params: 614 | * c = the character 615 | * p = the read pointer 616 | * 617 | **************************************/ 618 | @forceinline @nogc nothrow pure 619 | void seekPast(char c)(ref const(char)* p) 620 | { 621 | p.vpcmpistri!(char, c.repeat(16).to!string, Operation.equalElem); 622 | p++; 623 | } 624 | 625 | 626 | /******************************************************************************* 627 | * 628 | * Skips the read pointer over characters that fall into any of up to 8 ranges 629 | * of characters. The first character in `cs` is the start of the first range, 630 | * the second character is the end. This is repeated for any other character 631 | * pair. A character falls into a range from `a` to `b` if `a <= *p <= b`. 632 | * 633 | * Params: 634 | * cs = the character ranges 635 | * p = the read pointer 636 | * 637 | **************************************/ 638 | @forceinline @nogc nothrow pure 639 | void skipCharRanges(string cs)(ref const(char)* p) 640 | { 641 | p.vpcmpistri!(char, cs, Operation.inRanges, Polarity.negate); 642 | } 643 | 644 | 645 | /******************************************************************************* 646 | * 647 | * Skips the read pointer over all and any of the given characters. 648 | * 649 | * Params: 650 | * cs = the characters to skip over 651 | * p = the read pointer 652 | * 653 | **************************************/ 654 | @forceinline @nogc nothrow pure 655 | void skipAllOf(string cs)(ref const(char)* p) 656 | { 657 | p.vpcmpistri!(char, cs, Operation.equalAnyElem, Polarity.negate); 658 | } 659 | 660 | 661 | /******************************************************************************* 662 | * 663 | * Skips the read pointer over ASCII white-space comprising '\t', '\r', '\n' and 664 | * ' '. 665 | * 666 | * Params: 667 | * p = the read pointer 668 | * 669 | **************************************/ 670 | @forceinline @nogc nothrow pure 671 | void skipAsciiWhitespace(ref const(char)* p) 672 | { 673 | if (*p == ' ') 674 | p++; 675 | if (*p > ' ') 676 | return; 677 | p.skipAllOf!" \t\r\n"; 678 | } 679 | 680 | 681 | /******************************************************************************* 682 | * 683 | * Sets the read pointer to the start of the next line. 684 | * 685 | * Params: 686 | * p = the read pointer 687 | * 688 | **************************************/ 689 | @forceinline @nogc nothrow pure 690 | void skipToNextLine(ref const(char)* p) 691 | { 692 | // Stop at next \r, \n or \0. 693 | p.vpcmpistri!(char, "\x01\x09\x0B\x0C\x0E\xFF", Operation.inRanges, Polarity.negate); 694 | if (p[0] == '\r') p++; 695 | if (p[0] == '\n') p++; 696 | } 697 | 698 | 699 | private enum sanitizeChars(string cs) 700 | { 701 | import std.exception; 702 | 703 | bool has0 = false; 704 | foreach (c; cs) if (!c) { has0 = true; break; } 705 | assert(has0, "Parsers are required to also check for \0 when looking for specific chars."); 706 | 707 | char[] result; 708 | foreach (i; 1 .. 256) foreach (c; cs) if (i == c) 709 | result ~= c; 710 | return result.assumeUnique; 711 | } 712 | 713 | 714 | private enum sanitizeRanges(string cs) 715 | { 716 | import std.exception; 717 | 718 | bool has0 = false; 719 | foreach (i; 0 .. cs.length / 2) if (!cs[2*i]) { has0 = true; break; } 720 | assert(has0, "Parsers are required to also check for \0 when looking for specific chars."); 721 | 722 | char[] result; 723 | foreach (i; 0 .. cs.length / 2) 724 | { 725 | if (cs[2*i]) 726 | result ~= cs[2*i .. 2*i+2]; 727 | else if (cs[2*i+1]) 728 | result ~= ['\x01', cs[2*i+1]]; 729 | } 730 | return result.assumeUnique; 731 | } 732 | 733 | 734 | private enum Operation 735 | { 736 | equalAnyElem = 0b0_00_00_00, 737 | inRanges = 0b0_00_01_00, 738 | equalElem = 0b0_00_10_00, 739 | substrPos = 0b0_00_11_00, 740 | } 741 | 742 | 743 | private enum Polarity 744 | { 745 | keep = 0b0_00_00_00, 746 | negate = 0b0_01_00_00, 747 | negateValid = 0b0_11_00_00, 748 | } 749 | 750 | 751 | @forceinline @nogc nothrow pure 752 | private void vpcmpistri(C, immutable(C[]) cs, Operation op, Polarity pol = Polarity.keep, bool lastIndex = false) 753 | (ref const(char)* p) 754 | if (is(C == char) || is(C == ubyte) || is(C == wchar) || is(C == ushort) || is(C == byte) || is(C == short)) 755 | { 756 | import fast.internal.helpers; 757 | 758 | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53712 759 | static if (is(C == char) || is(C == ubyte)) 760 | enum ct = 0b00; 761 | else static if (is(C == wchar) || is(C == ushort)) 762 | enum ct = 0b01; 763 | else static if (is(C == byte)) 764 | enum ct = 0b10; 765 | else 766 | enum ct = 0b11; 767 | 768 | enum mode = ct | op | pol | (!!lastIndex << 6); 769 | 770 | version (X86_64) 771 | enum creg = "rcx"; 772 | else version (X86) 773 | enum creg = "ecx"; 774 | else static assert(0, "Not implemented"); 775 | 776 | version (LDC) 777 | { 778 | import ldc.llvmasm; 779 | 780 | p = __asm!(const(char*))(" 781 | 1: 782 | pcmpistri $2, ($1), $3 783 | add $$16, $1 784 | cmp $$16, %ecx 785 | je 1b 786 | sub $$16, $1 787 | add %" ~ creg ~ ", $1 788 | ", "=r,0,K,x,~{ecx}", p, mode, SIMDFromString!cs); 789 | } 790 | else version (GNU) 791 | { 792 | asm { " 793 | 1: 794 | pcmpistri %2, (%1), %3 795 | add $16, %1 796 | cmp $16, %%ecx 797 | je 1b 798 | sub $16, %1 799 | add %%" ~ creg ~ ", %1 800 | " : "=r" p : "0" p, "K" mode, "x" SIMDFromString!cs : "ecx"; } 801 | } 802 | else 803 | { 804 | alias csXMM = SIMDFromString!cs; 805 | version (D_InlineAsm_X86_64) 806 | { 807 | version (Posix) 808 | { 809 | version (D_PIC) asm @nogc pure nothrow 810 | { 811 | naked; 812 | lea RAX, csXMM; 813 | mov RAX, [RAX]; 814 | movdqu XMM0, [RAX]; 815 | mov RAX, [RDI]; 816 | L1: 817 | vpcmpistri XMM0, [RAX], mode; 818 | add RAX, 16; 819 | cmp ECX, 16; 820 | je L1; 821 | sub RAX, 16; 822 | add RAX, RCX; 823 | mov [RDI], RAX; 824 | ret; 825 | } 826 | else asm @nogc pure nothrow 827 | { 828 | naked; 829 | movdqa XMM0, csXMM; 830 | mov RAX, [RDI]; 831 | L1: 832 | vpcmpistri XMM0, [RAX], mode; 833 | add RAX, 16; 834 | cmp ECX, 16; 835 | je L1; 836 | sub RAX, 16; 837 | add RAX, RCX; 838 | mov [RDI], RAX; 839 | ret; 840 | } 841 | } 842 | else static assert(0, "Not implemented"); 843 | } 844 | else version (D_InlineAsm_X86) 845 | { 846 | version (Posix) 847 | { 848 | version (D_PIC) asm @nogc pure nothrow 849 | { 850 | naked; 851 | mov EDX, CS:csXMM[EBX]; 852 | movdqu XMM0, [EDX]; 853 | mov EDX, [EAX]; 854 | L1: 855 | vpcmpistri XMM0, [EDX], mode; 856 | add EDX, 16; 857 | cmp ECX, 16; 858 | je L1; 859 | sub EDX, 16; 860 | add EDX, ECX; 861 | mov [EAX], EDX; 862 | ret; 863 | } 864 | else asm @nogc pure nothrow 865 | { 866 | naked; 867 | movdqa XMM0, csXMM; 868 | mov EDX, [EAX]; 869 | L1: 870 | vpcmpistri XMM0, [EDX], mode; 871 | add EDX, 16; 872 | cmp ECX, 16; 873 | je L1; 874 | sub EDX, 16; 875 | add EDX, ECX; 876 | mov [EAX], EDX; 877 | ret; 878 | } 879 | } 880 | else static assert(0, "Not implemented"); 881 | } 882 | else static assert(0, "Not implemented"); 883 | } 884 | } 885 | -------------------------------------------------------------------------------- /source/fast/string.d: -------------------------------------------------------------------------------- 1 | /** 2 | * Fast, non-allocating string functions. 3 | * 4 | * Authors: 5 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 6 | * 7 | * Copyright: 8 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 9 | * 10 | * License: 11 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 12 | */ 13 | module fast.string; 14 | 15 | import core.bitop; 16 | import core.simd; 17 | import core.stdc.stdlib; 18 | 19 | version (GNU) import gcc.attribute; 20 | 21 | import std.algorithm; 22 | import std.range; 23 | import std.stdio; 24 | import std.string; 25 | import std.traits; 26 | 27 | import fast.buffer; 28 | 29 | 30 | /** 31 | * Splits a string in two around one or more compile-time known code units. 32 | * 33 | * Params: 34 | * match = An expression that matches all characters around which a split should occur. 35 | * str = The string to scan. 36 | * before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here. 37 | * after = The part after the split is stored here. If no character in $(D match) is found, $(D null) is returned here. 38 | * splitter = If not $(D null), this pointer will receive a copy of the splitting char. 39 | * 40 | * Returns: 41 | * $(D true), iff a split occured. 42 | */ 43 | bool split(string match)(scope inout(char[]) str, ref inout(char)[] before, ref inout(char)[] after, char* splitter = null) 44 | { 45 | immutable pos = min(str.length, SimdMatcher!match.find(str.ptr, str.ptr + str.length)); 46 | before = str[0 .. pos]; 47 | if (pos < str.length) { 48 | after = str[pos+1 .. $]; 49 | if (splitter) *splitter = str[pos]; 50 | return true; 51 | } 52 | after = null; 53 | return false; 54 | } 55 | 56 | /** 57 | * Similar to the overload for strings, this function works a little faster as it lacks boundary checks. 58 | * It assumes that one of the characters in $(D match) is actually contained in the string. 59 | * 60 | * Params: 61 | * match = An expression that matches all characters around which a split should occur. 62 | * ptr = The string to scan. 63 | * before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here. 64 | * after = The pointer to the part after the split is stored here. 65 | * 66 | * Returns: 67 | * The char that caused the split. (From $(D match).) 68 | */ 69 | char split(string match)(scope inout(char*) ptr, ref inout(char)[] before, ref inout(char)* after) 70 | { 71 | immutable pos = SimdMatcher!match.find(str.ptr); 72 | before = ptr[0 .. pos]; 73 | after = ptr + pos + 1; 74 | return ptr[pos]; 75 | } 76 | 77 | 78 | /******************************************************************************* 79 | * 80 | * Finds the first occurrence of a set of compile-time known code units in a 81 | * string. While the algorithm is `O(n)` in relation to the count of given code 82 | * units, the overhead when using it on short strings weights more for only 1 or 83 | * 2 code units. 84 | * 85 | * Params: 86 | * match = An expression that matches all characters around which a split 87 | * should occur. 88 | * str = The string to search for a code unit. 89 | * 90 | * Returns: 91 | * If a match is found, the index into the string is returned. 92 | * Otherwise an invalid index is returned. Check with 93 | * `if (result < str.length)`. 94 | * 95 | * See_Also: 96 | * split, 97 | * $(LINK2 http://mischasan.wordpress.com/2011/11/09/the-generic-sse2-loop/, 98 | * The Generic SSE2 Loop) 99 | * 100 | * Example: 101 | * --- 102 | * // Check if there is a '/' or '\' in the string 103 | * auto pos = str.find!(`or(=/,=\)`); 104 | * if (pos < str.length) { } 105 | * --- 106 | **************************************/ 107 | size_t find(string match)(in char[] str) pure nothrow 108 | { 109 | return SimdMatcher!match.find(str.ptr, str.ptr + str.length); 110 | } 111 | 112 | /******************************************************************************* 113 | * 114 | * Same as the overload for strings, but with only a char*, making it faster as 115 | * it cannot do a boundary check. 116 | * 117 | * Sometimes when looking for a character it is helpful to append it as a 118 | * sentinel to the char buffer and then use this function instead of the slower 119 | * one that checks the boundary constantly. 120 | * 121 | * Example: 122 | * --- 123 | * // Find a ']' in a buffer of 1024 bytes using an additional sentinel. 124 | * size_t length = 1024; 125 | * char[] buffer = new char[](length+1); 126 | * buffer[length] = ']'; 127 | * auto pos = buffer.ptr.find!("=]"); 128 | * if (pos < length) { // was an actual find before the sentinel } 129 | * --- 130 | **************************************/ 131 | inout(char)* find(string match)(inout(char*) ptr) pure nothrow 132 | { 133 | return SimdMatcher!match.find(ptr); 134 | } 135 | 136 | 137 | bool keyword1(string key)(in char[] str, 138 | scope bool function(ref immutable(char)* key, ref const(char)* str) mismatcher = null) 139 | { 140 | auto strPtr = str.ptr; 141 | auto keyPtr = key.ptr; 142 | auto keyEnd = keyPtr + key.length; 143 | 144 | while (keyPtr !is keyEnd) 145 | { 146 | while (*strPtr == '\\') 147 | if (!mismatcher(keyPtr, strPtr)) 148 | return false; 149 | 150 | if (*strPtr == '"' || *strPtr != *keyPtr) 151 | return false; 152 | 153 | strPtr++; 154 | keyPtr++; 155 | } 156 | return true; 157 | } 158 | 159 | 160 | bool keyword2(string key)(in char[] str, 161 | scope bool function(ref immutable(char)* key, ref const(char)* str) mismatcher = null) 162 | { 163 | version (LDC) import ldc.gccbuiltins_x86; 164 | 165 | /* Since SIMD typically works with word aligned data, we duplicate 'key' for every possible start of 'str' when 166 | * loaded from an aligned memory address where the first character appears 0 to Word.sizeof bytes into the SIMD 167 | * register. 168 | * For 16-byte SIMD we could just create an array of 16 strings with 0 to 15 padding bytes in front and some after, 169 | * but we can be more compact with at most 16 wasted padding bytes. Since machine registers are powers of 2, if we 170 | * pad all keys to an odd length and repeat them 16 times we get a sequence with the following properties: 171 | * - It consists of as many SIMD words as the key is long. 172 | * - All 16 shift offsets of the key are contained in the SIMD words due to the periodicity introduced by using 173 | * disjunct prime factors for the key length and the SIMD word size. 174 | * Interpreted as an array of SIMD words, it can be indexed with the desired shift multiplied by a constant factor 175 | * and taken modulo the SIMD array length to use the periodicity. The constant factor is the smallest value that 176 | * when multiplied with the key length ends up at a SIMD word boundary + 1 (the first shift). 177 | */ 178 | 179 | // 'key' length rounded up to next odd value is the number of SIMD words we need. 180 | enum keyLenOdd = uint(key.length | 1); // TODO: uint or implicit type ? 181 | align(16) static immutable char[keyLenOdd * Word.sizeof] keyData = key.representation 182 | .chain(ubyte(0x20).repeat(keyLenOdd - key.length)).cycle.take(keyLenOdd * Word.sizeof).array; 183 | align(16) static immutable char[Word.sizeof] dquote = '"'; 184 | align(16) static immutable char[Word.sizeof] bslash = '\\'; 185 | enum mul = { uint result = 0; while ((++result * Word.sizeof + 1) % keyLenOdd) {} return result; }(); 186 | 187 | const(char)* strPtr = str.ptr; 188 | immutable(char)* keyPtr = keyData.ptr; 189 | auto bsWord = *cast(immutable Word*) &bslash; 190 | auto dqWord = *cast(immutable Word*) &dquote; 191 | 192 | do 193 | { 194 | // writeln("enter loop"); 195 | // Calculate SSE word boundary before 'str' 196 | size_t strOff = cast(size_t) strPtr % Word.sizeof; 197 | Word strWord = *cast(Word*) (strPtr - strOff); 198 | size_t keyPos = keyPtr - keyData.ptr; 199 | size_t keyOff = (strOff - keyPos) % Word.sizeof; 200 | Word keyWord = (cast(Word*) keyData.ptr)[keyOff * mul % keyLenOdd + (keyOff + keyPos) / Word.sizeof]; 201 | 202 | // Escape seqences have priority. 'key' may contain backslashes as part of the text, but in 'str' a backslash 203 | // at the same position is actually the begin of the escape sequence "\\". 204 | Word bsMask = strWord.maskEqual(bsWord); 205 | // If after processing backslashes there is a double-quote in 'str' we must not match it with a double-quote in 206 | // 'key', since it is the delimiter of 'str'. 207 | Word dqMask = strWord.maskEqual(dqWord); 208 | // How many bytes of 'key' and 'str' match in our 'Word' ? 209 | Word missMask = strWord.maskNotEqual(keyWord); 210 | // Merge mismatch, backslash and double-quote masks and move them into a non-SSE register. 211 | Word allMasks = or(missMask, or(bsMask, dqMask)); 212 | uint skip = bsf((__builtin_ia32_pmovmskb128(allMasks) | 1 << Word.sizeof) >> strOff); 213 | // writeln(keyPtr[0 .. 5]); 214 | // writeln(strPtr[0 .. 5]); 215 | // writeln(skip); 216 | strPtr += skip; 217 | keyPtr += skip; 218 | 219 | // Have we matched enough bytes to reach the end of 'key' ? 220 | if (keyPtr - keyData.ptr >= key.length) 221 | return true; 222 | 223 | // When we find a mismatch between 'key' and 'str', we try to call a provided helper function. 224 | // It may decode escape sequences in 'str' and recover from the state. 225 | // If that fails we accept the mismatch and return 'false'. 226 | // writefln("Key: %s, Str %s", *keyPtr, *strPtr); 227 | // const(char*) strPtrOld = strPtr; 228 | // immutable(char*) keyPtrOld = keyPtr; 229 | if (strOff + skip < Word.sizeof && !(mismatcher && mismatcher(keyPtr, strPtr))) 230 | { 231 | // writefln("Key: %s, Str %s", *keyPtr, *strPtr); 232 | return false; 233 | } 234 | // writefln("Key: %s, Str %s", *keyPtr, *strPtr); 235 | } 236 | while (keyPtr - keyData.ptr < key.length); 237 | 238 | return true; 239 | } 240 | 241 | 242 | bool keyword3(string key)(in char[] str, bool function(ref immutable(char)*, ref const(char)*) mismatcher = null) 243 | { 244 | version (LDC) import ldc.gccbuiltins_x86; 245 | version (GNU) import gcc.builtins; 246 | 247 | /* Since SIMD typically works with word aligned data, we duplicate 'key' for every possible start of 'str' when 248 | * loaded from an aligned memory address where the first character appears 0 to Word.sizeof bytes into the SIMD 249 | * register. 250 | * For 16-byte SIMD we could just create an array of 16 strings with 0 to 15 padding bytes in front and some after, 251 | * but we can be more compact with at most 16 wasted padding bytes. Since machine registers are powers of 2, if we 252 | * pad all keys to an odd length and repeat them 16 times we get a sequence with the following properties: 253 | * - It consists of as many SIMD words as the key is long. 254 | * - All 16 shift offsets of the key are contained in the SIMD words due to the periodicity introduced by using 255 | * disjunct prime factors for the key length and the SIMD word size. 256 | * Interpreted as an array of SIMD words, it can be indexed with the desired shift multiplied by a constant factor 257 | * and taken modulo the SIMD array length to use the periodicity. The constant factor is the smallest value that 258 | * when multiplied with the key length ends up at a SIMD word boundary + 1 (the first shift). 259 | */ 260 | 261 | // 'key' length rounded up to next odd value is the number of SIMD words we need. 262 | enum keyLenOdd = uint(key.length | 1); // TODO: uint or implicit type ? 263 | align(16) static immutable char[keyLenOdd * Word.sizeof] keyData = key.representation 264 | .chain(ubyte(0x20).repeat(keyLenOdd - key.length)).cycle.take(keyLenOdd * Word.sizeof).array; 265 | align(16) static immutable char[Word.sizeof] dqbs = `\"""""""""""""""`; 266 | enum mul = { uint result = 0; while ((++result * Word.sizeof + 1) % keyLenOdd) {} return result; }(); 267 | 268 | // Calculate SSE word boundary before 'str' 269 | uint off = cast(uint) str.ptr % Word.sizeof; 270 | // SSE aligned pointer <= 'str.ptr'. 271 | auto strPtr = cast(const(Word)*) (str.ptr - off); 272 | auto keyPtr = cast(immutable(Word)*) keyData.ptr + off * mul % keyLenOdd; 273 | auto keyStart = cast(immutable(char)*) keyPtr + off; 274 | Word strWord = *strPtr; 275 | 276 | LoadKey: 277 | auto keyEnd = keyStart + key.length; 278 | 279 | Compare: 280 | // Get bitmask of special characters in 'str'. 281 | uint escMask = getScalar(cast(int4) __builtin_ia32_pcmpistrm128(*cast(Word*) &dqbs, strWord, 0b_0_00_00_00)); 282 | // writeln("Called a"); 283 | // Get bitmask of characters from 'key' and 'str' that don't match. 284 | uint missMask = getScalar(cast(int4) __builtin_ia32_pcmpistrm128(*keyPtr, strWord, 0b_0_01_10_00)); 285 | // writeln("Called b"); 286 | // Create a merged mask for both and an additional bit at position 16, serving as a delimiter for 'bsf'. 287 | uint mask = (escMask | missMask) & (uint.max << off); 288 | 289 | // No bit set means all 16 bytes are equal and there are no escape characters. That's as good as it gets. 290 | if (!mask) 291 | { 292 | // Jump forward by a word size and see if we successfully compared all bytes to the end of our 'key'. 293 | keyPtr += 16; 294 | if (cast(immutable(char)*) keyPtr >= keyEnd) 295 | return true; 296 | // Otherwise continue with the next set of 16 bytes. 297 | strPtr += 16; 298 | off = 0; 299 | goto Compare; 300 | } 301 | 302 | // One of two cases ... 303 | off = bsf(mask); 304 | 305 | // 1) Did the mismatch occur past the end of 'key' ? Then we compared succesfully. 306 | if (cast(immutable(char)*) keyPtr + off >= keyEnd) 307 | return true; 308 | 309 | // 2) It must be a special character or actual mismatch, let 'mismatcher' decide. 310 | // writefln("Skipping: %s", (cast(const(char)*) strPtr)[0 .. off]); 311 | auto strChP = cast(const(char)*) strPtr + off; 312 | auto strChPOld = strChP; 313 | auto keyChP = cast(immutable(char)*) keyPtr + off; 314 | bool goodToGo = mismatcher(keyChP, strChP); 315 | 316 | // writefln("Mismatcher used %s key chars, %s str chars and returned: %s", keyAdd, strAdd, goodToGo); 317 | if (keyChP >= keyEnd) 318 | return true; 319 | if (!goodToGo) 320 | return false; 321 | 322 | // Arriving here we just decoded an escape sequence and have to adjust our pointers. 323 | auto keyPos = keyChP - keyStart; 324 | off += strChP - strChPOld; 325 | if (off >= 16) 326 | { 327 | strPtr += off / 16; 328 | strWord = *strPtr; 329 | off %= 16; 330 | } 331 | auto baseOff = (off - keyPos) & 15; 332 | keyPtr = cast(immutable(Word)*) keyData.ptr + baseOff * mul % keyLenOdd; 333 | keyStart = cast(immutable(char)*) keyPtr + baseOff; 334 | keyPtr += (baseOff + keyPos) / 16; 335 | goto LoadKey; 336 | } 337 | 338 | 339 | size_t equalLength(scope inout(char[]) a, scope inout(char[]) b) 340 | { 341 | return 0; 342 | } 343 | 344 | 345 | /******************************************************************************* 346 | * 347 | * Concatenates a series of strings. 348 | * 349 | * Params: 350 | * Strs = a series of string symbols or literals to be concatenated 351 | * buffer = optional buffer, implicitly allocated 352 | * 353 | * Returns: 354 | * A $(D TempBuffer!char) containing the concatenated string. It is kept alive 355 | * for as long as it is in scope. 356 | * 357 | **************************************/ 358 | nothrow @nogc 359 | template concat(Strs...) 360 | { 361 | import core.stdc.string : memcpy; 362 | import fast.internal.helpers; 363 | 364 | enum allocExpr = ctfeJoin!(Strs.length)("Strs[%s].length", "+") ~ "+1"; 365 | 366 | auto concat(void* buffer = (mixin(allocExpr) <= allocaLimit) ? alloca(mixin(allocExpr)) : null) 367 | { 368 | immutable length = mixin(allocExpr); 369 | auto result = TempBuffer!char( 370 | (cast(char*) (buffer is null ? malloc(length) : buffer))[0 .. length - 1], 371 | buffer is null); 372 | 373 | char* p = result.ptr; 374 | foreach (const(char[]) str; Strs) 375 | { 376 | memcpy (p, str.ptr, str.length); 377 | p += str.length; 378 | } 379 | *p = '\0'; 380 | 381 | return result; 382 | } 383 | } 384 | 385 | 386 | 387 | private: 388 | 389 | template SimdMatcher(string match) 390 | { 391 | import core.simd; 392 | import std.string; 393 | import fast.internal.sysdef; 394 | 395 | static if (match != strip(match)) { 396 | // Reinstanciate the template with any whitespace stripped from the match string. 397 | alias SimdMatcher = SimdMatcher!(strip(match)); 398 | } else { 399 | /* For SSE in DMD I am blocked by: 400 | * https://d.puremagic.com/issues/show_bug.cgi?id=8047 401 | * https://d.puremagic.com/issues/show_bug.cgi?id=11585 402 | */ 403 | enum isUsingSSE = hasSSE2 && (isLDC || isGDC); 404 | enum isSingleChar = match.length == 2 && match[0] == '='; 405 | static if (isSingleChar) enum singleChar = match[1]; 406 | static if (isUsingSSE) { 407 | // Using MOVMSKB we get one boolean per bit in a 16-bit value. 408 | alias Word = ubyte16; 409 | alias Mask = uint; 410 | enum sparseness = 1; 411 | } else { 412 | // The fallback is to work with machine words and tricky bit-twiddling algorithms. 413 | // As a result we get machine words where matching bytes have the high bit set. 414 | alias Word = size_t; 415 | alias Mask = size_t; 416 | enum sparseness = 8; 417 | } 418 | enum matchCode = genMatchCode!isUsingSSE("*wp"); 419 | // Used in generic comparison code 420 | enum lows = size_t.max / 0xFF; 421 | enum highs = lows * 0x80; 422 | 423 | enum betterUseTables = (isDMD && matchCode.complexity >= 4) 424 | || (isGDC && matchCode.complexity >= 18) 425 | || (isLDC && matchCode.complexity >= 18); 426 | 427 | static if (betterUseTables) 428 | { 429 | immutable matchTable = genMatchTable(); 430 | 431 | size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow @nogc 432 | { 433 | import core.stdc.string; 434 | import fast.internal.helpers; 435 | 436 | // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins. 437 | static if (isSingleChar) { 438 | return memchr(b, singleChar, e - b) - b; 439 | } else { 440 | if (b >= e) return 0; 441 | 442 | size_t off = cast(size_t) b % ushort.sizeof; 443 | ushort* wp = cast(ushort*) (b - off); 444 | ushort* we = cast(ushort*) alignPtrNext(e, ushort.sizeof); 445 | if (off) { 446 | // Throw away bytes from before start of the string 447 | if (auto mask = matchTable[*wp] >> off) 448 | return bsf(mask); 449 | if (++wp is we) return size_t.max; 450 | } 451 | 452 | do { 453 | if (auto mask = matchTable[*wp]) 454 | return bsf(mask) + (cast(char*) wp - b); 455 | } while (++wp !is we); 456 | return size_t.max; 457 | } 458 | } 459 | 460 | inout(char)* find(scope inout(char*) b) pure nothrow @nogc 461 | { 462 | import core.stdc.string; 463 | // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins. 464 | static if (isSingleChar && singleChar == '\0') { 465 | return strlen(b) + b; 466 | } else static if (isSingleChar && isDMD) { // DMD is better off using optimized C library code. 467 | return memchr(b, singleChar, e - b) - b; 468 | } else { 469 | size_t off = cast(size_t) b % ushort.sizeof; 470 | ushort* wp = cast(ushort*) (b - off); 471 | if (off) { 472 | // Throw away bytes from before start of the string 473 | if (auto mask = matchTable[*wp] >> off) 474 | return b + bsf(mask); 475 | } 476 | 477 | do { 478 | if (auto mask = matchTable[*wp]) 479 | return cast(inout(char)*) wp + bsf(mask); 480 | } while (true); 481 | } 482 | } 483 | } 484 | else 485 | { 486 | import core.stdc.string, core.simd; 487 | import std.simd; 488 | import fast.internal.helpers; 489 | 490 | version (LDC) { 491 | import ldc.gccbuiltins_x86; 492 | } else version (GNU) { 493 | import gcc.builtins; 494 | } 495 | 496 | size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow 497 | { 498 | // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins. 499 | static if (isSingleChar) { 500 | return memchr(b, singleChar, e - b) - b; 501 | } else { 502 | if (b >= e) return 0; 503 | 504 | size_t off = cast(size_t) b % Word.sizeof; 505 | Word* wp = cast(Word*) (b - off); 506 | Word* we = cast(Word*) alignPtrNext(e, Word.sizeof); 507 | if (off) { 508 | // Throw away bytes from before start of the string 509 | if (auto mask = (mixin(matchCode.code)) >> (off * sparseness)) 510 | return bsf(mask) / sparseness; 511 | if (++wp is we) return size_t.max; 512 | } 513 | 514 | do { 515 | if (auto mask = mixin(matchCode.code)) 516 | return bsf(mask) / sparseness + (cast(char*) wp - b); 517 | } while (++wp !is we); 518 | return size_t.max; 519 | } 520 | } 521 | 522 | inout(char)* find(scope inout(char*) b) pure nothrow 523 | { 524 | // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins. 525 | static if (isSingleChar && singleChar == '\0') { 526 | return strlen(b) + b; 527 | } else static if (isSingleChar && isDMD) { // DMD is better off using optimized C library code. 528 | return cast(inout(char*)) memchr(b, singleChar, size_t.max); 529 | } else { 530 | size_t off = cast(size_t) b % Word.sizeof; 531 | Word* wp = cast(Word*) (b - off); 532 | if (off) { 533 | // Throw away bytes from before start of the string 534 | if (auto mask = (mixin(matchCode.code)) >> (off * sparseness)) 535 | return b + bsf(mask) / sparseness; 536 | ++wp; 537 | } 538 | 539 | do { 540 | if (auto mask = mixin(matchCode.code)) 541 | return cast(inout(char)*) wp + bsf(mask) / sparseness; 542 | ++wp; 543 | } while (true); 544 | } 545 | } 546 | } 547 | 548 | enum genMatchCode(bool sse)(string var) 549 | { 550 | import std.ascii, std.exception; 551 | 552 | struct Code { 553 | string code; 554 | size_t complexity = 1; 555 | } 556 | Code result; 557 | string[] nesting; 558 | 559 | with (result) { 560 | for (size_t i = 0; i < match.length;) { 561 | string handleChar() { 562 | char c = match[i+1]; 563 | switch (c) { 564 | case 0: 565 | return `'\0'`; 566 | case '\\': 567 | return `'\\'`; 568 | case "'"[0]: 569 | return `'\''`; 570 | case '\t': 571 | return `'\t'`; 572 | case '\r': 573 | return `'\r'`; 574 | case '\n': 575 | return `'\n'`; 576 | default: 577 | return `'` ~ c ~ `'`; 578 | } 579 | } 580 | 581 | if (match[i] == '=') { 582 | static if (sse) { 583 | code ~= "maskEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))"; 584 | } else if (match[i+1] == 0) { 585 | code ~= "" ~ var ~ " - lows & ~" ~ var; 586 | } else { 587 | code ~= "(" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows & ~(" ~ var ~ " ^ lows * " ~ handleChar() ~ ")"; 588 | } 589 | i += 2; 590 | } else if (match[i] == '!') { 591 | static if (sse) { 592 | code ~= "maskNotEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))"; 593 | } else if (match[i+1] == 0) { 594 | code ~= "(~(" ~ var ~ " - lows) | " ~ var ~ ")"; 595 | } else { 596 | code ~= "(~((" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows) | (" ~ var ~ " ^ lows * " ~ handleChar() ~ "))"; 597 | } 598 | i += 2; 599 | } else if (match[i] == '<') { 600 | static if (sse) 601 | code ~= "maskGreater(SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "), " ~ var ~ ")"; 602 | else 603 | code ~= "maskLessGeneric!" ~ handleChar() ~ "(" ~ var ~ ")"; 604 | i += 2; 605 | } else if (match[i] == '>') { 606 | static if (sse) 607 | code ~= "maskGreater(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))"; 608 | else 609 | code ~= "maskGreaterGeneric!" ~ handleChar() ~ "(" ~ var ~ ")"; 610 | i += 2; 611 | } else if (match[i .. $].startsWith("or(")) { 612 | static if (sse) { 613 | nesting ~= ", "; 614 | code ~= "or("; 615 | } else { 616 | nesting ~= " | "; 617 | } 618 | complexity++; 619 | i += 3; 620 | } else if (match[i .. $].startsWith("and(")) { 621 | static if (sse) { 622 | nesting ~= ", "; 623 | code ~= "and("; 624 | } else { 625 | nesting ~= " & "; 626 | } 627 | complexity++; 628 | i += 4; 629 | } else if (match[i] == ',') { 630 | enforce(nesting.length, "',' on top level"); 631 | code ~= nesting[$-1]; 632 | i++; 633 | } else if (match[i] == ')') { 634 | enforce(nesting.length, "Unbalanced closing parenthesis"); 635 | nesting.length--; 636 | static if (sse) { 637 | code ~= ")"; 638 | } 639 | i++; 640 | } else if (match[i].isWhite) { 641 | i++; 642 | } else { 643 | throw new Exception(format("Unexpected character at index %s: 0x%02x", i, match[i])); 644 | } 645 | } 646 | static if (sse) { 647 | code = "__builtin_ia32_pmovmskb128(" ~ code ~ ")"; 648 | } else { 649 | code = "(" ~ code ~ ") & highs"; 650 | } 651 | } 652 | return result; 653 | } 654 | 655 | enum genMatchTable() 656 | { 657 | ubyte[1 << 16] table; 658 | ubyte[256] lut; 659 | foreach (uint i; 0 .. 256) { 660 | lut[i] = (mixin(genMatchCode!false("i").code) >> 7) & 1; 661 | } 662 | foreach (i; 0 .. 256) foreach (k; 0 .. 256) { 663 | table[i * 256 + k] = cast(ubyte) (lut[i] << 1 | lut[k]); 664 | } 665 | return table; 666 | } 667 | } 668 | } 669 | 670 | /** 671 | * Template for searching a fixed value in a word sized memory block (i.e. 1, 2, 4 or 8 bytes). 672 | * 673 | * Params: 674 | * value = The value you are looking for. 675 | * word = The data word to search for the value. 676 | * 677 | * Returns: 678 | * non-zero, iff the value is contained in the data word. 679 | * Specifically it returns 0x80 for every byte of the word that was a match and 0x00 for others. 680 | * 681 | * See_Also: 682 | * http://graphics.stanford.edu/~seander/bithacks.html#ValueInWord 683 | */ 684 | T maskEqualGeneric(ubyte value, T)(T word) @safe pure nothrow 685 | if (isUnsigned!T) 686 | { 687 | // This value results in 0x01 for each byte of a T value. 688 | enum lows = T.max / 0xFF; 689 | static if (value == 0) { 690 | enum highs = lows * 0x80; 691 | return (word - lows) & ~word & highs; 692 | } else { 693 | enum xor = lows * value; 694 | return maskEqualGeneric!0(word ^ xor); 695 | } 696 | } 697 | 698 | T maskLessGeneric(ubyte value, T)(T word) @safe pure nothrow 699 | if (isUnsigned!T && value <= 128) 700 | { 701 | enum lows = T.max / 0xFF; 702 | enum highs = lows * 0x80; 703 | return (word - lows * value) & ~word & highs; 704 | } 705 | 706 | T maskGreaterGeneric(ubyte value, T)(T word) @safe pure nothrow 707 | if (isUnsigned!T && value <= 127) 708 | { 709 | enum lows = T.max / 0xFF; 710 | enum highs = lows * 0x80; 711 | return (word + lows * (127 - value) | word) & highs; 712 | } 713 | 714 | T orGeneric(T)(T a, T b) @safe pure nothrow 715 | if (isUnsigned!T) 716 | { 717 | return a | b; 718 | } 719 | -------------------------------------------------------------------------------- /source/fast/unicode.d: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * 3 | * Functions to work with the Unicode Transformation Format. 4 | * 5 | * Grapheme clusters: 6 | * A grapheme cluster is roughly speaking what the user would perceive as the smallest unit in a 7 | * writing system. Their count can be thought of as a caret position in a text editor. In 8 | * particular at grapheme cluster level, different normalization forms (NFC, NFD) become 9 | * transparent. The default definition used here is independent of the user's locale. 10 | * 11 | * Authors: 12 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 13 | * 14 | * Copyright: 15 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 16 | * 17 | * License: 18 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 19 | * 20 | **************************************************************************************************/ 21 | module fast.unicode; 22 | 23 | import fast.internal.unicode_tables; 24 | import fast.internal.sysdef; 25 | import std.simd; 26 | 27 | 28 | /******************************************************************************* 29 | * 30 | * Enumeration for the Unicode "General Category" used to roughly classify 31 | * codepoints into letters, punctuation etc. 32 | * 33 | **************************************/ 34 | alias GeneralCategory = DerivedGeneralCategory.Enum; 35 | 36 | 37 | /******************************************************************************* 38 | * 39 | * A customizable structure providing information on a code point. It consists 40 | * of a Unicode `property` in the form of an `enum` (e.g. `GeneralCategory`) and 41 | * a `length` in bytes of the code point in UTF-8. 42 | * 43 | **************************************/ 44 | struct CodePointInfo(Enum) 45 | { 46 | alias property this; 47 | size_t length; 48 | Enum property; 49 | } 50 | 51 | 52 | /******************************************************************************* 53 | * 54 | * Counts the number of grapheme clusters (character count) in a UTF string. 55 | * 56 | * This function uses "extended grapheme clusters" as defined in Unicode: 57 | * http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries 58 | * 59 | * When invalid byte sequences are encountered, each byte that does not make up 60 | * a code point will be counted as one grapheme as visual representations of 61 | * such broken strings will often show a square with the hexadecimal byte value 62 | * in them. 63 | * 64 | * Params: 65 | * str = the UTF-8 string 66 | * 67 | * Returns: 68 | * the number of grapheme clusters 69 | * 70 | **************************************/ 71 | @nogc @trusted pure nothrow size_t 72 | countGraphemes(scope const(char)[] str) 73 | { 74 | enum numValues = GraphemeBreakProperty.Enum.max + 1; 75 | static immutable graphemeBreakRules = 76 | { 77 | // GB999 78 | byte[numValues][numValues] graphemeBreaks = true; 79 | with (GraphemeBreakProperty.Enum) 80 | { 81 | // GB12 + GB13 (special handling) 82 | foreach (i; 0 .. numValues) 83 | graphemeBreaks[i][Regional_Indicator] = -1; 84 | // GB11 85 | graphemeBreaks[ZWJ][Glue_After_Zwj] = false; 86 | graphemeBreaks[ZWJ][E_Base_GAZ] = false; 87 | // GB10 (special handling) 88 | graphemeBreaks[E_Base] [E_Modifier] = false; 89 | graphemeBreaks[E_Base_GAZ][E_Modifier] = false; 90 | graphemeBreaks[Extend] [E_Modifier] = -1; 91 | // GB9b 92 | foreach (i; 0 .. numValues) 93 | graphemeBreaks[Prepend][i] = false; 94 | // GB9a 95 | foreach (i; 0 .. numValues) 96 | graphemeBreaks[i][SpacingMark] = false; 97 | // GB9 98 | foreach (i; 0 .. numValues) 99 | { 100 | graphemeBreaks[i][Extend] = false; 101 | graphemeBreaks[i][ZWJ] = false; 102 | } 103 | graphemeBreaks[E_Base] [Extend] = -1; 104 | graphemeBreaks[E_Base_GAZ][Extend] = -1; 105 | // GB8 106 | graphemeBreaks[LVT][T] = false; 107 | graphemeBreaks[T] [T] = false; 108 | // GB7 109 | graphemeBreaks[LV][V] = false; 110 | graphemeBreaks[LV][T] = false; 111 | graphemeBreaks[V] [V] = false; 112 | graphemeBreaks[V] [T] = false; 113 | // GB6 114 | graphemeBreaks[L][L] = false; 115 | graphemeBreaks[L][V] = false; 116 | graphemeBreaks[L][LV] = false; 117 | graphemeBreaks[L][LVT] = false; 118 | // GB5 119 | foreach (i; 0 .. numValues) 120 | { 121 | graphemeBreaks[i][Control] = true; 122 | graphemeBreaks[i][CR] = true; 123 | graphemeBreaks[i][LF] = true; 124 | } 125 | // GB4 126 | foreach (i; 0 .. numValues) 127 | { 128 | graphemeBreaks[Control][i] = true; 129 | graphemeBreaks[CR] [i] = true; 130 | graphemeBreaks[LF] [i] = true; 131 | } 132 | // GB3 133 | graphemeBreaks[CR][LF] = false; 134 | // Additional homebrew top level rule to break before and after invalid characters 135 | foreach (i; 0 .. numValues) 136 | { 137 | graphemeBreaks[i][__] = true; 138 | graphemeBreaks[__][i] = true; 139 | } 140 | } 141 | return graphemeBreaks; 142 | }(); 143 | 144 | size_t graphemeCount = 0; 145 | auto p = str.ptr; 146 | auto graphemeStart = p; 147 | GraphemeBreakProperty.Enum last, next; 148 | bool riEven, inEmojiBaseExtension; 149 | 150 | @noinline @safe @nogc pure nothrow bool 151 | complexRules() 152 | { 153 | pragma(inline, false); 154 | with (GraphemeBreakProperty.Enum) 155 | { 156 | if (next == Regional_Indicator) 157 | { 158 | // For GB12 + GB13 we need break only after a complete country code (2 indicators). 159 | if (last == Regional_Indicator) 160 | return riEven = !riEven; 161 | riEven = true; 162 | return false; 163 | } 164 | else if (next == Extend) 165 | { 166 | inEmojiBaseExtension = true; 167 | return false; 168 | } 169 | else if (inEmojiBaseExtension) 170 | { 171 | return inEmojiBaseExtension = false; 172 | } 173 | return true; 174 | } 175 | } 176 | 177 | @forceinline void 178 | graphemeCountImpl(S)(ref S str) 179 | { 180 | version (LDC) pragma(inline, true); 181 | auto cpi = getProperty!GraphemeBreakProperty(str); 182 | auto next = cpi.property; 183 | byte isBoundary = graphemeBreakRules[last][next]; 184 | if (isBoundary < 0 ? complexRules() : isBoundary) 185 | { 186 | graphemeCount++; 187 | static if (is(S == const(char)*)) 188 | graphemeStart = str; 189 | else 190 | graphemeStart = str.ptr; 191 | inEmojiBaseExtension = false; 192 | } 193 | static if (is(S == const(char)*)) 194 | str += cpi.length; 195 | else 196 | str = str[cpi.length..$]; 197 | last = next; 198 | } 199 | 200 | if (str.length >= 4) 201 | { 202 | const e = str.ptr + str.length - 4; 203 | do 204 | graphemeCountImpl(p); 205 | while (p <= e); 206 | str = str[p - str.ptr..$]; 207 | } 208 | while (str.length) 209 | graphemeCountImpl(str); 210 | return graphemeCount; 211 | } 212 | 213 | 214 | /******************************************************************************* 215 | * 216 | * Retrieves the "General Category" of the first code point in some UTF-8 217 | * string. For broken UTF-8, the property is set to `GeneralCategory.__` (`0`). 218 | * 219 | * Params: 220 | * str = the UTF-8 encoded text, which must not be empty 221 | * 222 | * Returns: 223 | * a code point information struct consisting of a the fields `property`, 224 | * containing the `GeneralCategory` enumeration and the `length` of the code 225 | * point in bytes. 226 | * 227 | **************************************/ 228 | @property @safe @nogc pure nothrow CodePointInfo!GeneralCategory 229 | generalCategory(scope const(char)[] str) 230 | { 231 | return getProperty!DerivedGeneralCategory(str); 232 | } 233 | unittest 234 | { 235 | assert("क".generalCategory == GeneralCategory.Other_Letter); 236 | assert("̸".generalCategory == GeneralCategory.Nonspacing_Mark); 237 | assert("\xFF".generalCategory == GeneralCategory.__); 238 | } 239 | 240 | 241 | 242 | private: 243 | 244 | @forceinline pure @nogc nothrow auto 245 | getProperty(Property, S)(scope S str) if (is(S == const(char)*) || is(S == const(char)[])) 246 | in 247 | { 248 | static if (is(S == const(char)[])) 249 | assert(str.length != 0, "No code units passed in."); 250 | } 251 | out 252 | { 253 | assert(__result <= Property.Enum.max); 254 | } 255 | body 256 | { 257 | version (LDC) pragma(inline, true); 258 | import fast.internal.helpers; 259 | 260 | alias Enum = Property.Enum; 261 | alias CPI = CodePointInfo!Enum; 262 | // Fast path for ASCII. 263 | size_t idx = Property.level0[0][str[0]]; 264 | if (byte(str[0]) >= 0) return CPI(1, cast(Enum)idx); 265 | // On multi-byte sequences, set the length to 1 for invalid sequences (idx == 0). 266 | size_t length = clz(str[0] ^ 0xFFu) - 24; 267 | // Safely return invalid code point of 1 byte length if string exhausted. 268 | static if (is(S == const(char)[])) 269 | if (length > str.length) 270 | return CPI(1, cast(Enum)0); 271 | // Otherwise use lookup table hierarchy to determine if code units form a valid code point 272 | if (idx > Enum.max) { 273 | idx = Property.level1[idx - Enum.max - 1][str[1]]; 274 | if (idx > Enum.max) { 275 | idx = Property.level2[idx - Enum.max - 1][str[2]]; 276 | if (idx > Enum.max) 277 | idx = Property.level3[idx - Enum.max - 1][str[3]]; 278 | } 279 | } 280 | if (idx) 281 | return CPI(length, cast(Enum)idx); 282 | else 283 | return CPI(1, cast(Enum)0); 284 | } 285 | -------------------------------------------------------------------------------- /source/unicode/generator.d: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * 3 | * Helper program to generate the lookup tables required for certain Unicode algorithms. 4 | * This code is conforming with Unicode 10.0.0. 5 | * 6 | * Authors: 7 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 8 | * 9 | * Copyright: 10 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise) 11 | * 12 | * License: 13 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0) 14 | * 15 | **************************************************************************************************/ 16 | module unicode.generator; 17 | import std.conv; 18 | import std.exception; 19 | import core.bitop; 20 | import std.stdio; 21 | import std.string; 22 | import std.algorithm; 23 | import std.meta; 24 | import std.path; 25 | 26 | enum PropertyType 27 | { 28 | catalog, enumeration, binary, string, numeric, miscellaneous 29 | } 30 | 31 | struct Property 32 | { 33 | string name; 34 | string value; 35 | } 36 | 37 | struct Entry 38 | { 39 | bool isSet = false; 40 | Property[] properties; 41 | } 42 | 43 | struct Line 44 | { 45 | uint rangeStart; 46 | uint rangeEnd; 47 | string[] properties; 48 | } 49 | 50 | struct UnicodeCharacterDatabase 51 | { 52 | PropertyType type; 53 | Entry[] entries; 54 | size_t[string] enumerationValues; 55 | string varName; 56 | 57 | this(string filename, PropertyType type) 58 | { 59 | import std.algorithm; 60 | import std.stdio; 61 | import std.uni; 62 | 63 | this.type = type; 64 | this.entries = new Entry[](0x110000); 65 | this.enumerationValues[null] = 0; 66 | this.varName = baseName(filename, ".txt"); 67 | Line[] defaults; 68 | Line[] actuals; 69 | bool abbreviates = false; 70 | string enumOverridePrefix; 71 | string enumOverride; 72 | 73 | foreach (line; File(filename).byLine()) 74 | { 75 | bool isDefault = false; 76 | char[] code; 77 | Line data; 78 | 79 | // Special @missing line syntax ? 80 | static immutable isMissingStr = "# @missing: "; 81 | static immutable propNameStr = "# Property: "; 82 | if (line.startsWith(isMissingStr)) 83 | { 84 | isDefault = true; 85 | code = line[isMissingStr.length..$]; 86 | } 87 | else if (line.startsWith(propNameStr)) 88 | { 89 | abbreviates = true; 90 | enumOverridePrefix = "# "~line[propNameStr.length..$].idup~"="; 91 | } 92 | else if (abbreviates && line.startsWith(enumOverridePrefix)) 93 | { 94 | enumOverride = line[enumOverridePrefix.length..$].idup; 95 | } 96 | else 97 | { 98 | // Split between code and comment section 99 | auto commentSplit = findSplit(line, "#"); 100 | code = commentSplit[0]; 101 | } 102 | code = strip!isWhite(code); 103 | if (code.length == 0) 104 | continue; 105 | 106 | uint fieldIdx = 0; 107 | foreach (field; splitter(code, ';')) 108 | { 109 | field = strip!isWhite(field); 110 | switch (fieldIdx) 111 | { 112 | case 0: // Code point(s) 113 | auto range = findSplit(field, ".."); 114 | data.rangeStart = to!uint(range[0], 16); 115 | data.rangeEnd = range[1] == ".." ? to!uint(range[2], 16) : data.rangeStart; 116 | enforce(data.rangeEnd <= 0x10FFFF); 117 | enforce(data.rangeStart <= data.rangeEnd); 118 | data.rangeEnd++; 119 | break; 120 | default: 121 | string ifield = enumOverride ? enumOverride : field.idup; 122 | data.properties ~= ifield; 123 | if (type == PropertyType.enumeration) 124 | { 125 | if (ifield !in enumerationValues) 126 | enumerationValues[ifield] = enumerationValues.length; 127 | } 128 | } 129 | fieldIdx++; 130 | } 131 | if (type == PropertyType.enumeration) 132 | enforce(fieldIdx >= 2); 133 | else assert(0, "Not implemented"); 134 | 135 | if (isDefault) 136 | defaults ~= data; 137 | else 138 | actuals ~= data; 139 | } 140 | 141 | foreach (set; [defaults, actuals]) 142 | { 143 | foreach (ref definition; set) 144 | { 145 | foreach (cp; definition.rangeStart .. definition.rangeEnd) 146 | { 147 | final switch (type) with (PropertyType) 148 | { 149 | case catalog: 150 | assert(0, "Not implemented"); 151 | case enumeration: 152 | enforce(definition.properties.length == 1); 153 | entries[cp].properties = [Property(null, definition.properties[0])]; 154 | entries[cp].isSet = true; 155 | break; 156 | case binary: 157 | case string: 158 | case numeric: 159 | case miscellaneous: 160 | assert(0, "Not implemented"); 161 | } 162 | } 163 | } 164 | } 165 | 166 | foreach (cp; 0 .. 0x110000) 167 | enforce(entries[cp].isSet); 168 | } 169 | 170 | struct TableEntry 171 | { 172 | ubyte[][] byteSeqs; 173 | string enumerationValue; 174 | Table* subEntries; 175 | 176 | string toString() 177 | { 178 | if (subEntries) 179 | return subEntries.to!string(); 180 | else 181 | return enumerationValue; 182 | } 183 | } 184 | 185 | struct Table 186 | { 187 | uint level, idx; 188 | TableEntry[256] entries; 189 | 190 | size_t toHash() const nothrow 191 | { 192 | size_t result; 193 | foreach (i; 0 .. 256) 194 | { 195 | if (entries[i].subEntries) 196 | result = hashOf(entries[i].subEntries.idx, result); 197 | else 198 | result = hashOf(entries[i].enumerationValue, result); 199 | } 200 | return hashOf(level, result); 201 | } 202 | 203 | bool opEquals(ref const Table key) const 204 | { 205 | foreach (i; 0 .. 256) 206 | { 207 | if ((this.entries[i].subEntries is null) != (key.entries[i].subEntries is null)) 208 | return false; 209 | if (this.entries[i].subEntries) 210 | { 211 | if (this.entries[i].subEntries.idx != key.entries[i].subEntries.idx) 212 | return false; 213 | } 214 | else if (this.entries[i].enumerationValue != key.entries[i].enumerationValue) 215 | { 216 | return false; 217 | } 218 | } 219 | return this.level == key.level; 220 | } 221 | } 222 | 223 | string generateEnumerationCode() 224 | { 225 | auto lookup = new Table; 226 | uint[4] levelAssignments; 227 | foreach (dchar cp; 0 .. 0x110000) 228 | { 229 | ubyte[] byteSeq; 230 | if (cp < 128) 231 | { 232 | byteSeq ~= cast(char)cp; 233 | } 234 | else 235 | { 236 | uint topBit = 6; 237 | uint bits = cp; 238 | do 239 | { 240 | byteSeq = char(bits & 0x3F | 0x80) ~ byteSeq; 241 | bits >>= 6; 242 | topBit--; 243 | } 244 | while (bits && bsr(bits) >= topBit); 245 | byteSeq = cast(char)(0xFE << topBit | bits) ~ byteSeq; 246 | } 247 | auto table = lookup; 248 | foreach (uint i, cu; byteSeq) 249 | { 250 | auto entry = &table.entries[cu]; 251 | if (entry.subEntries) 252 | { 253 | table = entry.subEntries; 254 | } 255 | else if (entry.enumerationValue is null) 256 | { 257 | entry.byteSeqs = [byteSeq]; 258 | entry.enumerationValue = entries[cp].properties[0].value; 259 | break; 260 | } 261 | else if (entry.enumerationValue == entries[cp].properties[0].value) 262 | { 263 | entry.byteSeqs ~= byteSeq; 264 | break; 265 | } 266 | else 267 | { 268 | auto subTable = new Table(i+1); 269 | foreach (byteSeq2; entry.byteSeqs) 270 | { 271 | subTable.entries[byteSeq2[i+1]].enumerationValue = entry.enumerationValue; 272 | subTable.entries[byteSeq2[i+1]].byteSeqs = [byteSeq2]; 273 | } 274 | entry.byteSeqs = null; 275 | entry.enumerationValue = null; 276 | entry.subEntries = subTable; 277 | } 278 | table = entry.subEntries; 279 | } 280 | } 281 | 282 | Table*[Table] tableSet; 283 | Table*[uint][4] tableByIdx; 284 | tableByIdx[0][0] = lookup; 285 | 286 | void assignIndices(Table* table, uint level = 0) 287 | { 288 | foreach (i, entry; table.entries) 289 | { 290 | if (entry.subEntries) 291 | { 292 | assignIndices(entry.subEntries, level + 1); 293 | if (auto dup = *entry.subEntries in tableSet) 294 | { 295 | entry.subEntries = *dup; 296 | } 297 | else 298 | { 299 | entry.subEntries.idx = levelAssignments[level + 1]++; 300 | tableByIdx[level + 1][entry.subEntries.idx] = entry.subEntries; 301 | tableSet[*entry.subEntries] = entry.subEntries; 302 | } 303 | } 304 | } 305 | } 306 | assignIndices(lookup); 307 | levelAssignments[0] = 1; 308 | 309 | writefln("%s: Using %s tables with a total size: %s KiB", 310 | varName, sum(levelAssignments[]), sum(levelAssignments[]) / 4f); 311 | stdout.flush(); // in case we are buffered 312 | 313 | auto level0 = new ubyte[256][](levelAssignments[0]); 314 | auto level1 = new ubyte[256][](levelAssignments[1]); 315 | auto level2 = new ubyte[256][](levelAssignments[2]); 316 | auto level3 = new ubyte[256][](levelAssignments[3]); 317 | 318 | foreach (level, bin; AliasSeq!(level0, level1, level2, level3)) 319 | { 320 | foreach (idx; 0 .. levelAssignments[level]) 321 | { 322 | Table* table = tableByIdx[level][idx]; 323 | enforce(table.idx == idx); 324 | enforce(table.level == level); 325 | enforce(levelAssignments[level] + enumerationValues.length <= 256, 326 | format("Sum of tables and enumarations at level %s exceeds ubyte storage capacity", level)); 327 | foreach (i, ref entry; table.entries) 328 | { 329 | if (entry.subEntries) 330 | bin[idx][i] = cast(ubyte)(entry.subEntries.idx + enumerationValues.length); 331 | else 332 | bin[idx][i] = cast(ubyte)enumerationValues[entry.enumerationValue]; 333 | } 334 | } 335 | } 336 | 337 | // Write struct with enum 338 | string code = "struct " ~ varName ~ "\n{\n"; 339 | auto sortedEnum = new string[](enumerationValues.length); 340 | foreach (key, value; enumerationValues) 341 | sortedEnum[value] = key; 342 | code ~= "\tenum Enum : size_t\n\t{\n\t\t"; 343 | foreach (key, value; sortedEnum) 344 | code ~= (value ? value : "__") ~ ", "; 345 | code ~= "\n\t}\n\n"; 346 | foreach (k, bin; AliasSeq!(level0, level1, level2, level3)) 347 | { 348 | code ~= "\tstatic immutable ubyte[256][" ~ to!string(bin.length) ~ "] level" ~ to!string(k) ~ " = [\n"; 349 | foreach (i; 0 .. bin.length) 350 | code ~= "\t\t[" ~ format("%(%s,%)", bin[i]) ~ "],\n"; 351 | code ~= "\t];\n"; 352 | } 353 | code ~= "}\n\n"; 354 | return code; 355 | } 356 | } 357 | 358 | alias UCD = UnicodeCharacterDatabase; 359 | 360 | void main() 361 | { 362 | string code = "module fast.internal.unicode_tables;\n\n"; 363 | UCD ucd; 364 | 365 | ucd = UCD("../ucd/auxiliary/GraphemeBreakProperty.txt", PropertyType.enumeration); 366 | code ~= ucd.generateEnumerationCode(); 367 | ucd = UCD("../ucd/extracted/DerivedGeneralCategory.txt", PropertyType.enumeration); 368 | code ~= ucd.generateEnumerationCode(); 369 | ucd = UCD("../ucd/extracted/DerivedLineBreak.txt", PropertyType.enumeration); 370 | code ~= ucd.generateEnumerationCode(); 371 | 372 | auto tableFile = File("../source/fast/internal/unicode_tables.d", "w"); 373 | tableFile.write(code); 374 | } -------------------------------------------------------------------------------- /test/fail1.json: -------------------------------------------------------------------------------- 1 | "A JSON payload should be an object or array, not a string." -------------------------------------------------------------------------------- /test/fail10.json: -------------------------------------------------------------------------------- 1 | {"Extra value after close": true} "misplaced quoted value" -------------------------------------------------------------------------------- /test/fail11.json: -------------------------------------------------------------------------------- 1 | {"Illegal expression": 1 + 2} -------------------------------------------------------------------------------- /test/fail12.json: -------------------------------------------------------------------------------- 1 | {"Illegal invocation": alert()} -------------------------------------------------------------------------------- /test/fail13.json: -------------------------------------------------------------------------------- 1 | {"Numbers cannot have leading zeroes": 013} -------------------------------------------------------------------------------- /test/fail14.json: -------------------------------------------------------------------------------- 1 | {"Numbers cannot be hex": 0x14} -------------------------------------------------------------------------------- /test/fail15.json: -------------------------------------------------------------------------------- 1 | ["Illegal backslash escape: \x15"] -------------------------------------------------------------------------------- /test/fail16.json: -------------------------------------------------------------------------------- 1 | [\naked] -------------------------------------------------------------------------------- /test/fail17.json: -------------------------------------------------------------------------------- 1 | ["Illegal backslash escape: \017"] -------------------------------------------------------------------------------- /test/fail18.json: -------------------------------------------------------------------------------- 1 | [[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]] -------------------------------------------------------------------------------- /test/fail19.json: -------------------------------------------------------------------------------- 1 | {"Missing colon" null} -------------------------------------------------------------------------------- /test/fail2.json: -------------------------------------------------------------------------------- 1 | ["Unclosed array" -------------------------------------------------------------------------------- /test/fail20.json: -------------------------------------------------------------------------------- 1 | {"Double colon":: null} -------------------------------------------------------------------------------- /test/fail21.json: -------------------------------------------------------------------------------- 1 | {"Comma instead of colon", null} -------------------------------------------------------------------------------- /test/fail22.json: -------------------------------------------------------------------------------- 1 | ["Colon instead of comma": false] -------------------------------------------------------------------------------- /test/fail23.json: -------------------------------------------------------------------------------- 1 | ["Bad value", truth] -------------------------------------------------------------------------------- /test/fail24.json: -------------------------------------------------------------------------------- 1 | ['single quote'] -------------------------------------------------------------------------------- /test/fail25.json: -------------------------------------------------------------------------------- 1 | [" tab character in string "] -------------------------------------------------------------------------------- /test/fail26.json: -------------------------------------------------------------------------------- 1 | ["tab\ character\ in\ string\ "] -------------------------------------------------------------------------------- /test/fail27.json: -------------------------------------------------------------------------------- 1 | ["line 2 | break"] -------------------------------------------------------------------------------- /test/fail28.json: -------------------------------------------------------------------------------- 1 | ["line\ 2 | break"] -------------------------------------------------------------------------------- /test/fail29.json: -------------------------------------------------------------------------------- 1 | [0e] -------------------------------------------------------------------------------- /test/fail3.json: -------------------------------------------------------------------------------- 1 | {unquoted_key: "keys must be quoted"} -------------------------------------------------------------------------------- /test/fail30.json: -------------------------------------------------------------------------------- 1 | [0e+] -------------------------------------------------------------------------------- /test/fail31.json: -------------------------------------------------------------------------------- 1 | [0e+-1] -------------------------------------------------------------------------------- /test/fail32.json: -------------------------------------------------------------------------------- 1 | {"Comma instead if closing brace": true, -------------------------------------------------------------------------------- /test/fail33.json: -------------------------------------------------------------------------------- 1 | ["mismatch"} -------------------------------------------------------------------------------- /test/fail4.json: -------------------------------------------------------------------------------- 1 | ["extra comma",] -------------------------------------------------------------------------------- /test/fail5.json: -------------------------------------------------------------------------------- 1 | ["double extra comma",,] -------------------------------------------------------------------------------- /test/fail6.json: -------------------------------------------------------------------------------- 1 | [ , "<-- missing value"] -------------------------------------------------------------------------------- /test/fail7.json: -------------------------------------------------------------------------------- 1 | ["Comma after the close"], -------------------------------------------------------------------------------- /test/fail8.json: -------------------------------------------------------------------------------- 1 | ["Extra close"]] -------------------------------------------------------------------------------- /test/fail9.json: -------------------------------------------------------------------------------- 1 | {"Extra comma": true,} -------------------------------------------------------------------------------- /test/pass1.json: -------------------------------------------------------------------------------- 1 | [ 2 | "JSON Test Pattern pass1", 3 | {"object with 1 member":["array with 1 element"]}, 4 | {}, 5 | [], 6 | -42, 7 | true, 8 | false, 9 | null, 10 | { 11 | "integer": 1234567890, 12 | "real": -9876.543210, 13 | "e": 0.123456789e-12, 14 | "E": 1.234567890E+34, 15 | "": 23456789012E66, 16 | "zero": 0, 17 | "one": 1, 18 | "space": " ", 19 | "quote": "\"", 20 | "backslash": "\\", 21 | "controls": "\b\f\n\r\t", 22 | "slash": "/ & \/", 23 | "alpha": "abcdefghijklmnopqrstuvwyz", 24 | "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", 25 | "digit": "0123456789", 26 | "0123456789": "digit", 27 | "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", 28 | "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", 29 | "true": true, 30 | "false": false, 31 | "null": null, 32 | "array":[ ], 33 | "object":{ }, 34 | "address": "50 St. James Street", 35 | "url": "http://www.JSON.org/", 36 | "comment": "// /* */": " ", 38 | " s p a c e d " :[1,2 , 3 39 | 40 | , 41 | 42 | 4 , 5 , 6 ,7 ],"compact":[1,2,3,4,5,6,7], 43 | "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", 44 | "quotes": "" \u0022 %22 0x22 034 "", 45 | "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" 46 | : "A key can be any string" 47 | }, 48 | 0.5 ,98.6 49 | , 50 | 99.44 51 | , 52 | 53 | 1066, 54 | 1e1, 55 | 0.1e1, 56 | 1e-1, 57 | 1e00,2e+00,2e-00 58 | ,"rosebud"] -------------------------------------------------------------------------------- /test/pass2.json: -------------------------------------------------------------------------------- 1 | [[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] -------------------------------------------------------------------------------- /test/pass3.json: -------------------------------------------------------------------------------- 1 | { 2 | "JSON Test Pattern pass3": { 3 | "The outermost value": "must be an object or array.", 4 | "In this test": "It is an object." 5 | } 6 | } 7 | --------------------------------------------------------------------------------