├── .gitignore
├── README.md
├── dub.sdl
├── mono-d
├── docs.dproj
├── fast.dproj
├── fast.sln
└── generate unicode tables.dproj
├── source
├── fast
│ ├── buffer.d
│ ├── cstring.d
│ ├── format.d
│ ├── internal
│ │ ├── benchmarks.d
│ │ ├── helpers.d
│ │ ├── sysdef.di
│ │ └── unicode_tables.d
│ ├── intmath.d
│ ├── json.d
│ ├── parsing.d
│ ├── string.d
│ └── unicode.d
├── std
│ └── simd.d
└── unicode
│ └── generator.d
└── test
├── fail1.json
├── fail10.json
├── fail11.json
├── fail12.json
├── fail13.json
├── fail14.json
├── fail15.json
├── fail16.json
├── fail17.json
├── fail18.json
├── fail19.json
├── fail2.json
├── fail20.json
├── fail21.json
├── fail22.json
├── fail23.json
├── fail24.json
├── fail25.json
├── fail26.json
├── fail27.json
├── fail28.json
├── fail29.json
├── fail3.json
├── fail30.json
├── fail31.json
├── fail32.json
├── fail33.json
├── fail4.json
├── fail5.json
├── fail6.json
├── fail7.json
├── fail8.json
├── fail9.json
├── pass1.json
├── pass2.json
└── pass3.json
/.gitignore:
--------------------------------------------------------------------------------
1 | # Generated documentation
2 | /docs/
3 |
4 | # Dub cache
5 | /.dub/
6 |
7 | # Object and executable output directory
8 | /generated/
9 |
10 | # Generated by OProfile (system wide profiler)
11 | /oprofile_data/
12 |
13 | # Unicode Character Database files can be downloaded here when tables need to be regenerated
14 | /ucd/
15 |
16 | # Mono-D user preferences
17 | /mono-d/fast.userprefs
18 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | fast
2 | ====
3 |
4 | This library aims to provide the fastest possible implementation of some every day routines.
5 |
6 | The contained functions avoid GC allocations and input validation. They may use SSE or stack allocations to reach a high throughput so that in some cases a 20 fold speed increase can be achieved.
7 |
8 | **[DMD](https://dlang.org/)**, **[GDC](https://gdcproject.org/)** and **[LDC2](https://wiki.dlang.org/LDC)** compilers are supported. Tested with front-end versions **2.068** through **2.079**.
9 |
10 | ### Benchmark
11 | A benchmark is included and can be run through dub, e.g.:
12 |
13 | dub --config=benchmark --build=release --compiler=gdc
14 |
15 | ### Examples
16 |
17 | ##### Read JSON file with coordinates.
18 | ```d
19 | struct Point3D { double x, y, z; }
20 |
21 | void main()
22 | {
23 | import fast.json;
24 | auto points = json.coordinates.read!(Point3D[]);
25 | }
26 | ```
27 |
28 | ##### SSE3 accelerated splitting around '/' and '\'
29 | ```d
30 | string rest = pathname
31 | string element;
32 |
33 | import fast.string;
34 | while (rest.split!`or(=\,=/)`(element, rest))
35 | {
36 | // `element' is now the next directory.
37 | // `rest' is what remains after the \ or /.
38 | }
39 | // `element` is now the file name part of the path.
40 | ```
41 |
42 | ##### Calling Windows API functions.
43 | ```d
44 | void createHardlink(string from, string to)
45 | {
46 | import fast.cstring : wcharPtr;
47 | CreateHardLinkW(wcharPtr!to, wcharPtr!from, null);
48 | }
49 | ```
50 |
51 | ##### Calling Linux API functions.
52 | ```d
53 | void createHardlink(string from, string to)
54 | {
55 | import fast.cstring : charPtr;
56 | link(charPtr!from, charPtr!to);
57 | }
58 | ```
59 |
--------------------------------------------------------------------------------
/dub.sdl:
--------------------------------------------------------------------------------
1 | name "fast"
2 | description "A library that aims to provide the fastest possible implementation of some every day routines."
3 | homepage "http://github.com/mleise/fast"
4 | authors "Marco Leise"
5 | copyright "Copyright © 2017, Marco Leise"
6 | license "GPL-3.0"
7 |
8 | excludedSourceFiles "source/docs/*.d" "source/unicode/*.d"
9 | targetPath "generated"
10 |
11 | configuration "library" {
12 | platforms "posix-dmd" "posix-x86_64-ldc" "posix-x86-gdc" "posix-x86_64-gdc"
13 | targetType "library"
14 | }
15 |
16 | configuration "shared-library" {
17 | platforms "posix-dmd" "posix-x86_64-ldc" "posix-x86-gdc" "posix-x86_64-gdc"
18 | targetType "dynamicLibrary"
19 | libs "gdruntime" "gphobos" platform="gdc" // Force linking with shared Phobos2, not the non-PIC static objects
20 | }
21 |
22 | configuration "benchmark" {
23 | platforms "posix-dmd" "posix-x86_64-ldc" "posix-x86-gdc" "posix-x86_64-gdc"
24 | targetType "executable"
25 | versions "benchmark"
26 | }
27 |
28 | configuration "benchmark-pic" {
29 | platforms "posix-dmd" "posix-x86_64-ldc" "posix-x86-gdc" "posix-x86_64-gdc"
30 | targetType "executable"
31 | versions "benchmark"
32 | dflags "-fPIC"
33 | }
34 |
--------------------------------------------------------------------------------
/mono-d/docs.dproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Documentation
5 | AnyCPU
6 | 8.0.30703
7 | 2.0
8 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}
9 | ..\source
10 | DMD
11 | true
12 | true
13 | true
14 | 0.3.2
15 |
16 |
17 | ..\generated
18 | ../generated/debug
19 | false
20 | false
21 | generate_docs
22 | Executable
23 | true
24 | 0
25 | true
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/mono-d/fast.dproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Debug
5 | AnyCPU
6 | 8.0.30703
7 | 2.0
8 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}
9 | true
10 | false
11 | True
12 | DMD
13 |
14 |
15 |
16 |
17 | ..\source
18 | 0.3.2
19 |
20 |
21 | ..\generated
22 | Executable
23 | benchmark
24 | false
25 | 0
26 | ../generated/release
27 | ../docs
28 |
29 |
30 | benchmark
31 |
32 |
33 | false
34 | false
35 |
36 |
37 | true
38 | ..\generated
39 | false
40 | ../generated/debug
41 | ../docs
42 |
43 |
44 | benchmark
45 |
46 |
47 | false
48 | benchmark-debug
49 | Executable
50 | false
51 | 0
52 | true
53 |
54 |
55 | true
56 | ..\generated
57 | false
58 | Executable
59 | fast-unittest-x86
60 | true
61 | false
62 | 0
63 | ../generated/unittest-x86
64 | ../docs
65 |
66 |
67 | true
68 | ..\generated
69 | false
70 | Executable
71 | fast-unittest-x64
72 | true
73 | false
74 | 0
75 | ../generated/unittest-x64
76 | ../docs
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
--------------------------------------------------------------------------------
/mono-d/fast.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 11.00
3 | # Visual Studio 2010
4 | Project("{3947E667-4C90-4C3A-BEB9-7148D6FE0D7C}") = "fast", "fast.dproj", "{DFEB5CCB-A636-4971-8302-89CDBF3B4503}"
5 | EndProject
6 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{87AD35CC-088E-43A6-99E8-A216AABD25F0}"
7 | ProjectSection(SolutionItems) = preProject
8 | ..\README.md = ..\README.md
9 | ..\.gitignore = ..\.gitignore
10 | ..\dub.sdl = ..\dub.sdl
11 | ..\test\fail1.json = ..\test\fail1.json
12 | ..\test\fail2.json = ..\test\fail2.json
13 | ..\test\fail3.json = ..\test\fail3.json
14 | ..\test\fail4.json = ..\test\fail4.json
15 | ..\test\fail5.json = ..\test\fail5.json
16 | ..\test\fail6.json = ..\test\fail6.json
17 | ..\test\fail7.json = ..\test\fail7.json
18 | ..\test\fail8.json = ..\test\fail8.json
19 | ..\test\fail9.json = ..\test\fail9.json
20 | ..\test\fail10.json = ..\test\fail10.json
21 | ..\test\fail11.json = ..\test\fail11.json
22 | ..\test\fail12.json = ..\test\fail12.json
23 | ..\test\fail13.json = ..\test\fail13.json
24 | ..\test\fail14.json = ..\test\fail14.json
25 | ..\test\fail15.json = ..\test\fail15.json
26 | ..\test\fail16.json = ..\test\fail16.json
27 | ..\test\fail17.json = ..\test\fail17.json
28 | ..\test\fail18.json = ..\test\fail18.json
29 | ..\test\fail19.json = ..\test\fail19.json
30 | ..\test\fail20.json = ..\test\fail20.json
31 | ..\test\fail21.json = ..\test\fail21.json
32 | ..\test\fail22.json = ..\test\fail22.json
33 | ..\test\fail23.json = ..\test\fail23.json
34 | ..\test\fail24.json = ..\test\fail24.json
35 | ..\test\fail25.json = ..\test\fail25.json
36 | ..\test\fail26.json = ..\test\fail26.json
37 | ..\test\fail27.json = ..\test\fail27.json
38 | ..\test\fail28.json = ..\test\fail28.json
39 | ..\test\fail29.json = ..\test\fail29.json
40 | ..\test\fail30.json = ..\test\fail30.json
41 | ..\test\fail31.json = ..\test\fail31.json
42 | ..\test\fail32.json = ..\test\fail32.json
43 | ..\test\fail33.json = ..\test\fail33.json
44 | ..\test\pass1.json = ..\test\pass1.json
45 | ..\test\pass2.json = ..\test\pass2.json
46 | ..\test\pass3.json = ..\test\pass3.json
47 | ..\benchall.sh = ..\benchall.sh
48 | EndProjectSection
49 | EndProject
50 | Project("{3947E667-4C90-4C3A-BEB9-7148D6FE0D7C}") = "docs", "docs.dproj", "{CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}"
51 | EndProject
52 | Project("{3947E667-4C90-4C3A-BEB9-7148D6FE0D7C}") = "generate unicode tables", "generate unicode tables.dproj", "{BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}"
53 | EndProject
54 | Global
55 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
56 | Benchmark|Any CPU = Benchmark|Any CPU
57 | Debug|Any CPU = Debug|Any CPU
58 | Release|Any CPU = Release|Any CPU
59 | Unittest|x86 = Unittest|x86
60 | Unittest|x64 = Unittest|x64
61 | Unicode tables|Any CPU = Unicode tables|Any CPU
62 | Documentation|Any CPU = Documentation|Any CPU
63 | EndGlobalSection
64 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
65 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Benchmark|Any CPU.ActiveCfg = Unicode tables|Any CPU
66 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Debug|Any CPU.ActiveCfg = Unicode tables|Any CPU
67 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Documentation|Any CPU.ActiveCfg = Unicode tables|Any CPU
68 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Release|Any CPU.ActiveCfg = Unicode tables|Any CPU
69 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Unicode tables|Any CPU.ActiveCfg = Unicode tables|Any CPU
70 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Unicode tables|Any CPU.Build.0 = Unicode tables|Any CPU
71 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Unittest|x64.ActiveCfg = Unicode tables|Any CPU
72 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Unittest|x86.ActiveCfg = Unicode tables|Any CPU
73 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Benchmark|Any CPU.ActiveCfg = Unicode tables|Any CPU
74 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Debug|Any CPU.ActiveCfg = Unicode tables|Any CPU
75 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Documentation|Any CPU.ActiveCfg = Documentation|Any CPU
76 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Documentation|Any CPU.Build.0 = Documentation|Any CPU
77 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Release|Any CPU.ActiveCfg = Unicode tables|Any CPU
78 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Unicode tables|Any CPU.ActiveCfg = Documentation|Any CPU
79 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Unittest|x64.ActiveCfg = Unicode tables|Any CPU
80 | {CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Unittest|x86.ActiveCfg = Unicode tables|Any CPU
81 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Benchmark|Any CPU.ActiveCfg = Benchmark|Any CPU
82 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Benchmark|Any CPU.Build.0 = Benchmark|Any CPU
83 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
84 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Debug|Any CPU.Build.0 = Debug|Any CPU
85 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Documentation|Any CPU.ActiveCfg = Documentation|Any CPU
86 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Release|Any CPU.ActiveCfg = Benchmark|Any CPU
87 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Release|Any CPU.Build.0 = Benchmark|Any CPU
88 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unicode tables|Any CPU.ActiveCfg = Benchmark|Any CPU
89 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unittest|x64.ActiveCfg = Unittest|x64
90 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unittest|x64.Build.0 = Unittest|x64
91 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unittest|x86.ActiveCfg = Unittest|x86
92 | {DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unittest|x86.Build.0 = Unittest|x86
93 | EndGlobalSection
94 | GlobalSection(NestedProjects) = preSolution
95 | EndGlobalSection
96 | GlobalSection(MonoDevelopProperties) = preSolution
97 | BaseDirectory = ..
98 | Policies = $0
99 | $0.TextStylePolicy = $1
100 | $1.inheritsSet = null
101 | $1.scope = application/json
102 | $0.TextStylePolicy = $2
103 | $2.inheritsSet = Mono
104 | $2.inheritsScope = text/plain
105 | $2.scope = text/plain
106 | $0.TextStylePolicy = $3
107 | $3.FileWidth = 120
108 | $3.TabsToSpaces = False
109 | $3.NoTabsAfterNonTabs = True
110 | $3.inheritsSet = VisualStudio
111 | $3.inheritsScope = text/plain
112 | $3.scope = text/x-d
113 | $0.DFormattingPolicy = $4
114 | $4.inheritsSet = Mono
115 | $4.inheritsScope = text/x-d
116 | $4.scope = text/x-d
117 | $0.StandardHeader = $5
118 | $5.Text =
119 | $5.IncludeInNewFiles = True
120 | $0.NameConventionPolicy = $6
121 | $6.Rules = $7
122 | $7.NamingRule = $8
123 | $8.Name = Namespaces
124 | $8.AffectedEntity = Namespace
125 | $8.VisibilityMask = VisibilityMask
126 | $8.NamingStyle = PascalCase
127 | $8.IncludeInstanceMembers = True
128 | $8.IncludeStaticEntities = True
129 | $7.NamingRule = $9
130 | $9.Name = Types
131 | $9.AffectedEntity = Class, Struct, Enum, Delegate
132 | $9.VisibilityMask = Public
133 | $9.NamingStyle = PascalCase
134 | $9.IncludeInstanceMembers = True
135 | $9.IncludeStaticEntities = True
136 | $7.NamingRule = $10
137 | $10.Name = Interfaces
138 | $10.RequiredPrefixes = $11
139 | $11.String = I
140 | $10.AffectedEntity = Interface
141 | $10.VisibilityMask = Public
142 | $10.NamingStyle = PascalCase
143 | $10.IncludeInstanceMembers = True
144 | $10.IncludeStaticEntities = True
145 | $7.NamingRule = $12
146 | $12.Name = Attributes
147 | $12.RequiredSuffixes = $13
148 | $13.String = Attribute
149 | $12.AffectedEntity = CustomAttributes
150 | $12.VisibilityMask = Public
151 | $12.NamingStyle = PascalCase
152 | $12.IncludeInstanceMembers = True
153 | $12.IncludeStaticEntities = True
154 | $7.NamingRule = $14
155 | $14.Name = Event Arguments
156 | $14.RequiredSuffixes = $15
157 | $15.String = EventArgs
158 | $14.AffectedEntity = CustomEventArgs
159 | $14.VisibilityMask = Public
160 | $14.NamingStyle = PascalCase
161 | $14.IncludeInstanceMembers = True
162 | $14.IncludeStaticEntities = True
163 | $7.NamingRule = $16
164 | $16.Name = Exceptions
165 | $16.RequiredSuffixes = $17
166 | $17.String = Exception
167 | $16.AffectedEntity = CustomExceptions
168 | $16.VisibilityMask = VisibilityMask
169 | $16.NamingStyle = PascalCase
170 | $16.IncludeInstanceMembers = True
171 | $16.IncludeStaticEntities = True
172 | $7.NamingRule = $18
173 | $18.Name = Methods
174 | $18.AffectedEntity = Methods
175 | $18.VisibilityMask = Protected, Public
176 | $18.NamingStyle = PascalCase
177 | $18.IncludeInstanceMembers = True
178 | $18.IncludeStaticEntities = True
179 | $7.NamingRule = $19
180 | $19.Name = Static Readonly Fields
181 | $19.AffectedEntity = ReadonlyField
182 | $19.VisibilityMask = Protected, Public
183 | $19.NamingStyle = PascalCase
184 | $19.IncludeInstanceMembers = False
185 | $19.IncludeStaticEntities = True
186 | $7.NamingRule = $20
187 | $20.Name = Fields
188 | $20.AffectedEntity = Field
189 | $20.VisibilityMask = Protected, Public
190 | $20.NamingStyle = PascalCase
191 | $20.IncludeInstanceMembers = True
192 | $20.IncludeStaticEntities = True
193 | $7.NamingRule = $21
194 | $21.Name = ReadOnly Fields
195 | $21.AffectedEntity = ReadonlyField
196 | $21.VisibilityMask = Protected, Public
197 | $21.NamingStyle = PascalCase
198 | $21.IncludeInstanceMembers = True
199 | $21.IncludeStaticEntities = False
200 | $7.NamingRule = $22
201 | $22.Name = Constant Fields
202 | $22.AffectedEntity = ConstantField
203 | $22.VisibilityMask = Protected, Public
204 | $22.NamingStyle = PascalCase
205 | $22.IncludeInstanceMembers = True
206 | $22.IncludeStaticEntities = True
207 | $7.NamingRule = $23
208 | $23.Name = Properties
209 | $23.AffectedEntity = Property
210 | $23.VisibilityMask = Protected, Public
211 | $23.NamingStyle = PascalCase
212 | $23.IncludeInstanceMembers = True
213 | $23.IncludeStaticEntities = True
214 | $7.NamingRule = $24
215 | $24.Name = Events
216 | $24.AffectedEntity = Event
217 | $24.VisibilityMask = Protected, Public
218 | $24.NamingStyle = PascalCase
219 | $24.IncludeInstanceMembers = True
220 | $24.IncludeStaticEntities = True
221 | $7.NamingRule = $25
222 | $25.Name = Enum Members
223 | $25.AffectedEntity = EnumMember
224 | $25.VisibilityMask = VisibilityMask
225 | $25.NamingStyle = PascalCase
226 | $25.IncludeInstanceMembers = True
227 | $25.IncludeStaticEntities = True
228 | $7.NamingRule = $26
229 | $26.Name = Parameters
230 | $26.AffectedEntity = Parameter
231 | $26.VisibilityMask = VisibilityMask
232 | $26.NamingStyle = CamelCase
233 | $26.IncludeInstanceMembers = True
234 | $26.IncludeStaticEntities = True
235 | $7.NamingRule = $27
236 | $27.Name = Type Parameters
237 | $27.RequiredPrefixes = $28
238 | $28.String = T
239 | $27.AffectedEntity = TypeParameter
240 | $27.VisibilityMask = VisibilityMask
241 | $27.NamingStyle = PascalCase
242 | $27.IncludeInstanceMembers = True
243 | $27.IncludeStaticEntities = True
244 | $0.VersionControlPolicy = $29
245 | $29.CommitMessageStyle = $30
246 | $30.FileSeparator = ", "
247 | $30.IncludeDirectoryPaths = True
248 | $29.inheritsSet = Mono
249 | $0.ChangeLogPolicy = $31
250 | $31.UpdateMode = None
251 | $31.MessageStyle = $32
252 | $32.LineAlign = 0
253 | $31.inheritsSet = Mono
254 | description = A library for D that aims to provide the fastest possible implementation of some every day routines.
255 | version = 0.3.2
256 | outputpath = ..
257 | EndGlobalSection
258 | GlobalSection(SolutionProperties) = preSolution
259 | HideSolutionNode = FALSE
260 | EndGlobalSection
261 | EndGlobal
262 |
--------------------------------------------------------------------------------
/mono-d/generate unicode tables.dproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Unicode tables
5 | AnyCPU
6 | 8.0.30703
7 | 2.0
8 | {BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}
9 | ..\source
10 | DMD
11 | true
12 | true
13 | true
14 | 0.3.2
15 |
16 |
17 | ..\generated
18 | ../docs
19 | ../generated/debug
20 | false
21 | false
22 | generate_unicode_tables
23 | Executable
24 | false
25 | 0
26 | true
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/source/fast/buffer.d:
--------------------------------------------------------------------------------
1 | /**
2 | * Fast buffer implementation.
3 | *
4 | * Authors:
5 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
6 | *
7 | * Copyright:
8 | * © 2015 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
9 | *
10 | * License:
11 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
12 | */
13 | module fast.buffer; nothrow
14 |
15 | import core.stdc.stdint;
16 | import core.stdc.stdlib;
17 | import std.range;
18 | import core.exception;
19 |
20 |
21 | enum allocaLimit = 2048;
22 |
23 |
24 | /*******************************************************************************
25 | *
26 | * Dynamic array using `malloc`, `realloc` and `free` under the hood. Note that
27 | * memory will be released on scope exit.
28 | *
29 | **************************************/
30 | struct RaiiArray(T)
31 | {
32 | private:
33 |
34 | T* m_ptr;
35 | size_t m_capacity;
36 |
37 |
38 | public:
39 |
40 | nothrow
41 | this(size_t capacity)
42 | {
43 | if (capacity)
44 | {
45 | m_ptr = cast(T*) malloc(capacity);
46 | if (m_ptr is null)
47 | onOutOfMemoryError();
48 | m_capacity = capacity;
49 | }
50 | }
51 |
52 |
53 | nothrow @nogc
54 | ~this()
55 | {
56 | if (m_ptr !is null)
57 | free(m_ptr);
58 | }
59 |
60 |
61 | @safe pure nothrow @nogc
62 | @property inout(T)* ptr() inout
63 | {
64 | return m_ptr;
65 | }
66 |
67 |
68 | @safe pure nothrow @nogc
69 | @property size_t capacity() const
70 | {
71 | return m_capacity;
72 | }
73 |
74 |
75 | nothrow
76 | @property void capacity(size_t value)
77 | {
78 | if (value != 0)
79 | {
80 | if (T* ptrNew = cast(T*) realloc(m_ptr, value))
81 | m_ptr = ptrNew;
82 | else onOutOfMemoryError();
83 | }
84 | else if (m_ptr)
85 | {
86 | free(m_ptr);
87 | m_ptr = null;
88 | }
89 | m_capacity = value;
90 | }
91 |
92 |
93 | alias length = capacity;
94 |
95 |
96 | mixin Slicing;
97 | mixin CapacityTools;
98 | }
99 |
100 |
101 | /*******************************************************************************
102 | *
103 | * Fixed maximum number of items on the stack. Memory is a static stack buffer.
104 | * This buffer can be filled up and cleared for reuse.
105 | *
106 | **************************************/
107 | struct LimitedScopeBuffer(T, size_t n)
108 | {
109 | private:
110 |
111 | T[n] m_data;
112 | size_t m_used;
113 |
114 |
115 | public:
116 |
117 | @safe pure nothrow @nogc
118 | @property inout(T)* ptr() inout
119 | {
120 | return m_data.ptr;
121 | }
122 |
123 |
124 | @safe pure nothrow @nogc
125 | @property size_t length() const
126 | {
127 | return m_used;
128 | }
129 |
130 | @safe pure nothrow @nogc
131 | @property void length(size_t value)
132 | in
133 | {
134 | assert( value <= n );
135 | }
136 | body
137 | {
138 | m_used = value;
139 | }
140 |
141 |
142 | @safe pure nothrow @nogc
143 | inout(T)[] opSlice() inout
144 | {
145 | return m_data[0 .. m_used];
146 | }
147 | }
148 |
149 |
150 | struct TempBuffer(T)
151 | {
152 | T[] slice;
153 | bool callFree;
154 |
155 | @disable this(this);
156 |
157 | ~this() nothrow
158 | {
159 | if (this.callFree)
160 | free(this.slice.ptr);
161 | }
162 |
163 | T[] opSlice() @safe pure nothrow { return this.slice[]; }
164 | T[] opSlice(size_t a, size_t b) @safe pure nothrow { return this.slice[a .. b]; }
165 | T[] opSliceAssign(const(T)[] value, size_t a, size_t b) @safe pure nothrow { return this.slice[a .. b] = value; }
166 | ref T opIndex(size_t idx) @safe pure nothrow { return this.slice[idx]; }
167 | @property size_t size() @safe pure nothrow { return T.sizeof * this.slice.length; }
168 | @property size_t length() @safe pure nothrow { return this.slice.length; }
169 | alias opDollar = length;
170 | @property T* ptr() @trusted pure nothrow { return this.slice.ptr; } // must use .ptr here for zero length strings
171 | alias ptr this;
172 |
173 | auto makeOutputRange()
174 | {
175 | struct OutputRange
176 | {
177 | T* ptr;
178 | size_t idx;
179 |
180 | void put(T)(auto ref T t) { ptr[idx++] = t; }
181 | T[] opSlice() pure nothrow { return ptr[0 .. idx]; }
182 | }
183 | return OutputRange(this.slice.ptr, 0);
184 | }
185 | }
186 |
187 |
188 | TempBuffer!T tempBuffer(T, alias length, size_t allocaLimit = .allocaLimit)
189 | (void* buffer = (T.sizeof * length <= allocaLimit) ? alloca(T.sizeof * length) : null)
190 | {
191 | return TempBuffer!T((cast(T*) (
192 | buffer is null
193 | ? malloc(T.sizeof * length)
194 | : buffer))[0 .. length],
195 | buffer is null);
196 | }
197 |
198 |
199 | /*******************************************************************************
200 | *
201 | * Returns a structure to your stack that contains a buffer of $(D bytes) size.
202 | * Memory is allocated by calling `.alloc!T(count)` on it in order to get
203 | * `count` elements of type `T`. The return value will be a RAII structure
204 | * that releases the memory back to the stack buffer upon destruction, so it can
205 | * be reused. The pointer within that RAII structure is aligned to
206 | * `T.alignof`. If the internal buffer isn't enough to fulfill the request
207 | * including padding from alignment, then `malloc()` is used instead.
208 | *
209 | * Warning:
210 | * Always keep the return value of `.alloc()` around on your stack until
211 | * you are done with its contents. Never pass it directly into functions as
212 | * arguments!
213 | *
214 | * Params:
215 | * bytes = The size of the buffer on the stack.
216 | *
217 | * Returns:
218 | * A stack buffer allocator.
219 | *
220 | **************************************/
221 | auto stackBuffer(size_t bytes)() @trusted pure
222 | {
223 | // All that remains of this after inlining is a stack pointer decrement and
224 | // a mov instruction for the `null`.
225 | StackBuffer!bytes result = void;
226 | result.last = cast(StackBufferEntry!void*) &result.last;
227 | result.sentinel = null;
228 | return result;
229 | }
230 |
231 |
232 | auto asOutputRange(T)(T* t) @safe pure
233 | {
234 | struct PointerRange
235 | {
236 | private:
237 |
238 | T* start;
239 | T* ptr;
240 |
241 | public:
242 |
243 | void put()(auto ref const(T) t) pure
244 | {
245 | *this.ptr++ = t;
246 | }
247 |
248 | T[] opSlice() pure
249 | {
250 | return this.start[0 .. this.ptr - this.start];
251 | }
252 | }
253 | static assert(isOutputRange!(PointerRange, T));
254 | return PointerRange(t, t);
255 | }
256 |
257 |
258 | enum bufferArg(alias size)()
259 | {
260 | return "((size <= allocaLimit) ? alloca(size) : null)";
261 | }
262 |
263 |
264 |
265 | package:
266 |
267 | struct StackBuffer(size_t bytes)
268 | {
269 | private:
270 |
271 | void[bytes] space = void;
272 | StackBufferEntry!void* last;
273 | void* sentinel;
274 |
275 | public:
276 |
277 | @disable this(this);
278 |
279 | @trusted
280 | StackBufferEntry!T alloc(T)(size_t howMany)
281 | {
282 | enum max = size_t.max / T.sizeof;
283 | alias SBE = StackBufferEntry!T;
284 | T* target = cast(T*) (cast(uintptr_t) this.last.ptr / T.alignof * T.alignof);
285 | if (target > this.space.ptr && cast(uintptr_t) (target - cast(T*) this.space.ptr) >= howMany)
286 | return SBE(target - howMany, this.last);
287 | else
288 | // TODO: Respect alignment here as well by padding. Optionally also embed a length in the heap block, so we can provide slicing of the whole thing.
289 | return SBE(howMany <= max ? cast(T*) malloc(T.sizeof * howMany) : null);
290 | }
291 | }
292 |
293 | struct StackBufferEntry(T)
294 | {
295 | private:
296 |
297 | StackBufferEntry!void* prev;
298 |
299 | this(T* ptr) pure { this.ptr = ptr; }
300 |
301 | this(T* ptr, ref StackBufferEntry!void* last) pure
302 | {
303 | this.ptr = ptr;
304 | this.prev = last;
305 | last = cast(StackBufferEntry!void*) &this;
306 | }
307 |
308 |
309 | public:
310 |
311 | T* ptr;
312 |
313 | static if (!is(T == void))
314 | {
315 | @disable this(this);
316 |
317 | ~this() @trusted
318 | {
319 | if (this.prev)
320 | {
321 | StackBufferEntry!void* it = this.prev;
322 | while (it.prev) it = it.prev;
323 | auto last = cast(StackBufferEntry!void**) &prev.ptr;
324 | *last = this.prev;
325 | }
326 | else free(this.ptr);
327 | }
328 |
329 | @system pure nothrow @nogc
330 | ref inout(T) opIndex(size_t idx) inout
331 | {
332 | return ptr[idx];
333 | }
334 |
335 | @system pure nothrow @nogc
336 | inout(T)[] opSlice(size_t a, size_t b) inout
337 | {
338 | return ptr[a .. b];
339 | }
340 |
341 | @safe pure nothrow @nogc
342 | @property auto range()
343 | {
344 | return ptr.asOutputRange();
345 | }
346 | }
347 | }
348 |
349 |
350 |
351 | private:
352 |
353 | mixin template Slicing()
354 | {
355 | public
356 | {
357 | @nogc pure nothrow
358 | ref inout(T) opIndex(size_t idx) inout
359 | in
360 | {
361 | assert(idx < length);
362 | }
363 | body
364 | {
365 | return ptr[idx];
366 | }
367 |
368 |
369 | @nogc pure nothrow
370 | inout(T)[] opSlice() inout
371 | {
372 | return ptr[0 .. length];
373 | }
374 |
375 |
376 | @nogc pure nothrow
377 | inout(T)[] opSlice(size_t a, size_t b) inout
378 | in
379 | {
380 | assert(a <= b && b <= length);
381 | }
382 | body
383 | {
384 | return ptr[a .. b];
385 | }
386 | }
387 | }
388 |
389 |
390 | mixin template CapacityTools()
391 | {
392 | public
393 | {
394 | nothrow
395 | void capacityNeeded(size_t c)
396 | {
397 | if (capacity < c)
398 | capacity = c;
399 | }
400 | }
401 | }
402 |
--------------------------------------------------------------------------------
/source/fast/cstring.d:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | *
3 | * Converts between UTF-8 and UTF-16.
4 | *
5 | * Authors:
6 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
7 | *
8 | * Copyright:
9 | * © 2013 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
10 | *
11 | * License:
12 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
13 | *
14 | **************************************/
15 | module fast.cstring; @nogc nothrow:
16 |
17 | import core.stdc.stdlib;
18 | import core.stdc.string;
19 | //import std.traits;
20 | import fast.buffer;
21 |
22 |
23 | /**
24 | * Converts a string to a wstring using a buffer provided by the user.
25 | * To get the buffer requirements call $(D wstringSize) on your source buffer.
26 | *
27 | * Params:
28 | * src = The UTF-8 string to convert.
29 | * dst = The destination buffer for the conversion.
30 | *
31 | * Returns:
32 | * The part of the destination buffer used for the conversion as a $(D wchar[]).
33 | * A terminating zero is appended, so the result.ptr can be passed into Windows APIs.
34 | */
35 | pure
36 | wchar[] string2wstring(in char[] src, wchar* dst)
37 | {
38 | const char* srcEnd = src.ptr + src.length;
39 | const(char)* srcIt = src.ptr;
40 | wchar* dstIt = dst;
41 |
42 | while (srcIt !is srcEnd)
43 | {
44 | // how long is the byte sequence
45 | int len = 0;
46 | uint mask = 0b1000_0000;
47 | while(*srcIt & mask)
48 | {
49 | mask >>= 1;
50 | len++;
51 | }
52 |
53 | // get payload of first byte
54 | dchar ch = *srcIt++ & (mask - 1);
55 |
56 | while (--len > 0)
57 | {
58 | // make space for 6 more bits
59 | ch <<= 6;
60 | ch |= *srcIt++ & 0b0011_1111;
61 | }
62 |
63 | // do we need to store a surrogate pair ?
64 | static if (is(wchar == dchar))
65 | {
66 | *dstIt++ = ch;
67 | }
68 | else if (ch > wchar.max)
69 | {
70 | *dstIt++ = (ch >> 10) | 0xD800;
71 | *dstIt++ = (ch & 0b11_1111_1111) | 0xDC00;
72 | }
73 | else
74 | {
75 | *dstIt++ = cast(wchar) ch;
76 | }
77 | }
78 | *dstIt = 0;
79 |
80 | return dst[0 .. dstIt - dst];
81 | }
82 | pure
83 | wchar[] string2wstring(in ushort[] src, wchar* dst)
84 | {
85 | memcpy(dst, cast(wchar*) src.ptr, src.length);
86 | return dst[0 .. src.length];
87 | }
88 | /**
89 | * Calculates the required buffer size in bytes for a string to wchar[] conversion.
90 | * Room for a terminating '\0' is included.
91 | *
92 | * Params:
93 | * src = The source string.
94 | *
95 | * Returns:
96 | * The maximum byte count the source string could require, including the terminating '\0'.
97 | *
98 | * See_Also:
99 | * string2wstring
100 | *
101 | */
102 | @safe pure
103 | size_t string2wstringSize(in char[] src)
104 | {
105 | enum limit = size_t.max / wchar.sizeof - 1;
106 | return src.length <= limit ? wchar.sizeof * (src.length + 1) : size_t.max;
107 | }
108 | @safe pure
109 | size_t string2wstringSize(in ushort[] src)
110 | {
111 | enum limit = size_t.max / wchar.sizeof - 1;
112 | return src.length <= limit ? wchar.sizeof * (src.length + 1) : size_t.max;
113 | }
114 |
115 |
116 | /**
117 | * Converts a wstring to a string using a buffer provided by the user.
118 | * To get the buffer requirements call $(D stringSize) on your source buffer.
119 | *
120 | * Params:
121 | * src = The UTF-8 string to convert.
122 | * dst = The destination buffer for the conversion.
123 | *
124 | * Returns:
125 | * The part of the destination buffer used for the conversion as a $(D wchar[]).
126 | * A terminating zero is appended, so the result.ptr can be passed into Windows APIs.
127 | */
128 | pure
129 | char[] wstring2string(in wchar[] src, char* dst)
130 | {
131 | const wchar* srcEnd = src.ptr + src.length;
132 | const(wchar)* srcIt = src.ptr;
133 | char* dstIt = dst;
134 |
135 | while (srcIt !is srcEnd)
136 | {
137 | if (*srcIt < 0x80)
138 | {
139 | *dstIt++ = cast(char) *srcIt++;
140 | }
141 | else if (*srcIt < 0x800)
142 | {
143 | *dstIt++ = cast(char) (0b_11000000 | *srcIt >> 6);
144 | *dstIt++ = 0b_10000000 | 0b_00111111 & *srcIt++;
145 | }
146 | if (*srcIt < 0xD800 || *srcIt > 0xDBFF)
147 | {
148 | // anything else within the BMP (<= 0xFFFF), but not a high surrogate
149 | *dstIt++ = 0b_11100000 | *srcIt >> 12;
150 | *dstIt++ = 0b_10000000 | 0b_00111111 & *srcIt >> 6;
151 | *dstIt++ = 0b_10000000 | 0b_00111111 & *srcIt++;
152 | }
153 | else
154 | {
155 | // high surrogate, assume correct encoding and that the next wchar is the low surrogate
156 | dchar decoded;
157 | decoded = (*srcIt++ & 0b11_1111_1111) << 10;
158 | decoded |= (*srcIt++ & 0b11_1111_1111);
159 | *dstIt++ = 0b_11110000 | decoded >> 18;
160 | *dstIt++ = 0b_10000000 | 0b_00111111 & decoded >> 12;
161 | *dstIt++ = 0b_10000000 | 0b_00111111 & decoded >> 6;
162 | *dstIt++ = 0b_10000000 | 0b_00111111 & decoded;
163 | }
164 | }
165 | *dstIt = 0;
166 |
167 | return dst[0 .. dstIt - dst];
168 | }
169 |
170 | /**
171 | * Calculates the required buffer size in bytes for a wstring to char[] conversion.
172 | * Room for a terminating '\0' is included.
173 | *
174 | * Params:
175 | * src = The source string.
176 | *
177 | * Returns:
178 | * The maximum byte count the source string could require, including the terminating '\0'.
179 | *
180 | * See_Also:
181 | * wstring2string
182 | *
183 | */
184 | @safe pure
185 | size_t wstring2stringSize(in wchar[] src)
186 | {
187 | enum limit = (size_t.max / char.sizeof - 1) / 3;
188 | return src.length <= limit ? char.sizeof * (3 * src.length + 1) : size_t.max;
189 | }
190 |
191 |
192 | /**
193 | * Replaces $(D std.utf.toUTFz) with a version that uses the stack as long as the required bytes for the output are
194 | * <= 1k. Longer strings use $(D malloc) to create a buffer for the conversion. It is freed at least at the end of the
195 | * scope.
196 | *
197 | * Params:
198 | * str = The source string to convert.
199 | *
200 | * See_Also:
201 | * toWstring
202 | *
203 | * Example:
204 | * ---
205 | * string text = "Hello, world!";
206 | * WinApiW(wcharPtr!text);
207 | * ---
208 | */
209 | auto wcharPtr(alias str)(void* buffer = string2wstringSize(str) <= allocaLimit ? alloca(string2wstringSize(str)) : null)
210 | {
211 | // In any case we have to return a proper InstantBuffer, so that free() is called in the dtor at some point.
212 | return TempBuffer!wchar(
213 | string2wstring(str, cast(wchar*) (buffer ? buffer : malloc(string2wstringSize(str)))),
214 | buffer is null);
215 | }
216 |
217 | /// ditto
218 | immutable(wchar)* wcharPtr(alias wstr)()
219 | if (is(typeof(wstr) == wstring) && __traits(compiles, { enum wstring e = wstr; }))
220 | {
221 | // D string literals (known at compile time) are always \0-terminated.
222 | return wstr.ptr;
223 | }
224 |
225 | /**
226 | * $(D char*) version of $(D wcharPtr). Basically it appends a \0 to the input.
227 | * The function uses $(D malloc) for strings of lengths 1024 and above.
228 | *
229 | * Params:
230 | * str = The source string to convert to a C UTF-8 string
231 | *
232 | * Note:
233 | * Do not use this to call Windows ANSI functions! Always use wide-char
234 | * functions on this operating system unless you want to deal with codepages.
235 | *
236 | * Example:
237 | * ---
238 | * string text = "Hello, world!";
239 | * linuxApi(charPtr!text);
240 | * ---
241 | */
242 | auto charPtr(alias str)(void* buffer = alloca(str.length + 1))
243 | if (is(typeof(str) : const(char)[]) || is(typeof(str) : const(ubyte)[]))
244 | {
245 | char* dst = cast(char*) memcpy(buffer ? buffer : malloc(str.length + 1), str.ptr, str.length);
246 | dst[str.length] = '\0';
247 | return TempBuffer!char(dst[0 .. str.length], buffer is null);
248 | }
249 |
250 | /// ditto
251 | immutable(char)* charPtr(alias str)()
252 | if (__traits(compiles, { enum string e = str; }))
253 | {
254 | // D string literals (known at compile time) are always \0-terminated.
255 | return str.ptr;
256 | }
257 |
258 | /**
259 | * This overload allocates the required memory from an existing stack buffer.
260 | *
261 | * Params:
262 | * str = The source string to convert to a C UTF-8 string
263 | * sb = The stack buffer to allocate from
264 | *
265 | * Note:
266 | * Always assign the result to an auto variable first for RAII to work correctly.
267 | */
268 | StackBufferEntry!char charPtr(SB)(const(char)[] str, ref SB sb)
269 | if (is(SB == StackBuffer!bytes, bytes...))
270 | {
271 | auto buffer = sb.alloc!char(str.length + 1);
272 | memcpy(buffer.ptr, str.ptr, str.length);
273 | buffer[str.length] = '\0';
274 | return buffer;
275 | }
276 |
277 | /**
278 | * Returns the given $(D ptr) up to but not including the \0 as a $(D char[]).
279 | */
280 | inout(char)[] asString(inout(char*) ptr) @trusted pure
281 | {
282 | if (ptr is null) return null;
283 | return ptr[0 .. strlen(ptr)];
284 | }
--------------------------------------------------------------------------------
/source/fast/format.d:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | *
3 | * Functions for formatting data into strings and back.
4 | *
5 | * Authors:
6 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
7 | *
8 | * Copyright:
9 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
10 | *
11 | * License:
12 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
13 | *
14 | **************************************/
15 | module fast.format;
16 |
17 | import core.stdc.stdlib;
18 | import core.stdc.string;
19 | import core.bitop;
20 | import std.string;
21 | import std.traits;
22 | import std.typecons;
23 | import std.typetuple;
24 | import fast.internal.helpers;
25 |
26 |
27 | /+
28 | ╔══════════════════════════════════════════════════════════════════════════════
29 | ║ ⚑ Hex String
30 | ╚══════════════════════════════════════════════════════════════════════════════
31 | +/
32 |
33 | /**
34 | * Converts an unsigned type into a fixed width 8 digits hex string using lower-case letters.
35 | *
36 | * Params:
37 | * n = the number to convert
38 | *
39 | * Returns:
40 | * hexadecimal representation of $(D n), lower-case letters
41 | */
42 | @safe pure nothrow @nogc
43 | char[2 * U.sizeof] hexStrLower(U)(Unqual!U n) if (isUnsigned!U)
44 | {
45 | char[2 * U.sizeof] hex = void;
46 | foreach_reverse (i; 0 .. 2 * U.sizeof)
47 | {
48 | U d = n & U(0xF);
49 | hex[i] = cast(char) (d < 10 ? '0' + d : 'a' + d - 10);
50 | n >>= 4;
51 | }
52 | return hex;
53 | }
54 |
55 |
56 | /**
57 | * Converts an unsigned type into a fixed width 8 digits hex string using upper-case letters.
58 | *
59 | * Params:
60 | * n = the number to convert
61 | *
62 | * Returns:
63 | * hexadecimal representation of $(D n), upper-case letters
64 | */
65 | @safe pure nothrow @nogc
66 | char[2 * U.sizeof] hexStrUpper(U)(U n) if (isUnsigned!U)
67 | {
68 | char[2 * U.sizeof] hex = void;
69 | foreach_reverse (i; 0 .. 2 * U.sizeof)
70 | {
71 | U d = n & U(0xF);
72 | hex[i] = cast(char) (d < 10 ? '0' + d : 'A' + d - 10);
73 | n >>= 4;
74 | }
75 | return hex;
76 | }
77 |
78 |
79 | /+
80 | ╔══════════════════════════════════════════════════════════════════════════════
81 | ║ ⚑ Decimal String
82 | ╚══════════════════════════════════════════════════════════════════════════════
83 | +/
84 |
85 | template decDigits(T) if (isIntegral!T)
86 | {
87 | static if (is(T == ulong))
88 | enum decDigits = 20;
89 | else static if (is(T == long))
90 | enum decDigits = 19;
91 | else static if (is(T == uint) || is(T == int))
92 | enum decDigits = 10;
93 | else static if (is(T == ushort) || is(T == short))
94 | enum decDigits = 5;
95 | else static if (is(T == ubyte) || is(T == byte))
96 | enum decDigits = 3;
97 | }
98 |
99 |
100 | enum decChars(T) = decDigits!T + isSigned!T;
101 |
102 |
103 | @safe pure nothrow @nogc
104 | RevFillStr!(decChars!I) decStr(I)(I i) if (isIntegral!I)
105 | {
106 | RevFillStr!(decChars!I) str;
107 | size_t idx = decChars!I;
108 |
109 | static if (isSigned!I)
110 | {
111 | bool signed = i < 0;
112 | UnsignedOf!I u = i < 0 ? -i : i;
113 | }
114 | else alias u = i;
115 |
116 | do
117 | {
118 | str ~= char('0' + u % 10);
119 | u /= 10;
120 | }
121 | while (u);
122 |
123 | static if (isSigned!I) if (signed)
124 | str ~= '-';
125 |
126 | return str;
127 | }
128 |
129 |
130 | /+
131 | ╔══════════════════════════════════════════════════════════════════════════════
132 | ║ ⚑ Formatting
133 | ╚══════════════════════════════════════════════════════════════════════════════
134 | +/
135 |
136 | template hasKnownSpaceRequirement(T)
137 | {
138 | static if (isIntegral!T || isPointer!T)
139 | enum hasKnownSpaceRequirement = true;
140 | else
141 | enum hasKnownSpaceRequirement = false;
142 | }
143 |
144 |
145 | template spaceRequirement(string format, T) if (hasKnownSpaceRequirement!T)
146 | {
147 | static if (isIntegral!T)
148 | {
149 | static if (format == "%s" || format == "%d")
150 | enum spaceRequirement = decChars!T;
151 | else static if (isUnsigned!T && (format == "%x" || format == "%X"))
152 | enum spaceRequirement = 2 * T.sizeof;
153 | else static assert (0, "Don't know how to handle " ~ T.stringof ~ " as " ~ format);
154 | }
155 | else static if (isPointer!T)
156 | {
157 | static if (format == "%s" || format == "%p")
158 | enum spaceRequirement = 2 * T.sizeof;
159 | else static assert (0, "Don't know how to handle " ~ T.stringof ~ " as " ~ format);
160 | }
161 | else static assert (0, "Don't know how to handle " ~ T.stringof);
162 | }
163 |
164 |
165 | enum spaceRequirements(string format, Args...)() if (allSatisfy!(hasKnownSpaceRequirement, Args))
166 | {
167 | size_t sum = 0;
168 |
169 | alias parts = tokenizedFormatString!format;
170 | foreach (i; staticIota!(0, parts.length))
171 | {
172 | static if (parts[i][1] == size_t.max)
173 | sum += parts[i][0].length;
174 | else
175 | sum += spaceRequirement!(parts[i][0], Args[parts[i][1]]);
176 | }
177 |
178 | return sum;
179 | }
180 |
181 |
182 | template tokenizedFormatString(string format)
183 | {
184 | enum impl()
185 | {
186 | Tuple!(string, size_t)[] parts;
187 | size_t i = 0;
188 | string rest = format;
189 |
190 | while (1)
191 | {
192 | ptrdiff_t markerPos = rest.indexOf("%");
193 | if (markerPos < 0)
194 | return rest.length ? parts ~ tuple(rest, size_t.max) : parts;
195 |
196 | if (markerPos)
197 | {
198 | parts ~= tuple(rest[0 .. markerPos], size_t.max);
199 | rest = rest[markerPos .. $];
200 | }
201 |
202 | // TODO: more complex formats
203 | parts ~= tuple(rest[0 .. 2], i++);
204 | rest = rest[2 .. $];
205 | }
206 | }
207 |
208 | enum result = impl();
209 | static immutable Tuple!(string, size_t)[result.length] tokenizedFormatString = result;
210 | }
211 |
212 |
213 | enum formatStringArgCount(string format)()
214 | {
215 | size_t count = 0;
216 |
217 | alias parts = tokenizedFormatString!format;
218 | foreach (i; staticIota!(0, parts.length))
219 | if (parts[i][1] != size_t.max && parts[i][1] >= count)
220 | count = parts[i][1] + 1;
221 |
222 | return count;
223 | }
224 |
225 |
226 | template format(string fmt)
227 | {
228 | import std.exception;
229 |
230 | enum argCnt = formatStringArgCount!fmt;
231 |
232 | enum codeGen()
233 | {
234 | string code = `pure nothrow string format(`;
235 | foreach (i; staticIota!(0, argCnt))
236 | {
237 | if (i) code ~= `, `;
238 | code ~= std.string.format("A%s", i);
239 | }
240 | code ~= `)(`;
241 | foreach (i; staticIota!(0, argCnt))
242 | {
243 | if (i) code ~= `, `;
244 | code ~= std.string.format("A%s a%s", i, i);
245 | }
246 | code ~= `, char[] buffer = new char[](spaceRequirements!(fmt`;
247 | foreach (i; staticIota!(0, argCnt))
248 | code ~= std.string.format(", A%s", i);
249 | code ~= `))) { return assumeUnique(formattedWrite!fmt(buffer.ptr`;
250 | foreach (i; staticIota!(0, argCnt))
251 | code ~= std.string.format(", a%s", i);
252 | code ~= `)); }`;
253 | return code;
254 | }
255 |
256 | mixin(codeGen());
257 | }
258 |
259 |
260 | template formata(string fmt)
261 | {
262 | enum argCnt = formatStringArgCount!fmt;
263 |
264 | enum codeGen()
265 | {
266 | string code = `pure nothrow @nogc char[] formata(`;
267 | foreach (i; staticIota!(0, argCnt))
268 | {
269 | if (i) code ~= `, `;
270 | code ~= std.string.format("A%s", i);
271 | }
272 | code ~= `)(`;
273 | foreach (i; staticIota!(0, argCnt))
274 | {
275 | if (i) code ~= `, `;
276 | code ~= std.string.format("A%s a%s", i, i);
277 | }
278 | code ~= `, void* buffer = alloca(spaceRequirements!(fmt`;
279 | foreach (i; staticIota!(0, argCnt))
280 | code ~= std.string.format(", A%s", i);
281 | code ~= `))) { return formattedWrite!fmt(cast(char*) buffer`;
282 | foreach (i; staticIota!(0, argCnt))
283 | code ~= std.string.format(", a%s", i);
284 | code ~= `); }`;
285 | return code;
286 | }
287 |
288 | mixin(codeGen());
289 | }
290 |
291 |
292 | template formats(string fmt)
293 | {
294 | enum argCnt = formatStringArgCount!fmt;
295 |
296 | enum codeGen()
297 | {
298 | string code = `@safe pure nothrow @nogc auto formats(`;
299 | foreach (i; staticIota!(0, argCnt))
300 | {
301 | if (i) code ~= `, `;
302 | code ~= std.string.format("A%s", i);
303 | }
304 | code ~= `)(`;
305 | foreach (i; staticIota!(0, argCnt))
306 | {
307 | if (i) code ~= `, `;
308 | code ~= std.string.format("A%s a%s", i, i);
309 | }
310 | code ~= `))) { LimitedScopeBuffer!(char, spaceRequirements!(fmt`;
311 | foreach (i; staticIota!(0, argCnt))
312 | code ~= std.string.format(", A%s", i);
313 | code ~= `)) buffer; buffer.length = formattedWrite!fmt(buffer.ptr`;
314 | foreach (i; staticIota!(0, argCnt))
315 | code ~= std.string.format(", a%s", i);
316 | code ~= `).length; return buffer; }`;
317 | return code;
318 | }
319 |
320 | mixin(codeGen());
321 | }
322 |
323 |
324 | char[] formattedWrite(string format, Args...)(char* buffer, Args args)
325 | {
326 | char* it = buffer;
327 |
328 | alias parts = tokenizedFormatString!format;
329 | foreach (i; staticIota!(0, parts.length))
330 | {
331 | static if (parts[i][1] == size_t.max)
332 | {
333 | // Direct string copy
334 | memcpy( it, parts[i][0].ptr, parts[i][0].length );
335 | it += parts[i][0].length;
336 | }
337 | else
338 | {
339 | // Formatted argument
340 | it.formattedWriteItem!(parts[i][0])( args[parts[i][1]] );
341 | }
342 | }
343 |
344 | return buffer[0 .. it - buffer];
345 | }
346 |
347 |
348 | pure nothrow @nogc
349 | void formattedWriteItem(string format, T)(ref char* buffer, T t)
350 | if (isUnsigned!T && format == "%x")
351 | {
352 | alias RT = ReturnType!(hexStrLower!T);
353 | *cast(RT*) buffer = hexStrLower!T(t);
354 | buffer += RT.length;
355 | }
356 |
357 |
358 | pure nothrow @nogc
359 | void formattedWriteItem(string format, T)(ref char* buffer, T t)
360 | if (isUnsigned!T && format == "%X")
361 | {
362 | alias RT = ReturnType!(hexStrUpper!T);
363 | *cast(RT*) buffer = hexStrUpper!T(t);
364 | buffer += RT.length;
365 | }
366 |
367 |
368 | pure nothrow @nogc
369 | void formattedWriteItem(string format, T)(ref char* buffer, T t)
370 | if (isIntegral!T && (format == "%s" || format == "%d"))
371 | {
372 | auto str = decStr(t);
373 | memcpy( buffer, str.ptr, str.length );
374 | buffer += str.length;
375 | }
376 |
377 |
378 | pure nothrow @nogc
379 | void formattedWriteItem(string format)(ref char* buffer, void* p)
380 | if (format == "%s" || format == "%p")
381 | {
382 | buffer.formattedWriteItem!"%X"( cast(size_t) p );
383 | }
384 |
385 |
386 | /+
387 | ╔══════════════════════════════════════════════════════════════════════════════
388 | ║ ⚑ Helper Structs
389 | ╚══════════════════════════════════════════════════════════════════════════════
390 | +/
391 |
392 | struct RevFillStr(size_t n)
393 | {
394 | private:
395 |
396 | size_t offset = n;
397 | char[n] buffer = '\0';
398 |
399 |
400 | public:
401 |
402 | alias opSlice this;
403 |
404 | @safe pure nothrow @nogc
405 | void opOpAssign(string op : "~")(char ch)
406 | in
407 | {
408 | assert( offset > 0 );
409 | }
410 | body
411 | {
412 | buffer[--offset] = ch;
413 | }
414 |
415 |
416 | @safe pure nothrow @nogc
417 | @property inout(char)[] opSlice() inout
418 | {
419 | return buffer[offset .. n];
420 | }
421 |
422 |
423 | @safe pure nothrow @nogc
424 | @property inout(char)* ptr() inout
425 | {
426 | return &buffer[offset];
427 | }
428 |
429 |
430 | @safe pure nothrow @nogc
431 | @property size_t length() const
432 | {
433 | return n - offset;
434 | }
435 | }
--------------------------------------------------------------------------------
/source/fast/internal/benchmarks.d:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | *
3 | * Internal benchmark module.
4 | *
5 | * Authors:
6 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
7 | *
8 | * Copyright:
9 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
10 | *
11 | * License:
12 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
13 | *
14 | **************************************************************************************************/
15 | module fast.internal.benchmarks;
16 |
17 | version (benchmark):
18 |
19 | void main()
20 | {
21 | import std.stdio;
22 | import core.stdc.string, core.stdc.stddef, core.stdc.stdlib;
23 | import std.array, std.stdio, std.algorithm, std.regex, std.utf, std.conv, std.string, std.range;
24 | import fast.string, fast.cstring, fast.buffer, fast.format, fast.json;
25 | import std.format : formattedWrite;
26 |
27 | static immutable nums = { ulong[1uL << 8] nums = void; foreach (i; 0 .. nums.length) nums[i] = (1uL << (64 - 8)) * i; return nums; }();
28 | static immutable part1 = "C:\\";
29 | static immutable part2 = "Documents and Settings\\User\\My Documents\\My Downloads\\";
30 | static immutable part3 = "Fast.zip";
31 | static immutable pathname = "hello/i_am_a/path_name\\with_several_different\\slashes";
32 | static immutable zeroterm = "wefwfnqwefnw(eknwoemkf)moorroijqwoijq&oqo(vqwojkpjavnal(nvo(eirvn$wefwfnqwefnw(eknwoemkf)moorroijqwoihqioqo(vqwojkpjavnal(nvo(eirvn$wefwfnqwef\"w(eknwoemkf)moorroijqwoijqioqo(vqwojkpjavnal(nvo(eirvn$\0";
33 | static pathSepRegex = ctRegex!`[/\\]`;
34 | enum pathnameWStringLength = to!(immutable(wchar_t)[])(pathname).length;
35 |
36 | unicode();
37 |
38 | jsonCoordinates!true();
39 | jsonCoordinates!false();
40 |
41 | run ("Format strings for integers...", 13093,
42 | benchmark ("std.*.format", () { uint check; foreach (ulong num; nums) { string str = format("decimal: %s, hex: %x", num, num); check += str[9]; } return check; } ),
43 | benchmark ("fast.*.format", () { uint check; foreach (ulong num; nums) { string str = fast.format.format!"decimal: %s, hex: %x"(num, num); check += str[9]; } return check; } ),
44 | benchmark ("fast.*.formata", () { uint check; foreach (ulong num; nums) { char[] str = formata!"decimal: %s, hex: %x"(num, num); check += str[9]; } return check; } ),
45 | );
46 |
47 | run ("Convert 256 numbers to fixed width hex strings...", 0x20,
48 | benchmark ("std.*.formattedWrite", () { Appender!(char[]) app; app.reserve(16); char check = 0; foreach (ulong num; nums) { app.formattedWrite("%016X", num); check += app.data[0]; app.clear(); } return check; }),
49 | benchmark ("fast.*.hexStrUpper", () { char[16] str; char check = 0; foreach (ulong num; nums) { str = hexStrUpper(num); check += str[0]; } return check; }),
50 | );
51 |
52 | run ("Concatenate a known number of strings...", part1.length + part2.length + part3.length,
53 | benchmark ("std.array.appender", () { auto app = appender(part1); app ~= part2; app ~= part3; return app.data.length; }),
54 | benchmark ("~", () { string path = part1 ~ part2 ~ part3; return path.length; }),
55 | benchmark ("fast.string.concat", () { size_t length; { auto path = concat!(part1, part2, part3); length = path.length; } return length; }),
56 | );
57 |
58 | run ("Allocate a temporary char buffer and fill it with 0xFF...", '\xFF',
59 | benchmark ("new", () { auto str = new char[](zeroterm.length); return str[$-1]; }),
60 | benchmark ("malloc", () { auto ptr = cast(char*) malloc(zeroterm.length); scope(exit) free(ptr); memset(ptr, 0xFF, zeroterm.length); return ptr[zeroterm.length-1]; }),
61 | benchmark ("fast.buffer.tempBuffer", () { char result; { auto buf = tempBuffer!(char, zeroterm.length); memset(buf, 0xFF, zeroterm.length); result = buf[$-1]; } return result; }),
62 | );
63 |
64 | run("Convert a string to a wchar*...", wchar('\0'),
65 | benchmark ("toUTFz", () { return toUTFz!(wchar*)(pathname)[pathnameWStringLength]; }),
66 | benchmark ("cstring.wcharPtr", () { wchar result; { auto buf = wcharPtr!pathname; result = buf.ptr[pathnameWStringLength]; } return result; }),
67 | );
68 |
69 | run("Convert a string to a char*...", '\0',
70 | benchmark ("toUTFz", () { return toUTFz!(char*)(pathname)[pathname.length]; }),
71 | benchmark ("toStringz", () { return cast(char) toStringz(pathname)[pathname.length]; }),
72 | benchmark ("cstring.charPtr", () { return cast(char) charPtr!pathname[pathname.length]; }),
73 | );
74 |
75 | run ("Split a string at each occurance of <, >, & and \"...", "w(eknwoemkf)moorroijqwoijqioqo(vqwojkpjavnal(nvo(eirvn$\0",
76 | benchmark (`while+if with 4 cond.`, () { string before; immutable(char*) stop = zeroterm.ptr + zeroterm.length; immutable(char)* iter = zeroterm.ptr; immutable(char)* done = zeroterm.ptr; if (iter !is stop) do { char c = *iter++; if (c == '<' || c == '>' || c == '&' || c == '"') { before = done[0 .. iter - done]; done = iter; }} while (iter !is stop); return done[0 .. stop - done]; }),
77 | benchmark ("fast.string.split", () { string before, after = zeroterm; while (fast.string.split!`or(or(=<,=>),or(=&,="))`(after, before, after)) {} return before; }),
78 | );
79 |
80 | run ("Split a path by '/' or '\\'...", "slashes",
81 | benchmark ("std.regex.split", () { return split(pathname, pathSepRegex)[$-1]; }),
82 | benchmark ("std.regex.splitter", () { string last; auto range = splitter(pathname, pathSepRegex); while (!range.empty) { last = range.front; range.popFront(); } return last; }),
83 | benchmark ("fast.string.split", () { string before, after = pathname; while (fast.string.split!`or(=\,=/)`(after, before, after)) {} return before; }),
84 | );
85 |
86 | writeln("Benchmark done!");
87 | }
88 |
89 |
90 |
91 | private:
92 |
93 | void unicode()
94 | {
95 | import std.range, std.uni, std.string, std.meta;
96 | import fast.unicode;
97 |
98 | static immutable string devanagari = cast(string)"तदपि कही गुर बारंिह बारा। समुझि परी कछु मति अनुसारा।।
99 | भाषाबद्ध करबि मैं सोई। मोरें मन प्रबोध जेंिह होई।।
100 | जस कछु बुधि बिबेक बल मेरें। तस कहिहउँ हियँ हरि के प्रेरें।।
101 | निज संदेह मोह भ्रम हरनी। करउँ कथा भव सरिता तरनी।।
102 | बुध बिश्राम सकल जन रंजनि। रामकथा कलि कलुष बिभंजनि।।
103 | रामकथा कलि पंनग भरनी। पुनि बिबेक पावक कहुँ अरनी।।
104 | रामकथा कलि कामद गाई। सुजन सजीवनि मूरि सुहाई।।
105 | सोइ बसुधातल सुधा तरंगिनि। भय भंजनि भ्रम भेक भुअंगिनि।।
106 | असुर सेन सम नरक निकंदिनि। साधु बिबुध कुल हित गिरिनंदिनि।।
107 | संत समाज पयोधि रमा सी। बिस्व भार भर अचल छमा सी।।
108 | जम गन मुहँ मसि जग जमुना सी। जीवन मुकुति हेतु जनु कासी।।
109 | रामहि प्रिय पावनि तुलसी सी। तुलसिदास हित हियँ हुलसी सी।।
110 | सिवप्रय मेकल सैल सुता सी। सकल सिद्धि सुख संपति रासी।।
111 | सदगुन सुरगन अंब अदिति सी। रघुबर भगति प्रेम परमिति सी।।
112 | ".representation.repeat(10).join.array();
113 | static immutable string latin = "A gory knife had been found close to the murdered man, and it had been
114 | recognized by somebody as belonging to Muff Potter--so the story ran.
115 | And it was said that a belated citizen had come upon Potter washing
116 | himself in the \"branch\" about one or two o'clock in the morning, and
117 | that Potter had at once sneaked off--suspicious circumstances,
118 | especially the washing which was not a habit with Potter. It was also
119 | said that the town had been ransacked for this \"murderer\" (the public
120 | are not slow in the matter of sifting evidence and arriving at a
121 | verdict), but that he could not be found. Horsemen had departed down
122 | all the roads in every direction, and the Sheriff \"was confident\" that
123 | he would be captured before night.
124 | ".repeat(10).join.array();
125 |
126 | void benchCountGraphemes(alias text)(size_t count)
127 | {
128 | run ("Count graphemes in " ~ text.stringof ~ " text...", count,
129 | benchmark ("byGrapheme.walkLength", () { return text.byGrapheme.walkLength(); }),
130 | benchmark ("fast.graphemeCount", () { return text.countGraphemes(); }),
131 | );
132 | }
133 | benchCountGraphemes!devanagari(5430);
134 | benchCountGraphemes!latin(7210);
135 | }
136 |
137 |
138 | void jsonCoordinates(bool integral)()
139 | {
140 | // A variant of https://github.com/kostya/benchmarks with less coordinate tuples,
141 | // since we repeat the test runs until a time span of one second passed.
142 | import core.memory;
143 | import std.algorithm;
144 | import std.ascii;
145 | import std.format;
146 | import std.random;
147 | import std.range;
148 | import std.typecons;
149 | import fast.internal.sysdef;
150 |
151 | enum coordCount = 10_000;
152 | auto rng = Mt19937(0);
153 | __gshared string text = "{\n \"coordinates\": [\n";
154 | foreach (i; 0 .. coordCount)
155 | {
156 | static if (integral)
157 | {
158 | text ~= format(" {\n \"x\": %s,\n \"y\": %s,\n \"z\": %s,\n" ~
159 | " \"name\": \"%s %s\",\n \"opts\": {\n \"1\": [\n 1,\n true\n" ~
160 | " ]\n }\n }", uniform(0, 10_000, rng), uniform(0, 10_000, rng), uniform(0, 10_000, rng),
161 | iota(5).map!(_ => lowercase[uniform(0, $, rng)]), uniform(0, 10000, rng));
162 | }
163 | else
164 | {
165 | text ~= format(" {\n \"x\": %.17g,\n \"y\": %.17g,\n \"z\": %.17g,\n" ~
166 | " \"name\": \"%s %s\",\n \"opts\": {\n \"1\": [\n 1,\n true\n" ~
167 | " ]\n }\n }", uniform(0.0, 1.0, rng), uniform(0.0, 1.0, rng), uniform(0.0, 1.0, rng),
168 | iota(5).map!(_ => lowercase[uniform(0, $, rng)]), uniform(0, 10000, rng));
169 | }
170 | text ~= (i == coordCount - 1) ? "\n" : ",\n";
171 | }
172 | text ~= " ],\n \"info\": \"some info\"\n}\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
173 | text = text[0 .. $-16];
174 |
175 | GC.collect();
176 |
177 | // Dlang on x86 with optimizations rounds up double additions.
178 | static if (integral)
179 | {
180 | version (X86)
181 | enum expect = tuple(4986L, 4997L, 4988L);
182 | else
183 | enum expect = tuple(5003L, 4979L, 4971L);
184 | }
185 | else static if (isDMD && isX86 && (!isRelease || isRelease && (__VERSION__ < 2069 || __VERSION__ > 2070)) ||
186 | isGDC && isX86)
187 | enum expect = tuple(0.49823454184104704, 0.50283215330409059, 0.49828840592580270);
188 | else static if (!isX86 || !isRelease)
189 | enum expect = tuple(0.49683911677479053, 0.50166077554665356, 0.49647639699603635);
190 | else
191 | enum expect = tuple(0.49823454184171062, 0.50283215330485886, 0.49828840592673407);
192 |
193 | run!(1, coordCount)("JSON 3D coordinates (" ~ (integral ? "integers" : "floating-point") ~ ")", expect,
194 | benchmark("std.json", {
195 | import std.json;
196 |
197 | auto json = parseJSON(text);
198 | auto coordinates = json["coordinates"].array;
199 | size_t len = coordinates.length;
200 | static if (integral)
201 | long x, y, z;
202 | else
203 | double x = 0, y = 0, z = 0;
204 | foreach (i; 0 .. len)
205 | {
206 | auto coord = coordinates[i];
207 | static if (integral)
208 | {
209 | x += coord["x"].integer;
210 | y += coord["y"].integer;
211 | z += coord["z"].integer;
212 | }
213 | else
214 | {
215 | x += coord["x"].floating;
216 | y += coord["y"].floating;
217 | z += coord["z"].floating;
218 | }
219 | }
220 |
221 | return tuple(x / long(len), y / long(len), z / long(len));
222 | }),
223 | // benchmark("stdx.data.json", {
224 | // import stdx.data.json.lexer;
225 | // import stdx.data.json.parser;
226 | //
227 | // auto json = parseJSONStream!(LexOptions.useBigInt)(text);
228 | // json.skipToKey("coordinates");
229 | // size_t len;
230 | // double x = 0, y = 0, z = 0;
231 | // json.readArray(delegate() @trusted {
232 | // json.readObject!(typeof(json))(delegate(string key) @trusted {
233 | // if (key == "x")
234 | // x += json.readDouble();
235 | // else if (key == "y")
236 | // y += json.readDouble();
237 | // else if (key == "z")
238 | // z += json.readDouble();
239 | // else
240 | // json.skipValue();
241 | // });
242 | // len++;
243 | // });
244 | //
245 | // return tuple(x / len, y / len, z / len);
246 | // }),
247 | benchmark("fast.json", {
248 | import fast.json;
249 |
250 | auto json = Json!(validateAll, true)(text);
251 | long len;
252 |
253 | static if (integral)
254 | {
255 | long x, y, z;
256 | foreach (i; json.coordinates)
257 | {
258 | json.keySwitch!("x", "y", "z")(
259 | { x += json.read!long; },
260 | { y += json.read!long; },
261 | { z += json.read!long; }
262 | );
263 | len++;
264 | }
265 | }
266 | else
267 | {
268 | double x = 0, y = 0, z = 0;
269 | foreach (i; json.coordinates)
270 | {
271 | json.keySwitch!("x", "y", "z")(
272 | { x += json.read!double; },
273 | { y += json.read!double; },
274 | { z += json.read!double; }
275 | );
276 | len++;
277 | }
278 | }
279 |
280 | return tuple(x / len, y / len, z / len);
281 | }),
282 | );
283 | }
284 |
285 |
286 | /*******************************************************************************
287 | *
288 | * Runs a set of `Benchmark`s and prints comparing runtime statistics. The
289 | * functions are always called until at least a second of time has passed.
290 | *
291 | * Params:
292 | * innerLoop = how many iterations to perform without looking at the clock
293 | * mul = typically `1`, unless the called functions repeat an action multiple
294 | * times and you want to see that reflected in the output
295 | * title = short overall title of this comparing benchmark
296 | * expectation = return value, that is expected from all the tested functions
297 | * for validation purposes and to counter dead-code elimination.
298 | * benchmarks = A set of `Benchmark`s to be run and compared. The first one in
299 | * the list acts as a reference timing for the others.
300 | *
301 | **************************************/
302 | void run(uint innerLoop = 1000, uint mul = 1, R)(in string title, in R expectation, in Benchmark!R[] benchmarks...)
303 | {
304 | import core.time, std.stdio, std.exception, std.string;
305 |
306 | writeln("\x1b[1m", title, "\x1b[0m");
307 | writeln();
308 | ulong reference;
309 | foreach (i, ref bm; benchmarks) {
310 | // Check that the result is as expected...
311 | auto actual = bm.run();
312 | import std.stdio;
313 | //enforce(actual == expectation, format(`Benchmark "%s" did not result as expected in "%s", but in "%s".`,
314 | // bm.title, expectation, actual));
315 | ulong iters = 0;
316 | immutable t1 = TickDuration.currSystemTick;
317 | TickDuration t2;
318 | do {
319 | foreach (k; 0 .. innerLoop)
320 | bm.run();
321 | iters++;
322 | t2 = TickDuration.currSystemTick;
323 | } while (!(t2 - t1).seconds);
324 | ulong times = iters * innerLoop * mul * 1_000_000_000 / (t2 - t1).nsecs;
325 | if (i == 0) {
326 | reference = times;
327 | writefln(" %-22s: %10s per second", bm.title, times);
328 | } else if (reference <= times) {
329 | writefln("\x1b[1m %-22s: %10s per second (done in %.0f%% of time !)\x1b[0m", bm.title, times, 100.0 * reference / times);
330 | } else {
331 | writefln(" %-22s: %10s per second (slower by factor %.1f)", bm.title, times, 1.0 * reference / times);
332 | }
333 | }
334 | writeln();
335 | }
336 |
337 |
338 | /*******************************************************************************
339 | *
340 | * Functor to create `Benchmark` structs.
341 | *
342 | * Params:
343 | * title = displayed string when the statistics of `run` are displayed
344 | * run = the benchmarked function
345 | *
346 | * Returns:
347 | * a `Benchmark` from the given information
348 | *
349 | **************************************/
350 | Benchmark!R benchmark(R)(string title, R function() run)
351 | {
352 | return Benchmark!R(title, run);
353 | }
354 |
355 |
356 | /*******************************************************************************
357 | *
358 | * Information about a benchmarked function.
359 | *
360 | **************************************/
361 | struct Benchmark(R)
362 | {
363 | string title;
364 | R function() run;
365 | }
366 |
--------------------------------------------------------------------------------
/source/fast/internal/helpers.d:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | *
3 | * Helper functions that serve general purposes.
4 | *
5 | * Authors:
6 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
7 | *
8 | * Copyright:
9 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
10 | *
11 | * License:
12 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
13 | *
14 | **************************************************************************************************/
15 | module fast.internal.helpers;
16 |
17 | import std.traits;
18 | import fast.internal.sysdef;
19 |
20 |
21 | private enum 一META一PROGRAMMING一;
22 |
23 | // 2.071 fixed visibility rules, so we need to roll our own staticIota.
24 | static if (__VERSION__ >= 2071)
25 | {
26 | import std.meta : AliasSeq;
27 |
28 | template staticIota(int beg, int end)
29 | {
30 | static if (beg + 1 >= end)
31 | {
32 | static if (beg >= end)
33 | {
34 | alias staticIota = AliasSeq!();
35 | }
36 | else
37 | {
38 | alias staticIota = AliasSeq!(+beg);
39 | }
40 | }
41 | else
42 | {
43 | enum mid = beg + (end - beg) / 2;
44 | alias staticIota = AliasSeq!(staticIota!(beg, mid), staticIota!(mid, end));
45 | }
46 | }
47 | }
48 | else
49 | {
50 | import std.typecons : staticIota;
51 | }
52 |
53 |
54 | /**
55 | * For any integral type, returns the unsigned type of the same bit-width.
56 | */
57 | template UnsignedOf(I) if (isIntegral!I)
58 | {
59 | static if (isUnsigned!I)
60 | alias UnsignedOf = I;
61 | else static if (is(I == long))
62 | alias UnsignedOf = ulong;
63 | else static if (is(I == int))
64 | alias UnsignedOf = uint;
65 | else static if (is(I == short))
66 | alias UnsignedOf = ushort;
67 | else static if (is(I == byte))
68 | alias UnsignedOf = ubyte;
69 | else static assert (0, "Not implemented");
70 | }
71 |
72 |
73 | /**
74 | * Generates a mixin string for repeating code. It can be used to unroll variadic arguments.
75 | * A format string is instantiated a certain number times with an incrementing parameter.
76 | * The results are then concatenated using an optional joiner.
77 | *
78 | * Params:
79 | * length = Number of elements you want to join. It is passed into format() as an incrementing number from [0 .. count$(RPAREN).
80 | * fmt = The format string to apply on each instanciation. Use %1d$ to refer to the current index multiple times when necessary.
81 | * joiner = Optional string that will be placed between instances. It could be a space or an arithmetic operation.
82 | *
83 | * Returns:
84 | * The combined elements as a mixin string.
85 | *
86 | * See_Also:
87 | * $(LINK2 http://forum.dlang.org/thread/vqfvihyezbmwcjkmpzin@forum.dlang.org, A simple way to do compile time loop unrolling)
88 | */
89 | enum ctfeJoin(size_t length)(in string fmt, in string joiner = null)
90 | {
91 | import std.range : iota;
92 | import std.string : format;
93 | import std.algorithm : map;
94 |
95 | // BUG: Cannot use, join(), as it "cannot access the nested function 'ctfeJoin'".
96 | string result;
97 | foreach (inst; map!(i => format(fmt, i))(iota(length))) {
98 | if (result && joiner) result ~= joiner;
99 | result ~= inst;
100 | }
101 | return result;
102 | }
103 |
104 |
105 | enum getUDA(alias sym, T)()
106 | {
107 | foreach (uda; __traits(getAttributes, sym))
108 | static if (is(typeof(uda) == T))
109 | return uda;
110 | return T.init;
111 | }
112 |
113 |
114 | private enum 一BIT一OPERATIONS一;
115 |
116 | static import core.bitop;
117 |
118 | alias bsr = core.bitop.bsr;
119 | alias bsf = core.bitop.bsf;
120 |
121 | /*******************************************************************************
122 | *
123 | * Count leading zeroes.
124 | *
125 | * Params:
126 | * u = the unsigned value to scan
127 | *
128 | * Returns:
129 | * The number of leading zero bits before the first one bit. If `u` is `0`,
130 | * the result is undefined.
131 | *
132 | **************************************/
133 | version (DigitalMars)
134 | {
135 | @safe @nogc pure nothrow U
136 | clz(U)(U u) if (is(Unqual!U == uint) || is(Unqual!U == size_t))
137 | {
138 | pragma(inline, true);
139 | enum U max = 8 * U.sizeof - 1;
140 | return max - bsr(u);
141 | }
142 |
143 | static if (isX86)
144 | {
145 | @safe @nogc pure nothrow uint
146 | clz(U)(U u) if (is(Unqual!U == ulong))
147 | {
148 | pragma(inline, true);
149 | uint hi = u >> 32;
150 | return hi ? 31 - bsr(hi) : 63 - bsr(cast(uint)u);
151 | }
152 | }
153 | }
154 | else version (GNU)
155 | {
156 | import gcc.builtins;
157 | alias clz = __builtin_clz;
158 | static if (isX86)
159 | {
160 | @safe @nogc pure nothrow uint
161 | clz(ulong u)
162 | {
163 | uint hi = u >> 32;
164 | return hi ? __builtin_clz(hi) : 32 + __builtin_clz(cast(uint)u);
165 | }
166 | }
167 | else alias clz = __builtin_clzl;
168 | }
169 | else version (LDC)
170 | {
171 | @safe @nogc pure nothrow U
172 | clz(U)(U u) if (is(Unqual!U == uint) || is(Unqual!U == size_t))
173 | {
174 | pragma(inline, true);
175 | import ldc.intrinsics;
176 | return llvm_ctlz(u, false);
177 | }
178 |
179 | static if (isX86)
180 | {
181 | @safe @nogc pure nothrow uint
182 | clz(U)(U u) if (is(Unqual!U == ulong))
183 | {
184 | pragma(inline, true);
185 | import ldc.intrinsics;
186 | return cast(uint)llvm_ctlz(u, false);
187 | }
188 | }
189 | }
190 | static if (__VERSION__ < 2071)
191 | {
192 | // < 2.071 did not have 64-bit bsr/bsf on x86.
193 | @safe @nogc pure nothrow uint
194 | bsr(U)(U u) if (is(Unqual!U == ulong))
195 | {
196 | pragma(inline, true);
197 | uint hi = u >> 32;
198 | return hi ? bsr(hi) + 32 : bsr(cast(uint)u);
199 | }
200 |
201 | @safe @nogc pure nothrow uint
202 | bsf(U)(U u) if (is(Unqual!U == ulong))
203 | {
204 | pragma(inline, true);
205 | uint lo = cast(uint)u;
206 | return lo ? bsf(lo) : 32 + bsf(u >> 32);
207 | }
208 | }
209 | unittest
210 | {
211 | assert(clz(uint(0x01234567)) == 7);
212 | assert(clz(ulong(0x0123456701234567)) == 7);
213 | assert(clz(ulong(0x0000000001234567)) == 7+32);
214 | assert(bsr(uint(0x01234567)) == 24);
215 | assert(bsr(ulong(0x0123456701234567)) == 24+32);
216 | assert(bsr(ulong(0x0000000001234567)) == 24);
217 | assert(bsf(uint(0x76543210)) == 4);
218 | assert(bsf(ulong(0x7654321076543210)) == 4);
219 | assert(bsf(ulong(0x7654321000000000)) == 4+32);
220 | }
221 |
222 |
223 | private enum 一UNITTESTING一;
224 |
225 | // Insert a dummy main when unittesting outside of dub.
226 | version (VibeCustomMain) {} else version (unittest) void main() {}
227 |
228 |
229 | private enum 一MISCELLANEOUS一;
230 |
231 | pure nothrow @nogc
232 | {
233 | /**
234 | * Aligns a pointer to the closest multiple of $(D pot) (a power of two),
235 | * which is equal to or larger than $(D value).
236 | */
237 | T* alignPtrNext(T)(scope T* ptr, in size_t pot)
238 | in { assert(pot > 0 && pot.isPowerOf2); }
239 | body { return cast(T*) ((cast(size_t) ptr + (pot - 1)) & -pot); }
240 | unittest { assert(alignPtrNext(cast(void*) 65, 64) == cast(void*) 128); }
241 | }
242 |
243 |
244 | @nogc @safe pure nothrow
245 | {
246 | /// Returns whether the (positive) argument is an integral power of two.
247 | @property bool isPowerOf2(in size_t n)
248 | in { assert(n > 0); }
249 | body { return (n & n - 1) == 0; }
250 |
251 | version (LDC) {
252 | import core.simd;
253 | pragma(LDC_intrinsic, "llvm.x86.sse2.pmovmskb.128")
254 | uint moveMask(ubyte16);
255 | } else version (GNU) {
256 | import gcc.builtins;
257 | alias moveMask = __builtin_ia32_pmovmskb128;
258 | }
259 |
260 | template SIMDFromScalar(V, alias scalar)
261 | {
262 | // This wrapper is needed for optimal performance with LDC and
263 | // doesn't hurt GDC's inlining.
264 | V SIMDFromScalar() {
265 | enum V asVectorEnum = scalar;
266 | return asVectorEnum;
267 | }
268 | }
269 |
270 |
271 | template SIMDFromString(string str) if (str.length <= 16)
272 | {
273 | import core.simd, std.algorithm, std.range, std.string;
274 |
275 | private enum data = chain(str.representation, 0.repeat(16 - str.length)).array;
276 |
277 | static if (!isDMD)
278 | immutable ubyte16 SIMDFromString = data;
279 | else version (D_PIC)
280 | {
281 | import std.format;
282 | void SIMDFromString() @safe @nogc pure nothrow
283 | {
284 | mixin(format("asm @trusted @nogc pure nothrow { naked; db %(%s,%); }", data));
285 | }
286 | }
287 | else static if (isX86)
288 | align(16) __gshared ubyte[16] SIMDFromString = data;
289 | else
290 | __gshared ubyte16 SIMDFromString = data;
291 | }
292 | }
293 |
--------------------------------------------------------------------------------
/source/fast/internal/sysdef.di:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | *
3 | * Definitions that abstract from the architecture or operating system.
4 | *
5 | * As far as possible these will alias existing definitons from OS headers to facilitate integration
6 | * with other code.
7 | *
8 | * Authors:
9 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
10 | *
11 | * Copyright:
12 | * © 2016 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
13 | *
14 | * License:
15 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
16 | *
17 | **************************************************************************************************/
18 | module fast.internal.sysdef;
19 |
20 |
21 | private enum 一ARCHITECTURE一;
22 |
23 | version (X86_64) {
24 | enum isAMD64 = true;
25 | enum isX86 = false;
26 | } else version (X86) {
27 | enum isAMD64 = false;
28 | enum isX86 = true;
29 | }
30 |
31 | version (X86_64)
32 | enum hasSSE2 = true;
33 | else
34 | enum hasSSE2 = false;
35 |
36 |
37 | private enum 一OPERATING一SYSTEM一;
38 |
39 | version (Posix)
40 | enum isPosix = true;
41 | else
42 | enum isPosix = false;
43 |
44 | version (Windows)
45 | enum isWindows = true;
46 | else
47 | enum isWindows = false;
48 |
49 | /*******************************************************************************
50 | *
51 | * Despite Phobos' use of `char[]` UTF-8 strings for file names, their internal
52 | * representation in the operating system is a sequence of 8- or 16-bit values.
53 | * On Windows this means that one could get invalid surrogate pairings and on
54 | * Linux, a file name can have any 8-bit encoding that keeps '/' at the same
55 | * code point as ASCII. That's why portable file names should only use a subset
56 | * of ASCII that is interpreted the same in all supported encodings.
57 | *
58 | * MSDN mentions that file paths should be treated as a sequence of `WCHAR`:
59 | * https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath
60 | *
61 | **************************************/
62 | static if (isPosix)
63 | alias FileChar = ubyte;
64 | else version (Windows)
65 | alias FileChar = ushort;
66 | else static assert(0, "Not implemented");
67 |
68 | alias Filename = FileChar[];
69 |
70 |
71 | private enum 一COMPILER一UNIFICATION一;
72 |
73 | version (LDC) {
74 | enum isLDC = true;
75 | enum isGDC = false;
76 | enum isDMD = false;
77 | } else version (GNU) {
78 | enum isLDC = false;
79 | enum isGDC = true;
80 | enum isDMD = false;
81 | } else version (DigitalMars) {
82 | enum isLDC = false;
83 | enum isGDC = false;
84 | enum isDMD = true;
85 | }
86 |
87 | version (DigitalMars)
88 | {
89 | enum noinline;
90 | enum forceinline;
91 | enum sse4;
92 | }
93 | else version (GNU)
94 | {
95 | import gcc.attribute;
96 | enum noinline = gcc.attribute.attribute("noinline");
97 | enum forceinline = gcc.attribute.attribute("forceinline");
98 | enum sse4_2 = gcc.attribute.attribute("target", "sse4.2");
99 | }
100 | else version (LDC)
101 | {
102 | import ldc.attributes;
103 | enum noinline;
104 | enum forceinline;
105 | enum sse4_2 = ldc.attributes.target("+sse4.2");
106 | }
107 |
108 | version (assert)
109 | enum isRelease = false;
110 | else
111 | enum isRelease = true;
112 |
113 | version (D_PIC)
114 | enum isPIC = true;
115 | else
116 | enum isPIC = false;
117 |
--------------------------------------------------------------------------------
/source/fast/intmath.d:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | *
3 | * Supplementary integer math functions.
4 | *
5 | * Authors:
6 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
7 | *
8 | * Copyright:
9 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
10 | *
11 | * License:
12 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
13 | *
14 | **************************************************************************************************/
15 | module fast.intmath;
16 |
17 | import fast.internal.helpers;
18 | import fast.internal.sysdef;
19 |
20 |
21 | version (LDC)
22 | {
23 | @safe @nogc pure nothrow
24 | ulong mulu(ulong x, ulong y, ref bool overflow)
25 | {
26 | import ldc.intrinsics;
27 | auto res = llvm_umul_with_overflow(x, y);
28 | overflow = res.overflow;
29 | return res.result;
30 | }
31 | }
32 | else static if (isPosix && isGDC && (isAMD64 || isX86))
33 | {
34 | @nogc pure nothrow
35 | ulong mulu(ulong x, ulong y, ref bool overflow)
36 | {
37 | version (GNU)
38 | {
39 | ulong lo;
40 | version (X86) asm { "
41 | cmp $0, 4+%2
42 | je 1f
43 | cmp $0, 4%3
44 | je 1f
45 | movb $1, %1
46 | 1:
47 | mov 4+%2, %%eax
48 | mull %3
49 | jno 2f
50 | movb $1, %1
51 | 2:
52 | mov %%eax, %%ecx
53 | mov %2, %%eax
54 | mull 4%3
55 | jno 3f
56 | movb $1, %1
57 | 3:
58 | add %%eax, %%ecx
59 | jno 4f
60 | movb $1, %1
61 | 4:
62 | mov %2, %%eax
63 | mull %3
64 | add %%ecx, %%edx
65 | jnc 5f
66 | movb $1, %1
67 | 5:
68 | " : "=&A" lo, "+*m" overflow : "m" x, "m" y : "ecx"; }
69 | else asm { "mul %3\njno 1f\nmovb $1, %1\n1:\n" : "=a" lo, "+*m" overflow : "a" x, "r" y : "rdx"; }
70 | return lo;
71 | }
72 | }
73 | }
74 | else
75 | {
76 | // DMD is already faster than my ASM code above, no need to improve. Good job Walter et al.
77 | import core.checkedint;
78 | alias mulu = core.checkedint.mulu;
79 | }
80 |
--------------------------------------------------------------------------------
/source/fast/json.d:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | *
3 | * A fast JSON parser implementing RFC 7159.
4 | *
5 | * The most prominent change compared to the initial revision is the allowance of all data types as
6 | * root values, not just objects and arrays.
7 | *
8 | * Usage_Hints:
9 | * $(UL
10 | * $(LI This parser only supports UTF-8 without BOM.)
11 | * $(LI When a JSON object has duplicate keys, the last one in the set will determine the value
12 | * of associative-array entries or struct fields.)
13 | * $(LI `BigInt` and large number parsing are not implemented currently, but all integral types
14 | * as well as minimal exact representations of many `double` values are supported.)
15 | * )
16 | *
17 | * Authors:
18 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
19 | *
20 | * Copyright:
21 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
22 | *
23 | * License:
24 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
25 | *
26 | **************************************************************************************************/
27 | module fast.json;
28 |
29 | import core.stdc.string;
30 |
31 | import std.ascii;
32 | import std.conv;
33 | import std.exception;
34 | import std.file;
35 | import std.json;
36 | import std.range;
37 | import std.string : representation, format;
38 | import std.traits;
39 | import std.uni;
40 |
41 | import fast.buffer;
42 | import fast.cstring;
43 | import fast.internal.sysdef;
44 | import fast.parsing;
45 |
46 |
47 | /*******************************************************************************
48 | *
49 | * Loads a file as JSON text and validates the used parts. This includes a UTF-8
50 | * validation on strings.
51 | *
52 | * Params:
53 | * fname = The file name to load.
54 | *
55 | * Returns:
56 | * A JSON file object exposing the `Json` API.
57 | *
58 | **************************************/
59 | auto parseJSONFile(uint vl = validateUsed)(in char[] fname)
60 | { return Json!vl.File(fname); }
61 |
62 | /// ditto
63 | auto parseJSONFile(uint vl = validateUsed)(in Filename fname)
64 | { return Json!vl.File(fname); }
65 |
66 |
67 | /*******************************************************************************
68 | *
69 | * Loads a JSON string and validates the used parts. This includes a UTF-8
70 | * validation on strings.
71 | *
72 | * Params:
73 | * text = The string to load.
74 | *
75 | * Returns:
76 | * A `Json` struct.
77 | *
78 | **************************************/
79 | auto parseJSON(uint vl = validateUsed, T : const(char)[])(T text) nothrow
80 | { return Json!(vl, false)(text); }
81 |
82 |
83 | /*******************************************************************************
84 | *
85 | * Load a file as JSON text that is considered 100% correct. No checks will be
86 | * performed, not even if you try to read a number as a string.
87 | *
88 | * Params:
89 | * fname = The file name to load.
90 | *
91 | * Returns:
92 | * A JSON file object exposing the `Json` API.
93 | *
94 | **************************************/
95 | Json!trustedSource.File parseTrustedJSONFile(in char[] fname)
96 | { return Json!trustedSource.File(fname); }
97 |
98 | /// ditto
99 | version(Windows){}else
100 | Json!trustedSource.File parseTrustedJSONFile(in Filename fname)
101 | { return Json!trustedSource.File(fname); }
102 |
103 |
104 | /*******************************************************************************
105 | *
106 | * Load a JSON string that is considered 100% correct. No checks will be
107 | * performed, not even if you try to read a number as a string.
108 | *
109 | * Params:
110 | * text = The string to load.
111 | *
112 | * Returns:
113 | * A `Json` struct.
114 | *
115 | **************************************/
116 | auto parseTrustedJSON(T : const(char)[])(T text) nothrow
117 | { return Json!(trustedSource, false)(text); }
118 |
119 |
120 | /*******************************************************************************
121 | *
122 | * Validates a JSON text file.
123 | *
124 | * Params:
125 | * fname = The file name to load.
126 | *
127 | * Throws:
128 | * JSONException on validation errors.
129 | *
130 | **************************************/
131 | void validateJSONFile(in char[] fname)
132 | { Json!(validateAll, true).File(fname).skipValue(); }
133 |
134 | /// ditto
135 | version(Windows){} else
136 | void validateJSONFile(in Filename fname)
137 | { Json!(validateAll, true).File(fname).skipValue(); }
138 |
139 |
140 | /*******************************************************************************
141 | *
142 | * Validates a JSON string.
143 | *
144 | * Params:
145 | * text = The string to load.
146 | *
147 | * Throws:
148 | * JSONException on validation errors.
149 | *
150 | **************************************/
151 | void validateJSON(T : const(char)[])(T text)
152 | { Json!(validateAll, true)(text).skipValue(); }
153 |
154 |
155 | /// JSON data types returned by `peek`.
156 | enum DataType : ubyte
157 | {
158 | string, number, object, array, boolean, null_
159 | }
160 |
161 |
162 | /// Validation strength of JSON parser
163 | enum
164 | {
165 | trustedSource, /// Assume 100% correct JSON and speed up parsing.
166 | validateUsed, /// Ignore errors in skipped portions.
167 | validateAll, /// Do a complete validation of the JSON data.
168 | }
169 |
170 |
171 | /// A UDA used to remap enum members or struct field names to JSON strings.
172 | struct JsonMapping { string[string] map; }
173 |
174 |
175 | /// JSON parser state returned by the `state` property.
176 | struct JsonParserState {
177 | const(char)* text;
178 | size_t nesting;
179 | }
180 |
181 |
182 | /*******************************************************************************
183 | *
184 | * This is a forward JSON parser for picking off items of interest on the go.
185 | * It neither produces a node structure, nor does it produce events. Instead you
186 | * can peek at the value type that lies ahead and/or directly consume a JSON
187 | * value from the parser. Objects and arrays can be iterated over via `foreach`,
188 | * while you can also directly ask for one or multiple keys of an object.
189 | *
190 | * Prams:
191 | * vl = Validation level. Any of `trustedSource`, `validateUsed` or
192 | * `validateAll`.
193 | * validateUtf8 = If validation is enabled, this also checks UTF-8 encoding
194 | * of JSON strings.
195 | *
196 | **************************************/
197 | struct Json(uint vl = validateUsed, bool validateUtf8 = vl > trustedSource)
198 | if (vl > trustedSource || !validateUtf8)
199 | {
200 | private:
201 |
202 | enum isTrusted = vl == trustedSource;
203 | enum skipAllInter = vl == trustedSource;
204 | enum isValidating = vl >= validateUsed;
205 | enum isValidateAll = vl == validateAll;
206 |
207 | const(char*) m_start = void;
208 | const(char)* m_text = void;
209 | size_t m_nesting = 0;
210 | RaiiArray!char m_mem;
211 | bool m_isString = false;
212 |
213 |
214 | public:
215 |
216 | @disable this();
217 | @disable this(this);
218 |
219 |
220 | /*******************************************************************************
221 | *
222 | * Constructor taking a `string` for fast slicing.
223 | *
224 | * JSON strings without escape sequences can be returned as slices.
225 | *
226 | * Params:
227 | * text = The JSON text to parse.
228 | * simdPrep = Set this to `No.simdPrep` to indicate that `text` is already
229 | * suffixed by 16 zero bytes as required for SIMD processing.
230 | *
231 | **************************************/
232 | nothrow
233 | this(string text, Flag!"simdPrep" simdPrep = Yes.simdPrep)
234 | {
235 | import core.memory;
236 | m_isString = GC.query(text.ptr) !is ReturnType!(GC.query).init;
237 | this(cast(const(char)[]) text, simdPrep);
238 | }
239 |
240 |
241 | /*******************************************************************************
242 | *
243 | * Constructor taking a `const char[]`.
244 | *
245 | * JSON strings allocate on the GC heap when returned.
246 | *
247 | * Params:
248 | * text = The JSON text to parse.
249 | * simdPrep = Set this to `No.simdPrep` to indicate that `text` is already
250 | * suffixed by 16 zero bytes as required for SIMD processing.
251 | *
252 | **************************************/
253 | pure nothrow
254 | this(const(char)[] text, Flag!"simdPrep" simdPrep = Yes.simdPrep)
255 | {
256 | if (simdPrep)
257 | {
258 | // We need to append 16 zero bytes for SSE to work, and if that reallocates the char[]
259 | // we can declare it unique/immutable and don't need to allocate when returning JSON strings.
260 | auto oldPtr = text.ptr;
261 | text ~= "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
262 | m_isString |= oldPtr !is text.ptr;
263 | }
264 | m_start = m_text = text.ptr;
265 | skipWhitespace!false();
266 | }
267 |
268 |
269 | /+
270 | ╔══════════════════════════════════════════════════════════════════════════════
271 | ║ ⚑ String
272 | ╚══════════════════════════════════════════════════════════════════════════════
273 | +/
274 |
275 | /*******************************************************************************
276 | *
277 | * Reads a string off the JSON text.
278 | *
279 | * Params:
280 | * allowNull = Allow `null` as a valid option for the string.
281 | *
282 | * Returns:
283 | * A GC managed string.
284 | *
285 | **************************************/
286 | string read(T)(bool allowNull = true) if (is(T == string))
287 | {
288 | if (!allowNull || peek == DataType.string)
289 | {
290 | auto borrowed = borrowString();
291 | return m_isString ? borrowed.assumeUnique() : borrowed.idup;
292 | }
293 | return readNull();
294 | }
295 |
296 |
297 | /*******************************************************************************
298 | *
299 | * Reads an enumeration off the JSON text.
300 | *
301 | **************************************/
302 | T read(T)() if (is(T == enum))
303 | {
304 | enum mapping = buildRemapTable!T;
305 | auto oldPos = m_text;
306 | auto text = borrowString();
307 | foreach (m; mapping)
308 | if (text.length == m.json.length && memcmp(text.ptr, m.json.ptr, m.json.length) == 0)
309 | return m.d;
310 | m_text = oldPos;
311 | static if (isValidating)
312 | handleError(format("Could not find enum member `%s` in `%s`", text, T.stringof));
313 | assert(0);
314 | }
315 |
316 |
317 | /*******************************************************************************
318 | *
319 | * Reads a string off the JSON text with limited lifetime.
320 | *
321 | * The reference to this slice is not guaranteed to be valid after the JSON
322 | * parser has been destroyed or another object key or string value has been
323 | * parsed. So make a copy before you continue parsing.
324 | *
325 | * Returns:
326 | * If the string had no escape sequences in it, the returned array is a
327 | * slice of the JSON text buffer, otherwise temporary copy.
328 | *
329 | **************************************/
330 | const(char)[] borrowString()
331 | {
332 | expect('"', "at start of string");
333 | auto escFreeStart = m_text;
334 |
335 | if (scanString!validateUtf8())
336 | {
337 | // Fast path here is to return a slice of the JSON if it doesn't contain escapes.
338 | size_t length = m_text - escFreeStart;
339 | skipOnePlusWhitespace!skipAllInter();
340 | return escFreeStart[0 .. length];
341 | }
342 | else
343 | {
344 | // Otherwise we copy to a separate memory area managed by this parser instance.
345 | size_t length = 0;
346 | bool eos = false;
347 | goto CopyToBuffer;
348 | do
349 | {
350 | do
351 | {
352 | m_mem.capacityNeeded( length + 4 );
353 | uint decoded = decodeEscape( &m_mem[length] );
354 | length += decoded;
355 | }
356 | while (*m_text == '\\');
357 |
358 | escFreeStart = m_text;
359 | eos = scanString!validateUtf8();
360 | CopyToBuffer:
361 | size_t escFreeLength = m_text - escFreeStart;
362 | m_mem.capacityNeeded( length + escFreeLength );
363 | memcpy( m_mem.ptr + length, escFreeStart, escFreeLength );
364 | length += escFreeLength;
365 | }
366 | while (!eos);
367 | skipOnePlusWhitespace!skipAllInter();
368 | return m_mem[0 .. length];
369 | }
370 | }
371 |
372 |
373 | private bool scanString(bool validate)()
374 | {
375 | static if (validate)
376 | {
377 | import core.bitop;
378 |
379 | while (true)
380 | {
381 | // Stop for control-characters, \, " and anything non-ASCII.
382 | m_text.seekToRanges!"\0\x1F\"\"\\\\\x7F\xFF";
383 |
384 | // Handle printable ASCII range
385 | if (*m_text == '"')
386 | return true;
387 | if (*m_text == '\\')
388 | return false;
389 |
390 | // Anything else better be UTF-8
391 | uint u = *cast(uint*) m_text;
392 | version (LittleEndian) u = bswap(u);
393 |
394 | // Filter overlong ASCII and missing follow byte.
395 | if (
396 | (u & 0b111_00000_11_000000_00000000_00000000) == 0b110_00000_10_000000_00000000_00000000 &&
397 | (u > 0b110_00001_10_111111_11111111_11111111))
398 | m_text += 2;
399 | // Handle overlong representation, UTF-16 surrogate pairs and missing follow bytes.
400 | else if (
401 | (u & 0b1111_0000_11_000000_11_000000_00000000) == 0b1110_0000_10_000000_10_000000_00000000 &&
402 | (u & 0b0000_1111_00_100000_00_000000_00000000) != 0b0000_1101_00_100000_00_000000_00000000 &&
403 | (u > 0b1110_0000_10_011111_10_111111_11111111))
404 | m_text += 3;
405 | // Handle missing follow bytes, Handle overlong representation and out of valid range (max. 0x10FFFF)
406 | else if (
407 | (u & 0b11111_000_11_000000_11_000000_11_000000) == 0b11110_000_10_000000_10_000000_10_000000 &&
408 | (u > 0b11110_000_10_001111_10_111111_10_111111) && (u < 0b11110_100_10_010000_10_000000_10_000000))
409 | m_text += 4;
410 | // Handle invalid code units.
411 | else if (*m_text < ' ' || *m_text == 0x7F)
412 | expectNot("is a disallowed control character in strings");
413 | else if (*m_text >= 0x80 && *m_text <= 0xBF)
414 | expectNot("is a UTF-8 follow byte and cannot start a sequence");
415 | else
416 | expectNot("is not a valid UTF-8 sequence start");
417 | }
418 | }
419 | else
420 | {
421 | m_text.seekToAnyOf!("\\\"\0");
422 | return *m_text == '"';
423 | }
424 | }
425 |
426 |
427 | private int matchString(string key)()
428 | {
429 | return m_text.fixedTermStrCmp!(char, key, "\"\0", "\\")(&stringCompareCallback);
430 | }
431 |
432 |
433 | private bool stringCompareCallback(ref immutable(char)* key, ref const(char)* str)
434 | {
435 | do
436 | {
437 | auto key4 = cast(char[4]*) key;
438 | char[4] buf = *key4;
439 | uint bytes = decodeEscape(buf.ptr);
440 | if (buf != *key4)
441 | return false;
442 | key += bytes;
443 | }
444 | while (str[0] == '\\');
445 | return true;
446 | }
447 |
448 |
449 | private static immutable escapes = {
450 | char[256] result = '\0';
451 | result['"'] = '"';
452 | result['\\'] = '\\';
453 | result['/'] = '/';
454 | result['b'] = '\b';
455 | result['f'] = '\f';
456 | result['n'] = '\n';
457 | result['r'] = '\r';
458 | result['t'] = '\t';
459 | return result;
460 | }();
461 |
462 |
463 | private void skipEscape()
464 | {
465 | static if (isValidateAll)
466 | {
467 | if (m_text[1] != 'u')
468 | {
469 | // Normal escape sequence. 2 bytes removed.
470 | if (!escapes[*++m_text])
471 | expectNot("in escape sequence");
472 | m_text++;
473 | }
474 | else
475 | {
476 | // UTF-16
477 | m_text += 2;
478 | decodeUtf16HexToCodepoint();
479 | }
480 | }
481 | else m_text += 2;
482 | }
483 |
484 |
485 | private uint decodeEscape(scope char* dst)
486 | {
487 | if (m_text[1] != 'u')
488 | {
489 | // Normal escape sequence. 2 bytes removed.
490 | dst[0] = escapes[m_text[1]];
491 | static if (isValidating)
492 | if (!dst[0])
493 | handleError("Invalid escape sequence");
494 | m_text += 2;
495 | return 1;
496 | }
497 | else
498 | {
499 | // UTF-16
500 | m_text += 2;
501 | uint cp = decodeUtf16HexToCodepoint();
502 |
503 | if (cp >= 0xD800 && cp <= 0xDBFF)
504 | {
505 | dst[0] = cast(char)(0b11110_000 | cp >> 18);
506 | dst[1] = cast(char)(0b10_000000 | cp >> 12 & 0b00_111111);
507 | dst[2] = cast(char)(0b10_000000 | cp >> 6 & 0b00_111111);
508 | dst[3] = cast(char)(0b10_000000 | cp & 0b00_111111);
509 | return 4;
510 | }
511 | else if (cp >= 0x800)
512 | {
513 | dst[0] = cast(char)(0b1110_0000 | cp >> 12);
514 | dst[1] = cast(char)(0b10_000000 | cp >> 6 & 0b00_111111);
515 | dst[2] = cast(char)(0b10_000000 | cp & 0b00_111111);
516 | return 3;
517 | }
518 | else if (cp >= 0x80)
519 | {
520 | dst[0] = cast(char)(0b110_00000 | cp >> 6);
521 | dst[1] = cast(char)(0b10_000000 | cp & 0b00_111111);
522 | return 2;
523 | }
524 | else
525 | {
526 | dst[0] = cast(char)(cp);
527 | return 1;
528 | }
529 | }
530 | }
531 |
532 |
533 | private dchar decodeUtf16HexToCodepoint()
534 | {
535 | import fast.internal.helpers;
536 |
537 | uint cp, hi;
538 | foreach (i; staticIota!(0, 2))
539 | {
540 | static if (isValidating)
541 | {
542 | if (auto badByte = hexDecode4(m_text, cp))
543 | {
544 | m_text = badByte;
545 | expectNot("is not a hex digit");
546 | }
547 | }
548 | else
549 | {
550 | cp = hexDecode4(m_text);
551 | }
552 |
553 | static if (i == 0)
554 | {
555 | // Is this a high surrogate (followed by a low surrogate) or not ?
556 | if (cp < 0xD800 || cp > 0xDBFF)
557 | break;
558 | hi = cp - 0xD800 + 0x40 << 10;
559 | }
560 | else static if (i == 1)
561 | {
562 | static if (isValidating)
563 | {
564 | if (cp < 0xDC00 || cp > 0xDFFF)
565 | handleError("The UTF-16 escape produced an invalid code point.");
566 | cp -= 0xDC00;
567 | }
568 | cp |= hi;
569 | }
570 | }
571 |
572 | static if (isValidating)
573 | if (cp > 0x10FFFF || cp >= 0xD800 && cp <= 0xDFFF)
574 | handleError("The UTF-16 escape produced an invalid code point.");
575 |
576 | return cp;
577 | }
578 |
579 |
580 | private void skipString(bool skipInter)()
581 | {
582 | m_text++;
583 | skipRestOfString!skipInter();
584 | }
585 |
586 |
587 | private void skipRestOfString(bool skipInter)()
588 | {
589 | while (!scanString!isValidateAll())
590 | skipEscape();
591 | skipOnePlusWhitespace!skipInter();
592 | }
593 |
594 |
595 | /+
596 | ╔══════════════════════════════════════════════════════════════════════════════
597 | ║ ⚑ Number
598 | ╚══════════════════════════════════════════════════════════════════════════════
599 | +/
600 |
601 | /*******************************************************************************
602 | *
603 | * Reads a number off the JSON text.
604 | *
605 | * If you ask for an unsigned value, no minus sign will be accepted in the JSON,
606 | * otherwise all features of JSON numbers will be available. In particular large
607 | * integers can be given in scientific notation.
608 | *
609 | * Params:
610 | * N = Built-in numerical type that should be returned.
611 | *
612 | * Returns:
613 | * The parsed number.
614 | *
615 | * Throws:
616 | * JSONException, on invalid JSON or integer overflow.
617 | *
618 | **************************************/
619 | N read(N)() if (isNumeric!N && !is(N == enum))
620 | {
621 | N n = void;
622 | static if (isUnsigned!N)
623 | enum NumberOptions opt = {};
624 | else
625 | enum NumberOptions opt = { minus:true };
626 | if (parseNumber!opt(m_text, n))
627 | skipWhitespace!skipAllInter();
628 | else static if (isValidating)
629 | handleError(format("Could not convert JSON number to `%s`", N.stringof));
630 | return n;
631 | }
632 |
633 |
634 | private void skipNumber(bool skipInter)()
635 | {
636 | static if (isValidateAll)
637 | {
638 | if (*m_text == '-')
639 | m_text++;
640 | if (*m_text == '0')
641 | m_text++;
642 | else
643 | trySkipDigits();
644 | if (*m_text == '.')
645 | {
646 | m_text++;
647 | trySkipDigits();
648 | }
649 | if ((*m_text | 0x20) == 'e')
650 | {
651 | m_text++;
652 | if (*m_text == '+' || *m_text == '-')
653 | m_text++;
654 | trySkipDigits();
655 | }
656 | skipWhitespace!false();
657 | }
658 | else
659 | {
660 | m_text.skipCharRanges!"\t\n\r\r ++-.09EEee";
661 | static if (skipInter)
662 | m_text.skipAllOf!"\t\n\r ,";
663 | }
664 | }
665 |
666 |
667 | static if (isValidateAll)
668 | {
669 | private void trySkipDigits()
670 | {
671 | if (*m_text - '0' > 9)
672 | expectNot("in number literal");
673 | m_text.skipAllOf!"0123456789";
674 | }
675 | }
676 |
677 |
678 | /+
679 | ╔══════════════════════════════════════════════════════════════════════════════
680 | ║ ⚑ Object
681 | ╚══════════════════════════════════════════════════════════════════════════════
682 | +/
683 |
684 | /*******************************************************************************
685 | *
686 | * Reads a plain old data struct off the JSON text.
687 | *
688 | * Params:
689 | * T = Type of struct that should be returned.
690 | *
691 | * Returns:
692 | * A struct of type `T`.
693 | *
694 | **************************************/
695 | T read(T)() if (is(T == struct) && __traits(isPOD, T))
696 | {
697 | nest('{', "on start of object");
698 |
699 | T t;
700 | if (*m_text != '}') while (true)
701 | {
702 | auto key = borrowString();
703 | static if (!skipAllInter)
704 | {
705 | expect(':', "between key and value");
706 | skipWhitespace!false();
707 | }
708 |
709 | enum mapping = buildRemapTable!T;
710 | foreach (m; mapping)
711 | {
712 | if (key.length == m.json.length && memcmp(key.ptr, m.json.ptr, m.json.length) == 0)
713 | {
714 | mixin("alias keyT = typeof(T." ~ m.d ~ ");");
715 | mixin("t." ~ m.d ~ " = read!keyT;");
716 | goto Success;
717 | }
718 | }
719 | skipValue();
720 |
721 | Success:
722 | if (*m_text == '}')
723 | break;
724 |
725 | static if (!skipAllInter)
726 | {
727 | expect(',', "between key-value pairs");
728 | skipWhitespace!false();
729 | }
730 | }
731 |
732 | unnest();
733 | return t;
734 | }
735 |
736 |
737 | /*******************************************************************************
738 | *
739 | * Reads a plain old data struct or `null` off the JSON text.
740 | *
741 | * Params:
742 | * T = Type of struct pointer that should be returned.
743 | *
744 | * Returns:
745 | * A pointer to a newly filled struct of type `T` on the GC heap.
746 | *
747 | **************************************/
748 | T read(T)() if (is(PointerTarget!T == struct) && __traits(isPOD, PointerTarget!T))
749 | {
750 | if (peek == DataType.null_)
751 | return readNull();
752 | T tp = new PointerTarget!T;
753 | *tp = read!(PointerTarget!T)();
754 | return tp;
755 | }
756 |
757 |
758 | /*******************************************************************************
759 | *
760 | * Reads an associative-array off a JSON text.
761 | *
762 | * The key type must be `string`, the value type can be any type otherwise
763 | * supported by the parser.
764 | *
765 | * Params:
766 | * T = The type of AA to return.
767 | *
768 | * Returns:
769 | * A newly filled associative array.
770 | *
771 | **************************************/
772 | T read(T)() if (is(KeyType!T == string))
773 | {
774 | T aa;
775 | foreach (key; byKey)
776 | aa[m_isString ? cast(immutable)key : key.idup] = read!(ValueType!T)();
777 | return aa;
778 | }
779 |
780 |
781 | /*******************************************************************************
782 | *
783 | * An alias to the `singleKey` method. Instead of `json.singleKey!"something"`
784 | * you can write `json.something`. Read the notes on `singleKey`.
785 | *
786 | **************************************/
787 | alias opDispatch = singleKey;
788 |
789 |
790 | /*******************************************************************************
791 | *
792 | * Skips all keys of an object except the first occurence with the given key
793 | * name.
794 | *
795 | * Params:
796 | * name = the key name of interest
797 | *
798 | * Returns:
799 | * A temporary struct, a proxy to the parser, that will automatically seek to
800 | * the end of the current JSON object on destruction.
801 | *
802 | * Throws:
803 | * JSONException when the key is not found in the object or parsing errors
804 | * occur.
805 | *
806 | * Note:
807 | * Since this is an on the fly parser, you can only get one key from an
808 | * object with this method. Use `keySwitch` or `foreach(key; json)` to get
809 | * values from multiple keys.
810 | *
811 | * See_Also:
812 | * keySwitch
813 | *
814 | **************************************/
815 | @property SingleKey singleKey(string name)()
816 | {
817 | nest('{', "on start of object");
818 |
819 | if (*m_text != '}') while (true)
820 | {
821 | auto key = borrowString();
822 | static if (!skipAllInter)
823 | {
824 | expect(':', "between key and value");
825 | skipWhitespace!false();
826 | }
827 |
828 | if (key.length == name.length && memcmp(key.ptr, name.ptr, name.length) == 0)
829 | return SingleKey(this);
830 |
831 | skipValueImpl!skipAllInter();
832 |
833 | if (*m_text == '}')
834 | break;
835 |
836 | static if (!skipAllInter)
837 | {
838 | expect(',', "between key-value pairs");
839 | skipWhitespace!false();
840 | }
841 | }
842 |
843 | unnest();
844 | static if (isValidating)
845 | handleError("Key not found.");
846 | assert(0);
847 | }
848 |
849 |
850 | /*******************************************************************************
851 | *
852 | * Selects from a set of given keys in an object and calls the corresponding
853 | * delegate. The difference to `singleKey` when invoked with a single key is
854 | * that `keySwitch` will not error out if the key is non-existent and may
855 | * trigger the delegate multiple times, if the JSON object has duplicate keys.
856 | *
857 | * Params:
858 | * Args = the names of the keys
859 | * dlg = the delegates corresponding to the keys
860 | *
861 | * Throws:
862 | * JSONException when the key is not found in the object or parsing errors
863 | * occur.
864 | *
865 | **************************************/
866 | void keySwitch(Args...)(scope void delegate()[Args.length] dlg...)
867 | {
868 | nest('{', "on start of object");
869 |
870 | if (*m_text != '}') while (true)
871 | {
872 | auto key = borrowString();
873 | static if (!skipAllInter)
874 | {
875 | expect(':', "between key and value");
876 | skipWhitespace!false();
877 | }
878 |
879 | auto oldPos = m_text;
880 | foreach (i, arg; Args)
881 | {
882 | if (key.length == arg.length && memcmp(key.ptr, arg.ptr, arg.length) == 0)
883 | {
884 | dlg[i]();
885 | goto Next;
886 | }
887 | }
888 | skipValue();
889 |
890 | Next:
891 | if (*m_text == '}')
892 | break;
893 |
894 | static if (!skipAllInter) if (oldPos !is m_text)
895 | {
896 | expect(',', "after key-value pair");
897 | skipWhitespace!false();
898 | }
899 | }
900 |
901 | unnest();
902 | }
903 |
904 |
905 | private int byKeyImpl(scope int delegate(ref const char[]) foreachBody)
906 | {
907 | nest('{', "at start of foreach over object");
908 |
909 | int result = 0;
910 | if (*m_text != '}') while (true)
911 | {
912 | auto key = borrowString();
913 | static if (!skipAllInter)
914 | {
915 | expect(':', "between key and value");
916 | skipWhitespace!false;
917 | }
918 |
919 | if (iterationGuts!"{}"(result, key, foreachBody, "after key-value pair"))
920 | break;
921 | }
922 |
923 | unnest();
924 | return result;
925 | }
926 |
927 |
928 | /*******************************************************************************
929 | *
930 | * Iterate the keys of a JSON object with `foreach`.
931 | *
932 | * Notes:
933 | * $(UL
934 | * $(LI If you want to store the key, you need to duplicate it.)
935 | * )
936 | *
937 | * Example:
938 | * ---
939 | * uint id;
940 | * foreach (key; json.byKey)
941 | * if (key == "id")
942 | * id = json.read!uint;
943 | * ---
944 | **************************************/
945 | @safe @nogc pure nothrow
946 | @property int delegate(scope int delegate(ref const char[])) byKey()
947 | {
948 | return &byKeyImpl;
949 | }
950 |
951 |
952 | /+
953 | ╔══════════════════════════════════════════════════════════════════════════════
954 | ║ ⚑ Array handling
955 | ╚══════════════════════════════════════════════════════════════════════════════
956 | +/
957 |
958 | /*******************************************************************************
959 | *
960 | * Reads a dynamic array off the JSON text.
961 | *
962 | **************************************/
963 | T read(T)() if (isDynamicArray!T && !isSomeString!T)
964 | {
965 | import std.array;
966 | Appender!T app;
967 | foreach (i; this)
968 | app.put(read!(typeof(T.init[0])));
969 | return app.data;
970 | }
971 |
972 |
973 | /*******************************************************************************
974 | *
975 | * Reads a static array off the JSON text.
976 | *
977 | * When validation is enabled, it is an error if the JSON array has a different
978 | * length lengths don't match up. Otherwise unset elements receive their initial
979 | * value.
980 | *
981 | **************************************/
982 | T read(T)() if (isStaticArray!T)
983 | {
984 | T sa = void;
985 | size_t cnt;
986 | foreach (i; this)
987 | {
988 | if (i < T.length)
989 | sa[i] = read!(typeof(T.init[0]));
990 | cnt = i + 1;
991 | }
992 | static if (isValidating)
993 | {
994 | if (cnt != T.length)
995 | handleError(format("Static array size mismatch. Expected %s, got %s", T.length, cnt));
996 | }
997 | else
998 | {
999 | foreach (i; cnt .. T.length)
1000 | sa[i] = T.init;
1001 | }
1002 | return sa;
1003 | }
1004 |
1005 |
1006 | /*******************************************************************************
1007 | *
1008 | * Iterate over a JSON array via `foreach`.
1009 | *
1010 | **************************************/
1011 | int opApply(scope int delegate(const size_t) foreachBody)
1012 | {
1013 | nest('[', "at start of foreach over array");
1014 |
1015 | int result = 0;
1016 | if (*m_text != ']') for (size_t idx = 0; true; idx++)
1017 | if (iterationGuts!"[]"(result, idx, foreachBody, "after array element"))
1018 | break;
1019 |
1020 | unnest();
1021 | return result;
1022 | }
1023 |
1024 |
1025 | /+
1026 | ╔══════════════════════════════════════════════════════════════════════════════
1027 | ║ ⚑ Boolean
1028 | ╚══════════════════════════════════════════════════════════════════════════════
1029 | +/
1030 |
1031 | /*******************************************************************************
1032 | *
1033 | * Reads a boolean value off the JSON text.
1034 | *
1035 | **************************************/
1036 | bool read(T)() if (is(T == bool))
1037 | {
1038 | return skipBoolean!(skipAllInter, isValidating)();
1039 | }
1040 |
1041 |
1042 | private bool skipBoolean(bool skipInter, bool validate = isValidateAll)()
1043 | {
1044 | static immutable char[4][2] keywords = [ "true", "alse" ];
1045 | auto isFalse = *m_text == 'f';
1046 | static if (validate)
1047 | if (*cast(char[4]*) &m_text[isFalse] != keywords[isFalse])
1048 | handleError("`true` or `false` expected.");
1049 | m_text += isFalse ? 5 : 4;
1050 | skipWhitespace!skipInter();
1051 | return !isFalse;
1052 | }
1053 |
1054 |
1055 | /+
1056 | ╔══════════════════════════════════════════════════════════════════════════════
1057 | ║ ⚑ Null
1058 | ╚══════════════════════════════════════════════════════════════════════════════
1059 | +/
1060 |
1061 | /*******************************************************************************
1062 | *
1063 | * Reads `null` off the JSON text.
1064 | *
1065 | **************************************/
1066 | typeof(null) readNull()
1067 | {
1068 | skipNull!(skipAllInter, isValidating)();
1069 | return null;
1070 | }
1071 |
1072 |
1073 | private void skipNull(bool skipInter, bool validate = isValidateAll)()
1074 | {
1075 | static if (validate)
1076 | if (*cast(const uint*) m_text != *cast(const uint*) "null".ptr)
1077 | handleError("`null` expected.");
1078 | m_text += 4;
1079 | skipWhitespace!skipInter();
1080 | }
1081 |
1082 |
1083 | /+
1084 | ╔══════════════════════════════════════════════════════════════════════════════
1085 | ║ ⚑ Helpers and Error Handling
1086 | ╚══════════════════════════════════════════════════════════════════════════════
1087 | +/
1088 |
1089 | /*******************************************************************************
1090 | *
1091 | * Skips the next JSON value if you are not interested.
1092 | *
1093 | **************************************/
1094 | void skipValue()
1095 | {
1096 | skipValueImpl!skipAllInter();
1097 | }
1098 |
1099 |
1100 | private void skipValueImpl(bool skipInter)()
1101 | {
1102 | with (DataType) final switch (peek)
1103 | {
1104 | case string:
1105 | skipString!skipInter();
1106 | break;
1107 | case number:
1108 | skipNumber!skipInter();
1109 | break;
1110 | case object:
1111 | static if (isValidateAll)
1112 | {
1113 | foreach (_; this.byKey)
1114 | break;
1115 | }
1116 | else
1117 | {
1118 | m_text++;
1119 | seekObjectEnd();
1120 | skipOnePlusWhitespace!skipInter();
1121 | }
1122 | break;
1123 | case array:
1124 | static if (isValidateAll)
1125 | {
1126 | foreach (_; this)
1127 | break;
1128 | }
1129 | else
1130 | {
1131 | m_text++;
1132 | seekArrayEnd();
1133 | skipOnePlusWhitespace!skipInter();
1134 | }
1135 | break;
1136 | case boolean:
1137 | skipBoolean!skipInter();
1138 | break;
1139 | case null_:
1140 | skipNull!skipInter();
1141 | break;
1142 | }
1143 | }
1144 |
1145 |
1146 | /*******************************************************************************
1147 | *
1148 | * Returns the type of data that is up next in the JSON text.
1149 | *
1150 | **************************************/
1151 | @property DataType peek()
1152 | {
1153 | static immutable trans = {
1154 | DataType[256] result = cast(DataType) ubyte.max;
1155 | result['{'] = DataType.object;
1156 | result['['] = DataType.array;
1157 | result['-'] = DataType.number;
1158 | foreach (i; '0' .. '9'+1)
1159 | result[i] = DataType.number;
1160 | result['"'] = DataType.string;
1161 | result['t'] = DataType.boolean;
1162 | result['f'] = DataType.boolean;
1163 | result['n'] = DataType.null_;
1164 | return result;
1165 | }();
1166 |
1167 | DataType vt = trans[*m_text];
1168 | static if (isValidating)
1169 | if (vt == ubyte.max)
1170 | expectNot("while peeking at next value type");
1171 | return vt;
1172 | }
1173 |
1174 |
1175 | /*******************************************************************************
1176 | *
1177 | * Save or restore the parser's internal state.
1178 | *
1179 | * If you want to read only a certain object from the JSON, but exactly which
1180 | * depends on the value of some key, this is where saving and restoring the
1181 | * parser state helps.
1182 | *
1183 | * Before each candidate you save the parser state. Then you perform just the
1184 | * minimal work to test if the candidate matches some criteria. If it does,
1185 | * restore the parser state and read the elements in full. Of it doesn't, just
1186 | * skip to the next.
1187 | *
1188 | **************************************/
1189 | @property const(JsonParserState) state() const
1190 | {
1191 | return JsonParserState(m_text, m_nesting);
1192 | }
1193 |
1194 | @property void state(const JsonParserState oldState)
1195 | {
1196 | m_text = oldState.text;
1197 | m_nesting = oldState.nesting;
1198 | }
1199 |
1200 |
1201 | private void nest(char c, string msg)
1202 | {
1203 | expect(c, msg);
1204 | skipWhitespace!false();
1205 | m_nesting++;
1206 | }
1207 |
1208 |
1209 | private void unnest()
1210 | in { assert(m_nesting > 0); }
1211 | body
1212 | {
1213 | if (--m_nesting == 0)
1214 | {
1215 | skipOnePlusWhitespace!false();
1216 | static if (isValidating)
1217 | if (*m_text != '\0')
1218 | handleError("Expected end of JSON.");
1219 | }
1220 | else skipOnePlusWhitespace!skipAllInter();
1221 | }
1222 |
1223 |
1224 | private bool iterationGuts(char[2] braces, T, D)(ref int result, T idx, scope D dlg,
1225 | string missingCommaMsg)
1226 | {
1227 | auto oldPos = m_text;
1228 | static if (isValidateAll)
1229 | {
1230 | if (result)
1231 | {
1232 | skipValueImpl!(!isValidateAll)();
1233 | goto PastValue;
1234 | }
1235 | }
1236 | result = dlg(idx);
1237 | if (oldPos is m_text)
1238 | skipValueImpl!(!isValidateAll)();
1239 |
1240 | PastValue:
1241 | if (*m_text == braces[1])
1242 | return true;
1243 |
1244 | static if (!isValidateAll) if (result)
1245 | {
1246 | seekAggregateEnd!braces();
1247 | return true;
1248 | }
1249 |
1250 | static if (!skipAllInter) if (oldPos !is m_text)
1251 | {
1252 | expect(',', missingCommaMsg);
1253 | skipWhitespace!false();
1254 | }
1255 | return false;
1256 | }
1257 |
1258 |
1259 | static if (!isValidateAll)
1260 | {
1261 | private void seekObjectEnd()
1262 | {
1263 | seekAggregateEnd!"{}"();
1264 | }
1265 |
1266 |
1267 | private void seekArrayEnd()
1268 | {
1269 | seekAggregateEnd!"[]"();
1270 | }
1271 |
1272 |
1273 | private void seekAggregateEnd(immutable char[2] parenthesis)()
1274 | {
1275 | size_t nesting = 1;
1276 | while (true)
1277 | {
1278 | m_text.seekToAnyOf!(parenthesis ~ "\"\0");
1279 | final switch (*m_text)
1280 | {
1281 | case parenthesis[0]:
1282 | m_text++;
1283 | nesting++;
1284 | break;
1285 | case parenthesis[1]:
1286 | if (--nesting == 0)
1287 | return;
1288 | m_text++;
1289 | break;
1290 | case '"':
1291 | // Could skip ':' or ',' here by passing `true`, but we skip it above anyways.
1292 | skipString!false();
1293 | }
1294 | }
1295 | }
1296 | }
1297 |
1298 |
1299 | /// This also increments the JSON read pointer.
1300 | private void expect(char c, string msg)
1301 | {
1302 | static if (isValidating)
1303 | if (*m_text != c)
1304 | expectImpl(c, msg);
1305 | m_text++;
1306 | }
1307 |
1308 |
1309 | private void expectNot(char c, string msg)
1310 | {
1311 | static if (isValidating)
1312 | if (*m_text == c)
1313 | expectNot(msg);
1314 | }
1315 |
1316 |
1317 | static if (isValidating)
1318 | {
1319 | @noinline
1320 | private void expectNot(string msg)
1321 | {
1322 | string tmpl = isPrintable(*m_text)
1323 | ? "Character '%s' %s."
1324 | : "Byte 0x%02x %s.";
1325 | handleError(format(tmpl, *m_text, msg));
1326 | }
1327 |
1328 |
1329 | @noinline
1330 | private void expectImpl(char c, string msg)
1331 | {
1332 | string tmpl = isPrintable(*m_text)
1333 | ? "Expected '%s', but found '%s' %s."
1334 | : "Expected '%s', but found byte 0x%02x %s.";
1335 | handleError(format(tmpl, c, *m_text, msg));
1336 | }
1337 |
1338 |
1339 | @noinline
1340 | private void handleError(string msg)
1341 | {
1342 | import fast.unicode;
1343 |
1344 | size_t line;
1345 | const(char)* p = m_start;
1346 | const(char)* last = m_start;
1347 | while (p < m_text)
1348 | {
1349 | last = p;
1350 | p.skipToNextLine();
1351 | line++;
1352 | }
1353 | line += p is m_text;
1354 | size_t column = last[0 .. m_text - last].countGraphemes() + 1;
1355 |
1356 | throw new JSONException(msg, line.to!int, column.to!int);
1357 | }
1358 | }
1359 |
1360 |
1361 | @forceinline @nogc pure nothrow
1362 | private void skipOnePlusWhitespace(bool skipInter)()
1363 | {
1364 | m_text++;
1365 | skipWhitespace!skipInter();
1366 | }
1367 |
1368 |
1369 | @forceinline @nogc pure nothrow
1370 | private void skipWhitespace(bool skipInter)()
1371 | {
1372 | static if (skipInter)
1373 | m_text.skipAllOf!"\t\n\r ,:";
1374 | else
1375 | m_text.skipAsciiWhitespace();
1376 | }
1377 |
1378 |
1379 | private static struct SingleKey
1380 | {
1381 | alias json this;
1382 |
1383 | private Json* m_pjson;
1384 | private const(char*) m_oldPos;
1385 |
1386 | @safe @nogc pure nothrow
1387 | @property ref Json json()
1388 | {
1389 | return *m_pjson;
1390 | }
1391 |
1392 | this(ref Json json)
1393 | {
1394 | m_pjson = &json;
1395 | m_oldPos = json.m_text;
1396 | }
1397 |
1398 | ~this()
1399 | {
1400 | static if (isValidateAll)
1401 | {
1402 | if (*json.m_text != '}')
1403 | {
1404 | if (m_oldPos !is json.m_text)
1405 | {
1406 | json.expect(',', "after key-value pair");
1407 | json.skipWhitespace!false();
1408 | }
1409 | while (true)
1410 | {
1411 | json.skipString!false();
1412 | json.expect(':', "between key and value");
1413 | json.skipWhitespace!false();
1414 | json.skipValueImpl!false();
1415 |
1416 | if (*json.m_text == '}')
1417 | break;
1418 |
1419 | json.expect(',', "after key-value pair");
1420 | json.skipWhitespace!false();
1421 | }
1422 | }
1423 | }
1424 | else
1425 | {
1426 | json.seekObjectEnd();
1427 | }
1428 | json.unnest();
1429 | }
1430 | }
1431 |
1432 |
1433 | private static struct File
1434 | {
1435 | alias m_json this;
1436 |
1437 | Json m_json;
1438 | private size_t m_len;
1439 | private bool m_isMapping;
1440 |
1441 | @disable this();
1442 | @disable this(this);
1443 |
1444 | this(const Filename fname)
1445 | {
1446 | version (Posix)
1447 | {
1448 | import core.sys.posix.fcntl;
1449 | import core.sys.posix.sys.mman;
1450 | import core.sys.posix.unistd;
1451 |
1452 | version (CRuntime_Glibc)
1453 | enum O_CLOEXEC = octal!2000000;
1454 | else version (OSX) // Requires at least OS X 10.7 Lion
1455 | enum O_CLOEXEC = 0x1000000;
1456 | else version(FreeBSD)
1457 | enum O_CLOEXEC = octal!2000000;
1458 | else static assert(0, "Not implemented");
1459 |
1460 | int fd = { return open(charPtr!fname, O_RDONLY | O_NOCTTY | O_CLOEXEC); }();
1461 | assert(fcntl(fd, F_GETFD) & FD_CLOEXEC, "Could not set O_CLOEXEC.");
1462 |
1463 | if (fd == -1)
1464 | throw new ErrnoException("Could not open JSON file for reading.");
1465 | scope(exit) close(fd);
1466 |
1467 | // Get the file size
1468 | stat_t info;
1469 | if (fstat(fd, &info) == -1)
1470 | throw new ErrnoException("Could not get JSON file size.");
1471 |
1472 | // Ensure we have 16 extra bytes
1473 | size_t pagesize = sysconf(_SC_PAGESIZE);
1474 | ulong fsize = ulong(info.st_size + pagesize - 1) / pagesize * pagesize;
1475 | bool zeroPage = fsize < info.st_size + 16;
1476 | if (zeroPage)
1477 | fsize += pagesize;
1478 | if (fsize > size_t.max)
1479 | throw new Exception("JSON file too large to be mapped in RAM.");
1480 | m_len = cast(size_t) fsize;
1481 |
1482 | // Map the file
1483 | void* mapping = mmap(null, m_len, PROT_READ, MAP_PRIVATE, fd, 0);
1484 | if (mapping == MAP_FAILED)
1485 | throw new ErrnoException("Could not map JSON file.");
1486 | scope(failure)
1487 | munmap(mapping, m_len);
1488 |
1489 | // Get a zero-page up behind the JSON text
1490 | if (zeroPage)
1491 | {
1492 | void* offs = mapping + m_len - pagesize;
1493 | if (mmap(offs, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0) == MAP_FAILED)
1494 | throw new ErrnoException("Could not map zero-page behind JSON text.");
1495 | }
1496 |
1497 | // Initialize the parser on the JSON text
1498 | m_json = Json((cast(char*) mapping)[0 .. cast(size_t) info.st_size], No.simdPrep);
1499 | }
1500 | else version (Windows)
1501 | {
1502 | import core.sys.windows.winnt;
1503 | import core.sys.windows.winbase;
1504 |
1505 | HANDLE hnd = { return CreateFileW( wcharPtr!fname, GENERIC_READ, FILE_SHARE_READ, null,
1506 | OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, null ); }();
1507 |
1508 | if (hnd == INVALID_HANDLE_VALUE)
1509 | throw new FileException("Could not open JSON file for reading.");
1510 | scope(exit)
1511 | CloseHandle( hnd );
1512 |
1513 | // Get the file size
1514 | LARGE_INTEGER fileSize = void;
1515 | if (!GetFileSizeEx( hnd, &fileSize ))
1516 | throw new Exception("Could not get JSON file size.");
1517 |
1518 | // Map the file
1519 | HANDLE mapping = CreateFileMapping( hnd, null, PAGE_READONLY, fileSize.HighPart, fileSize.LowPart, null );
1520 | if (mapping == INVALID_HANDLE_VALUE)
1521 | throw new Exception("Could not create file mapping for JSON file.");
1522 | scope(exit) CloseHandle( mapping );
1523 |
1524 | // View the mapping
1525 | void* view = MapViewOfFile( mapping, FILE_MAP_READ, 0, 0, 0 );
1526 | if (view is null)
1527 | throw new Exception("Could not map view of JSON file.");
1528 | scope(failure)
1529 | UnmapViewOfFile( view );
1530 |
1531 | // Missing 64-bit version in druntime (2.071)
1532 | version (X86_64) struct MEMORY_BASIC_INFORMATION {
1533 | PVOID BaseAddress;
1534 | PVOID AllocationBase;
1535 | DWORD AllocationProtect;
1536 | DWORD __alignment1;
1537 | ULONGLONG RegionSize;
1538 | DWORD State;
1539 | DWORD Protect;
1540 | DWORD Type;
1541 | DWORD __alignment2;
1542 | }
1543 |
1544 | // Check if the view is 16 bytes larger than the file
1545 | MEMORY_BASIC_INFORMATION query = void;
1546 | if (!VirtualQuery( view, cast(PMEMORY_BASIC_INFORMATION)&query, query.sizeof ))
1547 | throw new Exception("VirtualQuery failed.");
1548 |
1549 | // Initialize the parser on the JSON text
1550 | char[] slice = (cast(char*) view)[0 .. cast(size_t)fileSize.QuadPart];
1551 | if (query.RegionSize >= fileSize.QuadPart + 16)
1552 | {
1553 | m_json = Json(slice, No.simdPrep);
1554 | m_isMapping = true;
1555 | }
1556 | else
1557 | {
1558 | m_json = Json(slice, Yes.simdPrep);
1559 | UnmapViewOfFile( view );
1560 | }
1561 | }
1562 | else static assert(0, "Not implemented");
1563 | }
1564 |
1565 |
1566 | this(const(char)[] fname)
1567 | {
1568 | import std.string;
1569 |
1570 | version (Posix)
1571 | this( fname.representation );
1572 | else version (Windows)
1573 | {
1574 | import core.stdc.stdlib;
1575 | auto buf = cast(wchar*)alloca(string2wstringSize(fname));
1576 | auto fnameW = string2wstring(fname, buf);
1577 | this( fnameW.representation );
1578 | }
1579 | else static assert(0, "Not implemented");
1580 | }
1581 |
1582 |
1583 | nothrow
1584 | ~this()
1585 | {
1586 | version (Posix)
1587 | {
1588 | import core.sys.posix.sys.mman;
1589 | munmap(cast(void*)m_json.m_start, m_len);
1590 | }
1591 | else version (Windows)
1592 | {
1593 | import core.sys.windows.winnt;
1594 | import core.sys.windows.winbase;
1595 | if (m_isMapping)
1596 | UnmapViewOfFile( cast(LPCVOID)m_json.m_start );
1597 | }
1598 | else static assert(0, "Not implemented");
1599 | }
1600 | }
1601 | }
1602 |
1603 |
1604 | private template buildRemapTable(T)
1605 | {
1606 | import std.typetuple;
1607 | import fast.internal.helpers;
1608 |
1609 | static if (is(T == enum))
1610 | {
1611 | struct Remap { T d; string json; }
1612 | enum members = EnumMembers!T;
1613 | }
1614 | else
1615 | {
1616 | struct Remap { string d; string json; }
1617 | enum members = FieldNameTuple!T;
1618 | }
1619 | enum mapping = getUDA!(T, JsonMapping).map;
1620 |
1621 | template Impl(size_t a, size_t b)
1622 | {
1623 | static if (b - a > 1)
1624 | {
1625 | alias Impl = TypeTuple!(Impl!(a, (b + a) / 2), Impl!((b + a) / 2, b));
1626 | }
1627 | else static if (b - a == 1)
1628 | {
1629 | static if (is(T == enum))
1630 | enum key = members[a].to!string;
1631 | else
1632 | alias key = members[a];
1633 | static if ((key in mapping) !is null)
1634 | enum mapped = mapping[key];
1635 | else
1636 | alias mapped = key;
1637 | alias Impl = TypeTuple!(Remap(members[a], mapped));
1638 | }
1639 | else alias Impl = TypeTuple!();
1640 | }
1641 |
1642 | alias buildRemapTable = Impl!(0, members.length);
1643 | }
1644 |
1645 |
1646 | unittest
1647 | {
1648 | struct Counter
1649 | {
1650 | size_t array, object, key, string, number, boolean, null_;
1651 | }
1652 |
1653 | void valueHandler(ref Json!validateAll.File json, ref Counter ctr)
1654 | {
1655 | with (DataType) final switch (json.peek)
1656 | {
1657 | case array:
1658 | ctr.array++;
1659 | foreach (_; json)
1660 | valueHandler(json, ctr);
1661 | break;
1662 | case object:
1663 | ctr.object++;
1664 | foreach(key; json.byKey)
1665 | {
1666 | ctr.key++;
1667 | valueHandler(json, ctr);
1668 | }
1669 | break;
1670 | case string:
1671 | ctr.string++;
1672 | json.skipValue();
1673 | break;
1674 | case number:
1675 | ctr.number++;
1676 | json.skipValue();
1677 | break;
1678 | case boolean:
1679 | ctr.boolean++;
1680 | json.skipValue();
1681 | break;
1682 | case null_:
1683 | ctr.null_++;
1684 | json.skipValue();
1685 | break;
1686 | }
1687 | }
1688 |
1689 | void passFile(string fname, Counter valid)
1690 | {
1691 | auto json = parseJSONFile!validateAll(fname);
1692 | Counter ctr;
1693 | valueHandler(json, ctr);
1694 | assert(ctr == valid, fname);
1695 | }
1696 |
1697 | void failFile(string fname)
1698 | {
1699 | auto json = parseJSONFile!validateAll(fname);
1700 | Counter ctr;
1701 | assertThrown!JSONException(valueHandler(json, ctr), fname);
1702 | }
1703 |
1704 | // Tests that need to pass according to RFC 7159
1705 | passFile("test/pass1.json", Counter( 6, 4, 33, 21, 32, 4, 2));
1706 | passFile("test/pass2.json", Counter(19, 0, 0, 1, 0, 0, 0));
1707 | passFile("test/pass3.json", Counter( 0, 2, 3, 2, 0, 0, 0));
1708 | passFile("test/fail1.json", Counter( 0, 0, 0, 1, 0, 0, 0));
1709 | passFile("test/fail18.json", Counter(20, 0, 0, 1, 0, 0, 0));
1710 |
1711 | // Tests that need to fail
1712 | foreach (i; chain(iota(2, 18), iota(19, 34)))
1713 | failFile("test/fail" ~ i.to!string ~ ".json");
1714 |
1715 | // Deserialization
1716 | struct Test
1717 | {
1718 | string text1;
1719 | string text2;
1720 | string text3;
1721 | double dbl = 0;
1722 | float flt = 0;
1723 | ulong ul;
1724 | uint ui;
1725 | ushort us;
1726 | ubyte ub;
1727 | long lm, lp;
1728 | int im, ip;
1729 | short sm, sp;
1730 | byte bm, bp;
1731 | bool t, f;
1732 | Test* tp1, tp2;
1733 | int[2] sa;
1734 | int[] da;
1735 | Test[string] aa;
1736 | SearchPolicy e;
1737 | }
1738 |
1739 | Test t1 = {
1740 | text1 : "abcde",
1741 | text2 : "",
1742 | text3 : null,
1743 | dbl : 1.1,
1744 | flt : -1.1,
1745 | ul : ulong.max,
1746 | ui : uint.max,
1747 | us : ushort.max,
1748 | ub : ubyte.max,
1749 | lm : long.min,
1750 | lp : long.max,
1751 | im : int.min,
1752 | ip : int.max,
1753 | sm : short.min,
1754 | sp : short.max,
1755 | bm : byte.min,
1756 | bp : byte.max,
1757 | t : true,
1758 | f : false,
1759 | tp1 : null,
1760 | tp2 : new Test("This is", "a", "test."),
1761 | sa : [ 33, 44 ],
1762 | da : [ 5, 6, 7 ],
1763 | aa : [ "hash" : Test("x", "y", "z") ],
1764 | e : SearchPolicy.linear
1765 | };
1766 | Test t2 = parseJSON(`{
1767 | "text1" : "abcde",
1768 | "text2" : "",
1769 | "text3" : null,
1770 | "dbl" : 1.1,
1771 | "flt" : -1.1,
1772 | "ul" : ` ~ ulong.max.to!string ~ `,
1773 | "ui" : ` ~ uint.max.to!string ~ `,
1774 | "us" : ` ~ ushort.max.to!string ~ `,
1775 | "ub" : ` ~ ubyte.max.to!string ~ `,
1776 | "lm" : ` ~ long.min.to!string ~ `,
1777 | "lp" : ` ~ long.max.to!string ~ `,
1778 | "im" : ` ~ int.min.to!string ~ `,
1779 | "ip" : ` ~ int.max.to!string ~ `,
1780 | "sm" : ` ~ short.min.to!string ~ `,
1781 | "sp" : ` ~ short.max.to!string ~ `,
1782 | "bm" : ` ~ byte.min.to!string ~ `,
1783 | "bp" : ` ~ byte.max.to!string ~ `,
1784 | "t" : true,
1785 | "f" : false,
1786 | "tp1" : null,
1787 | "tp2" : { "text1": "This is", "text2": "a", "text3": "test." },
1788 | "sa" : [ 33, 44 ],
1789 | "da" : [ 5, 6, 7 ],
1790 | "aa" : { "hash" : { "text1":"x", "text2":"y", "text3":"z" } },
1791 | "e" : "linear"
1792 | }`).read!Test;
1793 |
1794 | assert(t2.tp2 && *t1.tp2 == *t2.tp2);
1795 | assert(t1.da == t2.da);
1796 | assert(t1.aa == t2.aa);
1797 | t2.tp2 = t1.tp2;
1798 | t2.da = t1.da;
1799 | t2.aa = t1.aa;
1800 | assert(t1 == t2);
1801 | }
1802 |
1803 | // Test case for Issue #4
1804 | unittest
1805 | {
1806 | auto str = `{"initiator_carrier_code":null,"a":"b"}`;
1807 | auto js = parseTrustedJSON(str);
1808 | foreach(key; js.byKey)
1809 | {
1810 | if(key == "initiator_carrier_code")
1811 | {
1812 | auto t = js.read!string;
1813 | assert(t is null);
1814 | }
1815 | }
1816 | }
1817 |
1818 | // Test case for Issue #5
1819 | unittest
1820 | {
1821 | import std.utf;
1822 | auto str = `{"a":"SΛNNO𐍈€한"}`;
1823 | str.validate;
1824 | validateJSON(str);
1825 | }
1826 |
--------------------------------------------------------------------------------
/source/fast/parsing.d:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | *
3 | * Text parsing functionality.
4 | *
5 | * Authors:
6 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
7 | *
8 | * Copyright:
9 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
10 | *
11 | * License:
12 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
13 | *
14 | **************************************************************************************************/
15 | module fast.parsing;
16 |
17 | import std.traits;
18 | import fast.internal.sysdef;
19 |
20 |
21 | /+
22 | ╔══════════════════════════════════════════════════════════════════════════════
23 | ║ ⚑ Hexadecimal
24 | ╚══════════════════════════════════════════════════════════════════════════════
25 | +/
26 |
27 | /*******************************************************************************
28 | *
29 | * Decodes a single hexadecimal character.
30 | *
31 | * Params:
32 | * c = The hexadecimal digit.
33 | *
34 | * Returns:
35 | * `c` converted to an integer.
36 | *
37 | **************************************/
38 | @safe @nogc pure nothrow
39 | uint hexDecode(char c)
40 | {
41 | return c + 9 * (c >> 6) & 15;
42 | }
43 |
44 |
45 | @nogc pure nothrow
46 | uint hexDecode4(ref const(char)* hex)
47 | {
48 | uint x = *cast(uint*) &hex;
49 | hex += 4;
50 | x = (x & 0x0F0F0F0F) + 9 * (x >> 6 & 0x01010101);
51 | version (LittleEndian)
52 | {
53 | return x >> 24 | x >> 12 & 0xF0 | x & 0xF00 | x << 12 & 0xF000;
54 | }
55 | else
56 | {
57 | x = (x | x >> 4) & 0x00FF00FF;
58 | return (x | x >> 8) & 0x0000FFFF;
59 | }
60 | }
61 |
62 |
63 | @nogc pure nothrow
64 | inout(char)* hexDecode4(ref inout(char)* hex, out uint result)
65 | {
66 | foreach (i; 0 .. 4)
67 | {
68 | result *= 16;
69 | char ch = cast(char) (hex[i] - '0');
70 | if (ch <= 9)
71 | {
72 | result += ch;
73 | }
74 | else
75 | {
76 | ch = cast(char) ((ch | 0x20) - 0x31);
77 | if (ch <= 5)
78 | result += ch + 10;
79 | else
80 | return hex + i;
81 | }
82 | }
83 | hex += 4;
84 | return null;
85 | }
86 | unittest
87 | {
88 | string x = "aF09";
89 | const(char)* p = x.ptr;
90 | uint result;
91 | hexDecode4(p, result);
92 | assert(result == 0xAF09);
93 | }
94 |
95 |
96 | /+
97 | ╔══════════════════════════════════════════════════════════════════════════════
98 | ║ ⚑ Numbers
99 | ╚══════════════════════════════════════════════════════════════════════════════
100 | +/
101 |
102 |
103 | /// Options for `parseNumber`.
104 | struct NumberOptions
105 | {
106 | /// Allows the minus sign as the first character and thus negative numbers.
107 | bool minus;
108 | }
109 |
110 |
111 | /*******************************************************************************
112 | *
113 | * Parse a number from a character read pointer.
114 | *
115 | * On success, the read pointer is set behind the number.
116 | *
117 | * Params:
118 | * opt = Selects features for the implementation. Less features make the
119 | * parser faster.
120 | * str = The read pointer.
121 | * n = A reference to a number to be overwritten with the result.
122 | *
123 | * Returns:
124 | * An indication of success. Typically the function fails when a number cannot
125 | * be stored in an integer of the given size or invalid characters are
126 | * encountered.
127 | *
128 | **************************************/
129 | @nogc pure nothrow
130 | bool parseNumber(NumberOptions opt, N)(ref const(char)* str, ref N n) if (isNumeric!N)
131 | {
132 | import fast.internal.helpers;
133 | import std.range;
134 |
135 | // Integer types larger than the mantissa of N.
136 | static if (N.sizeof <= size_t.sizeof)
137 | {
138 | alias U = size_t;
139 | alias I = ptrdiff_t;
140 | }
141 | else
142 | {
143 | alias U = ulong;
144 | alias I = long;
145 | }
146 |
147 | // Largest value of type U that can be multiplied by 10 and have a digit added without overflow.
148 | enum canHoldOneMoreDigit = (U.max - 9) / 10;
149 | static if (isFloatingPoint!N)
150 | {
151 | enum significandRightShift = 8 * U.sizeof - N.mant_dig + 1;
152 | enum lastSignificandBit = U(2) << 8 * U.sizeof - N.mant_dig;
153 | enum firstFractionBit = U(1) << 8 * U.sizeof - N.mant_dig;
154 | enum remainderBits = U.max - N.mant_dig + 1;
155 | enum expShift = N.mant_dig - 1;
156 | enum expBias = N.max_exp - 1;
157 | }
158 |
159 | static if (isFloatingPoint!N)
160 | {
161 | alias pow5Max = PowData!(U, 5).powMax;
162 | alias pow5 = PowData!(U, 5).pows;
163 |
164 | // Largest power of 10 that fits into a float of type N. The factor 5 here is correct, as the 2s
165 | // go in as an increment in the exponent, that is neglectable here.
166 | enum pow10MaxF = {
167 | U v = 1; uint exp;
168 | while (v <= ((U(1) << N.mant_dig) - 1) / 5) { v *= 5; exp++; }
169 | return exp;
170 | }();
171 |
172 | static immutable N[pow10MaxF] pow10F = N(10).recurrence!((a, n) => 10 * a[n-1]).take(pow10MaxF).array;
173 | }
174 | else
175 | {
176 | alias pow10Max = PowData!(U, 10).powMax;
177 | alias pow10 = PowData!(U, 10).pows;
178 | }
179 |
180 | const(char)* p = str;
181 | const(char)* point = null;
182 | U significand = 0;
183 | size_t exponent = 0;
184 | size_t expAdjust = void;
185 | bool expSign = void;
186 | static if (isFloatingPoint!N)
187 | {
188 | U exp2 = void;
189 | bool roundUp = false;
190 | }
191 |
192 | /////////////////// SIGN BIT HANDLING ///////////////////
193 |
194 | // Check for the sign.
195 | static if (opt.minus)
196 | {
197 | bool sign = (*p == '-');
198 | if (sign)
199 | p++;
200 | }
201 |
202 | /////////////////// INTEGRAL PART OF SIGNIFICAND ///////////////////
203 |
204 | uint digit = *p - '0';
205 | if (digit == 0)
206 | {
207 | // We have a single zero.
208 | p++;
209 | }
210 | else if (digit <= 9)
211 | {
212 | // Regular case of one or more digits.
213 | do
214 | {
215 | if (significand > canHoldOneMoreDigit)
216 | goto BigMantissa;
217 | BigMantissaNotSoMuch:
218 | significand = 10 * significand + digit;
219 | digit = *++p - '0';
220 | }
221 | while (digit <= 9);
222 | }
223 | else return false;
224 |
225 | /////////////////// FRACTIONAL PART OF SIGNIFICAND ///////////////////
226 |
227 | if (*p == '.')
228 | {
229 | point = ++p;
230 | digit = *p - '0';
231 | if (digit > 9)
232 | return false;
233 | do
234 | {
235 | if (significand > canHoldOneMoreDigit)
236 | goto BigMantissa;
237 | significand = 10 * significand + digit;
238 | digit = *++p - '0';
239 | }
240 | while (digit <= 9);
241 | }
242 |
243 | /////////////////// EXPONENT HANDLING ///////////////////
244 |
245 | expAdjust = (point is null) ? 0 : p - point;
246 | if ((*p | 0x20) == 'e')
247 | {
248 | p++;
249 | expSign = (*p == '-');
250 | if (expSign || *p == '+')
251 | p++;
252 | digit = *p - '0';
253 | if (digit > 9)
254 | return false;
255 | do
256 | {
257 | if (exponent > canHoldOneMoreDigit)
258 | goto BigExponent;
259 | exponent = 10 * exponent + digit;
260 | digit = *++p - '0';
261 | }
262 | while (digit <= 9);
263 | }
264 |
265 | if (expAdjust)
266 | {
267 | if (expSign)
268 | {
269 | if (exponent > size_t.max - expAdjust)
270 | goto BigExponentAdjustForDecimalPoint;
271 | exponent += expAdjust;
272 | }
273 | else if (exponent >= expAdjust)
274 | {
275 | exponent -= expAdjust;
276 | }
277 | else
278 | {
279 | // Amount of fraction digits turns exponent from positive to negative.
280 | expAdjust -= exponent;
281 | exponent = expAdjust;
282 | expSign = true;
283 | }
284 | }
285 |
286 | /////////////////// RESULT ASSEMBLY ///////////////////
287 |
288 | static if (isFloatingPoint!N)
289 | {
290 | if (significand == 0 || exponent == 0)
291 | {
292 | // The significand is the unsigned result.
293 | static if (opt.minus)
294 | if (sign)
295 | n = -N(significand);
296 | n = +N(significand);
297 | str = p;
298 | return true;
299 | }
300 |
301 | // Try the floating-point fast path: The significand's bits, as well as the 10^x exponent can be expressed
302 | // accurately as a float of type N. We just need to divide or multiply them based on the signedness of the
303 | // exponent.
304 | exp2 = bsr(significand);
305 | if (exp2 - bsf(significand) < N.mant_dig && exponent <= pow10MaxF)
306 | {
307 | N b = pow10F[exponent - 1];
308 | static if (opt.minus)
309 | if (sign)
310 | b = -b;
311 | n = expSign ? significand / b : significand * b;
312 | str = p;
313 | return true;
314 | }
315 | else if (exponent <= pow5Max)
316 | {
317 | // Special case, mostly to handle the little bit of extra precision that comes from
318 | // converting a double to its string representation. The last base-10 digit doesn't quite
319 | // fit back into a double, but we don't need to resort to arbitrary precision math just yet.
320 | if (expSign)
321 | {
322 | U divisor = pow5[exponent - 1];
323 | static if (isAMD64 && (isLDC || isGDC))
324 | {
325 | // AMD64 can divide 128-bit numbers by 64-bit numbers directly.
326 | size_t expDivisor = clz(divisor);
327 | divisor <<= expDivisor;
328 | exp2 = expDivisor - exponent - bigDiv(significand, divisor);
329 | significand <<= 1;
330 | }
331 | else
332 | {
333 | // We perform an iterative division.
334 | U dividend = significand << 8 * U.sizeof - 1 - exp2;
335 | U quotient = dividend / divisor;
336 | dividend %= divisor;
337 |
338 | size_t lzs = clz(quotient);
339 | exp2 -= exponent + lzs;
340 | significand = quotient << ++lzs;
341 | size_t accuracy = 8 * U.sizeof - lzs;
342 | while (accuracy < N.mant_dig)
343 | {
344 | lzs = clz(dividend);
345 | dividend <<= lzs;
346 | quotient = dividend / divisor;
347 | dividend %= divisor;
348 | significand |= quotient << (8 * U.sizeof - lzs) >> accuracy;
349 | accuracy += lzs;
350 | }
351 | }
352 |
353 | // Assemble floating point value from bits.
354 | roundUp = (significand & firstFractionBit) != 0;
355 | significand >>= significandRightShift;
356 | if (roundUp)
357 | {
358 | significand++;
359 | significand &= ~(U(1) << N.mant_dig - 1);
360 | if (significand == 0)
361 | ++exp2;
362 | }
363 |
364 | U* result = cast(U*) &n;
365 | *result = exp2 + expBias << expShift | significand;
366 | static if (opt.minus)
367 | *result |= U(sign) << U.sizeof * 8 - 1;
368 | str = p;
369 | return true;
370 | }
371 | else assert(0, "Not implemented");
372 | }
373 | else assert(0, "Not implemented");
374 | }
375 | else
376 | {
377 | import fast.intmath;
378 |
379 | if (exponent && significand)
380 | {
381 | // We need to account for the exponent.
382 | U pow = pow10[exponent - 1];
383 | if (expSign)
384 | {
385 | // Negative exponent, if we get a fractional result, abort.
386 | if (significand % pow)
387 | return false;
388 | significand /= pow;
389 | }
390 | else static if (U.sizeof < ulong.sizeof)
391 | {
392 | // Multiply using a bigger result type
393 | ulong prod = ulong(significand) * pow;
394 | if (prod > U.max)
395 | return false;
396 | significand = cast(U) prod;
397 | }
398 | else
399 | {
400 | // If the multiply will overflow, abort.
401 | bool overflowed;
402 | significand = mulu(significand, pow, overflowed);
403 | if (overflowed)
404 | return false;
405 | }
406 | }
407 |
408 | n = cast(N) significand;
409 | static if (isSigned!N && opt.minus)
410 | {
411 | if (significand > U(N.max) + sign)
412 | return false;
413 | if (sign)
414 | n = cast(N)-n;
415 | }
416 | else if (significand > N.max)
417 | return false;
418 | str = p;
419 | return true;
420 | }
421 |
422 | BigMantissa:
423 | if (significand <= (significand.max - digit) / 10)
424 | goto BigMantissaNotSoMuch;
425 | // assert(0, "Not implemented");
426 |
427 | BigExponent:
428 | // assert(0, "Not implemented");
429 |
430 | BigExponentAdjustForDecimalPoint:
431 | // assert(0, "Not implemented");
432 | return false;
433 | }
434 |
435 |
436 | private template PowData(U, U base)
437 | {
438 | import std.range;
439 |
440 | // Largest power of `base` that fits into an integer of type U.
441 | enum powMax = { U v = 1; uint exp; while (v <= U.max / base) { v *= base; exp++; } return exp; }();
442 |
443 | // Table of powers of `base`. (We skip base^0)
444 | static immutable U[powMax] pows = base.recurrence!((a, n) => base * a[n-1]).take(powMax).array;
445 | }
446 |
447 |
448 | static if (isAMD64 && (isLDC || isGDC))
449 | {
450 | @nogc pure nothrow
451 | private size_t bigDiv(ref size_t a, size_t b)
452 | in
453 | {
454 | assert(b > size_t.max / 2, "High bit of divisor must be set.");
455 | }
456 | body
457 | {
458 | // Make sure that the division will yield exactly 32 or 64 significant bits.
459 | import fast.internal.helpers;
460 | size_t lza = clz(a);
461 | version (LDC)
462 | {
463 | import ldc.llvmasm;
464 | a <<= lza;
465 | if (a >= b) { a >>= 1; lza--; }
466 | a = __asm!ulong("
467 | xor %rax, %rax
468 | divq $2
469 | ", "={rax},{rdx},rm", a, b);
470 | }
471 | else version (GNU)
472 | {
473 | size_t dividend = a << lza;
474 | if (dividend >= b) { dividend >>= 1; lza--; }
475 | asm { "
476 | xor %%rax, %%rax
477 | divq %3
478 | " : "=&a" a, "=d" dividend : "d" dividend, "rm" b; }
479 | }
480 | return ++lza;
481 | }
482 |
483 | unittest
484 | {
485 | size_t a = size_t.max / 11;
486 | size_t b = size_t.max / 5;
487 | version (X86_64)
488 | {
489 | import fast.internal.helpers;
490 | long exp = clz(b); // Positive base-2 exponent
491 | b <<= exp;
492 | exp -= bigDiv(a, b);
493 | assert(a == 0xE8BA2E8BA2E8BA2AUL);
494 | assert(exp == -2);
495 | }
496 | }
497 | }
498 |
499 |
500 | /+
501 | ╔══════════════════════════════════════════════════════════════════════════════
502 | ║ ⚑ String Scanning and Comparison
503 | ╚══════════════════════════════════════════════════════════════════════════════
504 | +/
505 |
506 | /*******************************************************************************
507 | *
508 | * Compares a string of unknown length against a statically known key.
509 | *
510 | * This function also handles escapes and requires one or more terminator chars.
511 | *
512 | * Params:
513 | * C = Character with.
514 | * key = The static key string.
515 | * terminators = A list of code units that terminate the string.
516 | * special = A list of code units that are handled by the user callback. Use
517 | * this for escape string handling. Default is `null`.
518 | * p_str = Pointer to the string for the comparison. After the function call
519 | * it will be behind the last matching character.
520 | * callback = User callback to handle special escape characters if `special`
521 | * is non-empty.
522 | *
523 | * Returns:
524 | * A code with following meanings: -1 = not equal, terminator character hit,
525 | * 0 = not equal, but string not exhausted, 1 = string equals key.
526 | *
527 | **************************************/
528 | int fixedTermStrCmp(C, immutable C[] key, immutable C[] terminators, immutable C[] special = null)
529 | (ref const(C)* p_str, scope bool delegate(ref immutable(char)*, ref const(char)*) callback = null)
530 | in
531 | {
532 | assert(special.length == 0 || callback !is null);
533 | }
534 | body
535 | {
536 | import std.algorithm, std.range;
537 |
538 | static immutable byte[256] classify =
539 | iota(256).map!(c => terminators.canFind(c) ? byte(-1) : special.canFind(c) ? 1 : 0).array;
540 |
541 | immutable(C)* p_key = key.ptr;
542 | immutable C* e_key = p_key + key.length;
543 |
544 | while (p_key !is e_key)
545 | {
546 | int clazz = *p_str <= 0xFF ? classify[*p_str] : 0;
547 |
548 | if (clazz < 0)
549 | {
550 | return clazz;
551 | }
552 | else if (clazz == 0)
553 | {
554 | if (*p_str != *p_key)
555 | return clazz;
556 |
557 | p_str++;
558 | p_key++;
559 | }
560 | else if (clazz > 0)
561 | {
562 | if (!callback(p_key, p_str))
563 | return 0;
564 | }
565 | }
566 |
567 | return classify[*p_str & 0xFF] < 0;
568 | }
569 |
570 |
571 | /*
572 | @nogc nothrow
573 | void fixedStringCompareSSE4()
574 | {
575 | enum words = key.length / 16;
576 | enum remainder = key.length % 16;
577 | enum contains0 = key.canFind('\0'); // For SSE4.2 string search.
578 | static assert(!contains0, "Not implemented");
579 |
580 | size_t remaining = e - b;
581 | auto p = b;
582 |
583 | foreach (i; staticIota!(0, words))
584 | {
585 | auto backup = p;
586 | p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid);
587 | p = backup;
588 | p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid);
589 | }
590 | }
591 | */
592 |
593 |
594 | @forceinline @nogc nothrow pure
595 | void seekToAnyOf(string cs)(ref const(char)* p)
596 | {
597 | p.vpcmpistri!(char, sanitizeChars(cs), Operation.equalAnyElem);
598 | }
599 |
600 |
601 | @forceinline @nogc nothrow pure
602 | void seekToRanges(string cs)(ref const(char)* p)
603 | {
604 | p.vpcmpistri!(char, sanitizeRanges(cs), Operation.inRanges);
605 | }
606 |
607 |
608 | /*******************************************************************************
609 | *
610 | * Searches for a specific character known to appear in the stream and skips the
611 | * read pointer over it.
612 | *
613 | * Params:
614 | * c = the character
615 | * p = the read pointer
616 | *
617 | **************************************/
618 | @forceinline @nogc nothrow pure
619 | void seekPast(char c)(ref const(char)* p)
620 | {
621 | p.vpcmpistri!(char, c.repeat(16).to!string, Operation.equalElem);
622 | p++;
623 | }
624 |
625 |
626 | /*******************************************************************************
627 | *
628 | * Skips the read pointer over characters that fall into any of up to 8 ranges
629 | * of characters. The first character in `cs` is the start of the first range,
630 | * the second character is the end. This is repeated for any other character
631 | * pair. A character falls into a range from `a` to `b` if `a <= *p <= b`.
632 | *
633 | * Params:
634 | * cs = the character ranges
635 | * p = the read pointer
636 | *
637 | **************************************/
638 | @forceinline @nogc nothrow pure
639 | void skipCharRanges(string cs)(ref const(char)* p)
640 | {
641 | p.vpcmpistri!(char, cs, Operation.inRanges, Polarity.negate);
642 | }
643 |
644 |
645 | /*******************************************************************************
646 | *
647 | * Skips the read pointer over all and any of the given characters.
648 | *
649 | * Params:
650 | * cs = the characters to skip over
651 | * p = the read pointer
652 | *
653 | **************************************/
654 | @forceinline @nogc nothrow pure
655 | void skipAllOf(string cs)(ref const(char)* p)
656 | {
657 | p.vpcmpistri!(char, cs, Operation.equalAnyElem, Polarity.negate);
658 | }
659 |
660 |
661 | /*******************************************************************************
662 | *
663 | * Skips the read pointer over ASCII white-space comprising '\t', '\r', '\n' and
664 | * ' '.
665 | *
666 | * Params:
667 | * p = the read pointer
668 | *
669 | **************************************/
670 | @forceinline @nogc nothrow pure
671 | void skipAsciiWhitespace(ref const(char)* p)
672 | {
673 | if (*p == ' ')
674 | p++;
675 | if (*p > ' ')
676 | return;
677 | p.skipAllOf!" \t\r\n";
678 | }
679 |
680 |
681 | /*******************************************************************************
682 | *
683 | * Sets the read pointer to the start of the next line.
684 | *
685 | * Params:
686 | * p = the read pointer
687 | *
688 | **************************************/
689 | @forceinline @nogc nothrow pure
690 | void skipToNextLine(ref const(char)* p)
691 | {
692 | // Stop at next \r, \n or \0.
693 | p.vpcmpistri!(char, "\x01\x09\x0B\x0C\x0E\xFF", Operation.inRanges, Polarity.negate);
694 | if (p[0] == '\r') p++;
695 | if (p[0] == '\n') p++;
696 | }
697 |
698 |
699 | private enum sanitizeChars(string cs)
700 | {
701 | import std.exception;
702 |
703 | bool has0 = false;
704 | foreach (c; cs) if (!c) { has0 = true; break; }
705 | assert(has0, "Parsers are required to also check for \0 when looking for specific chars.");
706 |
707 | char[] result;
708 | foreach (i; 1 .. 256) foreach (c; cs) if (i == c)
709 | result ~= c;
710 | return result.assumeUnique;
711 | }
712 |
713 |
714 | private enum sanitizeRanges(string cs)
715 | {
716 | import std.exception;
717 |
718 | bool has0 = false;
719 | foreach (i; 0 .. cs.length / 2) if (!cs[2*i]) { has0 = true; break; }
720 | assert(has0, "Parsers are required to also check for \0 when looking for specific chars.");
721 |
722 | char[] result;
723 | foreach (i; 0 .. cs.length / 2)
724 | {
725 | if (cs[2*i])
726 | result ~= cs[2*i .. 2*i+2];
727 | else if (cs[2*i+1])
728 | result ~= ['\x01', cs[2*i+1]];
729 | }
730 | return result.assumeUnique;
731 | }
732 |
733 |
734 | private enum Operation
735 | {
736 | equalAnyElem = 0b0_00_00_00,
737 | inRanges = 0b0_00_01_00,
738 | equalElem = 0b0_00_10_00,
739 | substrPos = 0b0_00_11_00,
740 | }
741 |
742 |
743 | private enum Polarity
744 | {
745 | keep = 0b0_00_00_00,
746 | negate = 0b0_01_00_00,
747 | negateValid = 0b0_11_00_00,
748 | }
749 |
750 |
751 | @forceinline @nogc nothrow pure
752 | private void vpcmpistri(C, immutable(C[]) cs, Operation op, Polarity pol = Polarity.keep, bool lastIndex = false)
753 | (ref const(char)* p)
754 | if (is(C == char) || is(C == ubyte) || is(C == wchar) || is(C == ushort) || is(C == byte) || is(C == short))
755 | {
756 | import fast.internal.helpers;
757 |
758 | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53712
759 | static if (is(C == char) || is(C == ubyte))
760 | enum ct = 0b00;
761 | else static if (is(C == wchar) || is(C == ushort))
762 | enum ct = 0b01;
763 | else static if (is(C == byte))
764 | enum ct = 0b10;
765 | else
766 | enum ct = 0b11;
767 |
768 | enum mode = ct | op | pol | (!!lastIndex << 6);
769 |
770 | version (X86_64)
771 | enum creg = "rcx";
772 | else version (X86)
773 | enum creg = "ecx";
774 | else static assert(0, "Not implemented");
775 |
776 | version (LDC)
777 | {
778 | import ldc.llvmasm;
779 |
780 | p = __asm!(const(char*))("
781 | 1:
782 | pcmpistri $2, ($1), $3
783 | add $$16, $1
784 | cmp $$16, %ecx
785 | je 1b
786 | sub $$16, $1
787 | add %" ~ creg ~ ", $1
788 | ", "=r,0,K,x,~{ecx}", p, mode, SIMDFromString!cs);
789 | }
790 | else version (GNU)
791 | {
792 | asm { "
793 | 1:
794 | pcmpistri %2, (%1), %3
795 | add $16, %1
796 | cmp $16, %%ecx
797 | je 1b
798 | sub $16, %1
799 | add %%" ~ creg ~ ", %1
800 | " : "=r" p : "0" p, "K" mode, "x" SIMDFromString!cs : "ecx"; }
801 | }
802 | else
803 | {
804 | alias csXMM = SIMDFromString!cs;
805 | version (D_InlineAsm_X86_64)
806 | {
807 | version (Posix)
808 | {
809 | version (D_PIC) asm @nogc pure nothrow
810 | {
811 | naked;
812 | lea RAX, csXMM;
813 | mov RAX, [RAX];
814 | movdqu XMM0, [RAX];
815 | mov RAX, [RDI];
816 | L1:
817 | vpcmpistri XMM0, [RAX], mode;
818 | add RAX, 16;
819 | cmp ECX, 16;
820 | je L1;
821 | sub RAX, 16;
822 | add RAX, RCX;
823 | mov [RDI], RAX;
824 | ret;
825 | }
826 | else asm @nogc pure nothrow
827 | {
828 | naked;
829 | movdqa XMM0, csXMM;
830 | mov RAX, [RDI];
831 | L1:
832 | vpcmpistri XMM0, [RAX], mode;
833 | add RAX, 16;
834 | cmp ECX, 16;
835 | je L1;
836 | sub RAX, 16;
837 | add RAX, RCX;
838 | mov [RDI], RAX;
839 | ret;
840 | }
841 | }
842 | else static assert(0, "Not implemented");
843 | }
844 | else version (D_InlineAsm_X86)
845 | {
846 | version (Posix)
847 | {
848 | version (D_PIC) asm @nogc pure nothrow
849 | {
850 | naked;
851 | mov EDX, CS:csXMM[EBX];
852 | movdqu XMM0, [EDX];
853 | mov EDX, [EAX];
854 | L1:
855 | vpcmpistri XMM0, [EDX], mode;
856 | add EDX, 16;
857 | cmp ECX, 16;
858 | je L1;
859 | sub EDX, 16;
860 | add EDX, ECX;
861 | mov [EAX], EDX;
862 | ret;
863 | }
864 | else asm @nogc pure nothrow
865 | {
866 | naked;
867 | movdqa XMM0, csXMM;
868 | mov EDX, [EAX];
869 | L1:
870 | vpcmpistri XMM0, [EDX], mode;
871 | add EDX, 16;
872 | cmp ECX, 16;
873 | je L1;
874 | sub EDX, 16;
875 | add EDX, ECX;
876 | mov [EAX], EDX;
877 | ret;
878 | }
879 | }
880 | else static assert(0, "Not implemented");
881 | }
882 | else static assert(0, "Not implemented");
883 | }
884 | }
885 |
--------------------------------------------------------------------------------
/source/fast/string.d:
--------------------------------------------------------------------------------
1 | /**
2 | * Fast, non-allocating string functions.
3 | *
4 | * Authors:
5 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
6 | *
7 | * Copyright:
8 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
9 | *
10 | * License:
11 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
12 | */
13 | module fast.string;
14 |
15 | import core.bitop;
16 | import core.simd;
17 | import core.stdc.stdlib;
18 |
19 | version (GNU) import gcc.attribute;
20 |
21 | import std.algorithm;
22 | import std.range;
23 | import std.stdio;
24 | import std.string;
25 | import std.traits;
26 |
27 | import fast.buffer;
28 |
29 |
30 | /**
31 | * Splits a string in two around one or more compile-time known code units.
32 | *
33 | * Params:
34 | * match = An expression that matches all characters around which a split should occur.
35 | * str = The string to scan.
36 | * before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here.
37 | * after = The part after the split is stored here. If no character in $(D match) is found, $(D null) is returned here.
38 | * splitter = If not $(D null), this pointer will receive a copy of the splitting char.
39 | *
40 | * Returns:
41 | * $(D true), iff a split occured.
42 | */
43 | bool split(string match)(scope inout(char[]) str, ref inout(char)[] before, ref inout(char)[] after, char* splitter = null)
44 | {
45 | immutable pos = min(str.length, SimdMatcher!match.find(str.ptr, str.ptr + str.length));
46 | before = str[0 .. pos];
47 | if (pos < str.length) {
48 | after = str[pos+1 .. $];
49 | if (splitter) *splitter = str[pos];
50 | return true;
51 | }
52 | after = null;
53 | return false;
54 | }
55 |
56 | /**
57 | * Similar to the overload for strings, this function works a little faster as it lacks boundary checks.
58 | * It assumes that one of the characters in $(D match) is actually contained in the string.
59 | *
60 | * Params:
61 | * match = An expression that matches all characters around which a split should occur.
62 | * ptr = The string to scan.
63 | * before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here.
64 | * after = The pointer to the part after the split is stored here.
65 | *
66 | * Returns:
67 | * The char that caused the split. (From $(D match).)
68 | */
69 | char split(string match)(scope inout(char*) ptr, ref inout(char)[] before, ref inout(char)* after)
70 | {
71 | immutable pos = SimdMatcher!match.find(str.ptr);
72 | before = ptr[0 .. pos];
73 | after = ptr + pos + 1;
74 | return ptr[pos];
75 | }
76 |
77 |
78 | /*******************************************************************************
79 | *
80 | * Finds the first occurrence of a set of compile-time known code units in a
81 | * string. While the algorithm is `O(n)` in relation to the count of given code
82 | * units, the overhead when using it on short strings weights more for only 1 or
83 | * 2 code units.
84 | *
85 | * Params:
86 | * match = An expression that matches all characters around which a split
87 | * should occur.
88 | * str = The string to search for a code unit.
89 | *
90 | * Returns:
91 | * If a match is found, the index into the string is returned.
92 | * Otherwise an invalid index is returned. Check with
93 | * `if (result < str.length)`.
94 | *
95 | * See_Also:
96 | * split,
97 | * $(LINK2 http://mischasan.wordpress.com/2011/11/09/the-generic-sse2-loop/,
98 | * The Generic SSE2 Loop)
99 | *
100 | * Example:
101 | * ---
102 | * // Check if there is a '/' or '\' in the string
103 | * auto pos = str.find!(`or(=/,=\)`);
104 | * if (pos < str.length) { }
105 | * ---
106 | **************************************/
107 | size_t find(string match)(in char[] str) pure nothrow
108 | {
109 | return SimdMatcher!match.find(str.ptr, str.ptr + str.length);
110 | }
111 |
112 | /*******************************************************************************
113 | *
114 | * Same as the overload for strings, but with only a char*, making it faster as
115 | * it cannot do a boundary check.
116 | *
117 | * Sometimes when looking for a character it is helpful to append it as a
118 | * sentinel to the char buffer and then use this function instead of the slower
119 | * one that checks the boundary constantly.
120 | *
121 | * Example:
122 | * ---
123 | * // Find a ']' in a buffer of 1024 bytes using an additional sentinel.
124 | * size_t length = 1024;
125 | * char[] buffer = new char[](length+1);
126 | * buffer[length] = ']';
127 | * auto pos = buffer.ptr.find!("=]");
128 | * if (pos < length) { // was an actual find before the sentinel }
129 | * ---
130 | **************************************/
131 | inout(char)* find(string match)(inout(char*) ptr) pure nothrow
132 | {
133 | return SimdMatcher!match.find(ptr);
134 | }
135 |
136 |
137 | bool keyword1(string key)(in char[] str,
138 | scope bool function(ref immutable(char)* key, ref const(char)* str) mismatcher = null)
139 | {
140 | auto strPtr = str.ptr;
141 | auto keyPtr = key.ptr;
142 | auto keyEnd = keyPtr + key.length;
143 |
144 | while (keyPtr !is keyEnd)
145 | {
146 | while (*strPtr == '\\')
147 | if (!mismatcher(keyPtr, strPtr))
148 | return false;
149 |
150 | if (*strPtr == '"' || *strPtr != *keyPtr)
151 | return false;
152 |
153 | strPtr++;
154 | keyPtr++;
155 | }
156 | return true;
157 | }
158 |
159 |
160 | bool keyword2(string key)(in char[] str,
161 | scope bool function(ref immutable(char)* key, ref const(char)* str) mismatcher = null)
162 | {
163 | version (LDC) import ldc.gccbuiltins_x86;
164 |
165 | /* Since SIMD typically works with word aligned data, we duplicate 'key' for every possible start of 'str' when
166 | * loaded from an aligned memory address where the first character appears 0 to Word.sizeof bytes into the SIMD
167 | * register.
168 | * For 16-byte SIMD we could just create an array of 16 strings with 0 to 15 padding bytes in front and some after,
169 | * but we can be more compact with at most 16 wasted padding bytes. Since machine registers are powers of 2, if we
170 | * pad all keys to an odd length and repeat them 16 times we get a sequence with the following properties:
171 | * - It consists of as many SIMD words as the key is long.
172 | * - All 16 shift offsets of the key are contained in the SIMD words due to the periodicity introduced by using
173 | * disjunct prime factors for the key length and the SIMD word size.
174 | * Interpreted as an array of SIMD words, it can be indexed with the desired shift multiplied by a constant factor
175 | * and taken modulo the SIMD array length to use the periodicity. The constant factor is the smallest value that
176 | * when multiplied with the key length ends up at a SIMD word boundary + 1 (the first shift).
177 | */
178 |
179 | // 'key' length rounded up to next odd value is the number of SIMD words we need.
180 | enum keyLenOdd = uint(key.length | 1); // TODO: uint or implicit type ?
181 | align(16) static immutable char[keyLenOdd * Word.sizeof] keyData = key.representation
182 | .chain(ubyte(0x20).repeat(keyLenOdd - key.length)).cycle.take(keyLenOdd * Word.sizeof).array;
183 | align(16) static immutable char[Word.sizeof] dquote = '"';
184 | align(16) static immutable char[Word.sizeof] bslash = '\\';
185 | enum mul = { uint result = 0; while ((++result * Word.sizeof + 1) % keyLenOdd) {} return result; }();
186 |
187 | const(char)* strPtr = str.ptr;
188 | immutable(char)* keyPtr = keyData.ptr;
189 | auto bsWord = *cast(immutable Word*) &bslash;
190 | auto dqWord = *cast(immutable Word*) &dquote;
191 |
192 | do
193 | {
194 | // writeln("enter loop");
195 | // Calculate SSE word boundary before 'str'
196 | size_t strOff = cast(size_t) strPtr % Word.sizeof;
197 | Word strWord = *cast(Word*) (strPtr - strOff);
198 | size_t keyPos = keyPtr - keyData.ptr;
199 | size_t keyOff = (strOff - keyPos) % Word.sizeof;
200 | Word keyWord = (cast(Word*) keyData.ptr)[keyOff * mul % keyLenOdd + (keyOff + keyPos) / Word.sizeof];
201 |
202 | // Escape seqences have priority. 'key' may contain backslashes as part of the text, but in 'str' a backslash
203 | // at the same position is actually the begin of the escape sequence "\\".
204 | Word bsMask = strWord.maskEqual(bsWord);
205 | // If after processing backslashes there is a double-quote in 'str' we must not match it with a double-quote in
206 | // 'key', since it is the delimiter of 'str'.
207 | Word dqMask = strWord.maskEqual(dqWord);
208 | // How many bytes of 'key' and 'str' match in our 'Word' ?
209 | Word missMask = strWord.maskNotEqual(keyWord);
210 | // Merge mismatch, backslash and double-quote masks and move them into a non-SSE register.
211 | Word allMasks = or(missMask, or(bsMask, dqMask));
212 | uint skip = bsf((__builtin_ia32_pmovmskb128(allMasks) | 1 << Word.sizeof) >> strOff);
213 | // writeln(keyPtr[0 .. 5]);
214 | // writeln(strPtr[0 .. 5]);
215 | // writeln(skip);
216 | strPtr += skip;
217 | keyPtr += skip;
218 |
219 | // Have we matched enough bytes to reach the end of 'key' ?
220 | if (keyPtr - keyData.ptr >= key.length)
221 | return true;
222 |
223 | // When we find a mismatch between 'key' and 'str', we try to call a provided helper function.
224 | // It may decode escape sequences in 'str' and recover from the state.
225 | // If that fails we accept the mismatch and return 'false'.
226 | // writefln("Key: %s, Str %s", *keyPtr, *strPtr);
227 | // const(char*) strPtrOld = strPtr;
228 | // immutable(char*) keyPtrOld = keyPtr;
229 | if (strOff + skip < Word.sizeof && !(mismatcher && mismatcher(keyPtr, strPtr)))
230 | {
231 | // writefln("Key: %s, Str %s", *keyPtr, *strPtr);
232 | return false;
233 | }
234 | // writefln("Key: %s, Str %s", *keyPtr, *strPtr);
235 | }
236 | while (keyPtr - keyData.ptr < key.length);
237 |
238 | return true;
239 | }
240 |
241 |
242 | bool keyword3(string key)(in char[] str, bool function(ref immutable(char)*, ref const(char)*) mismatcher = null)
243 | {
244 | version (LDC) import ldc.gccbuiltins_x86;
245 | version (GNU) import gcc.builtins;
246 |
247 | /* Since SIMD typically works with word aligned data, we duplicate 'key' for every possible start of 'str' when
248 | * loaded from an aligned memory address where the first character appears 0 to Word.sizeof bytes into the SIMD
249 | * register.
250 | * For 16-byte SIMD we could just create an array of 16 strings with 0 to 15 padding bytes in front and some after,
251 | * but we can be more compact with at most 16 wasted padding bytes. Since machine registers are powers of 2, if we
252 | * pad all keys to an odd length and repeat them 16 times we get a sequence with the following properties:
253 | * - It consists of as many SIMD words as the key is long.
254 | * - All 16 shift offsets of the key are contained in the SIMD words due to the periodicity introduced by using
255 | * disjunct prime factors for the key length and the SIMD word size.
256 | * Interpreted as an array of SIMD words, it can be indexed with the desired shift multiplied by a constant factor
257 | * and taken modulo the SIMD array length to use the periodicity. The constant factor is the smallest value that
258 | * when multiplied with the key length ends up at a SIMD word boundary + 1 (the first shift).
259 | */
260 |
261 | // 'key' length rounded up to next odd value is the number of SIMD words we need.
262 | enum keyLenOdd = uint(key.length | 1); // TODO: uint or implicit type ?
263 | align(16) static immutable char[keyLenOdd * Word.sizeof] keyData = key.representation
264 | .chain(ubyte(0x20).repeat(keyLenOdd - key.length)).cycle.take(keyLenOdd * Word.sizeof).array;
265 | align(16) static immutable char[Word.sizeof] dqbs = `\"""""""""""""""`;
266 | enum mul = { uint result = 0; while ((++result * Word.sizeof + 1) % keyLenOdd) {} return result; }();
267 |
268 | // Calculate SSE word boundary before 'str'
269 | uint off = cast(uint) str.ptr % Word.sizeof;
270 | // SSE aligned pointer <= 'str.ptr'.
271 | auto strPtr = cast(const(Word)*) (str.ptr - off);
272 | auto keyPtr = cast(immutable(Word)*) keyData.ptr + off * mul % keyLenOdd;
273 | auto keyStart = cast(immutable(char)*) keyPtr + off;
274 | Word strWord = *strPtr;
275 |
276 | LoadKey:
277 | auto keyEnd = keyStart + key.length;
278 |
279 | Compare:
280 | // Get bitmask of special characters in 'str'.
281 | uint escMask = getScalar(cast(int4) __builtin_ia32_pcmpistrm128(*cast(Word*) &dqbs, strWord, 0b_0_00_00_00));
282 | // writeln("Called a");
283 | // Get bitmask of characters from 'key' and 'str' that don't match.
284 | uint missMask = getScalar(cast(int4) __builtin_ia32_pcmpistrm128(*keyPtr, strWord, 0b_0_01_10_00));
285 | // writeln("Called b");
286 | // Create a merged mask for both and an additional bit at position 16, serving as a delimiter for 'bsf'.
287 | uint mask = (escMask | missMask) & (uint.max << off);
288 |
289 | // No bit set means all 16 bytes are equal and there are no escape characters. That's as good as it gets.
290 | if (!mask)
291 | {
292 | // Jump forward by a word size and see if we successfully compared all bytes to the end of our 'key'.
293 | keyPtr += 16;
294 | if (cast(immutable(char)*) keyPtr >= keyEnd)
295 | return true;
296 | // Otherwise continue with the next set of 16 bytes.
297 | strPtr += 16;
298 | off = 0;
299 | goto Compare;
300 | }
301 |
302 | // One of two cases ...
303 | off = bsf(mask);
304 |
305 | // 1) Did the mismatch occur past the end of 'key' ? Then we compared succesfully.
306 | if (cast(immutable(char)*) keyPtr + off >= keyEnd)
307 | return true;
308 |
309 | // 2) It must be a special character or actual mismatch, let 'mismatcher' decide.
310 | // writefln("Skipping: %s", (cast(const(char)*) strPtr)[0 .. off]);
311 | auto strChP = cast(const(char)*) strPtr + off;
312 | auto strChPOld = strChP;
313 | auto keyChP = cast(immutable(char)*) keyPtr + off;
314 | bool goodToGo = mismatcher(keyChP, strChP);
315 |
316 | // writefln("Mismatcher used %s key chars, %s str chars and returned: %s", keyAdd, strAdd, goodToGo);
317 | if (keyChP >= keyEnd)
318 | return true;
319 | if (!goodToGo)
320 | return false;
321 |
322 | // Arriving here we just decoded an escape sequence and have to adjust our pointers.
323 | auto keyPos = keyChP - keyStart;
324 | off += strChP - strChPOld;
325 | if (off >= 16)
326 | {
327 | strPtr += off / 16;
328 | strWord = *strPtr;
329 | off %= 16;
330 | }
331 | auto baseOff = (off - keyPos) & 15;
332 | keyPtr = cast(immutable(Word)*) keyData.ptr + baseOff * mul % keyLenOdd;
333 | keyStart = cast(immutable(char)*) keyPtr + baseOff;
334 | keyPtr += (baseOff + keyPos) / 16;
335 | goto LoadKey;
336 | }
337 |
338 |
339 | size_t equalLength(scope inout(char[]) a, scope inout(char[]) b)
340 | {
341 | return 0;
342 | }
343 |
344 |
345 | /*******************************************************************************
346 | *
347 | * Concatenates a series of strings.
348 | *
349 | * Params:
350 | * Strs = a series of string symbols or literals to be concatenated
351 | * buffer = optional buffer, implicitly allocated
352 | *
353 | * Returns:
354 | * A $(D TempBuffer!char) containing the concatenated string. It is kept alive
355 | * for as long as it is in scope.
356 | *
357 | **************************************/
358 | nothrow @nogc
359 | template concat(Strs...)
360 | {
361 | import core.stdc.string : memcpy;
362 | import fast.internal.helpers;
363 |
364 | enum allocExpr = ctfeJoin!(Strs.length)("Strs[%s].length", "+") ~ "+1";
365 |
366 | auto concat(void* buffer = (mixin(allocExpr) <= allocaLimit) ? alloca(mixin(allocExpr)) : null)
367 | {
368 | immutable length = mixin(allocExpr);
369 | auto result = TempBuffer!char(
370 | (cast(char*) (buffer is null ? malloc(length) : buffer))[0 .. length - 1],
371 | buffer is null);
372 |
373 | char* p = result.ptr;
374 | foreach (const(char[]) str; Strs)
375 | {
376 | memcpy (p, str.ptr, str.length);
377 | p += str.length;
378 | }
379 | *p = '\0';
380 |
381 | return result;
382 | }
383 | }
384 |
385 |
386 |
387 | private:
388 |
389 | template SimdMatcher(string match)
390 | {
391 | import core.simd;
392 | import std.string;
393 | import fast.internal.sysdef;
394 |
395 | static if (match != strip(match)) {
396 | // Reinstanciate the template with any whitespace stripped from the match string.
397 | alias SimdMatcher = SimdMatcher!(strip(match));
398 | } else {
399 | /* For SSE in DMD I am blocked by:
400 | * https://d.puremagic.com/issues/show_bug.cgi?id=8047
401 | * https://d.puremagic.com/issues/show_bug.cgi?id=11585
402 | */
403 | enum isUsingSSE = hasSSE2 && (isLDC || isGDC);
404 | enum isSingleChar = match.length == 2 && match[0] == '=';
405 | static if (isSingleChar) enum singleChar = match[1];
406 | static if (isUsingSSE) {
407 | // Using MOVMSKB we get one boolean per bit in a 16-bit value.
408 | alias Word = ubyte16;
409 | alias Mask = uint;
410 | enum sparseness = 1;
411 | } else {
412 | // The fallback is to work with machine words and tricky bit-twiddling algorithms.
413 | // As a result we get machine words where matching bytes have the high bit set.
414 | alias Word = size_t;
415 | alias Mask = size_t;
416 | enum sparseness = 8;
417 | }
418 | enum matchCode = genMatchCode!isUsingSSE("*wp");
419 | // Used in generic comparison code
420 | enum lows = size_t.max / 0xFF;
421 | enum highs = lows * 0x80;
422 |
423 | enum betterUseTables = (isDMD && matchCode.complexity >= 4)
424 | || (isGDC && matchCode.complexity >= 18)
425 | || (isLDC && matchCode.complexity >= 18);
426 |
427 | static if (betterUseTables)
428 | {
429 | immutable matchTable = genMatchTable();
430 |
431 | size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow @nogc
432 | {
433 | import core.stdc.string;
434 | import fast.internal.helpers;
435 |
436 | // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
437 | static if (isSingleChar) {
438 | return memchr(b, singleChar, e - b) - b;
439 | } else {
440 | if (b >= e) return 0;
441 |
442 | size_t off = cast(size_t) b % ushort.sizeof;
443 | ushort* wp = cast(ushort*) (b - off);
444 | ushort* we = cast(ushort*) alignPtrNext(e, ushort.sizeof);
445 | if (off) {
446 | // Throw away bytes from before start of the string
447 | if (auto mask = matchTable[*wp] >> off)
448 | return bsf(mask);
449 | if (++wp is we) return size_t.max;
450 | }
451 |
452 | do {
453 | if (auto mask = matchTable[*wp])
454 | return bsf(mask) + (cast(char*) wp - b);
455 | } while (++wp !is we);
456 | return size_t.max;
457 | }
458 | }
459 |
460 | inout(char)* find(scope inout(char*) b) pure nothrow @nogc
461 | {
462 | import core.stdc.string;
463 | // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
464 | static if (isSingleChar && singleChar == '\0') {
465 | return strlen(b) + b;
466 | } else static if (isSingleChar && isDMD) { // DMD is better off using optimized C library code.
467 | return memchr(b, singleChar, e - b) - b;
468 | } else {
469 | size_t off = cast(size_t) b % ushort.sizeof;
470 | ushort* wp = cast(ushort*) (b - off);
471 | if (off) {
472 | // Throw away bytes from before start of the string
473 | if (auto mask = matchTable[*wp] >> off)
474 | return b + bsf(mask);
475 | }
476 |
477 | do {
478 | if (auto mask = matchTable[*wp])
479 | return cast(inout(char)*) wp + bsf(mask);
480 | } while (true);
481 | }
482 | }
483 | }
484 | else
485 | {
486 | import core.stdc.string, core.simd;
487 | import std.simd;
488 | import fast.internal.helpers;
489 |
490 | version (LDC) {
491 | import ldc.gccbuiltins_x86;
492 | } else version (GNU) {
493 | import gcc.builtins;
494 | }
495 |
496 | size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow
497 | {
498 | // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
499 | static if (isSingleChar) {
500 | return memchr(b, singleChar, e - b) - b;
501 | } else {
502 | if (b >= e) return 0;
503 |
504 | size_t off = cast(size_t) b % Word.sizeof;
505 | Word* wp = cast(Word*) (b - off);
506 | Word* we = cast(Word*) alignPtrNext(e, Word.sizeof);
507 | if (off) {
508 | // Throw away bytes from before start of the string
509 | if (auto mask = (mixin(matchCode.code)) >> (off * sparseness))
510 | return bsf(mask) / sparseness;
511 | if (++wp is we) return size_t.max;
512 | }
513 |
514 | do {
515 | if (auto mask = mixin(matchCode.code))
516 | return bsf(mask) / sparseness + (cast(char*) wp - b);
517 | } while (++wp !is we);
518 | return size_t.max;
519 | }
520 | }
521 |
522 | inout(char)* find(scope inout(char*) b) pure nothrow
523 | {
524 | // catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
525 | static if (isSingleChar && singleChar == '\0') {
526 | return strlen(b) + b;
527 | } else static if (isSingleChar && isDMD) { // DMD is better off using optimized C library code.
528 | return cast(inout(char*)) memchr(b, singleChar, size_t.max);
529 | } else {
530 | size_t off = cast(size_t) b % Word.sizeof;
531 | Word* wp = cast(Word*) (b - off);
532 | if (off) {
533 | // Throw away bytes from before start of the string
534 | if (auto mask = (mixin(matchCode.code)) >> (off * sparseness))
535 | return b + bsf(mask) / sparseness;
536 | ++wp;
537 | }
538 |
539 | do {
540 | if (auto mask = mixin(matchCode.code))
541 | return cast(inout(char)*) wp + bsf(mask) / sparseness;
542 | ++wp;
543 | } while (true);
544 | }
545 | }
546 | }
547 |
548 | enum genMatchCode(bool sse)(string var)
549 | {
550 | import std.ascii, std.exception;
551 |
552 | struct Code {
553 | string code;
554 | size_t complexity = 1;
555 | }
556 | Code result;
557 | string[] nesting;
558 |
559 | with (result) {
560 | for (size_t i = 0; i < match.length;) {
561 | string handleChar() {
562 | char c = match[i+1];
563 | switch (c) {
564 | case 0:
565 | return `'\0'`;
566 | case '\\':
567 | return `'\\'`;
568 | case "'"[0]:
569 | return `'\''`;
570 | case '\t':
571 | return `'\t'`;
572 | case '\r':
573 | return `'\r'`;
574 | case '\n':
575 | return `'\n'`;
576 | default:
577 | return `'` ~ c ~ `'`;
578 | }
579 | }
580 |
581 | if (match[i] == '=') {
582 | static if (sse) {
583 | code ~= "maskEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))";
584 | } else if (match[i+1] == 0) {
585 | code ~= "" ~ var ~ " - lows & ~" ~ var;
586 | } else {
587 | code ~= "(" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows & ~(" ~ var ~ " ^ lows * " ~ handleChar() ~ ")";
588 | }
589 | i += 2;
590 | } else if (match[i] == '!') {
591 | static if (sse) {
592 | code ~= "maskNotEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))";
593 | } else if (match[i+1] == 0) {
594 | code ~= "(~(" ~ var ~ " - lows) | " ~ var ~ ")";
595 | } else {
596 | code ~= "(~((" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows) | (" ~ var ~ " ^ lows * " ~ handleChar() ~ "))";
597 | }
598 | i += 2;
599 | } else if (match[i] == '<') {
600 | static if (sse)
601 | code ~= "maskGreater(SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "), " ~ var ~ ")";
602 | else
603 | code ~= "maskLessGeneric!" ~ handleChar() ~ "(" ~ var ~ ")";
604 | i += 2;
605 | } else if (match[i] == '>') {
606 | static if (sse)
607 | code ~= "maskGreater(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))";
608 | else
609 | code ~= "maskGreaterGeneric!" ~ handleChar() ~ "(" ~ var ~ ")";
610 | i += 2;
611 | } else if (match[i .. $].startsWith("or(")) {
612 | static if (sse) {
613 | nesting ~= ", ";
614 | code ~= "or(";
615 | } else {
616 | nesting ~= " | ";
617 | }
618 | complexity++;
619 | i += 3;
620 | } else if (match[i .. $].startsWith("and(")) {
621 | static if (sse) {
622 | nesting ~= ", ";
623 | code ~= "and(";
624 | } else {
625 | nesting ~= " & ";
626 | }
627 | complexity++;
628 | i += 4;
629 | } else if (match[i] == ',') {
630 | enforce(nesting.length, "',' on top level");
631 | code ~= nesting[$-1];
632 | i++;
633 | } else if (match[i] == ')') {
634 | enforce(nesting.length, "Unbalanced closing parenthesis");
635 | nesting.length--;
636 | static if (sse) {
637 | code ~= ")";
638 | }
639 | i++;
640 | } else if (match[i].isWhite) {
641 | i++;
642 | } else {
643 | throw new Exception(format("Unexpected character at index %s: 0x%02x", i, match[i]));
644 | }
645 | }
646 | static if (sse) {
647 | code = "__builtin_ia32_pmovmskb128(" ~ code ~ ")";
648 | } else {
649 | code = "(" ~ code ~ ") & highs";
650 | }
651 | }
652 | return result;
653 | }
654 |
655 | enum genMatchTable()
656 | {
657 | ubyte[1 << 16] table;
658 | ubyte[256] lut;
659 | foreach (uint i; 0 .. 256) {
660 | lut[i] = (mixin(genMatchCode!false("i").code) >> 7) & 1;
661 | }
662 | foreach (i; 0 .. 256) foreach (k; 0 .. 256) {
663 | table[i * 256 + k] = cast(ubyte) (lut[i] << 1 | lut[k]);
664 | }
665 | return table;
666 | }
667 | }
668 | }
669 |
670 | /**
671 | * Template for searching a fixed value in a word sized memory block (i.e. 1, 2, 4 or 8 bytes).
672 | *
673 | * Params:
674 | * value = The value you are looking for.
675 | * word = The data word to search for the value.
676 | *
677 | * Returns:
678 | * non-zero, iff the value is contained in the data word.
679 | * Specifically it returns 0x80 for every byte of the word that was a match and 0x00 for others.
680 | *
681 | * See_Also:
682 | * http://graphics.stanford.edu/~seander/bithacks.html#ValueInWord
683 | */
684 | T maskEqualGeneric(ubyte value, T)(T word) @safe pure nothrow
685 | if (isUnsigned!T)
686 | {
687 | // This value results in 0x01 for each byte of a T value.
688 | enum lows = T.max / 0xFF;
689 | static if (value == 0) {
690 | enum highs = lows * 0x80;
691 | return (word - lows) & ~word & highs;
692 | } else {
693 | enum xor = lows * value;
694 | return maskEqualGeneric!0(word ^ xor);
695 | }
696 | }
697 |
698 | T maskLessGeneric(ubyte value, T)(T word) @safe pure nothrow
699 | if (isUnsigned!T && value <= 128)
700 | {
701 | enum lows = T.max / 0xFF;
702 | enum highs = lows * 0x80;
703 | return (word - lows * value) & ~word & highs;
704 | }
705 |
706 | T maskGreaterGeneric(ubyte value, T)(T word) @safe pure nothrow
707 | if (isUnsigned!T && value <= 127)
708 | {
709 | enum lows = T.max / 0xFF;
710 | enum highs = lows * 0x80;
711 | return (word + lows * (127 - value) | word) & highs;
712 | }
713 |
714 | T orGeneric(T)(T a, T b) @safe pure nothrow
715 | if (isUnsigned!T)
716 | {
717 | return a | b;
718 | }
719 |
--------------------------------------------------------------------------------
/source/fast/unicode.d:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | *
3 | * Functions to work with the Unicode Transformation Format.
4 | *
5 | * Grapheme clusters:
6 | * A grapheme cluster is roughly speaking what the user would perceive as the smallest unit in a
7 | * writing system. Their count can be thought of as a caret position in a text editor. In
8 | * particular at grapheme cluster level, different normalization forms (NFC, NFD) become
9 | * transparent. The default definition used here is independent of the user's locale.
10 | *
11 | * Authors:
12 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
13 | *
14 | * Copyright:
15 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
16 | *
17 | * License:
18 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
19 | *
20 | **************************************************************************************************/
21 | module fast.unicode;
22 |
23 | import fast.internal.unicode_tables;
24 | import fast.internal.sysdef;
25 | import std.simd;
26 |
27 |
28 | /*******************************************************************************
29 | *
30 | * Enumeration for the Unicode "General Category" used to roughly classify
31 | * codepoints into letters, punctuation etc.
32 | *
33 | **************************************/
34 | alias GeneralCategory = DerivedGeneralCategory.Enum;
35 |
36 |
37 | /*******************************************************************************
38 | *
39 | * A customizable structure providing information on a code point. It consists
40 | * of a Unicode `property` in the form of an `enum` (e.g. `GeneralCategory`) and
41 | * a `length` in bytes of the code point in UTF-8.
42 | *
43 | **************************************/
44 | struct CodePointInfo(Enum)
45 | {
46 | alias property this;
47 | size_t length;
48 | Enum property;
49 | }
50 |
51 |
52 | /*******************************************************************************
53 | *
54 | * Counts the number of grapheme clusters (character count) in a UTF string.
55 | *
56 | * This function uses "extended grapheme clusters" as defined in Unicode:
57 | * http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
58 | *
59 | * When invalid byte sequences are encountered, each byte that does not make up
60 | * a code point will be counted as one grapheme as visual representations of
61 | * such broken strings will often show a square with the hexadecimal byte value
62 | * in them.
63 | *
64 | * Params:
65 | * str = the UTF-8 string
66 | *
67 | * Returns:
68 | * the number of grapheme clusters
69 | *
70 | **************************************/
71 | @nogc @trusted pure nothrow size_t
72 | countGraphemes(scope const(char)[] str)
73 | {
74 | enum numValues = GraphemeBreakProperty.Enum.max + 1;
75 | static immutable graphemeBreakRules =
76 | {
77 | // GB999
78 | byte[numValues][numValues] graphemeBreaks = true;
79 | with (GraphemeBreakProperty.Enum)
80 | {
81 | // GB12 + GB13 (special handling)
82 | foreach (i; 0 .. numValues)
83 | graphemeBreaks[i][Regional_Indicator] = -1;
84 | // GB11
85 | graphemeBreaks[ZWJ][Glue_After_Zwj] = false;
86 | graphemeBreaks[ZWJ][E_Base_GAZ] = false;
87 | // GB10 (special handling)
88 | graphemeBreaks[E_Base] [E_Modifier] = false;
89 | graphemeBreaks[E_Base_GAZ][E_Modifier] = false;
90 | graphemeBreaks[Extend] [E_Modifier] = -1;
91 | // GB9b
92 | foreach (i; 0 .. numValues)
93 | graphemeBreaks[Prepend][i] = false;
94 | // GB9a
95 | foreach (i; 0 .. numValues)
96 | graphemeBreaks[i][SpacingMark] = false;
97 | // GB9
98 | foreach (i; 0 .. numValues)
99 | {
100 | graphemeBreaks[i][Extend] = false;
101 | graphemeBreaks[i][ZWJ] = false;
102 | }
103 | graphemeBreaks[E_Base] [Extend] = -1;
104 | graphemeBreaks[E_Base_GAZ][Extend] = -1;
105 | // GB8
106 | graphemeBreaks[LVT][T] = false;
107 | graphemeBreaks[T] [T] = false;
108 | // GB7
109 | graphemeBreaks[LV][V] = false;
110 | graphemeBreaks[LV][T] = false;
111 | graphemeBreaks[V] [V] = false;
112 | graphemeBreaks[V] [T] = false;
113 | // GB6
114 | graphemeBreaks[L][L] = false;
115 | graphemeBreaks[L][V] = false;
116 | graphemeBreaks[L][LV] = false;
117 | graphemeBreaks[L][LVT] = false;
118 | // GB5
119 | foreach (i; 0 .. numValues)
120 | {
121 | graphemeBreaks[i][Control] = true;
122 | graphemeBreaks[i][CR] = true;
123 | graphemeBreaks[i][LF] = true;
124 | }
125 | // GB4
126 | foreach (i; 0 .. numValues)
127 | {
128 | graphemeBreaks[Control][i] = true;
129 | graphemeBreaks[CR] [i] = true;
130 | graphemeBreaks[LF] [i] = true;
131 | }
132 | // GB3
133 | graphemeBreaks[CR][LF] = false;
134 | // Additional homebrew top level rule to break before and after invalid characters
135 | foreach (i; 0 .. numValues)
136 | {
137 | graphemeBreaks[i][__] = true;
138 | graphemeBreaks[__][i] = true;
139 | }
140 | }
141 | return graphemeBreaks;
142 | }();
143 |
144 | size_t graphemeCount = 0;
145 | auto p = str.ptr;
146 | auto graphemeStart = p;
147 | GraphemeBreakProperty.Enum last, next;
148 | bool riEven, inEmojiBaseExtension;
149 |
150 | @noinline @safe @nogc pure nothrow bool
151 | complexRules()
152 | {
153 | pragma(inline, false);
154 | with (GraphemeBreakProperty.Enum)
155 | {
156 | if (next == Regional_Indicator)
157 | {
158 | // For GB12 + GB13 we need break only after a complete country code (2 indicators).
159 | if (last == Regional_Indicator)
160 | return riEven = !riEven;
161 | riEven = true;
162 | return false;
163 | }
164 | else if (next == Extend)
165 | {
166 | inEmojiBaseExtension = true;
167 | return false;
168 | }
169 | else if (inEmojiBaseExtension)
170 | {
171 | return inEmojiBaseExtension = false;
172 | }
173 | return true;
174 | }
175 | }
176 |
177 | @forceinline void
178 | graphemeCountImpl(S)(ref S str)
179 | {
180 | version (LDC) pragma(inline, true);
181 | auto cpi = getProperty!GraphemeBreakProperty(str);
182 | auto next = cpi.property;
183 | byte isBoundary = graphemeBreakRules[last][next];
184 | if (isBoundary < 0 ? complexRules() : isBoundary)
185 | {
186 | graphemeCount++;
187 | static if (is(S == const(char)*))
188 | graphemeStart = str;
189 | else
190 | graphemeStart = str.ptr;
191 | inEmojiBaseExtension = false;
192 | }
193 | static if (is(S == const(char)*))
194 | str += cpi.length;
195 | else
196 | str = str[cpi.length..$];
197 | last = next;
198 | }
199 |
200 | if (str.length >= 4)
201 | {
202 | const e = str.ptr + str.length - 4;
203 | do
204 | graphemeCountImpl(p);
205 | while (p <= e);
206 | str = str[p - str.ptr..$];
207 | }
208 | while (str.length)
209 | graphemeCountImpl(str);
210 | return graphemeCount;
211 | }
212 |
213 |
214 | /*******************************************************************************
215 | *
216 | * Retrieves the "General Category" of the first code point in some UTF-8
217 | * string. For broken UTF-8, the property is set to `GeneralCategory.__` (`0`).
218 | *
219 | * Params:
220 | * str = the UTF-8 encoded text, which must not be empty
221 | *
222 | * Returns:
223 | * a code point information struct consisting of a the fields `property`,
224 | * containing the `GeneralCategory` enumeration and the `length` of the code
225 | * point in bytes.
226 | *
227 | **************************************/
228 | @property @safe @nogc pure nothrow CodePointInfo!GeneralCategory
229 | generalCategory(scope const(char)[] str)
230 | {
231 | return getProperty!DerivedGeneralCategory(str);
232 | }
233 | unittest
234 | {
235 | assert("क".generalCategory == GeneralCategory.Other_Letter);
236 | assert("̸".generalCategory == GeneralCategory.Nonspacing_Mark);
237 | assert("\xFF".generalCategory == GeneralCategory.__);
238 | }
239 |
240 |
241 |
242 | private:
243 |
244 | @forceinline pure @nogc nothrow auto
245 | getProperty(Property, S)(scope S str) if (is(S == const(char)*) || is(S == const(char)[]))
246 | in
247 | {
248 | static if (is(S == const(char)[]))
249 | assert(str.length != 0, "No code units passed in.");
250 | }
251 | out
252 | {
253 | assert(__result <= Property.Enum.max);
254 | }
255 | body
256 | {
257 | version (LDC) pragma(inline, true);
258 | import fast.internal.helpers;
259 |
260 | alias Enum = Property.Enum;
261 | alias CPI = CodePointInfo!Enum;
262 | // Fast path for ASCII.
263 | size_t idx = Property.level0[0][str[0]];
264 | if (byte(str[0]) >= 0) return CPI(1, cast(Enum)idx);
265 | // On multi-byte sequences, set the length to 1 for invalid sequences (idx == 0).
266 | size_t length = clz(str[0] ^ 0xFFu) - 24;
267 | // Safely return invalid code point of 1 byte length if string exhausted.
268 | static if (is(S == const(char)[]))
269 | if (length > str.length)
270 | return CPI(1, cast(Enum)0);
271 | // Otherwise use lookup table hierarchy to determine if code units form a valid code point
272 | if (idx > Enum.max) {
273 | idx = Property.level1[idx - Enum.max - 1][str[1]];
274 | if (idx > Enum.max) {
275 | idx = Property.level2[idx - Enum.max - 1][str[2]];
276 | if (idx > Enum.max)
277 | idx = Property.level3[idx - Enum.max - 1][str[3]];
278 | }
279 | }
280 | if (idx)
281 | return CPI(length, cast(Enum)idx);
282 | else
283 | return CPI(1, cast(Enum)0);
284 | }
285 |
--------------------------------------------------------------------------------
/source/unicode/generator.d:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | *
3 | * Helper program to generate the lookup tables required for certain Unicode algorithms.
4 | * This code is conforming with Unicode 10.0.0.
5 | *
6 | * Authors:
7 | * $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
8 | *
9 | * Copyright:
10 | * © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
11 | *
12 | * License:
13 | * $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
14 | *
15 | **************************************************************************************************/
16 | module unicode.generator;
17 | import std.conv;
18 | import std.exception;
19 | import core.bitop;
20 | import std.stdio;
21 | import std.string;
22 | import std.algorithm;
23 | import std.meta;
24 | import std.path;
25 |
26 | enum PropertyType
27 | {
28 | catalog, enumeration, binary, string, numeric, miscellaneous
29 | }
30 |
31 | struct Property
32 | {
33 | string name;
34 | string value;
35 | }
36 |
37 | struct Entry
38 | {
39 | bool isSet = false;
40 | Property[] properties;
41 | }
42 |
43 | struct Line
44 | {
45 | uint rangeStart;
46 | uint rangeEnd;
47 | string[] properties;
48 | }
49 |
50 | struct UnicodeCharacterDatabase
51 | {
52 | PropertyType type;
53 | Entry[] entries;
54 | size_t[string] enumerationValues;
55 | string varName;
56 |
57 | this(string filename, PropertyType type)
58 | {
59 | import std.algorithm;
60 | import std.stdio;
61 | import std.uni;
62 |
63 | this.type = type;
64 | this.entries = new Entry[](0x110000);
65 | this.enumerationValues[null] = 0;
66 | this.varName = baseName(filename, ".txt");
67 | Line[] defaults;
68 | Line[] actuals;
69 | bool abbreviates = false;
70 | string enumOverridePrefix;
71 | string enumOverride;
72 |
73 | foreach (line; File(filename).byLine())
74 | {
75 | bool isDefault = false;
76 | char[] code;
77 | Line data;
78 |
79 | // Special @missing line syntax ?
80 | static immutable isMissingStr = "# @missing: ";
81 | static immutable propNameStr = "# Property: ";
82 | if (line.startsWith(isMissingStr))
83 | {
84 | isDefault = true;
85 | code = line[isMissingStr.length..$];
86 | }
87 | else if (line.startsWith(propNameStr))
88 | {
89 | abbreviates = true;
90 | enumOverridePrefix = "# "~line[propNameStr.length..$].idup~"=";
91 | }
92 | else if (abbreviates && line.startsWith(enumOverridePrefix))
93 | {
94 | enumOverride = line[enumOverridePrefix.length..$].idup;
95 | }
96 | else
97 | {
98 | // Split between code and comment section
99 | auto commentSplit = findSplit(line, "#");
100 | code = commentSplit[0];
101 | }
102 | code = strip!isWhite(code);
103 | if (code.length == 0)
104 | continue;
105 |
106 | uint fieldIdx = 0;
107 | foreach (field; splitter(code, ';'))
108 | {
109 | field = strip!isWhite(field);
110 | switch (fieldIdx)
111 | {
112 | case 0: // Code point(s)
113 | auto range = findSplit(field, "..");
114 | data.rangeStart = to!uint(range[0], 16);
115 | data.rangeEnd = range[1] == ".." ? to!uint(range[2], 16) : data.rangeStart;
116 | enforce(data.rangeEnd <= 0x10FFFF);
117 | enforce(data.rangeStart <= data.rangeEnd);
118 | data.rangeEnd++;
119 | break;
120 | default:
121 | string ifield = enumOverride ? enumOverride : field.idup;
122 | data.properties ~= ifield;
123 | if (type == PropertyType.enumeration)
124 | {
125 | if (ifield !in enumerationValues)
126 | enumerationValues[ifield] = enumerationValues.length;
127 | }
128 | }
129 | fieldIdx++;
130 | }
131 | if (type == PropertyType.enumeration)
132 | enforce(fieldIdx >= 2);
133 | else assert(0, "Not implemented");
134 |
135 | if (isDefault)
136 | defaults ~= data;
137 | else
138 | actuals ~= data;
139 | }
140 |
141 | foreach (set; [defaults, actuals])
142 | {
143 | foreach (ref definition; set)
144 | {
145 | foreach (cp; definition.rangeStart .. definition.rangeEnd)
146 | {
147 | final switch (type) with (PropertyType)
148 | {
149 | case catalog:
150 | assert(0, "Not implemented");
151 | case enumeration:
152 | enforce(definition.properties.length == 1);
153 | entries[cp].properties = [Property(null, definition.properties[0])];
154 | entries[cp].isSet = true;
155 | break;
156 | case binary:
157 | case string:
158 | case numeric:
159 | case miscellaneous:
160 | assert(0, "Not implemented");
161 | }
162 | }
163 | }
164 | }
165 |
166 | foreach (cp; 0 .. 0x110000)
167 | enforce(entries[cp].isSet);
168 | }
169 |
170 | struct TableEntry
171 | {
172 | ubyte[][] byteSeqs;
173 | string enumerationValue;
174 | Table* subEntries;
175 |
176 | string toString()
177 | {
178 | if (subEntries)
179 | return subEntries.to!string();
180 | else
181 | return enumerationValue;
182 | }
183 | }
184 |
185 | struct Table
186 | {
187 | uint level, idx;
188 | TableEntry[256] entries;
189 |
190 | size_t toHash() const nothrow
191 | {
192 | size_t result;
193 | foreach (i; 0 .. 256)
194 | {
195 | if (entries[i].subEntries)
196 | result = hashOf(entries[i].subEntries.idx, result);
197 | else
198 | result = hashOf(entries[i].enumerationValue, result);
199 | }
200 | return hashOf(level, result);
201 | }
202 |
203 | bool opEquals(ref const Table key) const
204 | {
205 | foreach (i; 0 .. 256)
206 | {
207 | if ((this.entries[i].subEntries is null) != (key.entries[i].subEntries is null))
208 | return false;
209 | if (this.entries[i].subEntries)
210 | {
211 | if (this.entries[i].subEntries.idx != key.entries[i].subEntries.idx)
212 | return false;
213 | }
214 | else if (this.entries[i].enumerationValue != key.entries[i].enumerationValue)
215 | {
216 | return false;
217 | }
218 | }
219 | return this.level == key.level;
220 | }
221 | }
222 |
223 | string generateEnumerationCode()
224 | {
225 | auto lookup = new Table;
226 | uint[4] levelAssignments;
227 | foreach (dchar cp; 0 .. 0x110000)
228 | {
229 | ubyte[] byteSeq;
230 | if (cp < 128)
231 | {
232 | byteSeq ~= cast(char)cp;
233 | }
234 | else
235 | {
236 | uint topBit = 6;
237 | uint bits = cp;
238 | do
239 | {
240 | byteSeq = char(bits & 0x3F | 0x80) ~ byteSeq;
241 | bits >>= 6;
242 | topBit--;
243 | }
244 | while (bits && bsr(bits) >= topBit);
245 | byteSeq = cast(char)(0xFE << topBit | bits) ~ byteSeq;
246 | }
247 | auto table = lookup;
248 | foreach (uint i, cu; byteSeq)
249 | {
250 | auto entry = &table.entries[cu];
251 | if (entry.subEntries)
252 | {
253 | table = entry.subEntries;
254 | }
255 | else if (entry.enumerationValue is null)
256 | {
257 | entry.byteSeqs = [byteSeq];
258 | entry.enumerationValue = entries[cp].properties[0].value;
259 | break;
260 | }
261 | else if (entry.enumerationValue == entries[cp].properties[0].value)
262 | {
263 | entry.byteSeqs ~= byteSeq;
264 | break;
265 | }
266 | else
267 | {
268 | auto subTable = new Table(i+1);
269 | foreach (byteSeq2; entry.byteSeqs)
270 | {
271 | subTable.entries[byteSeq2[i+1]].enumerationValue = entry.enumerationValue;
272 | subTable.entries[byteSeq2[i+1]].byteSeqs = [byteSeq2];
273 | }
274 | entry.byteSeqs = null;
275 | entry.enumerationValue = null;
276 | entry.subEntries = subTable;
277 | }
278 | table = entry.subEntries;
279 | }
280 | }
281 |
282 | Table*[Table] tableSet;
283 | Table*[uint][4] tableByIdx;
284 | tableByIdx[0][0] = lookup;
285 |
286 | void assignIndices(Table* table, uint level = 0)
287 | {
288 | foreach (i, entry; table.entries)
289 | {
290 | if (entry.subEntries)
291 | {
292 | assignIndices(entry.subEntries, level + 1);
293 | if (auto dup = *entry.subEntries in tableSet)
294 | {
295 | entry.subEntries = *dup;
296 | }
297 | else
298 | {
299 | entry.subEntries.idx = levelAssignments[level + 1]++;
300 | tableByIdx[level + 1][entry.subEntries.idx] = entry.subEntries;
301 | tableSet[*entry.subEntries] = entry.subEntries;
302 | }
303 | }
304 | }
305 | }
306 | assignIndices(lookup);
307 | levelAssignments[0] = 1;
308 |
309 | writefln("%s: Using %s tables with a total size: %s KiB",
310 | varName, sum(levelAssignments[]), sum(levelAssignments[]) / 4f);
311 | stdout.flush(); // in case we are buffered
312 |
313 | auto level0 = new ubyte[256][](levelAssignments[0]);
314 | auto level1 = new ubyte[256][](levelAssignments[1]);
315 | auto level2 = new ubyte[256][](levelAssignments[2]);
316 | auto level3 = new ubyte[256][](levelAssignments[3]);
317 |
318 | foreach (level, bin; AliasSeq!(level0, level1, level2, level3))
319 | {
320 | foreach (idx; 0 .. levelAssignments[level])
321 | {
322 | Table* table = tableByIdx[level][idx];
323 | enforce(table.idx == idx);
324 | enforce(table.level == level);
325 | enforce(levelAssignments[level] + enumerationValues.length <= 256,
326 | format("Sum of tables and enumarations at level %s exceeds ubyte storage capacity", level));
327 | foreach (i, ref entry; table.entries)
328 | {
329 | if (entry.subEntries)
330 | bin[idx][i] = cast(ubyte)(entry.subEntries.idx + enumerationValues.length);
331 | else
332 | bin[idx][i] = cast(ubyte)enumerationValues[entry.enumerationValue];
333 | }
334 | }
335 | }
336 |
337 | // Write struct with enum
338 | string code = "struct " ~ varName ~ "\n{\n";
339 | auto sortedEnum = new string[](enumerationValues.length);
340 | foreach (key, value; enumerationValues)
341 | sortedEnum[value] = key;
342 | code ~= "\tenum Enum : size_t\n\t{\n\t\t";
343 | foreach (key, value; sortedEnum)
344 | code ~= (value ? value : "__") ~ ", ";
345 | code ~= "\n\t}\n\n";
346 | foreach (k, bin; AliasSeq!(level0, level1, level2, level3))
347 | {
348 | code ~= "\tstatic immutable ubyte[256][" ~ to!string(bin.length) ~ "] level" ~ to!string(k) ~ " = [\n";
349 | foreach (i; 0 .. bin.length)
350 | code ~= "\t\t[" ~ format("%(%s,%)", bin[i]) ~ "],\n";
351 | code ~= "\t];\n";
352 | }
353 | code ~= "}\n\n";
354 | return code;
355 | }
356 | }
357 |
358 | alias UCD = UnicodeCharacterDatabase;
359 |
360 | void main()
361 | {
362 | string code = "module fast.internal.unicode_tables;\n\n";
363 | UCD ucd;
364 |
365 | ucd = UCD("../ucd/auxiliary/GraphemeBreakProperty.txt", PropertyType.enumeration);
366 | code ~= ucd.generateEnumerationCode();
367 | ucd = UCD("../ucd/extracted/DerivedGeneralCategory.txt", PropertyType.enumeration);
368 | code ~= ucd.generateEnumerationCode();
369 | ucd = UCD("../ucd/extracted/DerivedLineBreak.txt", PropertyType.enumeration);
370 | code ~= ucd.generateEnumerationCode();
371 |
372 | auto tableFile = File("../source/fast/internal/unicode_tables.d", "w");
373 | tableFile.write(code);
374 | }
--------------------------------------------------------------------------------
/test/fail1.json:
--------------------------------------------------------------------------------
1 | "A JSON payload should be an object or array, not a string."
--------------------------------------------------------------------------------
/test/fail10.json:
--------------------------------------------------------------------------------
1 | {"Extra value after close": true} "misplaced quoted value"
--------------------------------------------------------------------------------
/test/fail11.json:
--------------------------------------------------------------------------------
1 | {"Illegal expression": 1 + 2}
--------------------------------------------------------------------------------
/test/fail12.json:
--------------------------------------------------------------------------------
1 | {"Illegal invocation": alert()}
--------------------------------------------------------------------------------
/test/fail13.json:
--------------------------------------------------------------------------------
1 | {"Numbers cannot have leading zeroes": 013}
--------------------------------------------------------------------------------
/test/fail14.json:
--------------------------------------------------------------------------------
1 | {"Numbers cannot be hex": 0x14}
--------------------------------------------------------------------------------
/test/fail15.json:
--------------------------------------------------------------------------------
1 | ["Illegal backslash escape: \x15"]
--------------------------------------------------------------------------------
/test/fail16.json:
--------------------------------------------------------------------------------
1 | [\naked]
--------------------------------------------------------------------------------
/test/fail17.json:
--------------------------------------------------------------------------------
1 | ["Illegal backslash escape: \017"]
--------------------------------------------------------------------------------
/test/fail18.json:
--------------------------------------------------------------------------------
1 | [[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]
--------------------------------------------------------------------------------
/test/fail19.json:
--------------------------------------------------------------------------------
1 | {"Missing colon" null}
--------------------------------------------------------------------------------
/test/fail2.json:
--------------------------------------------------------------------------------
1 | ["Unclosed array"
--------------------------------------------------------------------------------
/test/fail20.json:
--------------------------------------------------------------------------------
1 | {"Double colon":: null}
--------------------------------------------------------------------------------
/test/fail21.json:
--------------------------------------------------------------------------------
1 | {"Comma instead of colon", null}
--------------------------------------------------------------------------------
/test/fail22.json:
--------------------------------------------------------------------------------
1 | ["Colon instead of comma": false]
--------------------------------------------------------------------------------
/test/fail23.json:
--------------------------------------------------------------------------------
1 | ["Bad value", truth]
--------------------------------------------------------------------------------
/test/fail24.json:
--------------------------------------------------------------------------------
1 | ['single quote']
--------------------------------------------------------------------------------
/test/fail25.json:
--------------------------------------------------------------------------------
1 | [" tab character in string "]
--------------------------------------------------------------------------------
/test/fail26.json:
--------------------------------------------------------------------------------
1 | ["tab\ character\ in\ string\ "]
--------------------------------------------------------------------------------
/test/fail27.json:
--------------------------------------------------------------------------------
1 | ["line
2 | break"]
--------------------------------------------------------------------------------
/test/fail28.json:
--------------------------------------------------------------------------------
1 | ["line\
2 | break"]
--------------------------------------------------------------------------------
/test/fail29.json:
--------------------------------------------------------------------------------
1 | [0e]
--------------------------------------------------------------------------------
/test/fail3.json:
--------------------------------------------------------------------------------
1 | {unquoted_key: "keys must be quoted"}
--------------------------------------------------------------------------------
/test/fail30.json:
--------------------------------------------------------------------------------
1 | [0e+]
--------------------------------------------------------------------------------
/test/fail31.json:
--------------------------------------------------------------------------------
1 | [0e+-1]
--------------------------------------------------------------------------------
/test/fail32.json:
--------------------------------------------------------------------------------
1 | {"Comma instead if closing brace": true,
--------------------------------------------------------------------------------
/test/fail33.json:
--------------------------------------------------------------------------------
1 | ["mismatch"}
--------------------------------------------------------------------------------
/test/fail4.json:
--------------------------------------------------------------------------------
1 | ["extra comma",]
--------------------------------------------------------------------------------
/test/fail5.json:
--------------------------------------------------------------------------------
1 | ["double extra comma",,]
--------------------------------------------------------------------------------
/test/fail6.json:
--------------------------------------------------------------------------------
1 | [ , "<-- missing value"]
--------------------------------------------------------------------------------
/test/fail7.json:
--------------------------------------------------------------------------------
1 | ["Comma after the close"],
--------------------------------------------------------------------------------
/test/fail8.json:
--------------------------------------------------------------------------------
1 | ["Extra close"]]
--------------------------------------------------------------------------------
/test/fail9.json:
--------------------------------------------------------------------------------
1 | {"Extra comma": true,}
--------------------------------------------------------------------------------
/test/pass1.json:
--------------------------------------------------------------------------------
1 | [
2 | "JSON Test Pattern pass1",
3 | {"object with 1 member":["array with 1 element"]},
4 | {},
5 | [],
6 | -42,
7 | true,
8 | false,
9 | null,
10 | {
11 | "integer": 1234567890,
12 | "real": -9876.543210,
13 | "e": 0.123456789e-12,
14 | "E": 1.234567890E+34,
15 | "": 23456789012E66,
16 | "zero": 0,
17 | "one": 1,
18 | "space": " ",
19 | "quote": "\"",
20 | "backslash": "\\",
21 | "controls": "\b\f\n\r\t",
22 | "slash": "/ & \/",
23 | "alpha": "abcdefghijklmnopqrstuvwyz",
24 | "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ",
25 | "digit": "0123456789",
26 | "0123456789": "digit",
27 | "special": "`1~!@#$%^&*()_+-={':[,]}|;.>?",
28 | "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A",
29 | "true": true,
30 | "false": false,
31 | "null": null,
32 | "array":[ ],
33 | "object":{ },
34 | "address": "50 St. James Street",
35 | "url": "http://www.JSON.org/",
36 | "comment": "// /* */": " ",
38 | " s p a c e d " :[1,2 , 3
39 |
40 | ,
41 |
42 | 4 , 5 , 6 ,7 ],"compact":[1,2,3,4,5,6,7],
43 | "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
44 | "quotes": "" \u0022 %22 0x22 034 "",
45 | "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
46 | : "A key can be any string"
47 | },
48 | 0.5 ,98.6
49 | ,
50 | 99.44
51 | ,
52 |
53 | 1066,
54 | 1e1,
55 | 0.1e1,
56 | 1e-1,
57 | 1e00,2e+00,2e-00
58 | ,"rosebud"]
--------------------------------------------------------------------------------
/test/pass2.json:
--------------------------------------------------------------------------------
1 | [[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]]
--------------------------------------------------------------------------------
/test/pass3.json:
--------------------------------------------------------------------------------
1 | {
2 | "JSON Test Pattern pass3": {
3 | "The outermost value": "must be an object or array.",
4 | "In this test": "It is an object."
5 | }
6 | }
7 |
--------------------------------------------------------------------------------