├── .gitignore
├── README.md
├── dub.sdl
├── mono-d
    ├── docs.dproj
    ├── fast.dproj
    ├── fast.sln
    └── generate unicode tables.dproj
├── source
    ├── fast
    │   ├── buffer.d
    │   ├── cstring.d
    │   ├── format.d
    │   ├── internal
    │   │   ├── benchmarks.d
    │   │   ├── helpers.d
    │   │   ├── sysdef.di
    │   │   └── unicode_tables.d
    │   ├── intmath.d
    │   ├── json.d
    │   ├── parsing.d
    │   ├── string.d
    │   └── unicode.d
    ├── std
    │   └── simd.d
    └── unicode
    │   └── generator.d
└── test
    ├── fail1.json
    ├── fail10.json
    ├── fail11.json
    ├── fail12.json
    ├── fail13.json
    ├── fail14.json
    ├── fail15.json
    ├── fail16.json
    ├── fail17.json
    ├── fail18.json
    ├── fail19.json
    ├── fail2.json
    ├── fail20.json
    ├── fail21.json
    ├── fail22.json
    ├── fail23.json
    ├── fail24.json
    ├── fail25.json
    ├── fail26.json
    ├── fail27.json
    ├── fail28.json
    ├── fail29.json
    ├── fail3.json
    ├── fail30.json
    ├── fail31.json
    ├── fail32.json
    ├── fail33.json
    ├── fail4.json
    ├── fail5.json
    ├── fail6.json
    ├── fail7.json
    ├── fail8.json
    ├── fail9.json
    ├── pass1.json
    ├── pass2.json
    └── pass3.json


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Generated documentation
 2 | /docs/
 3 | 
 4 | # Dub cache
 5 | /.dub/
 6 | 
 7 | # Object and executable output directory
 8 | /generated/
 9 | 
10 | # Generated by OProfile (system wide profiler)
11 | /oprofile_data/
12 | 
13 | # Unicode Character Database files can be downloaded here when tables need to be regenerated
14 | /ucd/
15 | 
16 | # Mono-D user preferences
17 | /mono-d/fast.userprefs
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | fast
 2 | ====
 3 | 
 4 | This library aims to provide the fastest possible implementation of some every day routines.
 5 | 
 6 | The contained functions avoid GC allocations and input validation. They may use SSE or stack allocations to reach a high throughput so that in some cases a 20 fold speed increase can be achieved.
 7 | 
 8 | **[DMD](https://dlang.org/)**, **[GDC](https://gdcproject.org/)** and **[LDC2](https://wiki.dlang.org/LDC)** compilers are supported. Tested with front-end versions **2.068** through **2.079**.
 9 | 
10 | ### Benchmark
11 | A benchmark is included and can be run through dub, e.g.:
12 | 
13 |     dub --config=benchmark --build=release --compiler=gdc
14 | 
15 | ### Examples
16 | 
17 | ##### Read JSON file with coordinates.
18 | ```d
19 | struct Point3D { double x, y, z; }
20 | 
21 | void main()
22 | {
23 |     import fast.json;
24 |     auto points = json.coordinates.read!(Point3D[]);
25 | }
26 | ```
27 | 
28 | ##### SSE3 accelerated splitting around '/' and '\'
29 | ```d
30 | string rest = pathname
31 | string element;
32 | 
33 | import fast.string;
34 | while (rest.split!`or(=\,=/)`(element, rest))
35 | {
36 |     // `element' is now the next directory.
37 |     // `rest' is what remains after the \ or /.
38 | }
39 | // `element` is now the file name part of the path.
40 | ```
41 | 
42 | ##### Calling Windows API functions.
43 | ```d
44 | void createHardlink(string from, string to)
45 | {
46 |     import fast.cstring : wcharPtr;
47 |     CreateHardLinkW(wcharPtr!to, wcharPtr!from, null);
48 | }
49 | ```
50 | 
51 | ##### Calling Linux API functions.
52 | ```d
53 | void createHardlink(string from, string to)
54 | {
55 |     import fast.cstring : charPtr;
56 |     link(charPtr!from, charPtr!to);
57 | }
58 | ```
59 | 


--------------------------------------------------------------------------------
/dub.sdl:
--------------------------------------------------------------------------------
 1 | name "fast"
 2 | description "A library that aims to provide the fastest possible implementation of some every day routines."
 3 | homepage "http://github.com/mleise/fast"
 4 | authors "Marco Leise"
 5 | copyright "Copyright © 2017, Marco Leise"
 6 | license "GPL-3.0"
 7 | 
 8 | excludedSourceFiles "source/docs/*.d" "source/unicode/*.d"
 9 | targetPath "generated"
10 | 
11 | configuration "library" {
12 | 	platforms "posix-dmd" "posix-x86_64-ldc" "posix-x86-gdc" "posix-x86_64-gdc"
13 | 	targetType "library"
14 | }
15 | 
16 | configuration "shared-library" {
17 | 	platforms "posix-dmd" "posix-x86_64-ldc" "posix-x86-gdc" "posix-x86_64-gdc"
18 | 	targetType "dynamicLibrary"
19 | 	libs "gdruntime" "gphobos" platform="gdc"  // Force linking with shared Phobos2, not the non-PIC static objects
20 | }
21 | 
22 | configuration "benchmark" {
23 | 	platforms "posix-dmd" "posix-x86_64-ldc" "posix-x86-gdc" "posix-x86_64-gdc"
24 | 	targetType "executable"
25 | 	versions "benchmark"
26 | }
27 | 
28 | configuration "benchmark-pic" {
29 | 	platforms "posix-dmd" "posix-x86_64-ldc" "posix-x86-gdc" "posix-x86_64-gdc"
30 | 	targetType "executable"
31 | 	versions "benchmark"
32 | 	dflags "-fPIC"
33 | }
34 | 


--------------------------------------------------------------------------------
/mono-d/docs.dproj:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <Configuration Condition=" '$(Configuration)' == '' ">Documentation</Configuration>
 5 |     <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
 6 |     <ProductVersion>8.0.30703</ProductVersion>
 7 |     <SchemaVersion>2.0</SchemaVersion>
 8 |     <ProjectGuid>{CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}</ProjectGuid>
 9 |     <BaseDirectory>..\source</BaseDirectory>
10 |     <Compiler>DMD</Compiler>
11 |     <PreferOneStepBuild>true</PreferOneStepBuild>
12 |     <UseDefaultCompiler>true</UseDefaultCompiler>
13 |     <IncrementalLinking>true</IncrementalLinking>
14 |     <ReleaseVersion>0.3.2</ReleaseVersion>
15 |   </PropertyGroup>
16 |   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Documentation|AnyCPU' ">
17 |     <OutputPath>..\generated</OutputPath>
18 |     <ObjectsDirectory>../generated/debug</ObjectsDirectory>
19 |     <LinkinThirdPartyLibraries>false</LinkinThirdPartyLibraries>
20 |     <UnittestMode>false</UnittestMode>
21 |     <OutputName>generate_docs</OutputName>
22 |     <Target>Executable</Target>
23 |     <Externalconsole>true</Externalconsole>
24 |     <DebugLevel>0</DebugLevel>
25 |     <DebugSymbols>true</DebugSymbols>
26 |   </PropertyGroup>
27 |   <ItemGroup>
28 |     <Folder Include="..\source\docs\" />
29 |   </ItemGroup>
30 |   <ItemGroup>
31 |     <Compile Include="..\source\docs\doc.d" />
32 |   </ItemGroup>
33 | </Project>


--------------------------------------------------------------------------------
/mono-d/fast.dproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <PropertyGroup>
  4 |     <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
  5 |     <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
  6 |     <ProductVersion>8.0.30703</ProductVersion>
  7 |     <SchemaVersion>2.0</SchemaVersion>
  8 |     <ProjectGuid>{DFEB5CCB-A636-4971-8302-89CDBF3B4503}</ProjectGuid>
  9 |     <PreferOneStepBuild>true</PreferOneStepBuild>
 10 |     <UseDefaultCompiler>false</UseDefaultCompiler>
 11 |     <IncrementalLinking>True</IncrementalLinking>
 12 |     <Compiler>DMD</Compiler>
 13 |     <ExtraCompilerArguments>
 14 |     </ExtraCompilerArguments>
 15 |     <ExtraLinkerArguments>
 16 |     </ExtraLinkerArguments>
 17 |     <BaseDirectory>..\source</BaseDirectory>
 18 |     <ReleaseVersion>0.3.2</ReleaseVersion>
 19 |   </PropertyGroup>
 20 |   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Benchmark|AnyCPU' ">
 21 |     <OutputPath>..\generated</OutputPath>
 22 |     <Target>Executable</Target>
 23 |     <OutputName>benchmark</OutputName>
 24 |     <UnittestMode>false</UnittestMode>
 25 |     <DebugLevel>0</DebugLevel>
 26 |     <ObjectsDirectory>../generated/release</ObjectsDirectory>
 27 |     <DDocDirectory>../docs</DDocDirectory>
 28 |     <VersionIds>
 29 |       <VersionIds>
 30 |         <String>benchmark</String>
 31 |       </VersionIds>
 32 |     </VersionIds>
 33 |     <LinkinThirdPartyLibraries>false</LinkinThirdPartyLibraries>
 34 |     <ConsolePause>false</ConsolePause>
 35 |   </PropertyGroup>
 36 |   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
 37 |     <DebugSymbols>true</DebugSymbols>
 38 |     <OutputPath>..\generated</OutputPath>
 39 |     <LinkinThirdPartyLibraries>false</LinkinThirdPartyLibraries>
 40 |     <ObjectsDirectory>../generated/debug</ObjectsDirectory>
 41 |     <DDocDirectory>../docs</DDocDirectory>
 42 |     <VersionIds>
 43 |       <VersionIds>
 44 |         <String>benchmark</String>
 45 |       </VersionIds>
 46 |     </VersionIds>
 47 |     <UnittestMode>false</UnittestMode>
 48 |     <OutputName>benchmark-debug</OutputName>
 49 |     <Target>Executable</Target>
 50 |     <ConsolePause>false</ConsolePause>
 51 |     <DebugLevel>0</DebugLevel>
 52 |     <Externalconsole>true</Externalconsole>
 53 |   </PropertyGroup>
 54 |   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Unittest|x86' ">
 55 |     <DebugSymbols>true</DebugSymbols>
 56 |     <OutputPath>..\generated</OutputPath>
 57 |     <ConsolePause>false</ConsolePause>
 58 |     <Target>Executable</Target>
 59 |     <OutputName>fast-unittest-x86</OutputName>
 60 |     <UnittestMode>true</UnittestMode>
 61 |     <LinkinThirdPartyLibraries>false</LinkinThirdPartyLibraries>
 62 |     <DebugLevel>0</DebugLevel>
 63 |     <ObjectsDirectory>../generated/unittest-x86</ObjectsDirectory>
 64 |     <DDocDirectory>../docs</DDocDirectory>
 65 |   </PropertyGroup>
 66 |   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Unittest|x64' ">
 67 |     <DebugSymbols>true</DebugSymbols>
 68 |     <OutputPath>..\generated</OutputPath>
 69 |     <ConsolePause>false</ConsolePause>
 70 |     <Target>Executable</Target>
 71 |     <OutputName>fast-unittest-x64</OutputName>
 72 |     <UnittestMode>true</UnittestMode>
 73 |     <LinkinThirdPartyLibraries>false</LinkinThirdPartyLibraries>
 74 |     <DebugLevel>0</DebugLevel>
 75 |     <ObjectsDirectory>../generated/unittest-x64</ObjectsDirectory>
 76 |     <DDocDirectory>../docs</DDocDirectory>
 77 |   </PropertyGroup>
 78 |   <ItemGroup>
 79 |     <Folder Include="source\fast\" />
 80 |     <Folder Include="source\std\" />
 81 |     <Folder Include="..\source\fast\" />
 82 |     <Folder Include="..\source\fast\internal\" />
 83 |   </ItemGroup>
 84 |   <ItemGroup>
 85 |     <Compile Include="..\source\std\simd.d" />
 86 |     <Compile Include="..\source\fast\internal\unicode_tables.d" />
 87 |     <Compile Include="..\source\fast\unicode.d" />
 88 |     <Compile Include="..\source\fast\internal\benchmarks.d" />
 89 |     <Compile Include="..\source\fast\parsing.d" />
 90 |     <Compile Include="..\source\fast\intmath.d" />
 91 |     <Compile Include="..\source\fast\internal\helpers.d" />
 92 |     <Compile Include="..\source\fast\format.d" />
 93 |     <Compile Include="..\source\fast\string.d" />
 94 |     <Compile Include="..\source\fast\json.d" />
 95 |     <Compile Include="..\source\fast\cstring.d" />
 96 |     <Compile Include="..\source\fast\buffer.d" />
 97 |   </ItemGroup>
 98 |   <ProjectExtensions>
 99 |     <MonoDevelop>
100 |       <Properties>
101 |         <Policies>
102 |           <ChangeLogPolicy UpdateMode="None" inheritsSet="Mono">
103 |             <MessageStyle LineAlign="0" />
104 |           </ChangeLogPolicy>
105 |         </Policies>
106 |       </Properties>
107 |     </MonoDevelop>
108 |   </ProjectExtensions>
109 |   <ItemGroup>
110 |     <None Include="..\source\fast\locale.d" />
111 |     <None Include="..\source\fast\internal\sysdef.di" />
112 |   </ItemGroup>
113 | </Project>
114 | 


--------------------------------------------------------------------------------
/mono-d/fast.sln:
--------------------------------------------------------------------------------
  1 | ﻿
  2 | Microsoft Visual Studio Solution File, Format Version 11.00
  3 | # Visual Studio 2010
  4 | Project("{3947E667-4C90-4C3A-BEB9-7148D6FE0D7C}") = "fast", "fast.dproj", "{DFEB5CCB-A636-4971-8302-89CDBF3B4503}"
  5 | EndProject
  6 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{87AD35CC-088E-43A6-99E8-A216AABD25F0}"
  7 | 	ProjectSection(SolutionItems) = preProject
  8 | 		..\README.md = ..\README.md
  9 | 		..\.gitignore = ..\.gitignore
 10 | 		..\dub.sdl = ..\dub.sdl
 11 | 		..\test\fail1.json = ..\test\fail1.json
 12 | 		..\test\fail2.json = ..\test\fail2.json
 13 | 		..\test\fail3.json = ..\test\fail3.json
 14 | 		..\test\fail4.json = ..\test\fail4.json
 15 | 		..\test\fail5.json = ..\test\fail5.json
 16 | 		..\test\fail6.json = ..\test\fail6.json
 17 | 		..\test\fail7.json = ..\test\fail7.json
 18 | 		..\test\fail8.json = ..\test\fail8.json
 19 | 		..\test\fail9.json = ..\test\fail9.json
 20 | 		..\test\fail10.json = ..\test\fail10.json
 21 | 		..\test\fail11.json = ..\test\fail11.json
 22 | 		..\test\fail12.json = ..\test\fail12.json
 23 | 		..\test\fail13.json = ..\test\fail13.json
 24 | 		..\test\fail14.json = ..\test\fail14.json
 25 | 		..\test\fail15.json = ..\test\fail15.json
 26 | 		..\test\fail16.json = ..\test\fail16.json
 27 | 		..\test\fail17.json = ..\test\fail17.json
 28 | 		..\test\fail18.json = ..\test\fail18.json
 29 | 		..\test\fail19.json = ..\test\fail19.json
 30 | 		..\test\fail20.json = ..\test\fail20.json
 31 | 		..\test\fail21.json = ..\test\fail21.json
 32 | 		..\test\fail22.json = ..\test\fail22.json
 33 | 		..\test\fail23.json = ..\test\fail23.json
 34 | 		..\test\fail24.json = ..\test\fail24.json
 35 | 		..\test\fail25.json = ..\test\fail25.json
 36 | 		..\test\fail26.json = ..\test\fail26.json
 37 | 		..\test\fail27.json = ..\test\fail27.json
 38 | 		..\test\fail28.json = ..\test\fail28.json
 39 | 		..\test\fail29.json = ..\test\fail29.json
 40 | 		..\test\fail30.json = ..\test\fail30.json
 41 | 		..\test\fail31.json = ..\test\fail31.json
 42 | 		..\test\fail32.json = ..\test\fail32.json
 43 | 		..\test\fail33.json = ..\test\fail33.json
 44 | 		..\test\pass1.json = ..\test\pass1.json
 45 | 		..\test\pass2.json = ..\test\pass2.json
 46 | 		..\test\pass3.json = ..\test\pass3.json
 47 | 		..\benchall.sh = ..\benchall.sh
 48 | 	EndProjectSection
 49 | EndProject
 50 | Project("{3947E667-4C90-4C3A-BEB9-7148D6FE0D7C}") = "docs", "docs.dproj", "{CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}"
 51 | EndProject
 52 | Project("{3947E667-4C90-4C3A-BEB9-7148D6FE0D7C}") = "generate unicode tables", "generate unicode tables.dproj", "{BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}"
 53 | EndProject
 54 | Global
 55 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 56 | 		Benchmark|Any CPU = Benchmark|Any CPU
 57 | 		Debug|Any CPU = Debug|Any CPU
 58 | 		Release|Any CPU = Release|Any CPU
 59 | 		Unittest|x86 = Unittest|x86
 60 | 		Unittest|x64 = Unittest|x64
 61 | 		Unicode tables|Any CPU = Unicode tables|Any CPU
 62 | 		Documentation|Any CPU = Documentation|Any CPU
 63 | 	EndGlobalSection
 64 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
 65 | 		{BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Benchmark|Any CPU.ActiveCfg = Unicode tables|Any CPU
 66 | 		{BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Debug|Any CPU.ActiveCfg = Unicode tables|Any CPU
 67 | 		{BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Documentation|Any CPU.ActiveCfg = Unicode tables|Any CPU
 68 | 		{BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Release|Any CPU.ActiveCfg = Unicode tables|Any CPU
 69 | 		{BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Unicode tables|Any CPU.ActiveCfg = Unicode tables|Any CPU
 70 | 		{BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Unicode tables|Any CPU.Build.0 = Unicode tables|Any CPU
 71 | 		{BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Unittest|x64.ActiveCfg = Unicode tables|Any CPU
 72 | 		{BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}.Unittest|x86.ActiveCfg = Unicode tables|Any CPU
 73 | 		{CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Benchmark|Any CPU.ActiveCfg = Unicode tables|Any CPU
 74 | 		{CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Debug|Any CPU.ActiveCfg = Unicode tables|Any CPU
 75 | 		{CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Documentation|Any CPU.ActiveCfg = Documentation|Any CPU
 76 | 		{CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Documentation|Any CPU.Build.0 = Documentation|Any CPU
 77 | 		{CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Release|Any CPU.ActiveCfg = Unicode tables|Any CPU
 78 | 		{CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Unicode tables|Any CPU.ActiveCfg = Documentation|Any CPU
 79 | 		{CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Unittest|x64.ActiveCfg = Unicode tables|Any CPU
 80 | 		{CD5AEB53-9D32-49D3-983A-F46CF7E7AC60}.Unittest|x86.ActiveCfg = Unicode tables|Any CPU
 81 | 		{DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Benchmark|Any CPU.ActiveCfg = Benchmark|Any CPU
 82 | 		{DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Benchmark|Any CPU.Build.0 = Benchmark|Any CPU
 83 | 		{DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 84 | 		{DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Debug|Any CPU.Build.0 = Debug|Any CPU
 85 | 		{DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Documentation|Any CPU.ActiveCfg = Documentation|Any CPU
 86 | 		{DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Release|Any CPU.ActiveCfg = Benchmark|Any CPU
 87 | 		{DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Release|Any CPU.Build.0 = Benchmark|Any CPU
 88 | 		{DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unicode tables|Any CPU.ActiveCfg = Benchmark|Any CPU
 89 | 		{DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unittest|x64.ActiveCfg = Unittest|x64
 90 | 		{DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unittest|x64.Build.0 = Unittest|x64
 91 | 		{DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unittest|x86.ActiveCfg = Unittest|x86
 92 | 		{DFEB5CCB-A636-4971-8302-89CDBF3B4503}.Unittest|x86.Build.0 = Unittest|x86
 93 | 	EndGlobalSection
 94 | 	GlobalSection(NestedProjects) = preSolution
 95 | 	EndGlobalSection
 96 | 	GlobalSection(MonoDevelopProperties) = preSolution
 97 | 		BaseDirectory = ..
 98 | 		Policies = $0
 99 | 		$0.TextStylePolicy = $1
100 | 		$1.inheritsSet = null
101 | 		$1.scope = application/json
102 | 		$0.TextStylePolicy = $2
103 | 		$2.inheritsSet = Mono
104 | 		$2.inheritsScope = text/plain
105 | 		$2.scope = text/plain
106 | 		$0.TextStylePolicy = $3
107 | 		$3.FileWidth = 120
108 | 		$3.TabsToSpaces = False
109 | 		$3.NoTabsAfterNonTabs = True
110 | 		$3.inheritsSet = VisualStudio
111 | 		$3.inheritsScope = text/plain
112 | 		$3.scope = text/x-d
113 | 		$0.DFormattingPolicy = $4
114 | 		$4.inheritsSet = Mono
115 | 		$4.inheritsScope = text/x-d
116 | 		$4.scope = text/x-d
117 | 		$0.StandardHeader = $5
118 | 		$5.Text = 
119 | 		$5.IncludeInNewFiles = True
120 | 		$0.NameConventionPolicy = $6
121 | 		$6.Rules = $7
122 | 		$7.NamingRule = $8
123 | 		$8.Name = Namespaces
124 | 		$8.AffectedEntity = Namespace
125 | 		$8.VisibilityMask = VisibilityMask
126 | 		$8.NamingStyle = PascalCase
127 | 		$8.IncludeInstanceMembers = True
128 | 		$8.IncludeStaticEntities = True
129 | 		$7.NamingRule = $9
130 | 		$9.Name = Types
131 | 		$9.AffectedEntity = Class, Struct, Enum, Delegate
132 | 		$9.VisibilityMask = Public
133 | 		$9.NamingStyle = PascalCase
134 | 		$9.IncludeInstanceMembers = True
135 | 		$9.IncludeStaticEntities = True
136 | 		$7.NamingRule = $10
137 | 		$10.Name = Interfaces
138 | 		$10.RequiredPrefixes = $11
139 | 		$11.String = I
140 | 		$10.AffectedEntity = Interface
141 | 		$10.VisibilityMask = Public
142 | 		$10.NamingStyle = PascalCase
143 | 		$10.IncludeInstanceMembers = True
144 | 		$10.IncludeStaticEntities = True
145 | 		$7.NamingRule = $12
146 | 		$12.Name = Attributes
147 | 		$12.RequiredSuffixes = $13
148 | 		$13.String = Attribute
149 | 		$12.AffectedEntity = CustomAttributes
150 | 		$12.VisibilityMask = Public
151 | 		$12.NamingStyle = PascalCase
152 | 		$12.IncludeInstanceMembers = True
153 | 		$12.IncludeStaticEntities = True
154 | 		$7.NamingRule = $14
155 | 		$14.Name = Event Arguments
156 | 		$14.RequiredSuffixes = $15
157 | 		$15.String = EventArgs
158 | 		$14.AffectedEntity = CustomEventArgs
159 | 		$14.VisibilityMask = Public
160 | 		$14.NamingStyle = PascalCase
161 | 		$14.IncludeInstanceMembers = True
162 | 		$14.IncludeStaticEntities = True
163 | 		$7.NamingRule = $16
164 | 		$16.Name = Exceptions
165 | 		$16.RequiredSuffixes = $17
166 | 		$17.String = Exception
167 | 		$16.AffectedEntity = CustomExceptions
168 | 		$16.VisibilityMask = VisibilityMask
169 | 		$16.NamingStyle = PascalCase
170 | 		$16.IncludeInstanceMembers = True
171 | 		$16.IncludeStaticEntities = True
172 | 		$7.NamingRule = $18
173 | 		$18.Name = Methods
174 | 		$18.AffectedEntity = Methods
175 | 		$18.VisibilityMask = Protected, Public
176 | 		$18.NamingStyle = PascalCase
177 | 		$18.IncludeInstanceMembers = True
178 | 		$18.IncludeStaticEntities = True
179 | 		$7.NamingRule = $19
180 | 		$19.Name = Static Readonly Fields
181 | 		$19.AffectedEntity = ReadonlyField
182 | 		$19.VisibilityMask = Protected, Public
183 | 		$19.NamingStyle = PascalCase
184 | 		$19.IncludeInstanceMembers = False
185 | 		$19.IncludeStaticEntities = True
186 | 		$7.NamingRule = $20
187 | 		$20.Name = Fields
188 | 		$20.AffectedEntity = Field
189 | 		$20.VisibilityMask = Protected, Public
190 | 		$20.NamingStyle = PascalCase
191 | 		$20.IncludeInstanceMembers = True
192 | 		$20.IncludeStaticEntities = True
193 | 		$7.NamingRule = $21
194 | 		$21.Name = ReadOnly Fields
195 | 		$21.AffectedEntity = ReadonlyField
196 | 		$21.VisibilityMask = Protected, Public
197 | 		$21.NamingStyle = PascalCase
198 | 		$21.IncludeInstanceMembers = True
199 | 		$21.IncludeStaticEntities = False
200 | 		$7.NamingRule = $22
201 | 		$22.Name = Constant Fields
202 | 		$22.AffectedEntity = ConstantField
203 | 		$22.VisibilityMask = Protected, Public
204 | 		$22.NamingStyle = PascalCase
205 | 		$22.IncludeInstanceMembers = True
206 | 		$22.IncludeStaticEntities = True
207 | 		$7.NamingRule = $23
208 | 		$23.Name = Properties
209 | 		$23.AffectedEntity = Property
210 | 		$23.VisibilityMask = Protected, Public
211 | 		$23.NamingStyle = PascalCase
212 | 		$23.IncludeInstanceMembers = True
213 | 		$23.IncludeStaticEntities = True
214 | 		$7.NamingRule = $24
215 | 		$24.Name = Events
216 | 		$24.AffectedEntity = Event
217 | 		$24.VisibilityMask = Protected, Public
218 | 		$24.NamingStyle = PascalCase
219 | 		$24.IncludeInstanceMembers = True
220 | 		$24.IncludeStaticEntities = True
221 | 		$7.NamingRule = $25
222 | 		$25.Name = Enum Members
223 | 		$25.AffectedEntity = EnumMember
224 | 		$25.VisibilityMask = VisibilityMask
225 | 		$25.NamingStyle = PascalCase
226 | 		$25.IncludeInstanceMembers = True
227 | 		$25.IncludeStaticEntities = True
228 | 		$7.NamingRule = $26
229 | 		$26.Name = Parameters
230 | 		$26.AffectedEntity = Parameter
231 | 		$26.VisibilityMask = VisibilityMask
232 | 		$26.NamingStyle = CamelCase
233 | 		$26.IncludeInstanceMembers = True
234 | 		$26.IncludeStaticEntities = True
235 | 		$7.NamingRule = $27
236 | 		$27.Name = Type Parameters
237 | 		$27.RequiredPrefixes = $28
238 | 		$28.String = T
239 | 		$27.AffectedEntity = TypeParameter
240 | 		$27.VisibilityMask = VisibilityMask
241 | 		$27.NamingStyle = PascalCase
242 | 		$27.IncludeInstanceMembers = True
243 | 		$27.IncludeStaticEntities = True
244 | 		$0.VersionControlPolicy = $29
245 | 		$29.CommitMessageStyle = $30
246 | 		$30.FileSeparator = ", "
247 | 		$30.IncludeDirectoryPaths = True
248 | 		$29.inheritsSet = Mono
249 | 		$0.ChangeLogPolicy = $31
250 | 		$31.UpdateMode = None
251 | 		$31.MessageStyle = $32
252 | 		$32.LineAlign = 0
253 | 		$31.inheritsSet = Mono
254 | 		description = A library for D that aims to provide the fastest possible implementation of some every day routines.
255 | 		version = 0.3.2
256 | 		outputpath = ..
257 | 	EndGlobalSection
258 | 	GlobalSection(SolutionProperties) = preSolution
259 | 		HideSolutionNode = FALSE
260 | 	EndGlobalSection
261 | EndGlobal
262 | 


--------------------------------------------------------------------------------
/mono-d/generate unicode tables.dproj:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <PropertyGroup>
 4 |     <Configuration Condition=" '$(Configuration)' == '' ">Unicode tables</Configuration>
 5 |     <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
 6 |     <ProductVersion>8.0.30703</ProductVersion>
 7 |     <SchemaVersion>2.0</SchemaVersion>
 8 |     <ProjectGuid>{BD4CA9F1-8EFC-427F-BE62-FA2B7523DAE1}</ProjectGuid>
 9 |     <BaseDirectory>..\source</BaseDirectory>
10 |     <Compiler>DMD</Compiler>
11 |     <PreferOneStepBuild>true</PreferOneStepBuild>
12 |     <UseDefaultCompiler>true</UseDefaultCompiler>
13 |     <IncrementalLinking>true</IncrementalLinking>
14 |     <ReleaseVersion>0.3.2</ReleaseVersion>
15 |   </PropertyGroup>
16 |   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Unicode tables|AnyCPU' ">
17 |     <OutputPath>..\generated</OutputPath>
18 |     <DDocDirectory>../docs</DDocDirectory>
19 |     <ObjectsDirectory>../generated/debug</ObjectsDirectory>
20 |     <LinkinThirdPartyLibraries>false</LinkinThirdPartyLibraries>
21 |     <UnittestMode>false</UnittestMode>
22 |     <OutputName>generate_unicode_tables</OutputName>
23 |     <Target>Executable</Target>
24 |     <ConsolePause>false</ConsolePause>
25 |     <DebugLevel>0</DebugLevel>
26 |     <DebugSymbols>true</DebugSymbols>
27 |   </PropertyGroup>
28 |   <ItemGroup>
29 |     <Folder Include="..\source\unicode\" />
30 |   </ItemGroup>
31 |   <ItemGroup>
32 |     <Compile Include="..\source\unicode\generator.d" />
33 |   </ItemGroup>
34 | </Project>


--------------------------------------------------------------------------------
/source/fast/buffer.d:
--------------------------------------------------------------------------------
  1 | ﻿/**
  2 |  * Fast buffer implementation.
  3 |  *
  4 |  * Authors:
  5 |  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
  6 |  *
  7 |  * Copyright:
  8 |  *   © 2015 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
  9 |  *
 10 |  * License:
 11 |  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
 12 |  */
 13 | module fast.buffer; nothrow
 14 | 
 15 | import core.stdc.stdint;
 16 | import core.stdc.stdlib;
 17 | import std.range;
 18 | import core.exception;
 19 | 
 20 | 
 21 | enum allocaLimit = 2048;
 22 | 
 23 | 
 24 | /*******************************************************************************
 25 |  * 
 26 |  * Dynamic array using `malloc`, `realloc` and `free` under the hood. Note that
 27 |  * memory will be released on scope exit.
 28 |  *
 29 |  **************************************/
 30 | struct RaiiArray(T)
 31 | {
 32 | private:
 33 | 
 34 | 	T*     m_ptr;
 35 | 	size_t m_capacity;
 36 | 
 37 | 
 38 | public:
 39 | 
 40 | 	nothrow
 41 | 	this(size_t capacity)
 42 | 	{
 43 | 		if (capacity)
 44 | 		{
 45 | 			m_ptr = cast(T*) malloc(capacity);
 46 | 			if (m_ptr is null)
 47 | 				onOutOfMemoryError();
 48 | 			m_capacity = capacity;
 49 | 		}
 50 | 	}
 51 | 
 52 | 
 53 | 	nothrow @nogc
 54 | 	~this()
 55 | 	{
 56 | 		if (m_ptr !is null)
 57 | 			free(m_ptr);
 58 | 	}
 59 | 
 60 | 
 61 | 	@safe pure nothrow @nogc
 62 | 	@property inout(T)* ptr() inout
 63 | 	{
 64 | 		return m_ptr;
 65 | 	}
 66 | 
 67 | 
 68 | 	@safe pure nothrow @nogc
 69 | 	@property size_t capacity() const
 70 | 	{
 71 | 		return m_capacity;
 72 | 	}
 73 | 
 74 | 
 75 | 	nothrow
 76 | 	@property void capacity(size_t value)
 77 | 	{
 78 | 		if (value != 0)
 79 | 		{
 80 | 			if (T* ptrNew = cast(T*) realloc(m_ptr, value))
 81 | 				m_ptr = ptrNew;
 82 | 			else onOutOfMemoryError();
 83 | 		}
 84 | 		else if (m_ptr)
 85 | 		{
 86 | 			free(m_ptr);
 87 | 			m_ptr = null;
 88 | 		}
 89 | 		m_capacity = value;
 90 | 	}
 91 | 
 92 | 
 93 | 	alias length = capacity;
 94 | 
 95 | 
 96 | 	mixin Slicing;
 97 | 	mixin CapacityTools;
 98 | }
 99 | 
100 | 
101 | /*******************************************************************************
102 |  * 
103 |  * Fixed maximum number of items on the stack. Memory is a static stack buffer.
104 |  * This buffer can be filled up and cleared for reuse.
105 |  *
106 |  **************************************/
107 | struct LimitedScopeBuffer(T, size_t n)
108 | {
109 | private:
110 | 
111 | 	T[n]   m_data;
112 | 	size_t m_used;
113 | 
114 | 
115 | public:
116 | 
117 | 	@safe pure nothrow @nogc
118 | 	@property inout(T)* ptr() inout
119 | 	{
120 | 		return m_data.ptr;
121 | 	}
122 | 
123 | 
124 | 	@safe pure nothrow @nogc
125 | 	@property size_t length() const
126 | 	{
127 | 		return m_used;
128 | 	}
129 | 
130 | 	@safe pure nothrow @nogc
131 | 	@property void length(size_t value)
132 | 	in
133 | 	{
134 | 		assert( value <= n );
135 | 	}
136 | 	body
137 | 	{
138 | 		m_used = value;
139 | 	}
140 | 
141 | 
142 | 	@safe pure nothrow @nogc
143 | 	inout(T)[] opSlice() inout
144 | 	{
145 | 		return m_data[0 .. m_used];
146 | 	}
147 | }
148 | 
149 | 
150 | struct TempBuffer(T)
151 | {
152 | 	T[] slice;
153 | 	bool callFree;
154 | 	
155 | 	@disable this(this);
156 | 
157 | 	~this() nothrow
158 | 	{
159 | 		if (this.callFree)
160 | 			free(this.slice.ptr);
161 | 	}
162 | 
163 | 	T[] opSlice() @safe pure nothrow { return this.slice[]; }
164 | 	T[] opSlice(size_t a, size_t b) @safe pure nothrow { return this.slice[a .. b]; }
165 | 	T[] opSliceAssign(const(T)[] value, size_t a, size_t b) @safe pure nothrow { return this.slice[a .. b] = value; }
166 | 	ref T opIndex(size_t idx) @safe pure nothrow { return this.slice[idx]; }
167 | 	@property size_t size() @safe pure nothrow { return T.sizeof * this.slice.length; }
168 | 	@property size_t length() @safe pure nothrow { return this.slice.length; }
169 | 	alias opDollar = length;
170 | 	@property T* ptr() @trusted pure nothrow { return this.slice.ptr; } // must use .ptr here for zero length strings
171 | 	alias ptr this;
172 | 
173 | 	auto makeOutputRange()
174 | 	{
175 | 		struct OutputRange
176 | 		{
177 | 			T* ptr;
178 | 			size_t idx;
179 | 
180 | 			void put(T)(auto ref T t) { ptr[idx++] = t; }
181 | 			T[] opSlice() pure nothrow { return ptr[0 .. idx]; }
182 | 		}
183 | 		return OutputRange(this.slice.ptr, 0);
184 | 	}
185 | }
186 | 
187 | 
188 | TempBuffer!T tempBuffer(T, alias length, size_t allocaLimit = .allocaLimit)
189 | 	(void* buffer = (T.sizeof * length <= allocaLimit) ? alloca(T.sizeof * length) : null)
190 | {
191 | 	return TempBuffer!T((cast(T*) (
192 | 		buffer is null
193 | 		? malloc(T.sizeof * length)
194 | 		: buffer))[0 .. length],
195 | 	buffer is null);
196 | }
197 | 
198 | 
199 | /*******************************************************************************
200 |  * 
201 |  * Returns a structure to your stack that contains a buffer of $(D bytes) size.
202 |  * Memory is allocated by calling `.alloc!T(count)` on it in order to get
203 |  * `count` elements of type `T`. The return value will be a RAII structure
204 |  * that releases the memory back to the stack buffer upon destruction, so it can
205 |  * be reused. The pointer within that RAII structure is aligned to
206 |  * `T.alignof`. If the internal buffer isn't enough to fulfill the request
207 |  * including padding from alignment, then `malloc()` is used instead.
208 |  * 
209 |  * Warning:
210 |  *   Always keep the return value of `.alloc()` around on your stack until
211 |  *   you are done with its contents. Never pass it directly into functions as
212 |  *   arguments!
213 |  *
214 |  * Params:
215 |  *   bytes = The size of the buffer on the stack.
216 |  *
217 |  * Returns:
218 |  *   A stack buffer allocator.
219 |  *
220 |  **************************************/
221 | auto stackBuffer(size_t bytes)() @trusted pure
222 | {
223 | 	// All that remains of this after inlining is a stack pointer decrement and
224 | 	// a mov instruction for the `null`.
225 | 	StackBuffer!bytes result = void;
226 | 	result.last = cast(StackBufferEntry!void*) &result.last;
227 | 	result.sentinel = null;
228 | 	return result;
229 | }
230 | 
231 | 
232 | auto asOutputRange(T)(T* t) @safe pure
233 | {
234 | 	struct PointerRange
235 | 	{
236 | 	private:
237 | 
238 | 		T* start;
239 | 		T* ptr;
240 | 
241 | 	public:
242 | 
243 | 		void put()(auto ref const(T) t) pure
244 | 		{
245 | 			*this.ptr++ = t;
246 | 		}
247 | 
248 | 		T[] opSlice() pure
249 | 		{
250 | 			return this.start[0 .. this.ptr - this.start];
251 | 		}
252 | 	}
253 | 	static assert(isOutputRange!(PointerRange, T));
254 | 	return PointerRange(t, t);
255 | }
256 | 
257 | 
258 | enum bufferArg(alias size)()
259 | {
260 | 	return "((size <= allocaLimit) ? alloca(size) : null)";
261 | }
262 | 
263 | 
264 | 
265 | package:
266 | 
267 | struct StackBuffer(size_t bytes)
268 | {
269 | private:
270 | 	
271 | 	void[bytes] space = void;
272 | 	StackBufferEntry!void* last;
273 | 	void* sentinel;
274 | 	
275 | public:
276 | 	
277 | 	@disable this(this);
278 | 	
279 | 	@trusted
280 | 	StackBufferEntry!T alloc(T)(size_t howMany)
281 | 	{
282 | 		enum max = size_t.max / T.sizeof;
283 | 		alias SBE = StackBufferEntry!T;
284 | 		T* target = cast(T*) (cast(uintptr_t) this.last.ptr / T.alignof * T.alignof);
285 | 		if (target > this.space.ptr && cast(uintptr_t) (target - cast(T*) this.space.ptr) >= howMany)
286 | 			return SBE(target - howMany, this.last);
287 | 		else
288 | 			// TODO: Respect alignment here as well by padding. Optionally also embed a length in the heap block, so we can provide slicing of the whole thing.
289 | 			return SBE(howMany <= max ? cast(T*) malloc(T.sizeof * howMany) : null);
290 | 	}
291 | }
292 | 
293 | struct StackBufferEntry(T)
294 | {
295 | private:
296 | 
297 | 	StackBufferEntry!void* prev;
298 | 
299 | 	this(T* ptr) pure { this.ptr = ptr; }
300 | 
301 | 	this(T* ptr, ref StackBufferEntry!void* last) pure
302 | 	{
303 | 		this.ptr = ptr;
304 | 		this.prev = last;
305 | 		last = cast(StackBufferEntry!void*) &this;
306 | 	}
307 | 
308 | 
309 | public:
310 | 	
311 | 	T* ptr;
312 | 	
313 | 	static if (!is(T == void))
314 | 	{
315 | 		@disable this(this);
316 | 	
317 | 		~this() @trusted
318 | 		{
319 | 			if (this.prev)
320 | 			{
321 | 				StackBufferEntry!void* it = this.prev;
322 | 				while (it.prev) it = it.prev;
323 | 				auto last = cast(StackBufferEntry!void**) &prev.ptr;
324 | 				*last = this.prev;
325 | 			}
326 | 			else free(this.ptr);
327 | 		}
328 | 
329 | 		@system pure nothrow @nogc
330 | 		ref inout(T) opIndex(size_t idx) inout
331 | 		{
332 | 			return ptr[idx];
333 | 		}
334 | 
335 | 		@system pure nothrow @nogc
336 | 		inout(T)[] opSlice(size_t a, size_t b) inout
337 | 		{
338 | 			return ptr[a .. b];
339 | 		}
340 | 
341 | 		@safe pure nothrow @nogc
342 | 		@property auto range()
343 | 		{
344 | 			return ptr.asOutputRange();
345 | 		}
346 | 	}
347 | }
348 | 
349 | 
350 | 
351 | private:
352 | 
353 | mixin template Slicing()
354 | {
355 | 	public
356 | 	{
357 | 		@nogc pure nothrow
358 | 		ref inout(T) opIndex(size_t idx) inout
359 | 		in
360 | 		{
361 | 			assert(idx < length);
362 | 		}
363 | 		body
364 | 		{
365 | 			return ptr[idx];
366 | 		}
367 | 
368 | 
369 | 		@nogc pure nothrow
370 | 		inout(T)[] opSlice() inout
371 | 		{
372 | 			return ptr[0 .. length];
373 | 		}
374 | 		
375 | 		
376 | 		@nogc pure nothrow
377 | 		inout(T)[] opSlice(size_t a, size_t b) inout
378 | 		in
379 | 		{
380 | 			assert(a <= b && b <= length);
381 | 		}
382 | 		body
383 | 		{
384 | 			return ptr[a .. b];
385 | 		}
386 | 	}
387 | }
388 | 
389 | 
390 | mixin template CapacityTools()
391 | {
392 | 	public
393 | 	{
394 | 		nothrow
395 | 		void capacityNeeded(size_t c)
396 | 		{
397 | 			if (capacity < c)
398 | 				capacity = c;
399 | 		}
400 | 	}
401 | }
402 | 


--------------------------------------------------------------------------------
/source/fast/cstring.d:
--------------------------------------------------------------------------------
  1 | /*******************************************************************************
  2 |  * 
  3 |  * Converts between UTF-8 and UTF-16.
  4 |  * 
  5 |  * Authors:
  6 |  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
  7 |  * 
  8 |  * Copyright:
  9 |  *   © 2013 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
 10 |  * 
 11 |  * License:
 12 |  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
 13 |  * 
 14 |  **************************************/
 15 | module fast.cstring; @nogc nothrow:
 16 | 
 17 | import core.stdc.stdlib;
 18 | import core.stdc.string;
 19 | //import std.traits;
 20 | import fast.buffer;
 21 | 
 22 | 
 23 | /**
 24 |  * Converts a string to a wstring using a buffer provided by the user.
 25 |  * To get the buffer requirements call $(D wstringSize) on your source buffer.
 26 |  *
 27 |  * Params:
 28 |  *   src = The UTF-8 string to convert.
 29 |  *   dst = The destination buffer for the conversion.
 30 |  *
 31 |  * Returns:
 32 |  *   The part of the destination buffer used for the conversion as a $(D wchar[]).
 33 |  *   A terminating zero is appended, so the result.ptr can be passed into Windows APIs.
 34 |  */
 35 | pure
 36 | wchar[] string2wstring(in char[] src, wchar* dst)
 37 | {
 38 | 	const char* srcEnd = src.ptr + src.length;
 39 | 	const(char)* srcIt = src.ptr;
 40 | 	wchar* dstIt = dst;
 41 | 
 42 | 	while (srcIt !is srcEnd)
 43 | 	{
 44 | 		// how long is the byte sequence
 45 | 		int len = 0;
 46 | 		uint mask = 0b1000_0000;
 47 | 		while(*srcIt & mask)
 48 | 		{
 49 | 			mask >>= 1;
 50 | 			len++;
 51 | 		}
 52 | 
 53 | 		// get payload of first byte
 54 | 		dchar ch = *srcIt++ & (mask - 1);
 55 | 
 56 | 		while (--len > 0)
 57 | 		{
 58 | 			// make space for 6 more bits
 59 | 			ch <<= 6;
 60 | 			ch |= *srcIt++ & 0b0011_1111;
 61 | 		}
 62 | 
 63 | 		// do we need to store a surrogate pair ?
 64 | 		static if (is(wchar == dchar))
 65 | 		{
 66 | 			*dstIt++ = ch;
 67 | 		}
 68 | 		else if (ch > wchar.max)
 69 | 		{
 70 | 			*dstIt++ = (ch >> 10) | 0xD800;
 71 | 			*dstIt++ = (ch & 0b11_1111_1111) | 0xDC00;
 72 | 		}
 73 | 		else
 74 | 		{
 75 | 			*dstIt++ = cast(wchar) ch;
 76 | 		}
 77 | 	}
 78 | 	*dstIt = 0;
 79 | 
 80 | 	return dst[0 .. dstIt - dst];
 81 | }
 82 | pure
 83 | wchar[] string2wstring(in ushort[] src, wchar* dst)
 84 | {
 85 | 	memcpy(dst, cast(wchar*) src.ptr, src.length);
 86 | 	return dst[0 .. src.length];
 87 | }
 88 | /**
 89 |  * Calculates the required buffer size in bytes for a string to wchar[] conversion.
 90 |  * Room for a terminating '\0' is included.
 91 |  *
 92 |  * Params:
 93 |  *   src = The source string.
 94 |  *
 95 |  * Returns:
 96 |  *   The maximum byte count the source string could require, including the terminating '\0'.
 97 |  *
 98 |  * See_Also:
 99 |  *   string2wstring
100 |  *   
101 |  */
102 | @safe pure
103 | size_t string2wstringSize(in char[] src)
104 | {
105 | 	enum limit = size_t.max / wchar.sizeof - 1;
106 | 	return src.length <= limit ? wchar.sizeof * (src.length + 1) : size_t.max;
107 | }
108 | @safe pure
109 | size_t string2wstringSize(in ushort[] src)
110 | {
111 | 	enum limit = size_t.max / wchar.sizeof - 1;
112 | 	return src.length <= limit ? wchar.sizeof * (src.length + 1) : size_t.max;
113 | }
114 | 
115 | 
116 | /**
117 |  * Converts a wstring to a string using a buffer provided by the user.
118 |  * To get the buffer requirements call $(D stringSize) on your source buffer.
119 |  *
120 |  * Params:
121 |  *   src = The UTF-8 string to convert.
122 |  *   dst = The destination buffer for the conversion.
123 |  *
124 |  * Returns:
125 |  *   The part of the destination buffer used for the conversion as a $(D wchar[]).
126 |  *   A terminating zero is appended, so the result.ptr can be passed into Windows APIs.
127 |  */
128 | pure
129 | char[] wstring2string(in wchar[] src, char* dst)
130 | {
131 | 	const wchar* srcEnd = src.ptr + src.length;
132 | 	const(wchar)* srcIt = src.ptr;
133 | 	char* dstIt = dst;
134 | 
135 | 	while (srcIt !is srcEnd)
136 | 	{
137 | 		if (*srcIt < 0x80)
138 | 		{
139 | 			*dstIt++ = cast(char) *srcIt++;
140 | 		}
141 | 		else if (*srcIt < 0x800)
142 | 		{
143 | 			*dstIt++ = cast(char) (0b_11000000 | *srcIt >> 6);
144 | 			*dstIt++ = 0b_10000000 | 0b_00111111 & *srcIt++;
145 | 		}
146 | 		if (*srcIt < 0xD800 || *srcIt > 0xDBFF)
147 | 		{
148 | 			// anything else within the BMP (<= 0xFFFF), but not a high surrogate
149 | 			*dstIt++ = 0b_11100000 | *srcIt >> 12;
150 | 			*dstIt++ = 0b_10000000 | 0b_00111111 & *srcIt >> 6;
151 | 			*dstIt++ = 0b_10000000 | 0b_00111111 & *srcIt++;
152 | 		}
153 | 		else
154 | 		{
155 | 			// high surrogate, assume correct encoding and that the next wchar is the low surrogate
156 | 			dchar decoded;
157 | 			decoded = (*srcIt++ & 0b11_1111_1111) << 10;
158 | 			decoded |= (*srcIt++ & 0b11_1111_1111);
159 | 			*dstIt++ = 0b_11110000 | decoded >> 18;
160 | 			*dstIt++ = 0b_10000000 | 0b_00111111 & decoded >> 12;
161 | 			*dstIt++ = 0b_10000000 | 0b_00111111 & decoded >> 6;
162 | 			*dstIt++ = 0b_10000000 | 0b_00111111 & decoded;
163 | 		}
164 | 	}
165 | 	*dstIt = 0;
166 | 	
167 | 	return dst[0 .. dstIt - dst];
168 | }
169 | 
170 | /**
171 |  * Calculates the required buffer size in bytes for a wstring to char[] conversion.
172 |  * Room for a terminating '\0' is included.
173 |  *
174 |  * Params:
175 |  *   src = The source string.
176 |  *
177 |  * Returns:
178 |  *   The maximum byte count the source string could require, including the terminating '\0'.
179 |  *
180 |  * See_Also:
181 |  *   wstring2string
182 |  *   
183 |  */
184 | @safe pure
185 | size_t wstring2stringSize(in wchar[] src)
186 | {
187 | 	enum limit = (size_t.max / char.sizeof - 1) / 3;
188 | 	return src.length <= limit ? char.sizeof * (3 * src.length + 1) : size_t.max;
189 | }
190 | 
191 | 
192 | /**
193 |  * Replaces $(D std.utf.toUTFz) with a version that uses the stack as long as the required bytes for the output are
194 |  * <= 1k. Longer strings use $(D malloc) to create a buffer for the conversion. It is freed at least at the end of the
195 |  * scope.
196 |  * 
197 |  * Params:
198 |  *   str = The source string to convert.
199 |  *
200 |  * See_Also:
201 |  *   toWstring
202 |  * 
203 |  * Example:
204 |  * ---
205 |  * string text = "Hello, world!";
206 |  * WinApiW(wcharPtr!text);
207 |  * ---
208 |  */
209 | auto wcharPtr(alias str)(void* buffer = string2wstringSize(str) <= allocaLimit ? alloca(string2wstringSize(str)) : null)
210 | {
211 | 	// In any case we have to return a proper InstantBuffer, so that free() is called in the dtor at some point.
212 | 	return TempBuffer!wchar(
213 | 		string2wstring(str, cast(wchar*) (buffer ? buffer : malloc(string2wstringSize(str)))),
214 | 		buffer is null);
215 | }
216 | 
217 | /// ditto
218 | immutable(wchar)* wcharPtr(alias wstr)()
219 | 	if (is(typeof(wstr) == wstring) && __traits(compiles, { enum wstring e = wstr; }))
220 | {
221 | 	// D string literals (known at compile time) are always \0-terminated.
222 | 	return wstr.ptr;
223 | }
224 | 
225 | /**
226 |  * $(D char*) version of $(D wcharPtr). Basically it appends a \0 to the input.
227 |  * The function uses $(D malloc) for strings of lengths 1024 and above.
228 |  * 
229 |  * Params:
230 |  *   str = The source string to convert to a C UTF-8 string
231 |  * 
232 |  * Note:
233 |  *   Do not use this to call Windows ANSI functions! Always use wide-char
234 |  *   functions on this operating system unless you want to deal with codepages.
235 |  *
236 |  * Example:
237 |  * ---
238 |  * string text = "Hello, world!";
239 |  * linuxApi(charPtr!text);
240 |  * ---
241 |  */
242 | auto charPtr(alias str)(void* buffer = alloca(str.length + 1))
243 | 	if (is(typeof(str) : const(char)[]) || is(typeof(str) : const(ubyte)[]))
244 | {
245 | 	char* dst = cast(char*) memcpy(buffer ? buffer : malloc(str.length + 1), str.ptr, str.length);
246 | 	dst[str.length] = '\0';
247 | 	return TempBuffer!char(dst[0 .. str.length], buffer is null);
248 | }
249 | 
250 | /// ditto
251 | immutable(char)* charPtr(alias str)()
252 | 	if (__traits(compiles, { enum string e = str; }))
253 | {
254 | 	// D string literals (known at compile time) are always \0-terminated.
255 | 	return str.ptr;
256 | }
257 | 
258 | /**
259 |  * This overload allocates the required memory from an existing stack buffer.
260 |  *
261 |  * Params:
262 |  *   str = The source string to convert to a C UTF-8 string
263 |  *   sb = The stack buffer to allocate from
264 |  * 
265 |  * Note:
266 |  *   Always assign the result to an auto variable first for RAII to work correctly.
267 |  */
268 | StackBufferEntry!char charPtr(SB)(const(char)[] str, ref SB sb)
269 | 	if (is(SB == StackBuffer!bytes, bytes...))
270 | {
271 | 	auto buffer = sb.alloc!char(str.length + 1);
272 | 	memcpy(buffer.ptr, str.ptr, str.length);
273 | 	buffer[str.length] = '\0';
274 | 	return buffer;
275 | }
276 | 
277 | /**
278 |  * Returns the given $(D ptr) up to but not including the \0 as a $(D char[]).
279 |  */
280 | inout(char)[] asString(inout(char*) ptr) @trusted pure
281 | {
282 | 	if (ptr is null) return null;
283 | 	return ptr[0 .. strlen(ptr)];
284 | }


--------------------------------------------------------------------------------
/source/fast/format.d:
--------------------------------------------------------------------------------
  1 | ﻿/*******************************************************************************
  2 |  * 
  3 |  * Functions for formatting data into strings and back.
  4 |  * 
  5 |  * Authors:
  6 |  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
  7 |  * 
  8 |  * Copyright:
  9 |  *   © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
 10 |  * 
 11 |  * License:
 12 |  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
 13 |  * 
 14 |  **************************************/
 15 | module fast.format;
 16 | 
 17 | import core.stdc.stdlib;
 18 | import core.stdc.string;
 19 | import core.bitop;
 20 | import std.string;
 21 | import std.traits;
 22 | import std.typecons;
 23 | import std.typetuple;
 24 | import fast.internal.helpers;
 25 | 
 26 | 
 27 | /+
 28 |  ╔══════════════════════════════════════════════════════════════════════════════
 29 |  ║ ⚑ Hex String
 30 |  ╚══════════════════════════════════════════════════════════════════════════════
 31 |  +/
 32 | 
 33 | /**
 34 |  * Converts an unsigned type into a fixed width 8 digits hex string using lower-case letters.
 35 |  * 
 36 |  * Params:
 37 |  *   n = the number to convert
 38 |  * 
 39 |  * Returns:
 40 |  *   hexadecimal representation of $(D n), lower-case letters
 41 |  */
 42 | @safe pure nothrow @nogc
 43 | char[2 * U.sizeof] hexStrLower(U)(Unqual!U n) if (isUnsigned!U)
 44 | {
 45 | 	char[2 * U.sizeof] hex = void;
 46 | 	foreach_reverse (i; 0 .. 2 * U.sizeof)
 47 | 	{
 48 | 		U d = n & U(0xF);
 49 | 		hex[i] = cast(char) (d < 10 ? '0' + d : 'a' + d - 10);
 50 | 		n >>= 4;
 51 | 	}
 52 | 	return hex;
 53 | }
 54 | 
 55 | 
 56 | /**
 57 |  * Converts an unsigned type into a fixed width 8 digits hex string using upper-case letters.
 58 |  * 
 59 |  * Params:
 60 |  *   n = the number to convert
 61 |  * 
 62 |  * Returns:
 63 |  *   hexadecimal representation of $(D n), upper-case letters
 64 |  */
 65 | @safe pure nothrow @nogc
 66 | char[2 * U.sizeof] hexStrUpper(U)(U n) if (isUnsigned!U)
 67 | {
 68 | 	char[2 * U.sizeof] hex = void;
 69 | 	foreach_reverse (i; 0 .. 2 * U.sizeof)
 70 | 	{
 71 | 		U d = n & U(0xF);
 72 | 		hex[i] = cast(char) (d < 10 ? '0' + d : 'A' + d - 10);
 73 | 		n >>= 4;
 74 | 	}
 75 | 	return hex;
 76 | }
 77 | 
 78 | 
 79 | /+
 80 |  ╔══════════════════════════════════════════════════════════════════════════════
 81 |  ║ ⚑ Decimal String
 82 |  ╚══════════════════════════════════════════════════════════════════════════════
 83 |  +/
 84 | 
 85 | template decDigits(T) if (isIntegral!T)
 86 | {
 87 | 	static if (is(T == ulong))
 88 | 		enum decDigits = 20;
 89 | 	else static if (is(T == long))
 90 | 		enum decDigits = 19;
 91 | 	else static if (is(T == uint) || is(T == int))
 92 | 		enum decDigits = 10;
 93 | 	else static if (is(T == ushort) || is(T == short))
 94 | 		enum decDigits = 5;
 95 | 	else static if (is(T == ubyte) || is(T == byte))
 96 | 		enum decDigits = 3;
 97 | }
 98 | 
 99 | 
100 | enum decChars(T) = decDigits!T + isSigned!T;
101 | 
102 | 
103 | @safe pure nothrow @nogc
104 | RevFillStr!(decChars!I) decStr(I)(I i) if (isIntegral!I)
105 | {
106 | 	RevFillStr!(decChars!I) str;
107 | 	size_t idx = decChars!I;
108 | 
109 | 	static if (isSigned!I)
110 | 	{
111 | 		bool signed = i < 0;
112 | 		UnsignedOf!I u = i < 0 ? -i : i;
113 | 	}
114 | 	else alias u = i;
115 | 
116 | 	do
117 | 	{
118 | 		str ~= char('0' + u % 10);
119 | 		u /= 10;
120 | 	}
121 | 	while (u);
122 | 
123 | 	static if (isSigned!I) if (signed)
124 | 		str ~= '-';
125 | 
126 | 	return str;
127 | }
128 | 
129 | 
130 | /+
131 |  ╔══════════════════════════════════════════════════════════════════════════════
132 |  ║ ⚑ Formatting
133 |  ╚══════════════════════════════════════════════════════════════════════════════
134 |  +/
135 | 
136 | template hasKnownSpaceRequirement(T)
137 | {
138 | 	static if (isIntegral!T || isPointer!T)
139 | 		enum hasKnownSpaceRequirement = true;
140 | 	else
141 | 		enum hasKnownSpaceRequirement = false;
142 | }
143 | 
144 | 
145 | template spaceRequirement(string format, T) if (hasKnownSpaceRequirement!T)
146 | {
147 | 	static if (isIntegral!T)
148 | 	{
149 | 		static if (format == "%s" || format == "%d")
150 | 			enum spaceRequirement = decChars!T;
151 | 		else static if (isUnsigned!T && (format == "%x" || format == "%X"))
152 | 			enum spaceRequirement = 2 * T.sizeof;
153 | 		else static assert (0, "Don't know how to handle " ~ T.stringof ~ " as " ~ format);
154 | 	}
155 | 	else static if (isPointer!T)
156 | 	{
157 | 		static if (format == "%s" || format == "%p")
158 | 			enum spaceRequirement = 2 * T.sizeof;
159 | 		else static assert (0, "Don't know how to handle " ~ T.stringof ~ " as " ~ format);
160 | 	}
161 | 	else static assert (0, "Don't know how to handle " ~ T.stringof);
162 | }
163 | 
164 | 
165 | enum spaceRequirements(string format, Args...)() if (allSatisfy!(hasKnownSpaceRequirement, Args))
166 | {
167 | 	size_t sum = 0;
168 | 	
169 | 	alias parts = tokenizedFormatString!format;
170 | 	foreach (i; staticIota!(0, parts.length))
171 | 	{
172 | 		static if (parts[i][1] == size_t.max)
173 | 			sum += parts[i][0].length;
174 | 		else
175 | 			sum += spaceRequirement!(parts[i][0], Args[parts[i][1]]);
176 | 	}
177 | 	
178 | 	return sum;
179 | }
180 | 
181 | 
182 | template tokenizedFormatString(string format)
183 | {
184 | 	enum impl()
185 | 	{
186 | 		Tuple!(string, size_t)[] parts;
187 | 		size_t i = 0;
188 | 		string rest = format;
189 | 
190 | 		while (1)
191 | 		{
192 | 			ptrdiff_t markerPos = rest.indexOf("%");
193 | 			if (markerPos < 0)
194 | 				return rest.length ? parts ~ tuple(rest, size_t.max) : parts;
195 | 
196 | 			if (markerPos)
197 | 			{
198 | 				parts ~= tuple(rest[0 .. markerPos], size_t.max);
199 | 				rest = rest[markerPos .. $];
200 | 			}
201 | 
202 | 			// TODO: more complex formats
203 | 			parts ~= tuple(rest[0 .. 2], i++);
204 | 			rest = rest[2 .. $];
205 | 		}
206 | 	}
207 | 
208 | 	enum result = impl();
209 | 	static immutable Tuple!(string, size_t)[result.length] tokenizedFormatString = result;
210 | }
211 | 
212 | 
213 | enum formatStringArgCount(string format)()
214 | {
215 | 	size_t count = 0;
216 | 
217 | 	alias parts = tokenizedFormatString!format;
218 | 	foreach (i; staticIota!(0, parts.length))
219 | 		if (parts[i][1] != size_t.max && parts[i][1] >= count)
220 | 			count = parts[i][1] + 1;
221 | 
222 | 	return count;
223 | }
224 | 
225 | 
226 | template format(string fmt)
227 | {
228 | 	import std.exception;
229 | 
230 | 	enum argCnt = formatStringArgCount!fmt;
231 | 
232 | 	enum codeGen()
233 | 	{
234 | 		string code = `pure nothrow string format(`;
235 | 		foreach (i; staticIota!(0, argCnt))
236 | 		{
237 | 			if (i) code ~= `, `;
238 | 			code ~= std.string.format("A%s", i);
239 | 		}
240 | 		code ~= `)(`;
241 | 		foreach (i; staticIota!(0, argCnt))
242 | 		{
243 | 			if (i) code ~= `, `;
244 | 			code ~= std.string.format("A%s a%s", i, i);
245 | 		}
246 | 		code ~= `, char[] buffer = new char[](spaceRequirements!(fmt`;
247 | 		foreach (i; staticIota!(0, argCnt))
248 | 		code ~= std.string.format(", A%s", i);
249 | 		code ~= `))) { return assumeUnique(formattedWrite!fmt(buffer.ptr`;
250 | 		foreach (i; staticIota!(0, argCnt))
251 | 		code ~= std.string.format(", a%s", i);
252 | 		code ~= `)); }`;
253 | 		return code;
254 | 	}
255 | 
256 | 	mixin(codeGen());
257 | }
258 | 
259 | 
260 | template formata(string fmt)
261 | {
262 | 	enum argCnt = formatStringArgCount!fmt;
263 | 
264 | 	enum codeGen()
265 | 	{
266 | 		string code = `pure nothrow @nogc char[] formata(`;
267 | 		foreach (i; staticIota!(0, argCnt))
268 | 		{
269 | 			if (i) code ~= `, `;
270 | 			code ~= std.string.format("A%s", i);
271 | 		}
272 | 		code ~= `)(`;
273 | 		foreach (i; staticIota!(0, argCnt))
274 | 		{
275 | 			if (i) code ~= `, `;
276 | 			code ~= std.string.format("A%s a%s", i, i);
277 | 		}
278 | 		code ~= `, void* buffer = alloca(spaceRequirements!(fmt`;
279 | 		foreach (i; staticIota!(0, argCnt))
280 | 			code ~= std.string.format(", A%s", i);
281 | 		code ~= `))) { return formattedWrite!fmt(cast(char*) buffer`;
282 | 		foreach (i; staticIota!(0, argCnt))
283 | 			code ~= std.string.format(", a%s", i);
284 | 		code ~= `); }`;
285 | 		return code;
286 | 	}
287 | 
288 | 	mixin(codeGen());
289 | }
290 | 
291 | 
292 | template formats(string fmt)
293 | {
294 | 	enum argCnt = formatStringArgCount!fmt;
295 | 	
296 | 	enum codeGen()
297 | 	{
298 | 		string code = `@safe pure nothrow @nogc auto formats(`;
299 | 		foreach (i; staticIota!(0, argCnt))
300 | 		{
301 | 			if (i) code ~= `, `;
302 | 			code ~= std.string.format("A%s", i);
303 | 		}
304 | 		code ~= `)(`;
305 | 		foreach (i; staticIota!(0, argCnt))
306 | 		{
307 | 			if (i) code ~= `, `;
308 | 			code ~= std.string.format("A%s a%s", i, i);
309 | 		}
310 | 		code ~= `))) { LimitedScopeBuffer!(char, spaceRequirements!(fmt`;
311 | 		foreach (i; staticIota!(0, argCnt))
312 | 			code ~= std.string.format(", A%s", i);
313 | 		code ~= `)) buffer; buffer.length = formattedWrite!fmt(buffer.ptr`;
314 | 		foreach (i; staticIota!(0, argCnt))
315 | 			code ~= std.string.format(", a%s", i);
316 | 		code ~= `).length; return buffer; }`;
317 | 		return code;
318 | 	}
319 | 	
320 | 	mixin(codeGen());
321 | }
322 | 
323 | 
324 | char[] formattedWrite(string format, Args...)(char* buffer, Args args)
325 | {
326 | 	char* it = buffer;
327 | 
328 | 	alias parts = tokenizedFormatString!format;
329 | 	foreach (i; staticIota!(0, parts.length))
330 | 	{
331 | 		static if (parts[i][1] == size_t.max)
332 | 		{
333 | 			// Direct string copy
334 | 			memcpy( it, parts[i][0].ptr, parts[i][0].length );
335 | 			it += parts[i][0].length;
336 | 		}
337 | 		else
338 | 		{
339 | 			// Formatted argument
340 | 			it.formattedWriteItem!(parts[i][0])( args[parts[i][1]] );
341 | 		}
342 | 	}
343 | 
344 | 	return buffer[0 .. it - buffer];
345 | }
346 | 
347 | 
348 | pure nothrow @nogc
349 | void formattedWriteItem(string format, T)(ref char* buffer, T t)
350 | 	if (isUnsigned!T && format == "%x")
351 | {
352 | 	alias RT = ReturnType!(hexStrLower!T);
353 | 	*cast(RT*) buffer = hexStrLower!T(t);
354 | 	buffer += RT.length;
355 | }
356 | 
357 | 
358 | pure nothrow @nogc
359 | void formattedWriteItem(string format, T)(ref char* buffer, T t)
360 | 	if (isUnsigned!T && format == "%X")
361 | {
362 | 	alias RT = ReturnType!(hexStrUpper!T);
363 | 	*cast(RT*) buffer = hexStrUpper!T(t);
364 | 	buffer += RT.length;
365 | }
366 | 
367 | 
368 | pure nothrow @nogc
369 | void formattedWriteItem(string format, T)(ref char* buffer, T t)
370 | 	if (isIntegral!T && (format == "%s" || format == "%d"))
371 | {
372 | 	auto str = decStr(t);
373 | 	memcpy( buffer, str.ptr, str.length );
374 | 	buffer += str.length;
375 | }
376 | 
377 | 
378 | pure nothrow @nogc
379 | void formattedWriteItem(string format)(ref char* buffer, void* p)
380 | 	if (format == "%s" || format == "%p")
381 | {
382 | 	buffer.formattedWriteItem!"%X"( cast(size_t) p );
383 | }
384 | 
385 | 
386 | /+
387 |  ╔══════════════════════════════════════════════════════════════════════════════
388 |  ║ ⚑ Helper Structs
389 |  ╚══════════════════════════════════════════════════════════════════════════════
390 |  +/
391 | 
392 | struct RevFillStr(size_t n)
393 | {
394 | private:
395 | 
396 | 	size_t offset = n;
397 | 	char[n] buffer = '\0';
398 | 
399 | 
400 | public:
401 | 
402 | 	alias opSlice this;
403 | 
404 | 	@safe pure nothrow @nogc
405 | 	void opOpAssign(string op : "~")(char ch)
406 | 	in
407 | 	{
408 | 		assert( offset > 0 );
409 | 	}
410 | 	body
411 | 	{
412 | 		buffer[--offset] = ch;
413 | 	}
414 | 
415 | 
416 | 	@safe pure nothrow @nogc
417 | 	@property inout(char)[] opSlice() inout
418 | 	{
419 | 		return buffer[offset .. n];
420 | 	}
421 | 
422 | 
423 | 	@safe pure nothrow @nogc
424 | 	@property inout(char)* ptr() inout
425 | 	{
426 | 		return &buffer[offset];
427 | 	}
428 | 
429 | 
430 | 	@safe pure nothrow @nogc
431 | 	@property size_t length() const
432 | 	{
433 | 		return n - offset;
434 | 	}
435 | }


--------------------------------------------------------------------------------
/source/fast/internal/benchmarks.d:
--------------------------------------------------------------------------------
  1 | ﻿/***************************************************************************************************
  2 |  * 
  3 |  * Internal benchmark module.
  4 |  * 
  5 |  * Authors:
  6 |  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
  7 |  * 
  8 |  * Copyright:
  9 |  *   © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
 10 |  * 
 11 |  * License:
 12 |  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
 13 |  * 
 14 |  **************************************************************************************************/
 15 | module fast.internal.benchmarks;
 16 | 
 17 | version (benchmark):
 18 | 
 19 | void main()
 20 | {
 21 | 	import std.stdio;
 22 | 	import core.stdc.string, core.stdc.stddef, core.stdc.stdlib;
 23 | 	import std.array, std.stdio, std.algorithm, std.regex, std.utf, std.conv, std.string, std.range;
 24 | 	import fast.string, fast.cstring, fast.buffer, fast.format, fast.json;
 25 | 	import std.format : formattedWrite;
 26 | 
 27 | 	static immutable nums = { ulong[1uL << 8] nums = void; foreach (i; 0 .. nums.length) nums[i] = (1uL << (64 - 8)) * i; return nums; }();
 28 | 	static immutable part1 = "C:\\";
 29 | 	static immutable part2 = "Documents and Settings\\User\\My Documents\\My Downloads\\";
 30 | 	static immutable part3 = "Fast.zip";
 31 | 	static immutable pathname = "hello/i_am_a/path_name\\with_several_different\\slashes";
 32 | 	static immutable zeroterm = "wefwfnqwefnw(eknwoemkf)moorroijqwoijq&oqo(vqwojkpjavnal(nvo(eirvn$wefwfnqwefnw(eknwoemkf)moorroijqwoihqioqo(vqwojkpjavnal(nvo(eirvn$wefwfnqwef\"w(eknwoemkf)moorroijqwoijqioqo(vqwojkpjavnal(nvo(eirvn$\0";
 33 | 	static pathSepRegex = ctRegex!`[/\\]`;
 34 | 	enum pathnameWStringLength = to!(immutable(wchar_t)[])(pathname).length;
 35 | 
 36 | 	unicode();
 37 | 
 38 | 	jsonCoordinates!true();
 39 | 	jsonCoordinates!false();
 40 | 
 41 | 	run ("Format strings for integers...", 13093,
 42 | 		benchmark ("std.*.format", () { uint check; foreach (ulong num; nums) { string str = format("decimal: %s, hex: %x", num, num); check += str[9]; } return check; } ),
 43 | 		benchmark ("fast.*.format", () { uint check; foreach (ulong num; nums) { string str = fast.format.format!"decimal: %s, hex: %x"(num, num); check += str[9]; } return check; } ),
 44 | 		benchmark ("fast.*.formata", () { uint check; foreach (ulong num; nums) { char[] str = formata!"decimal: %s, hex: %x"(num, num); check += str[9]; } return check; } ),
 45 | 		);
 46 | 
 47 | 	run ("Convert 256 numbers to fixed width hex strings...", 0x20,
 48 | 		benchmark ("std.*.formattedWrite", () { Appender!(char[]) app; app.reserve(16); char check = 0; foreach (ulong num; nums) { app.formattedWrite("%016X", num); check += app.data[0]; app.clear(); } return check; }),
 49 | 		benchmark ("fast.*.hexStrUpper", () { char[16] str; char check = 0; foreach (ulong num; nums) { str = hexStrUpper(num); check += str[0]; } return check; }),
 50 | 		);
 51 | 
 52 | 	run ("Concatenate a known number of strings...", part1.length + part2.length + part3.length,
 53 | 		benchmark ("std.array.appender", () { auto app = appender(part1); app ~= part2; app ~= part3; return app.data.length; }),
 54 | 		benchmark ("~", () { string path = part1 ~ part2 ~ part3; return path.length; }),
 55 | 		benchmark ("fast.string.concat", () { size_t length; { auto path = concat!(part1, part2, part3); length = path.length; } return length; }),
 56 | 		);
 57 | 
 58 | 	run ("Allocate a temporary char buffer and fill it with 0xFF...", '\xFF',
 59 | 		benchmark ("new", () { auto str = new char[](zeroterm.length); return str[$-1]; }),
 60 | 		benchmark ("malloc", () { auto ptr = cast(char*) malloc(zeroterm.length); scope(exit) free(ptr); memset(ptr, 0xFF, zeroterm.length); return ptr[zeroterm.length-1]; }),
 61 | 		benchmark ("fast.buffer.tempBuffer", () { char result; { auto buf = tempBuffer!(char, zeroterm.length); memset(buf, 0xFF, zeroterm.length); result = buf[$-1]; } return result; }),
 62 | 		);
 63 | 
 64 | 	run("Convert a string to a wchar*...", wchar('\0'),
 65 | 		benchmark ("toUTFz", () { return toUTFz!(wchar*)(pathname)[pathnameWStringLength]; }),
 66 | 		benchmark ("cstring.wcharPtr", () { wchar result; { auto buf = wcharPtr!pathname; result = buf.ptr[pathnameWStringLength]; } return result; }),
 67 | 		);
 68 | 
 69 | 	run("Convert a string to a char*...", '\0',
 70 | 		benchmark ("toUTFz", () { return toUTFz!(char*)(pathname)[pathname.length]; }),
 71 | 		benchmark ("toStringz", () { return cast(char) toStringz(pathname)[pathname.length]; }),
 72 | 		benchmark ("cstring.charPtr", () { return cast(char) charPtr!pathname[pathname.length]; }),
 73 | 		);
 74 | 
 75 | 	run ("Split a string at each occurance of <, >, & and \"...", "w(eknwoemkf)moorroijqwoijqioqo(vqwojkpjavnal(nvo(eirvn$\0",
 76 | 		benchmark (`while+if with 4 cond.`, () { string before; immutable(char*) stop = zeroterm.ptr + zeroterm.length; immutable(char)* iter = zeroterm.ptr; immutable(char)* done = zeroterm.ptr; if (iter !is stop) do { char c = *iter++; if (c == '<' || c == '>' || c == '&' || c == '"') { before = done[0 .. iter - done]; done = iter; }} while (iter !is stop); return done[0 .. stop - done]; }),
 77 | 		benchmark ("fast.string.split", () { string before, after = zeroterm; while (fast.string.split!`or(or(=<,=>),or(=&,="))`(after, before, after)) {} return before; }),
 78 | 		);
 79 | 
 80 | 	run ("Split a path by '/' or '\\'...", "slashes",
 81 | 		benchmark ("std.regex.split", () { return split(pathname, pathSepRegex)[$-1]; }),
 82 | 		benchmark ("std.regex.splitter", () { string last; auto range = splitter(pathname, pathSepRegex); while (!range.empty) { last = range.front; range.popFront(); } return last; }),
 83 | 		benchmark ("fast.string.split", () { string before, after = pathname; while (fast.string.split!`or(=\,=/)`(after, before, after)) {} return before; }),
 84 | 		);
 85 | 
 86 | 	writeln("Benchmark done!");
 87 | }
 88 | 
 89 | 
 90 | 
 91 | private:
 92 | 
 93 | void unicode()
 94 | {
 95 | 	import std.range, std.uni, std.string, std.meta;
 96 | 	import fast.unicode;
 97 | 
 98 | 	static immutable string devanagari = cast(string)"तदपि कही गुर बारंिह बारा। समुझि परी कछु मति अनुसारा।।
 99 | भाषाबद्ध करबि मैं सोई। मोरें मन प्रबोध जेंिह होई।।
100 | जस कछु बुधि बिबेक बल मेरें। तस कहिहउँ हियँ हरि के प्रेरें।।
101 | निज संदेह मोह भ्रम हरनी। करउँ कथा भव सरिता तरनी।।
102 | बुध बिश्राम सकल जन रंजनि। रामकथा कलि कलुष बिभंजनि।।
103 | रामकथा कलि पंनग भरनी। पुनि बिबेक पावक कहुँ अरनी।।
104 | रामकथा कलि कामद गाई। सुजन सजीवनि मूरि सुहाई।।
105 | सोइ बसुधातल सुधा तरंगिनि। भय भंजनि भ्रम भेक भुअंगिनि।।
106 | असुर सेन सम नरक निकंदिनि। साधु बिबुध कुल हित गिरिनंदिनि।।
107 | संत समाज पयोधि रमा सी। बिस्व भार भर अचल छमा सी।।
108 | जम गन मुहँ मसि जग जमुना सी। जीवन मुकुति हेतु जनु कासी।।
109 | रामहि प्रिय पावनि तुलसी सी। तुलसिदास हित हियँ हुलसी सी।।
110 | सिवप्रय मेकल सैल सुता सी। सकल सिद्धि सुख संपति रासी।।
111 | सदगुन सुरगन अंब अदिति सी। रघुबर भगति प्रेम परमिति सी।।
112 | ".representation.repeat(10).join.array();
113 | 	static immutable string latin = "A gory knife had been found close to the murdered man, and it had been
114 | recognized by somebody as belonging to Muff Potter--so the story ran.
115 | And it was said that a belated citizen had come upon Potter washing
116 | himself in the \"branch\" about one or two o'clock in the morning, and
117 | that Potter had at once sneaked off--suspicious circumstances,
118 | especially the washing which was not a habit with Potter. It was also
119 | said that the town had been ransacked for this \"murderer\" (the public
120 | are not slow in the matter of sifting evidence and arriving at a
121 | verdict), but that he could not be found. Horsemen had departed down
122 | all the roads in every direction, and the Sheriff \"was confident\" that
123 | he would be captured before night.
124 | ".repeat(10).join.array();
125 | 
126 | 	void benchCountGraphemes(alias text)(size_t count)
127 | 	{
128 | 		run ("Count graphemes in " ~ text.stringof ~ " text...", count,
129 | 			benchmark ("byGrapheme.walkLength", () { return text.byGrapheme.walkLength(); }),
130 | 			benchmark ("fast.graphemeCount", () { return text.countGraphemes(); }),
131 | 			);
132 | 	}
133 | 	benchCountGraphemes!devanagari(5430);
134 | 	benchCountGraphemes!latin(7210);
135 | }
136 | 
137 | 
138 | void jsonCoordinates(bool integral)()
139 | {
140 | 	// A variant of https://github.com/kostya/benchmarks with less coordinate tuples,
141 | 	// since we repeat the test runs until a time span of one second passed.
142 | 	import core.memory;
143 | 	import std.algorithm;
144 | 	import std.ascii;
145 | 	import std.format;
146 | 	import std.random;
147 | 	import std.range;
148 | 	import std.typecons;
149 | 	import fast.internal.sysdef;
150 | 
151 | 	enum coordCount = 10_000;
152 | 	auto rng = Mt19937(0);
153 | 	__gshared string text = "{\n  \"coordinates\": [\n";
154 | 	foreach (i; 0 .. coordCount)
155 | 	{
156 | 		static if (integral)
157 | 		{
158 | 			text ~= format("    {\n      \"x\": %s,\n      \"y\": %s,\n      \"z\": %s,\n" ~
159 | 				"      \"name\": \"%s %s\",\n      \"opts\": {\n        \"1\": [\n          1,\n          true\n" ~
160 | 				"        ]\n      }\n    }", uniform(0, 10_000, rng), uniform(0, 10_000, rng), uniform(0, 10_000, rng),
161 | 				iota(5).map!(_ => lowercase[uniform(0, $, rng)]), uniform(0, 10000, rng));
162 | 		}
163 | 		else
164 | 		{
165 | 			text ~= format("    {\n      \"x\": %.17g,\n      \"y\": %.17g,\n      \"z\": %.17g,\n" ~
166 | 				"      \"name\": \"%s %s\",\n      \"opts\": {\n        \"1\": [\n          1,\n          true\n" ~
167 | 				"        ]\n      }\n    }", uniform(0.0, 1.0, rng), uniform(0.0, 1.0, rng), uniform(0.0, 1.0, rng),
168 | 				iota(5).map!(_ => lowercase[uniform(0, $, rng)]), uniform(0, 10000, rng));
169 | 		}
170 | 		text ~= (i == coordCount - 1) ? "\n" : ",\n";
171 | 	}
172 | 	text ~= "  ],\n  \"info\": \"some info\"\n}\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
173 | 	text = text[0 .. $-16];
174 | 
175 | 	GC.collect();
176 | 
177 | 	// Dlang on x86 with optimizations rounds up double additions.
178 | 	static if (integral)
179 | 	{
180 | 		version (X86)
181 | 			enum expect = tuple(4986L, 4997L, 4988L);
182 | 		else
183 | 			enum expect = tuple(5003L, 4979L, 4971L);
184 | 	}
185 | 	else static if (isDMD && isX86 && (!isRelease || isRelease && (__VERSION__ < 2069 || __VERSION__ > 2070)) ||
186 | 			isGDC && isX86)
187 | 		enum expect = tuple(0.49823454184104704, 0.50283215330409059, 0.49828840592580270);
188 | 	else static if (!isX86 || !isRelease)
189 | 		enum expect = tuple(0.49683911677479053, 0.50166077554665356, 0.49647639699603635);
190 | 	else
191 | 		enum expect = tuple(0.49823454184171062, 0.50283215330485886, 0.49828840592673407);
192 | 
193 | 	run!(1, coordCount)("JSON 3D coordinates (" ~ (integral ? "integers" : "floating-point") ~ ")", expect,
194 | 		benchmark("std.json", {
195 | 				import std.json;
196 | 				
197 | 				auto json = parseJSON(text);
198 | 				auto coordinates = json["coordinates"].array;
199 | 				size_t len = coordinates.length;
200 | 				static if (integral)
201 | 					long x, y, z;
202 | 				else
203 | 					double x = 0, y = 0, z = 0;
204 | 				foreach (i; 0 .. len)
205 | 				{
206 | 					auto coord = coordinates[i];
207 | 					static if (integral)
208 | 					{
209 | 						x += coord["x"].integer;
210 | 						y += coord["y"].integer;
211 | 						z += coord["z"].integer;
212 | 					}
213 | 					else
214 | 					{
215 | 						x += coord["x"].floating;
216 | 						y += coord["y"].floating;
217 | 						z += coord["z"].floating;
218 | 					}
219 | 				}
220 | 
221 | 				return tuple(x / long(len), y / long(len), z / long(len));
222 | 			}),
223 | //		benchmark("stdx.data.json", {
224 | //				import stdx.data.json.lexer;
225 | //				import stdx.data.json.parser;
226 | //
227 | //				auto json = parseJSONStream!(LexOptions.useBigInt)(text);
228 | //				json.skipToKey("coordinates");
229 | //				size_t len;
230 | //				double x = 0, y = 0, z = 0;
231 | //				json.readArray(delegate() @trusted {
232 | //						json.readObject!(typeof(json))(delegate(string key) @trusted {
233 | //							if (key == "x")
234 | //									x += json.readDouble();
235 | //								else if (key == "y")
236 | //									y += json.readDouble();
237 | //								else if (key == "z")
238 | //									z += json.readDouble();
239 | //								else
240 | //									json.skipValue();
241 | //							});
242 | //						len++;
243 | //					});
244 | //
245 | //				return tuple(x / len, y / len, z / len);
246 | //			}),
247 | 		benchmark("fast.json", {
248 | 				import fast.json;
249 | 
250 | 				auto json = Json!(validateAll, true)(text);
251 | 				long len;
252 | 
253 | 				static if (integral)
254 | 				{
255 | 					long x, y, z;
256 | 					foreach (i; json.coordinates)
257 | 					{
258 | 						json.keySwitch!("x", "y", "z")(
259 | 							{ x += json.read!long; },
260 | 							{ y += json.read!long; },
261 | 							{ z += json.read!long; }
262 | 							);
263 | 						len++;
264 | 					}
265 | 				}
266 | 				else
267 | 				{
268 | 					double x = 0, y = 0, z = 0;
269 | 					foreach (i; json.coordinates)
270 | 					{
271 | 						json.keySwitch!("x", "y", "z")(
272 | 							{ x += json.read!double; },
273 | 							{ y += json.read!double; },
274 | 							{ z += json.read!double; }
275 | 						);
276 | 						len++;
277 | 					}
278 | 				}
279 | 
280 | 				return tuple(x / len, y / len, z / len);
281 | 			}),
282 | 		);
283 | }
284 | 
285 | 
286 | /*******************************************************************************
287 |  * 
288 |  * Runs a set of `Benchmark`s and prints comparing runtime statistics. The
289 |  * functions are always called until at least a second of time has passed.
290 |  *
291 |  * Params:
292 |  *   innerLoop = how many iterations to perform without looking at the clock
293 |  *   mul = typically `1`, unless the called functions repeat an action multiple
294 |  *         times and you want to see that reflected in the output
295 |  *   title = short overall title of this comparing benchmark
296 |  *   expectation = return value, that is expected from all the tested functions
297 |  *                 for validation purposes and to counter dead-code elimination.
298 |  *   benchmarks = A set of `Benchmark`s to be run and compared. The first one in
299 |  *                the list acts as a reference timing for the others.
300 |  *
301 |  **************************************/
302 | void run(uint innerLoop = 1000, uint mul = 1, R)(in string title, in R expectation, in Benchmark!R[] benchmarks...)
303 | {
304 | 	import core.time, std.stdio, std.exception, std.string;
305 | 	
306 | 	writeln("\x1b[1m", title, "\x1b[0m");
307 | 	writeln();
308 | 	ulong reference;
309 | 	foreach (i, ref bm; benchmarks) {
310 | 		// Check that the result is as expected...
311 | 		auto actual = bm.run();
312 | 		import std.stdio;
313 | 		//enforce(actual == expectation, format(`Benchmark "%s" did not result as expected in "%s", but in "%s".`,
314 | 		//		bm.title, expectation, actual));
315 | 		ulong iters = 0;
316 | 		immutable t1 = TickDuration.currSystemTick;
317 | 		TickDuration t2;
318 | 		do {
319 | 			foreach (k; 0 .. innerLoop)
320 | 				bm.run();
321 | 			iters++;
322 | 			t2 = TickDuration.currSystemTick;
323 | 		} while (!(t2 - t1).seconds);
324 | 		ulong times = iters * innerLoop * mul * 1_000_000_000 / (t2 - t1).nsecs;
325 | 		if (i == 0) {
326 | 			reference = times;
327 | 			writefln("  %-22s: %10s per second", bm.title, times);
328 | 		} else if (reference <= times) {
329 | 			writefln("\x1b[1m  %-22s: %10s per second (done in %.0f%% of time !)\x1b[0m", bm.title, times, 100.0 * reference / times);
330 | 		} else {
331 | 			writefln("  %-22s: %10s per second (slower by factor %.1f)", bm.title, times, 1.0 * reference / times);
332 | 		}
333 | 	}
334 | 	writeln();
335 | }
336 | 
337 | 
338 | /*******************************************************************************
339 |  * 
340 |  * Functor to create `Benchmark` structs.
341 |  *
342 |  * Params:
343 |  *   title = displayed string when the statistics of `run` are displayed
344 |  *   run   = the benchmarked function
345 |  *
346 |  * Returns:
347 |  *   a `Benchmark` from the given information
348 |  *
349 |  **************************************/
350 | Benchmark!R benchmark(R)(string title, R function() run)
351 | {
352 | 	return Benchmark!R(title, run);
353 | }
354 | 
355 | 
356 | /*******************************************************************************
357 |  * 
358 |  * Information about a benchmarked function.
359 |  *
360 |  **************************************/
361 | struct Benchmark(R)
362 | {
363 | 	string title;
364 | 	R function() run;
365 | }
366 | 


--------------------------------------------------------------------------------
/source/fast/internal/helpers.d:
--------------------------------------------------------------------------------
  1 | /***************************************************************************************************
  2 |  * 
  3 |  * Helper functions that serve general purposes.
  4 |  * 
  5 |  * Authors:
  6 |  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
  7 |  * 
  8 |  * Copyright:
  9 |  *   © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
 10 |  * 
 11 |  * License:
 12 |  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
 13 |  * 
 14 |  **************************************************************************************************/
 15 | module fast.internal.helpers;
 16 | 
 17 | import std.traits;
 18 | import fast.internal.sysdef;
 19 | 
 20 | 
 21 | private enum 一META一PROGRAMMING一;
 22 | 
 23 | // 2.071 fixed visibility rules, so we need to roll our own staticIota.
 24 | static if (__VERSION__ >= 2071)
 25 | {
 26 | 	import std.meta : AliasSeq;
 27 | 
 28 | 	template staticIota(int beg, int end)
 29 | 	{
 30 | 		static if (beg + 1 >= end)
 31 | 		{
 32 | 			static if (beg >= end)
 33 | 			{
 34 | 				alias staticIota = AliasSeq!();
 35 | 			}
 36 | 			else
 37 | 			{
 38 | 				alias staticIota = AliasSeq!(+beg);
 39 | 			}
 40 | 		}
 41 | 		else
 42 | 		{
 43 | 			enum mid = beg + (end - beg) / 2;
 44 | 			alias staticIota = AliasSeq!(staticIota!(beg, mid), staticIota!(mid, end));
 45 | 		}
 46 | 	}
 47 | }
 48 | else
 49 | {
 50 | 	import std.typecons : staticIota;
 51 | }
 52 | 
 53 | 
 54 | /**
 55 |  * For any integral type, returns the unsigned type of the same bit-width.
 56 |  */
 57 | template UnsignedOf(I) if (isIntegral!I)
 58 | {
 59 | 	static if (isUnsigned!I)
 60 | 		alias UnsignedOf = I;
 61 | 	else static if (is(I == long))
 62 | 		alias UnsignedOf = ulong;
 63 | 	else static if (is(I == int))
 64 | 		alias UnsignedOf = uint;
 65 | 	else static if (is(I == short))
 66 | 		alias UnsignedOf = ushort;
 67 | 	else static if (is(I == byte))
 68 | 		alias UnsignedOf = ubyte;
 69 | 	else static assert (0, "Not implemented");
 70 | }
 71 | 
 72 | 
 73 | /**
 74 |  * Generates a mixin string for repeating code. It can be used to unroll variadic arguments.
 75 |  * A format string is instantiated a certain number times with an incrementing parameter.
 76 |  * The results are then concatenated using an optional joiner.
 77 |  *
 78 |  * Params:
 79 |  *   length = Number of elements you want to join. It is passed into format() as an incrementing number from [0 .. count$(RPAREN).
 80 |  *   fmt = The format string to apply on each instanciation. Use %1d$ to refer to the current index multiple times when necessary.
 81 |  *   joiner = Optional string that will be placed between instances. It could be a space or an arithmetic operation.
 82 |  *
 83 |  * Returns:
 84 |  *   The combined elements as a mixin string.
 85 |  *
 86 |  * See_Also:
 87 |  *   $(LINK2 http://forum.dlang.org/thread/vqfvihyezbmwcjkmpzin@forum.dlang.org, A simple way to do compile time loop unrolling)
 88 |  */
 89 | enum ctfeJoin(size_t length)(in string fmt, in string joiner = null)
 90 | {
 91 | 	import std.range : iota;
 92 | 	import std.string : format;
 93 | 	import std.algorithm : map;
 94 | 
 95 | 	// BUG: Cannot use, join(), as it "cannot access the nested function 'ctfeJoin'".
 96 | 	string result;
 97 | 	foreach (inst; map!(i => format(fmt, i))(iota(length))) {
 98 | 		if (result && joiner) result ~= joiner;
 99 | 		result ~= inst;
100 | 	}
101 | 	return result;
102 | }
103 | 
104 | 
105 | enum getUDA(alias sym, T)()
106 | {
107 | 	foreach (uda; __traits(getAttributes, sym))
108 | 		static if (is(typeof(uda) == T))
109 | 			return uda;
110 | 	return T.init;
111 | }
112 | 
113 | 
114 | private enum 一BIT一OPERATIONS一;
115 | 
116 | static import core.bitop;
117 | 
118 | alias bsr = core.bitop.bsr;
119 | alias bsf = core.bitop.bsf;
120 | 
121 | /*******************************************************************************
122 |  * 
123 |  * Count leading zeroes.
124 |  *
125 |  * Params:
126 |  *   u = the unsigned value to scan
127 |  *
128 |  * Returns:
129 |  *   The number of leading zero bits before the first one bit. If `u` is `0`,
130 |  *   the result is undefined.
131 |  *
132 |  **************************************/
133 | version (DigitalMars)
134 | {
135 | 	@safe @nogc pure nothrow U
136 | 	clz(U)(U u) if (is(Unqual!U == uint) || is(Unqual!U == size_t))
137 | 	{
138 | 		pragma(inline, true);
139 | 		enum U max = 8 * U.sizeof - 1;
140 | 		return max - bsr(u);
141 | 	}
142 | 
143 | 	static if (isX86)
144 | 	{
145 | 		@safe @nogc pure nothrow uint
146 | 		clz(U)(U u) if (is(Unqual!U == ulong))
147 | 		{
148 | 			pragma(inline, true);
149 | 			uint hi = u >> 32;
150 | 			return hi ? 31 - bsr(hi) : 63 - bsr(cast(uint)u);
151 | 		}
152 | 	}
153 | }
154 | else version (GNU)
155 | {
156 | 	import gcc.builtins;
157 | 	alias clz = __builtin_clz;
158 | 	static if (isX86)
159 | 	{
160 | 		@safe @nogc pure nothrow uint
161 | 		clz(ulong u)
162 | 		{
163 | 			uint hi = u >> 32;
164 | 			return hi ? __builtin_clz(hi) : 32 + __builtin_clz(cast(uint)u);
165 | 		}
166 | 	}
167 | 	else alias clz = __builtin_clzl;
168 | }
169 | else version (LDC)
170 | {
171 | 	@safe @nogc pure nothrow U
172 | 	clz(U)(U u) if (is(Unqual!U == uint) || is(Unqual!U == size_t))
173 | 	{
174 | 		pragma(inline, true);
175 | 		import ldc.intrinsics;
176 | 		return llvm_ctlz(u, false);
177 | 	}
178 | 
179 | 	static if (isX86)
180 | 	{
181 | 		@safe @nogc pure nothrow uint
182 | 		clz(U)(U u) if (is(Unqual!U == ulong))
183 | 		{
184 | 			pragma(inline, true);
185 | 			import ldc.intrinsics;
186 | 			return cast(uint)llvm_ctlz(u, false);
187 | 		}
188 | 	}
189 | }
190 | static if (__VERSION__ < 2071)
191 | {
192 | 	// < 2.071 did not have 64-bit bsr/bsf on x86.
193 | 	@safe @nogc pure nothrow uint
194 | 	bsr(U)(U u) if (is(Unqual!U == ulong))
195 | 	{
196 | 		pragma(inline, true);
197 | 		uint hi = u >> 32;
198 | 		return hi ? bsr(hi) + 32 : bsr(cast(uint)u);
199 | 	}
200 | 
201 | 	@safe @nogc pure nothrow uint
202 | 	bsf(U)(U u) if (is(Unqual!U == ulong))
203 | 	{
204 | 		pragma(inline, true);
205 | 		uint lo = cast(uint)u;
206 | 		return lo ? bsf(lo) : 32 + bsf(u >> 32);
207 | 	}
208 | }
209 | unittest
210 | {
211 | 	assert(clz(uint(0x01234567)) == 7);
212 | 	assert(clz(ulong(0x0123456701234567)) == 7);
213 | 	assert(clz(ulong(0x0000000001234567)) == 7+32);
214 | 	assert(bsr(uint(0x01234567)) == 24);
215 | 	assert(bsr(ulong(0x0123456701234567)) == 24+32);
216 | 	assert(bsr(ulong(0x0000000001234567)) == 24);
217 | 	assert(bsf(uint(0x76543210)) == 4);
218 | 	assert(bsf(ulong(0x7654321076543210)) == 4);
219 | 	assert(bsf(ulong(0x7654321000000000)) == 4+32);
220 | }
221 | 
222 | 
223 | private enum 一UNITTESTING一;
224 | 
225 | // Insert a dummy main when unittesting outside of dub.
226 | version (VibeCustomMain) {} else version (unittest) void main() {}
227 | 
228 | 
229 | private enum 一MISCELLANEOUS一;
230 | 
231 | pure nothrow @nogc
232 | {
233 | 	/**
234 | 	 * Aligns a pointer to the closest multiple of $(D pot) (a power of two),
235 | 	 * which is equal to or larger than $(D value).
236 | 	 */
237 | 	T* alignPtrNext(T)(scope T* ptr, in size_t pot)
238 | 	in { assert(pot > 0 && pot.isPowerOf2); }
239 | 	body { return cast(T*) ((cast(size_t) ptr + (pot - 1)) & -pot); }
240 | 	unittest { assert(alignPtrNext(cast(void*) 65, 64) == cast(void*) 128); }
241 | }
242 | 
243 | 
244 | @nogc @safe pure nothrow
245 | {
246 | 	/// Returns whether the (positive) argument is an integral power of two.
247 | 	@property bool isPowerOf2(in size_t n)
248 | 	in { assert(n > 0); }
249 | 	body { return (n & n - 1) == 0; }
250 | 
251 | 	version (LDC) {
252 | 		import core.simd;
253 | 		pragma(LDC_intrinsic, "llvm.x86.sse2.pmovmskb.128")
254 | 			uint moveMask(ubyte16);
255 | 	} else version (GNU) {
256 | 		import gcc.builtins;
257 | 		alias moveMask = __builtin_ia32_pmovmskb128;
258 | 	}
259 | 	
260 | 	template SIMDFromScalar(V, alias scalar)
261 | 	{
262 | 		// This wrapper is needed for optimal performance with LDC and
263 | 		// doesn't hurt GDC's inlining.
264 | 		V SIMDFromScalar() {
265 | 			enum V asVectorEnum = scalar;
266 | 			return asVectorEnum;
267 | 		}
268 | 	}
269 | 
270 | 
271 | 	template SIMDFromString(string str) if (str.length <= 16)
272 | 	{
273 | 		import core.simd, std.algorithm, std.range, std.string;
274 | 
275 | 		private enum data = chain(str.representation, 0.repeat(16 - str.length)).array;
276 | 
277 | 		static if (!isDMD)
278 | 			immutable ubyte16 SIMDFromString = data;
279 | 		else version (D_PIC)
280 | 		{
281 | 			import std.format;
282 | 			void SIMDFromString() @safe @nogc pure nothrow
283 | 			{
284 | 				mixin(format("asm @trusted @nogc pure nothrow { naked; db %(%s,%); }", data));
285 | 			}
286 | 		}
287 | 		else static if (isX86)
288 | 			align(16) __gshared ubyte[16] SIMDFromString = data;
289 | 		else
290 | 			__gshared ubyte16 SIMDFromString = data;
291 | 	}
292 | }
293 | 


--------------------------------------------------------------------------------
/source/fast/internal/sysdef.di:
--------------------------------------------------------------------------------
  1 | ﻿/***************************************************************************************************
  2 |  * 
  3 |  * Definitions that abstract from the architecture or operating system.
  4 |  * 
  5 |  * As far as possible these will alias existing definitons from OS headers to facilitate integration
  6 |  * with other code.
  7 |  * 
  8 |  * Authors:
  9 |  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
 10 |  * 
 11 |  * Copyright:
 12 |  *   © 2016 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
 13 |  * 
 14 |  * License:
 15 |  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
 16 |  * 
 17 |  **************************************************************************************************/
 18 | module fast.internal.sysdef;
 19 | 
 20 | 
 21 | private enum 一ARCHITECTURE一;
 22 | 
 23 | version (X86_64) {
 24 | 	enum isAMD64 = true;
 25 | 	enum isX86   = false;
 26 | } else version (X86) {
 27 | 	enum isAMD64 = false;
 28 | 	enum isX86   = true;
 29 | }
 30 | 
 31 | version (X86_64)
 32 | 	enum hasSSE2 = true;
 33 | else
 34 | 	enum hasSSE2 = false;
 35 | 
 36 | 
 37 | private enum 一OPERATING一SYSTEM一;
 38 | 
 39 | version (Posix)
 40 | 	enum isPosix = true;
 41 | else
 42 | 	enum isPosix = false;
 43 | 
 44 | version (Windows)
 45 | 	enum isWindows = true;
 46 | else
 47 | 	enum isWindows = false;
 48 | 
 49 | /*******************************************************************************
 50 |  * 
 51 |  * Despite Phobos' use of `char[]` UTF-8 strings for file names, their internal
 52 |  * representation in the operating system is a sequence of 8- or 16-bit values.
 53 |  * On Windows this means that one could get invalid surrogate pairings and on
 54 |  * Linux, a file name can have any 8-bit encoding that keeps '/' at the same
 55 |  * code point as ASCII. That's why portable file names should only use a subset
 56 |  * of ASCII that is interpreted the same in all supported encodings.
 57 |  * 
 58 |  * MSDN mentions that file paths should be treated as a sequence of `WCHAR`:
 59 |  * https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath
 60 |  *
 61 |  **************************************/
 62 | static if (isPosix)
 63 | 	alias FileChar = ubyte;
 64 | else version (Windows)
 65 | 	alias FileChar = ushort;
 66 | else static assert(0, "Not implemented");
 67 | 
 68 | alias Filename = FileChar[];
 69 | 
 70 | 
 71 | private enum 一COMPILER一UNIFICATION一;
 72 | 
 73 | version (LDC) {
 74 | 	enum isLDC = true;
 75 | 	enum isGDC = false;
 76 | 	enum isDMD = false;
 77 | } else version (GNU) {
 78 | 	enum isLDC = false;
 79 | 	enum isGDC = true;
 80 | 	enum isDMD = false;
 81 | } else version (DigitalMars) {
 82 | 	enum isLDC = false;
 83 | 	enum isGDC = false;
 84 | 	enum isDMD = true;
 85 | }
 86 | 
 87 | version (DigitalMars)
 88 | {
 89 | 	enum noinline;
 90 | 	enum forceinline;
 91 | 	enum sse4;
 92 | }
 93 | else version (GNU)
 94 | {
 95 | 	import gcc.attribute;
 96 | 	enum noinline    = gcc.attribute.attribute("noinline");
 97 | 	enum forceinline = gcc.attribute.attribute("forceinline");
 98 | 	enum sse4_2      = gcc.attribute.attribute("target", "sse4.2");
 99 | }
100 | else version (LDC)
101 | {
102 | 	import ldc.attributes;
103 | 	enum noinline;
104 | 	enum forceinline;
105 | 	enum sse4_2      = ldc.attributes.target("+sse4.2");
106 | }
107 | 
108 | version (assert)
109 | 	enum isRelease = false;
110 | else
111 | 	enum isRelease = true;
112 | 
113 | version (D_PIC)
114 | 	enum isPIC = true;
115 | else
116 | 	enum isPIC = false;
117 | 


--------------------------------------------------------------------------------
/source/fast/intmath.d:
--------------------------------------------------------------------------------
 1 | ﻿/***************************************************************************************************
 2 |  * 
 3 |  * Supplementary integer math functions.
 4 |  * 
 5 |  * Authors:
 6 |  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
 7 |  * 
 8 |  * Copyright:
 9 |  *   © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
10 |  * 
11 |  * License:
12 |  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
13 |  * 
14 |  **************************************************************************************************/
15 | module fast.intmath;
16 | 
17 | import fast.internal.helpers;
18 | import fast.internal.sysdef;
19 | 
20 | 
21 | version (LDC)
22 | {
23 | 	@safe @nogc pure nothrow
24 | 	ulong mulu(ulong x, ulong y, ref bool overflow)
25 | 	{
26 | 		import ldc.intrinsics;
27 | 		auto res = llvm_umul_with_overflow(x, y);
28 | 		overflow = res.overflow;
29 | 		return res.result;
30 | 	}
31 | }
32 | else static if (isPosix && isGDC && (isAMD64 || isX86))
33 | {
34 | 	@nogc pure nothrow
35 | 	ulong mulu(ulong x, ulong y, ref bool overflow)
36 | 	{
37 | 		version (GNU)
38 | 		{
39 | 			ulong lo;
40 | 			version (X86) asm { "
41 | 					cmp $0, 4+%2
42 | 					je 1f
43 | 					cmp $0, 4%3
44 | 					je 1f
45 | 					movb $1, %1
46 | 					1:
47 | 					mov 4+%2, %%eax
48 | 					mull %3
49 | 					jno 2f
50 | 					movb $1, %1
51 | 					2:
52 | 					mov %%eax, %%ecx
53 | 					mov %2, %%eax
54 | 					mull 4%3
55 | 					jno 3f
56 | 					movb $1, %1
57 | 					3:
58 | 					add %%eax, %%ecx
59 | 					jno 4f
60 | 					movb $1, %1
61 | 					4:
62 | 					mov %2, %%eax
63 | 					mull %3
64 | 					add %%ecx, %%edx
65 | 					jnc 5f
66 | 					movb $1, %1
67 | 					5:
68 | 					" : "=&A" lo, "+*m" overflow : "m" x, "m" y : "ecx"; }
69 | 			else asm { "mul %3\njno 1f\nmovb $1, %1\n1:\n" : "=a" lo, "+*m" overflow : "a" x, "r" y : "rdx"; }
70 | 			return lo;
71 | 		}
72 | 	}
73 | }
74 | else
75 | {
76 | 	// DMD is already faster than my ASM code above, no need to improve. Good job Walter et al.
77 | 	import core.checkedint;
78 | 	alias mulu = core.checkedint.mulu;
79 | }
80 | 


--------------------------------------------------------------------------------
/source/fast/json.d:
--------------------------------------------------------------------------------
   1 | /***************************************************************************************************
   2 |  * 
   3 |  * A fast JSON parser implementing RFC 7159.
   4 |  * 
   5 |  * The most prominent change compared to the initial revision is the allowance of all data types as
   6 |  * root values, not just objects and arrays.
   7 |  * 
   8 |  * Usage_Hints:
   9 |  *   $(UL
  10 |  *     $(LI This parser only supports UTF-8 without BOM.)
  11 |  *     $(LI When a JSON object has duplicate keys, the last one in the set will determine the value
  12 |  *          of associative-array entries or struct fields.)
  13 |  *     $(LI `BigInt` and large number parsing are not implemented currently, but all integral types
  14 |  *          as well as minimal exact representations of many `double` values are supported.)
  15 |  *   )
  16 |  * 
  17 |  * Authors:
  18 |  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
  19 |  * 
  20 |  * Copyright:
  21 |  *   © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
  22 |  * 
  23 |  * License:
  24 |  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
  25 |  * 
  26 |  **************************************************************************************************/
  27 | module fast.json;
  28 | 
  29 | import core.stdc.string;
  30 | 
  31 | import std.ascii;
  32 | import std.conv;
  33 | import std.exception;
  34 | import std.file;
  35 | import std.json;
  36 | import std.range;
  37 | import std.string : representation, format;
  38 | import std.traits;
  39 | import std.uni;
  40 | 
  41 | import fast.buffer;
  42 | import fast.cstring;
  43 | import fast.internal.sysdef;
  44 | import fast.parsing;
  45 | 
  46 | 
  47 | /*******************************************************************************
  48 |  * 
  49 |  * Loads a file as JSON text and validates the used parts. This includes a UTF-8
  50 |  * validation on strings.
  51 |  *
  52 |  * Params:
  53 |  *   fname = The file name to load.
  54 |  *
  55 |  * Returns:
  56 |  *   A JSON file object exposing the `Json` API.
  57 |  *
  58 |  **************************************/
  59 | auto parseJSONFile(uint vl = validateUsed)(in char[] fname)
  60 | { return Json!vl.File(fname); }
  61 | 
  62 | /// ditto
  63 | auto parseJSONFile(uint vl = validateUsed)(in Filename fname)
  64 | { return Json!vl.File(fname); }
  65 | 
  66 | 
  67 | /*******************************************************************************
  68 |  * 
  69 |  * Loads a JSON string and validates the used parts. This includes a UTF-8
  70 |  * validation on strings.
  71 |  *
  72 |  * Params:
  73 |  *   text = The string to load.
  74 |  *
  75 |  * Returns:
  76 |  *   A `Json` struct.
  77 |  *
  78 |  **************************************/
  79 | auto parseJSON(uint vl = validateUsed, T : const(char)[])(T text) nothrow
  80 | { return Json!(vl, false)(text); }
  81 | 
  82 | 
  83 | /*******************************************************************************
  84 |  * 
  85 |  * Load a file as JSON text that is considered 100% correct. No checks will be
  86 |  * performed, not even if you try to read a number as a string.
  87 |  *
  88 |  * Params:
  89 |  *   fname = The file name to load.
  90 |  *
  91 |  * Returns:
  92 |  *   A JSON file object exposing the `Json` API.
  93 |  *
  94 |  **************************************/
  95 | Json!trustedSource.File parseTrustedJSONFile(in char[] fname)
  96 | { return Json!trustedSource.File(fname); }
  97 | 
  98 | /// ditto
  99 | version(Windows){}else
 100 | Json!trustedSource.File parseTrustedJSONFile(in Filename fname)
 101 | { return Json!trustedSource.File(fname); }
 102 | 
 103 | 
 104 | /*******************************************************************************
 105 |  * 
 106 |  * Load a JSON string that is considered 100% correct. No checks will be
 107 |  * performed, not even if you try to read a number as a string.
 108 |  *
 109 |  * Params:
 110 |  *   text = The string to load.
 111 |  *
 112 |  * Returns:
 113 |  *   A `Json` struct.
 114 |  *
 115 |  **************************************/
 116 | auto parseTrustedJSON(T : const(char)[])(T text) nothrow
 117 | { return Json!(trustedSource, false)(text); }
 118 | 
 119 | 
 120 | /*******************************************************************************
 121 |  *
 122 |  * Validates a JSON text file.
 123 |  *
 124 |  * Params:
 125 |  *   fname = The file name to load.
 126 |  *
 127 |  * Throws:
 128 |  *   JSONException on validation errors.
 129 |  *
 130 |  **************************************/
 131 | void validateJSONFile(in char[] fname)
 132 | { Json!(validateAll, true).File(fname).skipValue(); }
 133 | 
 134 | /// ditto
 135 | version(Windows){} else
 136 | void validateJSONFile(in Filename fname)
 137 | { Json!(validateAll, true).File(fname).skipValue(); }
 138 | 
 139 | 
 140 | /*******************************************************************************
 141 |  *
 142 |  * Validates a JSON string.
 143 |  *
 144 |  * Params:
 145 |  *   text = The string to load.
 146 |  *
 147 |  * Throws:
 148 |  *   JSONException on validation errors.
 149 |  *
 150 |  **************************************/
 151 | void validateJSON(T : const(char)[])(T text)
 152 | { Json!(validateAll, true)(text).skipValue(); }
 153 | 
 154 | 
 155 | /// JSON data types returned by `peek`.
 156 | enum DataType : ubyte
 157 | {
 158 | 	string, number, object, array, boolean, null_
 159 | }
 160 | 
 161 | 
 162 | /// Validation strength of JSON parser
 163 | enum
 164 | {
 165 | 	trustedSource,  /// Assume 100% correct JSON and speed up parsing.
 166 | 	validateUsed,   /// Ignore errors in skipped portions.
 167 | 	validateAll,    /// Do a complete validation of the JSON data.
 168 | }
 169 | 
 170 | 
 171 | /// A UDA used to remap enum members or struct field names to JSON strings.
 172 | struct JsonMapping { string[string] map; }
 173 | 
 174 | 
 175 | /// JSON parser state returned by the `state` property.
 176 | struct JsonParserState {
 177 | 	const(char)*    text;
 178 | 	size_t          nesting;
 179 | }
 180 | 
 181 | 
 182 | /*******************************************************************************
 183 |  * 
 184 |  * This is a forward JSON parser for picking off items of interest on the go.
 185 |  * It neither produces a node structure, nor does it produce events. Instead you
 186 |  * can peek at the value type that lies ahead and/or directly consume a JSON
 187 |  * value from the parser. Objects and arrays can be iterated over via `foreach`,
 188 |  * while you can also directly ask for one or multiple keys of an object.
 189 |  * 
 190 |  * Prams:
 191 |  *   vl = Validation level. Any of `trustedSource`, `validateUsed` or
 192 |  *        `validateAll`.
 193 |  *   validateUtf8 = If validation is enabled, this also checks UTF-8 encoding
 194 |  *                  of JSON strings.
 195 |  * 
 196 |  **************************************/
 197 | struct Json(uint vl = validateUsed, bool validateUtf8 = vl > trustedSource)
 198 | 	if (vl > trustedSource || !validateUtf8)
 199 | {
 200 | private:
 201 | 
 202 | 	enum isTrusted     = vl == trustedSource;
 203 | 	enum skipAllInter  = vl == trustedSource;
 204 | 	enum isValidating  = vl >= validateUsed;
 205 | 	enum isValidateAll = vl == validateAll;
 206 | 
 207 | 	const(char*)    m_start     = void;
 208 | 	const(char)*    m_text      = void;
 209 | 	size_t          m_nesting   = 0;
 210 | 	RaiiArray!char  m_mem;
 211 | 	bool            m_isString  = false;
 212 | 
 213 | 
 214 | public:
 215 | 
 216 | 	@disable this();
 217 | 	@disable this(this);
 218 | 
 219 | 
 220 | 	/*******************************************************************************
 221 | 	 * 
 222 | 	 * Constructor taking a `string` for fast slicing.
 223 | 	 * 
 224 | 	 * JSON strings without escape sequences can be returned as slices.
 225 | 	 *
 226 | 	 * Params:
 227 | 	 *   text = The JSON text to parse.
 228 | 	 *   simdPrep = Set this to `No.simdPrep` to indicate that `text` is already
 229 | 	 *     suffixed by 16 zero bytes as required for SIMD processing.
 230 | 	 *
 231 | 	 **************************************/
 232 | 	nothrow
 233 | 	this(string text, Flag!"simdPrep" simdPrep = Yes.simdPrep)
 234 | 	{
 235 | 		import core.memory;
 236 | 		m_isString = GC.query(text.ptr) !is ReturnType!(GC.query).init;
 237 | 		this(cast(const(char)[]) text, simdPrep);
 238 | 	}
 239 | 
 240 | 
 241 | 	/*******************************************************************************
 242 | 	 * 
 243 | 	 * Constructor taking a `const char[]`.
 244 | 	 * 
 245 | 	 * JSON strings allocate on the GC heap when returned.
 246 | 	 *
 247 | 	 * Params:
 248 | 	 *   text = The JSON text to parse.
 249 | 	 *   simdPrep = Set this to `No.simdPrep` to indicate that `text` is already
 250 | 	 *     suffixed by 16 zero bytes as required for SIMD processing.
 251 | 	 *
 252 | 	 **************************************/
 253 | 	pure nothrow
 254 | 	this(const(char)[] text, Flag!"simdPrep" simdPrep = Yes.simdPrep)
 255 | 	{
 256 | 		if (simdPrep)
 257 | 		{
 258 | 			// We need to append 16 zero bytes for SSE to work, and if that reallocates the char[]
 259 | 			// we can declare it unique/immutable and don't need to allocate when returning JSON strings.
 260 | 			auto oldPtr = text.ptr;
 261 | 			text ~= "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
 262 | 			m_isString |= oldPtr !is text.ptr;
 263 | 		}
 264 | 		m_start = m_text = text.ptr;
 265 | 		skipWhitespace!false();
 266 | 	}
 267 | 
 268 | 
 269 | 	/+
 270 | 	 ╔══════════════════════════════════════════════════════════════════════════════
 271 | 	 ║ ⚑ String
 272 | 	 ╚══════════════════════════════════════════════════════════════════════════════
 273 | 	 +/
 274 | 
 275 | 	/*******************************************************************************
 276 | 	 * 
 277 | 	 * Reads a string off the JSON text.
 278 | 	 *
 279 | 	 * Params:
 280 | 	 *   allowNull = Allow `null` as a valid option for the string.
 281 | 	 *
 282 | 	 * Returns:
 283 | 	 *   A GC managed string.
 284 | 	 *
 285 | 	 **************************************/
 286 | 	string read(T)(bool allowNull = true) if (is(T == string))
 287 | 	{
 288 | 		if (!allowNull || peek == DataType.string)
 289 | 		{
 290 | 			auto borrowed = borrowString();
 291 | 			return m_isString ? borrowed.assumeUnique() : borrowed.idup;
 292 | 		}
 293 | 		return readNull();
 294 | 	}
 295 | 
 296 | 
 297 | 	/*******************************************************************************
 298 | 	 * 
 299 | 	 * Reads an enumeration off the JSON text.
 300 | 	 *
 301 | 	 **************************************/
 302 | 	T read(T)() if (is(T == enum))
 303 | 	{
 304 | 		enum mapping = buildRemapTable!T;
 305 | 		auto oldPos = m_text;
 306 | 		auto text = borrowString();
 307 | 		foreach (m; mapping)
 308 | 			if (text.length == m.json.length && memcmp(text.ptr, m.json.ptr, m.json.length) == 0)
 309 | 				return m.d;
 310 | 		m_text = oldPos;
 311 | 		static if (isValidating)
 312 | 			handleError(format("Could not find enum member `%s` in `%s`", text, T.stringof));
 313 | 		assert(0);
 314 | 	}
 315 | 
 316 | 
 317 | 	/*******************************************************************************
 318 | 	 * 
 319 | 	 * Reads a string off the JSON text with limited lifetime.
 320 | 	 * 
 321 | 	 * The reference to this slice is not guaranteed to be valid after the JSON
 322 | 	 * parser has been destroyed or another object key or string value has been
 323 | 	 * parsed. So make a copy before you continue parsing.
 324 | 	 *
 325 | 	 * Returns:
 326 | 	 *   If the string had no escape sequences in it, the returned array is a
 327 | 	 *   slice of the JSON text buffer, otherwise temporary copy.
 328 | 	 *
 329 | 	 **************************************/
 330 | 	const(char)[] borrowString()
 331 | 	{
 332 | 		expect('"', "at start of string");
 333 | 		auto escFreeStart = m_text;
 334 | 
 335 | 		if (scanString!validateUtf8())
 336 | 		{
 337 | 			// Fast path here is to return a slice of the JSON if it doesn't contain escapes.
 338 | 			size_t length = m_text - escFreeStart;
 339 | 			skipOnePlusWhitespace!skipAllInter();
 340 | 			return escFreeStart[0 .. length];
 341 | 		}
 342 | 		else
 343 | 		{
 344 | 			// Otherwise we copy to a separate memory area managed by this parser instance.
 345 | 			size_t length = 0;
 346 | 			bool eos = false;
 347 | 			goto CopyToBuffer;
 348 | 			do
 349 | 			{
 350 | 				do
 351 | 				{
 352 | 					m_mem.capacityNeeded( length + 4 );
 353 | 					uint decoded = decodeEscape( &m_mem[length] );
 354 | 					length += decoded;
 355 | 				}
 356 | 				while (*m_text == '\\');
 357 | 
 358 | 				escFreeStart = m_text;
 359 | 				eos = scanString!validateUtf8();
 360 | 			CopyToBuffer:
 361 | 				size_t escFreeLength = m_text - escFreeStart;
 362 | 				m_mem.capacityNeeded( length + escFreeLength );
 363 | 				memcpy( m_mem.ptr + length, escFreeStart, escFreeLength );
 364 | 				length += escFreeLength;
 365 | 			}
 366 | 			while (!eos);
 367 | 			skipOnePlusWhitespace!skipAllInter();
 368 | 			return m_mem[0 .. length];
 369 | 		}
 370 | 	}
 371 | 
 372 | 
 373 | 	private bool scanString(bool validate)()
 374 | 	{
 375 | 		static if (validate)
 376 | 		{
 377 | 			import core.bitop;
 378 | 
 379 | 			while (true)
 380 | 			{
 381 | 				// Stop for control-characters, \, " and anything non-ASCII.
 382 | 				m_text.seekToRanges!"\0\x1F\"\"\\\\\x7F\xFF";
 383 | 				
 384 | 				// Handle printable ASCII range
 385 | 				if (*m_text == '"')
 386 | 					return true;
 387 | 				if (*m_text == '\\')
 388 | 					return false;
 389 | 				
 390 | 				// Anything else better be UTF-8
 391 | 				uint u = *cast(uint*) m_text;
 392 | 				version (LittleEndian) u = bswap(u);
 393 | 				
 394 | 				// Filter overlong ASCII and missing follow byte.
 395 | 				if (
 396 | 					(u & 0b111_00000_11_000000_00000000_00000000) == 0b110_00000_10_000000_00000000_00000000 &&
 397 | 					(u > 0b110_00001_10_111111_11111111_11111111))
 398 | 					m_text += 2;
 399 | 				// Handle overlong representation, UTF-16 surrogate pairs and missing follow bytes.
 400 | 				else if (
 401 | 					(u & 0b1111_0000_11_000000_11_000000_00000000) == 0b1110_0000_10_000000_10_000000_00000000 &&
 402 | 					(u & 0b0000_1111_00_100000_00_000000_00000000) != 0b0000_1101_00_100000_00_000000_00000000 &&
 403 | 					(u > 0b1110_0000_10_011111_10_111111_11111111))
 404 | 					m_text += 3;
 405 | 				// Handle missing follow bytes, Handle overlong representation and out of valid range (max. 0x10FFFF)
 406 | 				else if (
 407 | 					(u & 0b11111_000_11_000000_11_000000_11_000000) == 0b11110_000_10_000000_10_000000_10_000000 &&
 408 | 					(u > 0b11110_000_10_001111_10_111111_10_111111) && (u < 0b11110_100_10_010000_10_000000_10_000000))
 409 | 					m_text += 4;
 410 | 				// Handle invalid code units.
 411 | 				else if (*m_text < ' ' || *m_text == 0x7F)
 412 | 					expectNot("is a disallowed control character in strings");
 413 | 				else if (*m_text >= 0x80 && *m_text <= 0xBF)
 414 | 					expectNot("is a UTF-8 follow byte and cannot start a sequence");
 415 | 				else
 416 | 					expectNot("is not a valid UTF-8 sequence start");
 417 | 			}
 418 | 		}
 419 | 		else
 420 | 		{
 421 | 			m_text.seekToAnyOf!("\\\"\0");
 422 | 			return *m_text == '"';
 423 | 		}
 424 | 	}
 425 | 
 426 | 
 427 | 	private int matchString(string key)()
 428 | 	{
 429 | 		return m_text.fixedTermStrCmp!(char, key, "\"\0", "\\")(&stringCompareCallback);
 430 | 	}
 431 | 
 432 | 
 433 | 	private bool stringCompareCallback(ref immutable(char)* key, ref const(char)* str)
 434 | 	{
 435 | 		do
 436 | 		{
 437 | 			auto key4 = cast(char[4]*) key;
 438 | 			char[4] buf = *key4;
 439 | 			uint bytes = decodeEscape(buf.ptr);
 440 | 			if (buf != *key4)
 441 | 				return false;
 442 | 			key += bytes;
 443 | 		}
 444 | 		while (str[0] == '\\');
 445 | 		return true;
 446 | 	}
 447 | 
 448 | 
 449 | 	private static immutable escapes = {
 450 | 		char[256] result = '\0';
 451 | 		result['"'] = '"';
 452 | 		result['\\'] = '\\';
 453 | 		result['/'] = '/';
 454 | 		result['b'] = '\b';
 455 | 		result['f'] = '\f';
 456 | 		result['n'] = '\n';
 457 | 		result['r'] = '\r';
 458 | 		result['t'] = '\t';
 459 | 		return result;
 460 | 	}();
 461 | 
 462 | 
 463 | 	private void skipEscape()
 464 | 	{
 465 | 		static if (isValidateAll)
 466 | 		{
 467 | 			if (m_text[1] != 'u')
 468 | 			{
 469 | 				// Normal escape sequence. 2 bytes removed.
 470 | 				if (!escapes[*++m_text])
 471 | 					expectNot("in escape sequence");
 472 | 				m_text++;
 473 | 			}
 474 | 			else
 475 | 			{
 476 | 				// UTF-16
 477 | 				m_text += 2;
 478 | 				decodeUtf16HexToCodepoint();
 479 | 			}
 480 | 		}
 481 | 		else m_text += 2;
 482 | 	}
 483 | 
 484 | 
 485 | 	private uint decodeEscape(scope char* dst)
 486 | 	{
 487 | 		if (m_text[1] != 'u')
 488 | 		{
 489 | 			// Normal escape sequence. 2 bytes removed.
 490 | 			dst[0] = escapes[m_text[1]];
 491 | 			static if (isValidating)
 492 | 				if (!dst[0])
 493 | 					handleError("Invalid escape sequence");
 494 | 			m_text += 2;
 495 | 			return 1;
 496 | 		}
 497 | 		else
 498 | 		{
 499 | 			// UTF-16
 500 | 			m_text += 2;
 501 | 			uint cp = decodeUtf16HexToCodepoint();
 502 | 
 503 | 			if (cp >= 0xD800 && cp <= 0xDBFF)
 504 | 			{
 505 | 				dst[0] = cast(char)(0b11110_000 | cp >> 18);
 506 | 				dst[1] = cast(char)(0b10_000000 | cp >> 12 & 0b00_111111);
 507 | 				dst[2] = cast(char)(0b10_000000 | cp >> 6  & 0b00_111111);
 508 | 				dst[3] = cast(char)(0b10_000000 | cp       & 0b00_111111);
 509 | 				return 4;
 510 | 			}
 511 | 			else if (cp >= 0x800)
 512 | 			{
 513 | 				dst[0] = cast(char)(0b1110_0000 | cp >> 12);
 514 | 				dst[1] = cast(char)(0b10_000000 | cp >> 6  & 0b00_111111);
 515 | 				dst[2] = cast(char)(0b10_000000 | cp       & 0b00_111111);
 516 | 				return 3;
 517 | 			}
 518 | 			else if (cp >= 0x80)
 519 | 			{
 520 | 				dst[0] = cast(char)(0b110_00000 | cp >> 6);
 521 | 				dst[1] = cast(char)(0b10_000000 | cp       & 0b00_111111);
 522 | 				return 2;
 523 | 			}
 524 | 			else
 525 | 			{
 526 | 				dst[0] = cast(char)(cp);
 527 | 				return 1;
 528 | 			}
 529 | 		}
 530 | 	}
 531 | 
 532 | 
 533 | 	private dchar decodeUtf16HexToCodepoint()
 534 | 	{
 535 | 		import fast.internal.helpers;
 536 | 
 537 | 		uint cp, hi;
 538 | 		foreach (i; staticIota!(0, 2))
 539 | 		{
 540 | 			static if (isValidating)
 541 | 			{
 542 | 				if (auto badByte = hexDecode4(m_text, cp))
 543 | 				{
 544 | 					m_text = badByte;
 545 | 					expectNot("is not a hex digit");
 546 | 				}
 547 | 			}
 548 | 			else
 549 | 			{
 550 | 				cp = hexDecode4(m_text);
 551 | 			}
 552 | 			
 553 | 			static if (i == 0)
 554 | 			{
 555 | 				// Is this a high surrogate (followed by a low surrogate) or not ?
 556 | 				if (cp < 0xD800 || cp > 0xDBFF)
 557 | 					break;
 558 | 				hi = cp - 0xD800 + 0x40 << 10;
 559 | 			}
 560 | 			else static if (i == 1)
 561 | 			{
 562 | 				static if (isValidating)
 563 | 				{
 564 | 					if (cp < 0xDC00 || cp > 0xDFFF)
 565 | 						handleError("The UTF-16 escape produced an invalid code point.");
 566 | 					cp -= 0xDC00;
 567 | 				}
 568 | 				cp |= hi;
 569 | 			}
 570 | 		}
 571 | 
 572 | 		static if (isValidating)
 573 | 			if (cp > 0x10FFFF || cp >= 0xD800 && cp <= 0xDFFF)
 574 | 				handleError("The UTF-16 escape produced an invalid code point.");
 575 | 
 576 | 		return cp;
 577 | 	}
 578 | 
 579 | 
 580 | 	private void skipString(bool skipInter)()
 581 | 	{
 582 | 		m_text++;
 583 | 		skipRestOfString!skipInter();
 584 | 	}
 585 | 
 586 | 
 587 | 	private void skipRestOfString(bool skipInter)()
 588 | 	{
 589 | 		while (!scanString!isValidateAll())
 590 | 			skipEscape();
 591 | 		skipOnePlusWhitespace!skipInter();
 592 | 	}
 593 | 
 594 | 
 595 | 	/+
 596 | 	 ╔══════════════════════════════════════════════════════════════════════════════
 597 | 	 ║ ⚑ Number
 598 | 	 ╚══════════════════════════════════════════════════════════════════════════════
 599 | 	 +/
 600 | 
 601 | 	/*******************************************************************************
 602 | 	 * 
 603 | 	 * Reads a number off the JSON text.
 604 | 	 * 
 605 | 	 * If you ask for an unsigned value, no minus sign will be accepted in the JSON,
 606 | 	 * otherwise all features of JSON numbers will be available. In particular large
 607 | 	 * integers can be given in scientific notation.
 608 | 	 *
 609 | 	 * Params:
 610 | 	 *   N = Built-in numerical type that should be returned.
 611 | 	 *
 612 | 	 * Returns:
 613 | 	 *   The parsed number.
 614 | 	 *
 615 | 	 * Throws:
 616 | 	 *   JSONException, on invalid JSON or integer overflow.
 617 | 	 *
 618 | 	 **************************************/
 619 | 	N read(N)() if (isNumeric!N && !is(N == enum))
 620 | 	{
 621 | 		N n = void;
 622 | 		static if (isUnsigned!N)
 623 | 			enum NumberOptions opt = {};
 624 | 		else
 625 | 			enum NumberOptions opt = { minus:true };
 626 | 		if (parseNumber!opt(m_text, n))
 627 | 			skipWhitespace!skipAllInter();
 628 | 		else static if (isValidating)
 629 | 			handleError(format("Could not convert JSON number to `%s`", N.stringof));
 630 | 		return n;
 631 | 	}
 632 | 
 633 | 
 634 | 	private void skipNumber(bool skipInter)()
 635 | 	{
 636 | 		static if (isValidateAll)
 637 | 		{
 638 | 			if (*m_text == '-')
 639 | 				m_text++;
 640 | 			if (*m_text == '0')
 641 | 				m_text++;
 642 | 			else
 643 | 				trySkipDigits();
 644 | 			if (*m_text == '.')
 645 | 			{
 646 | 				m_text++;
 647 | 				trySkipDigits();
 648 | 			}
 649 | 			if ((*m_text | 0x20) == 'e')
 650 | 			{
 651 | 				m_text++;
 652 | 				if (*m_text == '+' || *m_text == '-')
 653 | 					m_text++;
 654 | 				trySkipDigits();
 655 | 			}
 656 | 			skipWhitespace!false();
 657 | 		}
 658 | 		else
 659 | 		{
 660 | 			m_text.skipCharRanges!"\t\n\r\r  ++-.09EEee";
 661 | 			static if (skipInter)
 662 | 				m_text.skipAllOf!"\t\n\r ,";
 663 | 		}
 664 | 	}
 665 | 
 666 | 
 667 | 	static if (isValidateAll)
 668 | 	{
 669 | 		private void trySkipDigits()
 670 | 		{
 671 | 			if (*m_text - '0' > 9)
 672 | 				expectNot("in number literal");
 673 | 			m_text.skipAllOf!"0123456789";
 674 | 		}
 675 | 	}
 676 | 
 677 | 
 678 | 	/+
 679 | 	 ╔══════════════════════════════════════════════════════════════════════════════
 680 | 	 ║ ⚑ Object
 681 | 	 ╚══════════════════════════════════════════════════════════════════════════════
 682 | 	 +/
 683 | 
 684 | 	/*******************************************************************************
 685 | 	 * 
 686 | 	 * Reads a plain old data struct off the JSON text.
 687 | 	 *
 688 | 	 * Params:
 689 | 	 *   T = Type of struct that should be returned.
 690 | 	 *
 691 | 	 * Returns:
 692 | 	 *   A struct of type `T`.
 693 | 	 *
 694 | 	 **************************************/
 695 | 	T read(T)() if (is(T == struct) && __traits(isPOD, T))
 696 | 	{
 697 | 		nest('{', "on start of object");
 698 | 
 699 | 		T t;
 700 | 		if (*m_text != '}') while (true)
 701 | 		{
 702 | 			auto key = borrowString();
 703 | 			static if (!skipAllInter)
 704 | 			{
 705 | 				expect(':', "between key and value");
 706 | 				skipWhitespace!false();
 707 | 			}
 708 | 
 709 | 			enum mapping = buildRemapTable!T;
 710 | 			foreach (m; mapping)
 711 | 			{
 712 | 				if (key.length == m.json.length && memcmp(key.ptr, m.json.ptr, m.json.length) == 0)
 713 | 				{
 714 | 					mixin("alias keyT = typeof(T." ~ m.d ~ ");");
 715 | 					mixin("t." ~ m.d ~ " = read!keyT;");
 716 | 					goto Success;
 717 | 				}
 718 | 			}
 719 | 			skipValue();
 720 | 
 721 | 		Success:
 722 | 			if (*m_text == '}')
 723 | 				break;
 724 | 
 725 | 			static if (!skipAllInter)
 726 | 			{
 727 | 				expect(',', "between key-value pairs");
 728 | 				skipWhitespace!false();
 729 | 			}
 730 | 		}
 731 | 		
 732 | 		unnest();
 733 | 		return t;
 734 | 	}
 735 | 
 736 | 
 737 | 	/*******************************************************************************
 738 | 	 * 
 739 | 	 * Reads a plain old data struct or `null` off the JSON text.
 740 | 	 * 
 741 | 	 * Params:
 742 | 	 *   T = Type of struct pointer that should be returned.
 743 | 	 *
 744 | 	 * Returns:
 745 | 	 *   A pointer to a newly filled struct of type `T` on the GC heap.
 746 | 	 *
 747 | 	 **************************************/
 748 | 	T read(T)() if (is(PointerTarget!T == struct) && __traits(isPOD, PointerTarget!T))
 749 | 	{
 750 | 		if (peek == DataType.null_)
 751 | 			return readNull();
 752 | 		T tp = new PointerTarget!T;
 753 | 		*tp = read!(PointerTarget!T)();
 754 | 		return tp;
 755 | 	}
 756 | 
 757 | 
 758 | 	/*******************************************************************************
 759 | 	 * 
 760 | 	 * Reads an associative-array off a JSON text.
 761 | 	 * 
 762 | 	 * The key type must be `string`, the value type can be any type otherwise
 763 | 	 * supported by the parser.
 764 | 	 *
 765 | 	 * Params:
 766 | 	 *   T = The type of AA to return.
 767 | 	 *
 768 | 	 * Returns:
 769 | 	 *   A newly filled associative array.
 770 | 	 *
 771 | 	 **************************************/
 772 | 	T read(T)() if (is(KeyType!T == string))
 773 | 	{
 774 | 		T aa;
 775 | 		foreach (key; byKey)
 776 | 			aa[m_isString ? cast(immutable)key : key.idup] = read!(ValueType!T)();
 777 | 		return aa;
 778 | 	}
 779 | 
 780 | 
 781 | 	/*******************************************************************************
 782 | 	 * 
 783 | 	 * An alias to the `singleKey` method. Instead of `json.singleKey!"something"`
 784 | 	 * you can write `json.something`. Read the notes on `singleKey`.
 785 | 	 *
 786 | 	 **************************************/
 787 | 	alias opDispatch = singleKey;
 788 | 
 789 | 
 790 | 	/*******************************************************************************
 791 | 	 * 
 792 | 	 * Skips all keys of an object except the first occurence with the given key
 793 | 	 * name.
 794 | 	 *
 795 | 	 * Params:
 796 | 	 *   name = the key name of interest
 797 | 	 *
 798 | 	 * Returns:
 799 | 	 *   A temporary struct, a proxy to the parser, that will automatically seek to
 800 | 	 *   the end of the current JSON object on destruction.
 801 | 	 *
 802 | 	 * Throws:
 803 | 	 *   JSONException when the key is not found in the object or parsing errors
 804 | 	 *   occur.
 805 | 	 * 
 806 | 	 * Note:
 807 | 	 *   Since this is an on the fly parser, you can only get one key from an
 808 | 	 *   object with this method. Use `keySwitch` or `foreach(key; json)` to get
 809 | 	 *   values from multiple keys.
 810 | 	 * 
 811 | 	 * See_Also:
 812 | 	 *   keySwitch
 813 | 	 *
 814 | 	 **************************************/
 815 | 	@property SingleKey singleKey(string name)()
 816 | 	{
 817 | 		nest('{', "on start of object");
 818 | 		
 819 | 		if (*m_text != '}') while (true)
 820 | 		{
 821 | 			auto key = borrowString();
 822 | 			static if (!skipAllInter)
 823 | 			{
 824 | 				expect(':', "between key and value");
 825 | 				skipWhitespace!false();
 826 | 			}
 827 | 			
 828 | 			if (key.length == name.length && memcmp(key.ptr, name.ptr, name.length) == 0)
 829 | 				return SingleKey(this);
 830 | 
 831 | 			skipValueImpl!skipAllInter();
 832 | 			
 833 | 			if (*m_text == '}')
 834 | 				break;
 835 | 			
 836 | 			static if (!skipAllInter)
 837 | 			{
 838 | 				expect(',', "between key-value pairs");
 839 | 				skipWhitespace!false();
 840 | 			}
 841 | 		}
 842 | 		
 843 | 		unnest();
 844 | 		static if (isValidating)
 845 | 			handleError("Key not found.");
 846 | 		assert(0);
 847 | 	}
 848 | 
 849 | 
 850 | 	/*******************************************************************************
 851 | 	 * 
 852 | 	 * Selects from a set of given keys in an object and calls the corresponding
 853 | 	 * delegate. The difference to `singleKey` when invoked with a single key is
 854 | 	 * that `keySwitch` will not error out if the key is non-existent and may
 855 | 	 * trigger the delegate multiple times, if the JSON object has duplicate keys.
 856 | 	 *
 857 | 	 * Params:
 858 | 	 *   Args = the names of the keys
 859 | 	 *   dlg = the delegates corresponding to the keys
 860 | 	 *
 861 | 	 * Throws:
 862 | 	 *   JSONException when the key is not found in the object or parsing errors
 863 | 	 *   occur.
 864 | 	 * 
 865 | 	 **************************************/
 866 | 	void keySwitch(Args...)(scope void delegate()[Args.length] dlg...)
 867 | 	{
 868 | 		nest('{', "on start of object");
 869 | 		
 870 | 		if (*m_text != '}') while (true)
 871 | 		{
 872 | 			auto key = borrowString();
 873 | 			static if (!skipAllInter)
 874 | 			{
 875 | 				expect(':', "between key and value");
 876 | 				skipWhitespace!false();
 877 | 			}
 878 | 			
 879 | 			auto oldPos = m_text;
 880 | 			foreach (i, arg; Args)
 881 | 			{
 882 | 				if (key.length == arg.length && memcmp(key.ptr, arg.ptr, arg.length) == 0)
 883 | 				{
 884 | 					dlg[i]();
 885 | 					goto Next;
 886 | 				}
 887 | 			}
 888 | 			skipValue();
 889 | 			
 890 | 		Next:
 891 | 			if (*m_text == '}')
 892 | 				break;
 893 | 			
 894 | 			static if (!skipAllInter) if (oldPos !is m_text)
 895 | 			{
 896 | 				expect(',', "after key-value pair");
 897 | 				skipWhitespace!false();
 898 | 			}
 899 | 		}
 900 | 		
 901 | 		unnest();
 902 | 	}
 903 | 	
 904 | 	
 905 | 	private int byKeyImpl(scope int delegate(ref const char[]) foreachBody)
 906 | 	{
 907 | 		nest('{', "at start of foreach over object");
 908 | 
 909 | 		int result = 0;
 910 | 		if (*m_text != '}') while (true)
 911 | 		{
 912 | 			auto key = borrowString();
 913 | 			static if (!skipAllInter)
 914 | 			{
 915 | 				expect(':', "between key and value");
 916 | 				skipWhitespace!false;
 917 | 			}
 918 | 
 919 | 			if (iterationGuts!"{}"(result, key, foreachBody, "after key-value pair"))
 920 | 				break;
 921 | 		}
 922 | 
 923 | 		unnest();
 924 | 		return result;
 925 | 	}
 926 | 
 927 | 
 928 | 	/*******************************************************************************
 929 | 	 * 
 930 | 	 * Iterate the keys of a JSON object with `foreach`.
 931 | 	 * 
 932 | 	 * Notes:
 933 | 	 *   $(UL
 934 | 	 *     $(LI If you want to store the key, you need to duplicate it.)
 935 | 	 *   )
 936 | 	 * 
 937 | 	 * Example:
 938 | 	 * ---
 939 | 	 * uint id;
 940 | 	 * foreach (key; json.byKey)
 941 | 	 *     if (key == "id")
 942 | 	 *         id = json.read!uint;
 943 | 	 * ---
 944 | 	 **************************************/
 945 | 	@safe @nogc pure nothrow
 946 | 	@property int delegate(scope int delegate(ref const char[])) byKey()
 947 | 	{
 948 | 		return &byKeyImpl;
 949 | 	}
 950 | 
 951 | 
 952 | 	/+
 953 | 	 ╔══════════════════════════════════════════════════════════════════════════════
 954 | 	 ║ ⚑ Array handling
 955 | 	 ╚══════════════════════════════════════════════════════════════════════════════
 956 | 	 +/
 957 | 
 958 | 	/*******************************************************************************
 959 | 	 * 
 960 | 	 * Reads a dynamic array off the JSON text.
 961 | 	 * 
 962 | 	 **************************************/
 963 | 	T read(T)() if (isDynamicArray!T && !isSomeString!T)
 964 | 	{
 965 | 		import std.array;
 966 | 		Appender!T app;
 967 | 		foreach (i; this)
 968 | 			app.put(read!(typeof(T.init[0])));
 969 | 		return app.data;
 970 | 	}
 971 | 
 972 | 
 973 | 	/*******************************************************************************
 974 | 	 * 
 975 | 	 * Reads a static array off the JSON text.
 976 | 	 * 
 977 | 	 * When validation is enabled, it is an error if the JSON array has a different
 978 | 	 * length lengths don't match up. Otherwise unset elements receive their initial
 979 | 	 * value.
 980 | 	 *
 981 | 	 **************************************/
 982 | 	T read(T)() if (isStaticArray!T)
 983 | 	{
 984 | 		T sa = void;
 985 | 		size_t cnt;
 986 | 		foreach (i; this)
 987 | 		{
 988 | 			if (i < T.length)
 989 | 				sa[i] = read!(typeof(T.init[0]));
 990 | 			cnt = i + 1;
 991 | 		}
 992 | 		static if (isValidating)
 993 | 		{
 994 | 			if (cnt != T.length)
 995 | 				handleError(format("Static array size mismatch. Expected %s, got %s", T.length, cnt));
 996 | 		}
 997 | 		else
 998 | 		{
 999 | 			foreach (i; cnt .. T.length)
1000 | 				sa[i] = T.init;
1001 | 		}
1002 | 		return sa;
1003 | 	}
1004 | 
1005 | 
1006 | 	/*******************************************************************************
1007 | 	 * 
1008 | 	 * Iterate over a JSON array via `foreach`.
1009 | 	 *
1010 | 	 **************************************/
1011 | 	int opApply(scope int delegate(const size_t) foreachBody)
1012 | 	{
1013 | 		nest('[', "at start of foreach over array");
1014 | 
1015 | 		int result = 0;
1016 | 		if (*m_text != ']') for (size_t idx = 0; true; idx++)
1017 | 			if (iterationGuts!"[]"(result, idx, foreachBody, "after array element"))
1018 | 				break;
1019 | 
1020 | 		unnest();
1021 | 		return result;
1022 | 	}
1023 | 
1024 | 
1025 | 	/+
1026 | 	 ╔══════════════════════════════════════════════════════════════════════════════
1027 | 	 ║ ⚑ Boolean
1028 | 	 ╚══════════════════════════════════════════════════════════════════════════════
1029 | 	 +/
1030 | 
1031 | 	/*******************************************************************************
1032 | 	 * 
1033 | 	 * Reads a boolean value off the JSON text.
1034 | 	 *
1035 | 	 **************************************/
1036 | 	bool read(T)() if (is(T == bool))
1037 | 	{
1038 | 		return skipBoolean!(skipAllInter, isValidating)();
1039 | 	}
1040 | 
1041 | 
1042 | 	private bool skipBoolean(bool skipInter, bool validate = isValidateAll)()
1043 | 	{
1044 | 		static immutable char[4][2] keywords = [ "true", "alse" ];
1045 | 		auto isFalse = *m_text == 'f';
1046 | 		static if (validate)
1047 | 			if (*cast(char[4]*) &m_text[isFalse] != keywords[isFalse])
1048 | 				handleError("`true` or `false` expected.");
1049 | 		m_text += isFalse ? 5 : 4;
1050 | 		skipWhitespace!skipInter();
1051 | 		return !isFalse;
1052 | 	}
1053 | 
1054 | 
1055 | 	/+
1056 | 	 ╔══════════════════════════════════════════════════════════════════════════════
1057 | 	 ║ ⚑ Null
1058 | 	 ╚══════════════════════════════════════════════════════════════════════════════
1059 | 	 +/
1060 | 
1061 | 	/*******************************************************************************
1062 | 	 * 
1063 | 	 * Reads `null` off the JSON text.
1064 | 	 *
1065 | 	 **************************************/
1066 | 	typeof(null) readNull()
1067 | 	{
1068 | 		skipNull!(skipAllInter, isValidating)();
1069 | 		return null;
1070 | 	}
1071 | 
1072 | 
1073 | 	private void skipNull(bool skipInter, bool validate = isValidateAll)()
1074 | 	{
1075 | 		static if (validate)
1076 | 			if (*cast(const uint*) m_text != *cast(const uint*) "null".ptr)
1077 | 				handleError("`null` expected.");
1078 | 		m_text += 4;
1079 | 		skipWhitespace!skipInter();
1080 | 	}
1081 | 
1082 | 
1083 | 	/+
1084 | 	 ╔══════════════════════════════════════════════════════════════════════════════
1085 | 	 ║ ⚑ Helpers and Error Handling
1086 | 	 ╚══════════════════════════════════════════════════════════════════════════════
1087 | 	 +/
1088 | 
1089 | 	/*******************************************************************************
1090 | 	 * 
1091 | 	 * Skips the next JSON value if you are not interested.
1092 | 	 *
1093 | 	 **************************************/
1094 | 	void skipValue()
1095 | 	{
1096 | 		skipValueImpl!skipAllInter();
1097 | 	}
1098 | 
1099 | 
1100 | 	private void skipValueImpl(bool skipInter)()
1101 | 	{
1102 | 		with (DataType) final switch (peek)
1103 | 		{
1104 | 			case string:
1105 | 				skipString!skipInter();
1106 | 				break;
1107 | 			case number:
1108 | 				skipNumber!skipInter();
1109 | 				break;
1110 | 			case object:
1111 | 				static if (isValidateAll)
1112 | 				{
1113 | 					foreach (_; this.byKey)
1114 | 						break;
1115 | 				}
1116 | 				else
1117 | 				{
1118 | 					m_text++;
1119 | 					seekObjectEnd();
1120 | 					skipOnePlusWhitespace!skipInter();
1121 | 				}
1122 | 				break;
1123 | 			case array:
1124 | 				static if (isValidateAll)
1125 | 				{
1126 | 					foreach (_; this)
1127 | 						break;
1128 | 				}
1129 | 				else
1130 | 				{
1131 | 					m_text++;
1132 | 					seekArrayEnd();
1133 | 					skipOnePlusWhitespace!skipInter();
1134 | 				}
1135 | 				break;
1136 | 			case boolean:
1137 | 				skipBoolean!skipInter();
1138 | 				break;
1139 | 			case null_:
1140 | 				skipNull!skipInter();
1141 | 				break;
1142 | 		}
1143 | 	}
1144 | 
1145 | 
1146 | 	/*******************************************************************************
1147 | 	 * 
1148 | 	 * Returns the type of data that is up next in the JSON text.
1149 | 	 *
1150 | 	 **************************************/
1151 | 	@property DataType peek()
1152 | 	{
1153 | 		static immutable trans = {
1154 | 			DataType[256] result = cast(DataType) ubyte.max;
1155 | 			result['{'] = DataType.object;
1156 | 			result['['] = DataType.array;
1157 | 			result['-'] = DataType.number;
1158 | 			foreach (i; '0' .. '9'+1)
1159 | 				result[i] = DataType.number;
1160 | 			result['"'] = DataType.string;
1161 | 			result['t'] = DataType.boolean;
1162 | 			result['f'] = DataType.boolean;
1163 | 			result['n'] = DataType.null_;
1164 | 			return result;
1165 | 		}();
1166 | 		
1167 | 		DataType vt = trans[*m_text];
1168 | 		static if (isValidating)
1169 | 			if (vt == ubyte.max)
1170 | 				expectNot("while peeking at next value type");
1171 | 		return vt;
1172 | 	}
1173 | 
1174 | 
1175 | 	/*******************************************************************************
1176 | 	 *
1177 | 	 * Save or restore the parser's internal state.
1178 | 	 *
1179 | 	 * If you want to read only a certain object from the JSON, but exactly which
1180 | 	 * depends on the value of some key, this is where saving and restoring the
1181 | 	 * parser state helps.
1182 | 	 *
1183 | 	 * Before each candidate you save the parser state. Then you perform just the
1184 | 	 * minimal work to test if the candidate matches some criteria. If it does,
1185 | 	 * restore the parser state and read the elements in full. Of it doesn't, just
1186 | 	 * skip to the next.
1187 | 	 *
1188 | 	 **************************************/
1189 | 	@property const(JsonParserState) state() const
1190 | 	{
1191 | 		return JsonParserState(m_text, m_nesting);
1192 | 	}
1193 | 
1194 | 	@property void state(const JsonParserState oldState)
1195 | 	{
1196 | 		m_text    = oldState.text;
1197 | 		m_nesting = oldState.nesting;
1198 | 	}
1199 | 
1200 | 
1201 | 	private void nest(char c, string msg)
1202 | 	{
1203 | 		expect(c, msg);
1204 | 		skipWhitespace!false();
1205 | 		m_nesting++;
1206 | 	}
1207 | 
1208 | 
1209 | 	private void unnest()
1210 | 	in { assert(m_nesting > 0); }
1211 | 	body
1212 | 	{
1213 | 		if (--m_nesting == 0)
1214 | 		{
1215 | 			skipOnePlusWhitespace!false();
1216 | 			static if (isValidating)
1217 | 				if (*m_text != '\0')
1218 | 					handleError("Expected end of JSON.");
1219 | 		}
1220 | 		else skipOnePlusWhitespace!skipAllInter();
1221 | 	}
1222 | 
1223 | 
1224 | 	private bool iterationGuts(char[2] braces, T, D)(ref int result, T idx, scope D dlg,
1225 | 		string missingCommaMsg)
1226 | 	{
1227 | 		auto oldPos = m_text;
1228 | 		static if (isValidateAll)
1229 | 		{
1230 | 			if (result)
1231 | 			{
1232 | 				skipValueImpl!(!isValidateAll)();
1233 | 				goto PastValue;
1234 | 			}
1235 | 		}
1236 | 		result = dlg(idx);
1237 | 		if (oldPos is m_text)
1238 | 			skipValueImpl!(!isValidateAll)();
1239 | 		
1240 | 	PastValue:
1241 | 		if (*m_text == braces[1])
1242 | 			return true;
1243 | 		
1244 | 		static if (!isValidateAll) if (result)
1245 | 		{
1246 | 			seekAggregateEnd!braces();
1247 | 			return true;
1248 | 		}
1249 | 		
1250 | 		static if (!skipAllInter) if (oldPos !is m_text)
1251 | 		{
1252 | 			expect(',', missingCommaMsg);
1253 | 			skipWhitespace!false();
1254 | 		}
1255 | 		return false;
1256 | 	}
1257 | 
1258 | 
1259 | 	static if (!isValidateAll)
1260 | 	{
1261 | 		private void seekObjectEnd()
1262 | 		{
1263 | 			seekAggregateEnd!"{}"();
1264 | 		}
1265 | 
1266 | 
1267 | 		private void seekArrayEnd()
1268 | 		{
1269 | 			seekAggregateEnd!"[]"();
1270 | 		}
1271 | 
1272 | 
1273 | 		private void seekAggregateEnd(immutable char[2] parenthesis)()
1274 | 		{
1275 | 			size_t nesting = 1;
1276 | 			while (true)
1277 | 			{
1278 | 				m_text.seekToAnyOf!(parenthesis ~ "\"\0");
1279 | 				final switch (*m_text)
1280 | 				{
1281 | 					case parenthesis[0]:
1282 | 						m_text++;
1283 | 						nesting++;
1284 | 						break;
1285 | 					case parenthesis[1]:
1286 | 						if (--nesting == 0)
1287 | 							return;
1288 | 						m_text++;
1289 | 						break;
1290 | 					case '"':
1291 | 						// Could skip ':' or ',' here by passing `true`, but we skip it above anyways.
1292 | 						skipString!false();
1293 | 				}
1294 | 			}
1295 | 		}
1296 | 	}
1297 | 
1298 | 
1299 | 	/// This also increments the JSON read pointer.
1300 | 	private void expect(char c, string msg)
1301 | 	{
1302 | 		static if (isValidating)
1303 | 			if (*m_text != c)
1304 | 				expectImpl(c, msg);
1305 | 		m_text++;
1306 | 	}
1307 | 
1308 | 
1309 | 	private void expectNot(char c, string msg)
1310 | 	{
1311 | 		static if (isValidating)
1312 | 			if (*m_text == c)
1313 | 				expectNot(msg);
1314 | 	}
1315 | 
1316 | 
1317 | 	static if (isValidating)
1318 | 	{
1319 | 		@noinline
1320 | 		private void expectNot(string msg)
1321 | 		{
1322 | 			string tmpl = isPrintable(*m_text)
1323 | 				? "Character '%s' %s."
1324 | 				: "Byte 0x%02x %s.";
1325 | 			handleError(format(tmpl, *m_text, msg));
1326 | 		}
1327 | 
1328 | 
1329 | 		@noinline
1330 | 		private void expectImpl(char c, string msg)
1331 | 		{
1332 | 			string tmpl = isPrintable(*m_text)
1333 | 				? "Expected '%s', but found '%s' %s."
1334 | 				: "Expected '%s', but found byte 0x%02x %s.";
1335 | 			handleError(format(tmpl, c, *m_text, msg));
1336 | 		}
1337 | 
1338 | 
1339 | 		@noinline
1340 | 		private void handleError(string msg)
1341 | 		{
1342 | 			import fast.unicode;
1343 | 
1344 | 			size_t line;
1345 | 			const(char)* p    = m_start;
1346 | 			const(char)* last = m_start;
1347 | 			while (p < m_text)
1348 | 			{
1349 | 				last = p;
1350 | 				p.skipToNextLine();
1351 | 				line++;
1352 | 			}
1353 | 			line += p is m_text;
1354 | 			size_t column = last[0 .. m_text - last].countGraphemes() + 1;
1355 | 			
1356 | 			throw new JSONException(msg, line.to!int, column.to!int);
1357 | 		}
1358 | 	}
1359 | 
1360 | 
1361 | 	@forceinline @nogc pure nothrow
1362 | 	private void skipOnePlusWhitespace(bool skipInter)()
1363 | 	{
1364 | 		m_text++;
1365 | 		skipWhitespace!skipInter();
1366 | 	}
1367 | 
1368 | 
1369 | 	@forceinline @nogc pure nothrow
1370 | 	private void skipWhitespace(bool skipInter)()
1371 | 	{
1372 | 		static if (skipInter)
1373 | 			m_text.skipAllOf!"\t\n\r ,:";
1374 | 		else
1375 | 			m_text.skipAsciiWhitespace();
1376 | 	}
1377 | 
1378 | 
1379 | 	private static struct SingleKey
1380 | 	{
1381 | 		alias json this;
1382 | 
1383 | 		private Json* m_pjson;
1384 | 		private const(char*) m_oldPos;
1385 | 
1386 | 		@safe @nogc pure nothrow
1387 | 		@property ref Json json()
1388 | 		{
1389 | 			return *m_pjson;
1390 | 		}
1391 | 
1392 | 		this(ref Json json)
1393 | 		{
1394 | 			m_pjson = &json;
1395 | 			m_oldPos = json.m_text;
1396 | 		}
1397 | 
1398 | 		~this()
1399 | 		{
1400 | 			static if (isValidateAll)
1401 | 			{
1402 | 				if (*json.m_text != '}')
1403 | 				{
1404 | 					if (m_oldPos !is json.m_text)
1405 | 					{
1406 | 						json.expect(',', "after key-value pair");
1407 | 						json.skipWhitespace!false();
1408 | 					}
1409 | 					while (true)
1410 | 					{
1411 | 						json.skipString!false();
1412 | 						json.expect(':', "between key and value");
1413 | 						json.skipWhitespace!false();
1414 | 						json.skipValueImpl!false();
1415 | 
1416 | 						if (*json.m_text == '}')
1417 | 							break;
1418 | 
1419 | 						json.expect(',', "after key-value pair");
1420 | 						json.skipWhitespace!false();
1421 | 					}
1422 | 				}
1423 | 			}
1424 | 			else
1425 | 			{
1426 | 				json.seekObjectEnd();
1427 | 			}
1428 | 			json.unnest();
1429 | 		}
1430 | 	}
1431 | 
1432 | 
1433 | 	private static struct File
1434 | 	{
1435 | 		alias m_json this;
1436 | 		
1437 | 		Json m_json;
1438 | 		private size_t m_len;
1439 | 		private bool m_isMapping;
1440 | 		
1441 | 		@disable this();
1442 | 		@disable this(this);
1443 | 		
1444 | 		this(const Filename fname)
1445 | 		{
1446 | 			version (Posix)
1447 | 			{
1448 | 				import core.sys.posix.fcntl;
1449 | 				import core.sys.posix.sys.mman;
1450 | 				import core.sys.posix.unistd;
1451 | 
1452 | 				version (CRuntime_Glibc)
1453 | 					enum O_CLOEXEC = octal!2000000;
1454 | 				else version (OSX)  // Requires at least OS X 10.7 Lion
1455 | 					enum O_CLOEXEC = 0x1000000;
1456 | 				else version(FreeBSD) 
1457 | 					enum O_CLOEXEC = octal!2000000;
1458 | 				else static assert(0, "Not implemented");
1459 | 				
1460 | 				int fd = { return open(charPtr!fname, O_RDONLY | O_NOCTTY | O_CLOEXEC); }();
1461 | 				assert(fcntl(fd, F_GETFD) & FD_CLOEXEC, "Could not set O_CLOEXEC.");
1462 | 				
1463 | 				if (fd == -1)
1464 | 					throw new ErrnoException("Could not open JSON file for reading.");
1465 | 				scope(exit) close(fd);
1466 | 				
1467 | 				// Get the file size
1468 | 				stat_t info;
1469 | 				if (fstat(fd, &info) == -1)
1470 | 					throw new ErrnoException("Could not get JSON file size.");
1471 | 
1472 | 				// Ensure we have 16 extra bytes
1473 | 				size_t pagesize = sysconf(_SC_PAGESIZE);
1474 | 				ulong fsize = ulong(info.st_size + pagesize - 1) / pagesize * pagesize;
1475 | 				bool zeroPage = fsize < info.st_size + 16;
1476 | 				if (zeroPage)
1477 | 					fsize += pagesize;
1478 | 				if (fsize > size_t.max)
1479 | 					throw new Exception("JSON file too large to be mapped in RAM.");
1480 | 				m_len = cast(size_t) fsize;
1481 | 				
1482 | 				// Map the file
1483 | 				void* mapping = mmap(null, m_len, PROT_READ, MAP_PRIVATE, fd, 0);
1484 | 				if (mapping == MAP_FAILED)
1485 | 					throw new ErrnoException("Could not map JSON file.");
1486 | 				scope(failure)
1487 | 					munmap(mapping, m_len);
1488 | 
1489 | 				// Get a zero-page up behind the JSON text
1490 | 				if (zeroPage)
1491 | 				{
1492 | 					void* offs = mapping + m_len - pagesize;
1493 | 					if (mmap(offs, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0) == MAP_FAILED)
1494 | 						throw new ErrnoException("Could not map zero-page behind JSON text.");
1495 | 				}
1496 | 
1497 | 				// Initialize the parser on the JSON text
1498 | 				m_json = Json((cast(char*) mapping)[0 .. cast(size_t) info.st_size], No.simdPrep);
1499 | 			}
1500 | 			else version (Windows)
1501 | 			{
1502 | 				import core.sys.windows.winnt;
1503 | 				import core.sys.windows.winbase;
1504 | 
1505 | 				HANDLE hnd = { return CreateFileW( wcharPtr!fname, GENERIC_READ, FILE_SHARE_READ, null,
1506 | 						OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, null ); }();
1507 | 
1508 | 				if (hnd == INVALID_HANDLE_VALUE)
1509 | 					throw new FileException("Could not open JSON file for reading.");
1510 | 				scope(exit)
1511 | 					CloseHandle( hnd );
1512 | 
1513 | 				// Get the file size
1514 | 				LARGE_INTEGER fileSize = void;
1515 | 				if (!GetFileSizeEx( hnd, &fileSize ))
1516 | 					throw new Exception("Could not get JSON file size.");
1517 | 
1518 | 				// Map the file
1519 | 				HANDLE mapping = CreateFileMapping( hnd, null, PAGE_READONLY, fileSize.HighPart, fileSize.LowPart, null );
1520 | 				if (mapping == INVALID_HANDLE_VALUE)
1521 | 					throw new Exception("Could not create file mapping for JSON file.");
1522 | 				scope(exit) CloseHandle( mapping );
1523 | 
1524 | 				// View the mapping
1525 | 				void* view = MapViewOfFile( mapping, FILE_MAP_READ, 0, 0, 0 );
1526 | 				if (view is null)
1527 | 					throw new Exception("Could not map view of JSON file.");
1528 | 				scope(failure)
1529 | 					UnmapViewOfFile( view );
1530 | 				
1531 | 				// Missing 64-bit version in druntime (2.071)
1532 | 				version (X86_64) struct MEMORY_BASIC_INFORMATION {
1533 | 					PVOID     BaseAddress;
1534 | 					PVOID     AllocationBase;
1535 | 					DWORD     AllocationProtect;
1536 | 					DWORD     __alignment1;
1537 | 					ULONGLONG RegionSize;
1538 | 					DWORD     State;
1539 | 					DWORD     Protect;
1540 | 					DWORD     Type;
1541 | 					DWORD     __alignment2;
1542 | 				}
1543 | 				
1544 | 				// Check if the view is 16 bytes larger than the file
1545 | 				MEMORY_BASIC_INFORMATION query = void;
1546 | 				if (!VirtualQuery( view, cast(PMEMORY_BASIC_INFORMATION)&query, query.sizeof ))
1547 | 					throw new Exception("VirtualQuery failed.");
1548 | 				
1549 | 				// Initialize the parser on the JSON text
1550 | 				char[] slice = (cast(char*) view)[0 .. cast(size_t)fileSize.QuadPart];
1551 | 				if (query.RegionSize >= fileSize.QuadPart + 16)
1552 | 				{
1553 | 					m_json = Json(slice, No.simdPrep);
1554 | 					m_isMapping = true;
1555 | 				}
1556 | 				else
1557 | 				{
1558 | 					m_json = Json(slice, Yes.simdPrep);
1559 | 					UnmapViewOfFile( view );
1560 | 				}
1561 | 			}
1562 | 			else static assert(0, "Not implemented");
1563 | 		}
1564 | 
1565 | 
1566 | 		this(const(char)[] fname)
1567 | 		{
1568 | 			import std.string;
1569 | 
1570 | 			version (Posix)
1571 | 				this( fname.representation );
1572 | 			else version (Windows)
1573 | 			{
1574 | 				import core.stdc.stdlib;
1575 | 				auto buf = cast(wchar*)alloca(string2wstringSize(fname));
1576 | 				auto fnameW = string2wstring(fname, buf);
1577 | 				this( fnameW.representation );
1578 | 			}
1579 | 			else static assert(0, "Not implemented");
1580 | 		}
1581 | 
1582 | 
1583 | 		nothrow
1584 | 		~this()
1585 | 		{
1586 | 			version (Posix)
1587 | 			{
1588 | 				import core.sys.posix.sys.mman;
1589 | 				munmap(cast(void*)m_json.m_start, m_len);
1590 | 			}
1591 | 			else version (Windows)
1592 | 			{
1593 | 				import core.sys.windows.winnt;
1594 | 				import core.sys.windows.winbase;
1595 | 				if (m_isMapping)
1596 | 					UnmapViewOfFile( cast(LPCVOID)m_json.m_start );
1597 | 			}
1598 | 			else static assert(0, "Not implemented");
1599 | 		}
1600 | 	}
1601 | }
1602 | 
1603 | 
1604 | private template buildRemapTable(T)
1605 | {
1606 | 	import std.typetuple;
1607 | 	import fast.internal.helpers;
1608 | 
1609 | 	static if (is(T == enum))
1610 | 	{
1611 | 		struct Remap { T d; string json; }
1612 | 		enum members = EnumMembers!T;
1613 | 	}
1614 | 	else
1615 | 	{
1616 | 		struct Remap { string d; string json; }
1617 | 		enum members = FieldNameTuple!T;
1618 | 	}
1619 | 	enum mapping = getUDA!(T, JsonMapping).map;
1620 | 
1621 | 	template Impl(size_t a, size_t b)
1622 | 	{
1623 | 		static if (b - a > 1)
1624 | 		{
1625 | 			alias Impl = TypeTuple!(Impl!(a, (b + a) / 2), Impl!((b + a) / 2, b));
1626 | 		}
1627 | 		else static if (b - a == 1)
1628 | 		{
1629 | 			static if (is(T == enum))
1630 | 				enum key = members[a].to!string;
1631 | 			else
1632 | 				alias key = members[a];
1633 | 			static if ((key in mapping) !is null)
1634 | 				enum mapped = mapping[key];
1635 | 			else
1636 | 				alias mapped = key;
1637 | 			alias Impl = TypeTuple!(Remap(members[a], mapped));
1638 | 		}
1639 | 		else alias Impl = TypeTuple!();
1640 | 	}
1641 | 
1642 | 	alias buildRemapTable = Impl!(0, members.length);
1643 | }
1644 | 
1645 | 
1646 | unittest
1647 | {
1648 | 	struct Counter
1649 | 	{
1650 | 		size_t array, object, key, string, number, boolean, null_;
1651 | 	}
1652 | 
1653 | 	void valueHandler(ref Json!validateAll.File json, ref Counter ctr)
1654 | 	{
1655 | 		with (DataType) final switch (json.peek)
1656 | 		{
1657 | 			case array:
1658 | 				ctr.array++;
1659 | 				foreach (_; json)
1660 | 					valueHandler(json, ctr);
1661 | 				break;
1662 | 			case object:
1663 | 				ctr.object++;
1664 | 				foreach(key; json.byKey)
1665 | 				{
1666 | 					ctr.key++;
1667 | 					valueHandler(json, ctr);
1668 | 				}
1669 | 				break;
1670 | 			case string:
1671 | 				ctr.string++;
1672 | 				json.skipValue();
1673 | 				break;
1674 | 			case number:
1675 | 				ctr.number++;
1676 | 				json.skipValue();
1677 | 				break;
1678 | 			case boolean:
1679 | 				ctr.boolean++;
1680 | 				json.skipValue();
1681 | 				break;
1682 | 			case null_:
1683 | 				ctr.null_++;
1684 | 				json.skipValue();
1685 | 				break;
1686 | 		}
1687 | 	}
1688 | 
1689 | 	void passFile(string fname, Counter valid)
1690 | 	{
1691 | 		auto json = parseJSONFile!validateAll(fname);
1692 | 		Counter ctr;
1693 | 		valueHandler(json, ctr);
1694 | 		assert(ctr == valid, fname);
1695 | 	}
1696 | 
1697 | 	void failFile(string fname)
1698 | 	{
1699 | 		auto json = parseJSONFile!validateAll(fname);
1700 | 		Counter ctr;
1701 | 		assertThrown!JSONException(valueHandler(json, ctr), fname);
1702 | 	}
1703 | 
1704 | 	// Tests that need to pass according to RFC 7159
1705 | 	passFile("test/pass1.json",  Counter( 6,  4, 33, 21, 32,  4,  2));
1706 | 	passFile("test/pass2.json",  Counter(19,  0,  0,  1,  0,  0,  0));
1707 | 	passFile("test/pass3.json",  Counter( 0,  2,  3,  2,  0,  0,  0));
1708 | 	passFile("test/fail1.json",  Counter( 0,  0,  0,  1,  0,  0,  0));
1709 | 	passFile("test/fail18.json", Counter(20,  0,  0,  1,  0,  0,  0));
1710 | 
1711 | 	// Tests that need to fail
1712 | 	foreach (i; chain(iota(2, 18), iota(19, 34)))
1713 | 		failFile("test/fail" ~ i.to!string ~ ".json");
1714 | 
1715 | 	// Deserialization
1716 | 	struct Test
1717 | 	{
1718 | 		string text1;
1719 | 		string text2;
1720 | 		string text3;
1721 | 		double dbl = 0;
1722 | 		float flt = 0;
1723 | 		ulong ul;
1724 | 		uint ui;
1725 | 		ushort us;
1726 | 		ubyte ub;
1727 | 		long lm, lp;
1728 | 		int im, ip;
1729 | 		short sm, sp;
1730 | 		byte bm, bp;
1731 | 		bool t, f;
1732 | 		Test* tp1, tp2;
1733 | 		int[2] sa;
1734 | 		int[] da;
1735 | 		Test[string] aa;
1736 | 		SearchPolicy e;
1737 | 	}
1738 | 
1739 | 	Test t1 = {
1740 | 		text1 : "abcde",
1741 | 		text2 : "",
1742 | 		text3 : null,
1743 | 		dbl   : 1.1,
1744 | 		flt   : -1.1,
1745 | 		ul    : ulong.max,
1746 | 		ui    : uint.max,
1747 | 		us    : ushort.max,
1748 | 		ub    : ubyte.max,
1749 | 		lm    : long.min,
1750 | 		lp    : long.max,
1751 | 		im    : int.min,
1752 | 		ip    : int.max,
1753 | 		sm    : short.min,
1754 | 		sp    : short.max,
1755 | 		bm    : byte.min,
1756 | 		bp    : byte.max,
1757 | 		t     : true,
1758 | 		f     : false,
1759 | 		tp1   : null,
1760 | 		tp2   : new Test("This is", "a", "test."),
1761 | 		sa    : [ 33, 44 ],
1762 | 		da    : [ 5, 6, 7 ],
1763 | 		aa    : [ "hash" : Test("x", "y", "z") ],
1764 | 		e     : SearchPolicy.linear
1765 | 	};
1766 | 	Test t2 = parseJSON(`{
1767 | 		"text1" : "abcde",
1768 | 		"text2" : "",
1769 | 		"text3" : null,
1770 | 		"dbl"   : 1.1,
1771 | 		"flt"   : -1.1,
1772 | 		"ul"    : ` ~ ulong.max.to!string ~ `,
1773 | 		"ui"    : ` ~ uint.max.to!string ~ `,
1774 | 		"us"    : ` ~ ushort.max.to!string ~ `,
1775 | 		"ub"    : ` ~ ubyte.max.to!string ~ `,
1776 | 		"lm"    : ` ~ long.min.to!string ~ `,
1777 | 		"lp"    : ` ~ long.max.to!string ~ `,
1778 | 		"im"    : ` ~ int.min.to!string ~ `,
1779 | 		"ip"    : ` ~ int.max.to!string ~ `,
1780 | 		"sm"    : ` ~ short.min.to!string ~ `,
1781 | 		"sp"    : ` ~ short.max.to!string ~ `,
1782 | 		"bm"    : ` ~ byte.min.to!string ~ `,
1783 | 		"bp"    : ` ~ byte.max.to!string ~ `,
1784 | 		"t"     : true,
1785 | 		"f"     : false,
1786 | 		"tp1"   : null,
1787 | 		"tp2"   : { "text1": "This is", "text2": "a", "text3": "test." },
1788 | 		"sa"    : [ 33, 44 ],
1789 | 		"da"    : [ 5, 6, 7 ],
1790 | 		"aa"    : { "hash" : { "text1":"x", "text2":"y", "text3":"z" } },
1791 | 		"e"     : "linear"
1792 | 	}`).read!Test;
1793 | 
1794 | 	assert(t2.tp2 && *t1.tp2 == *t2.tp2);
1795 | 	assert(t1.da == t2.da);
1796 | 	assert(t1.aa == t2.aa);
1797 | 	t2.tp2 = t1.tp2;
1798 | 	t2.da = t1.da;
1799 | 	t2.aa = t1.aa;
1800 | 	assert(t1 == t2);
1801 | }
1802 | 
1803 | // Test case for Issue #4
1804 | unittest
1805 | {
1806 | 	auto str = `{"initiator_carrier_code":null,"a":"b"}`;
1807 | 	auto js = parseTrustedJSON(str);
1808 | 	foreach(key; js.byKey)
1809 | 	{
1810 | 		if(key == "initiator_carrier_code")
1811 | 		{
1812 | 			auto t = js.read!string;
1813 | 			assert(t is null);
1814 | 		}
1815 | 	}
1816 | }
1817 | 
1818 | // Test case for Issue #5
1819 | unittest
1820 | {
1821 | 	import std.utf;
1822 | 	auto str = `{"a":"SΛNNO𐍈€한"}`;
1823 | 	str.validate;
1824 | 	validateJSON(str);
1825 | }
1826 | 


--------------------------------------------------------------------------------
/source/fast/parsing.d:
--------------------------------------------------------------------------------
  1 | ﻿/***************************************************************************************************
  2 |  * 
  3 |  * Text parsing functionality.
  4 |  * 
  5 |  * Authors:
  6 |  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
  7 |  * 
  8 |  * Copyright:
  9 |  *   © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
 10 |  * 
 11 |  * License:
 12 |  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
 13 |  * 
 14 |  **************************************************************************************************/
 15 | module fast.parsing;
 16 | 
 17 | import std.traits;
 18 | import fast.internal.sysdef;
 19 | 
 20 | 
 21 | /+
 22 |  ╔══════════════════════════════════════════════════════════════════════════════
 23 |  ║ ⚑ Hexadecimal
 24 |  ╚══════════════════════════════════════════════════════════════════════════════
 25 |  +/
 26 | 
 27 | /*******************************************************************************
 28 |  * 
 29 |  * Decodes a single hexadecimal character.
 30 |  *
 31 |  * Params:
 32 |  *   c = The hexadecimal digit.
 33 |  *
 34 |  * Returns:
 35 |  *   `c` converted to an integer.
 36 |  *
 37 |  **************************************/
 38 | @safe @nogc pure nothrow
 39 | uint hexDecode(char c)
 40 | {
 41 | 	return c + 9 * (c >> 6) & 15;
 42 | }
 43 | 
 44 | 
 45 | @nogc pure nothrow
 46 | uint hexDecode4(ref const(char)* hex)
 47 | {
 48 | 	uint x = *cast(uint*) &hex;
 49 | 	hex += 4;
 50 | 	x = (x & 0x0F0F0F0F) + 9 * (x >> 6 & 0x01010101);
 51 | 	version (LittleEndian)
 52 | 	{
 53 | 		return x >> 24 | x >> 12 & 0xF0 | x & 0xF00 | x << 12 & 0xF000;
 54 | 	}
 55 | 	else
 56 | 	{
 57 | 		x = (x | x >> 4) & 0x00FF00FF;
 58 | 		return (x | x >> 8) & 0x0000FFFF;
 59 | 	}
 60 | }
 61 | 
 62 | 
 63 | @nogc pure nothrow
 64 | inout(char)* hexDecode4(ref inout(char)* hex, out uint result)
 65 | {
 66 | 	foreach (i; 0 .. 4)
 67 | 	{
 68 | 		result *= 16;
 69 | 		char ch = cast(char) (hex[i] - '0');
 70 | 		if (ch <= 9)
 71 | 		{
 72 | 			result += ch;
 73 | 		}
 74 | 		else
 75 | 		{
 76 | 			ch = cast(char) ((ch | 0x20) - 0x31);
 77 | 			if (ch <= 5)
 78 | 				result += ch + 10;
 79 | 			else
 80 | 				return hex + i;
 81 | 		}
 82 | 	}
 83 | 	hex += 4;
 84 | 	return null;
 85 | }
 86 | unittest
 87 | {
 88 | 	string x = "aF09";
 89 | 	const(char)* p = x.ptr;
 90 | 	uint result;
 91 | 	hexDecode4(p, result);
 92 | 	assert(result == 0xAF09);
 93 | }
 94 | 
 95 | 
 96 | /+
 97 |  ╔══════════════════════════════════════════════════════════════════════════════
 98 |  ║ ⚑ Numbers
 99 |  ╚══════════════════════════════════════════════════════════════════════════════
100 |  +/
101 | 
102 | 
103 | /// Options for `parseNumber`.
104 | struct NumberOptions
105 | {
106 | 	/// Allows the minus sign as the first character and thus negative numbers.
107 | 	bool minus;
108 | }
109 | 
110 | 
111 | /*******************************************************************************
112 |  * 
113 |  * Parse a number from a character read pointer.
114 |  * 
115 |  * On success, the read pointer is set behind the number.
116 |  *
117 |  * Params:
118 |  *   opt = Selects features for the implementation. Less features make the
119 |  *         parser faster.
120 |  *   str = The read pointer.
121 |  *   n = A reference to a number to be overwritten with the result.
122 |  *
123 |  * Returns:
124 |  *   An indication of success. Typically the function fails when a number cannot
125 |  *   be stored in an integer of the given size or invalid characters are
126 |  *   encountered.
127 |  *
128 |  **************************************/
129 | @nogc pure nothrow
130 | bool parseNumber(NumberOptions opt, N)(ref const(char)* str, ref N n) if (isNumeric!N)
131 | {
132 | 	import fast.internal.helpers;
133 | 	import std.range;
134 | 
135 | 	// Integer types larger than the mantissa of N.
136 | 	static if (N.sizeof <= size_t.sizeof)
137 | 	{
138 | 		alias U = size_t;
139 | 		alias I = ptrdiff_t;
140 | 	}
141 | 	else
142 | 	{
143 | 		alias U = ulong;
144 | 		alias I = long;
145 | 	}
146 | 	
147 | 	// Largest value of type U that can be multiplied by 10 and have a digit added without overflow.
148 | 	enum canHoldOneMoreDigit = (U.max - 9) / 10;
149 | 	static if (isFloatingPoint!N)
150 | 	{
151 | 		enum significandRightShift = 8 * U.sizeof - N.mant_dig + 1;
152 | 		enum lastSignificandBit = U(2) << 8 * U.sizeof - N.mant_dig;
153 | 		enum firstFractionBit   = U(1) << 8 * U.sizeof - N.mant_dig;
154 | 		enum remainderBits = U.max - N.mant_dig + 1;
155 | 		enum expShift = N.mant_dig - 1;
156 | 		enum expBias = N.max_exp - 1;
157 | 	}
158 | 	
159 | 	static if (isFloatingPoint!N)
160 | 	{
161 | 		alias pow5Max = PowData!(U, 5).powMax;
162 | 		alias pow5    = PowData!(U, 5).pows;
163 | 
164 | 		// Largest power of 10 that fits into a float of type N. The factor 5 here is correct, as the 2s
165 | 		// go in as an increment in the exponent, that is neglectable here.
166 | 		enum pow10MaxF = {
167 | 			U v = 1; uint exp;
168 | 			while (v <= ((U(1) << N.mant_dig) - 1) / 5) { v *= 5; exp++; }
169 | 			return exp;
170 | 		}();
171 | 
172 | 		static immutable N[pow10MaxF] pow10F = N(10).recurrence!((a, n) => 10 * a[n-1]).take(pow10MaxF).array;
173 | 	}
174 | 	else
175 | 	{
176 | 		alias pow10Max = PowData!(U, 10).powMax;
177 | 		alias pow10    = PowData!(U, 10).pows;
178 | 	}
179 | 
180 | 	const(char)* p = str;
181 | 	const(char)* point = null;
182 | 	U significand = 0;
183 | 	size_t exponent = 0;
184 | 	size_t expAdjust = void;
185 | 	bool expSign = void;
186 | 	static if (isFloatingPoint!N)
187 | 	{
188 | 		U exp2 = void;
189 | 		bool roundUp = false;
190 | 	}
191 | 	
192 | 	/////////////////// SIGN BIT HANDLING ///////////////////
193 | 	
194 | 	// Check for the sign.
195 | 	static if (opt.minus)
196 | 	{
197 | 		bool sign = (*p == '-');
198 | 		if (sign)
199 | 			p++;
200 | 	}
201 | 	
202 | 	/////////////////// INTEGRAL PART OF SIGNIFICAND ///////////////////
203 | 	
204 | 	uint digit = *p - '0';
205 | 	if (digit == 0)
206 | 	{
207 | 		// We have a single zero.
208 | 		p++;
209 | 	}
210 | 	else if (digit <= 9)
211 | 	{
212 | 		// Regular case of one or more digits.
213 | 		do
214 | 		{
215 | 			if (significand > canHoldOneMoreDigit)
216 | 				goto BigMantissa;
217 | 		BigMantissaNotSoMuch:
218 | 			significand = 10 * significand + digit;
219 | 			digit = *++p - '0';
220 | 		}
221 | 		while (digit <= 9);
222 | 	}
223 | 	else return false;
224 | 	
225 | 	/////////////////// FRACTIONAL PART OF SIGNIFICAND ///////////////////
226 | 	
227 | 	if (*p == '.')
228 | 	{
229 | 		point = ++p;
230 | 		digit = *p - '0';
231 | 		if (digit > 9)
232 | 			return false;
233 | 		do
234 | 		{
235 | 			if (significand > canHoldOneMoreDigit)
236 | 				goto BigMantissa;
237 | 			significand = 10 * significand + digit;
238 | 			digit = *++p - '0';
239 | 		}
240 | 		while (digit <= 9);
241 | 	}
242 | 	
243 | 	/////////////////// EXPONENT HANDLING ///////////////////
244 | 
245 | 	expAdjust = (point is null) ? 0 : p - point;
246 | 	if ((*p | 0x20) == 'e')
247 | 	{
248 | 		p++;
249 | 		expSign = (*p == '-');
250 | 		if (expSign || *p == '+')
251 | 			p++;
252 | 		digit = *p - '0';
253 | 		if (digit > 9)
254 | 			return false;
255 | 		do
256 | 		{
257 | 			if (exponent > canHoldOneMoreDigit)
258 | 				goto BigExponent;
259 | 			exponent = 10 * exponent + digit;
260 | 			digit = *++p - '0';
261 | 		}
262 | 		while (digit <= 9);
263 | 	}
264 | 	
265 | 	if (expAdjust)
266 | 	{
267 | 		if (expSign)
268 | 		{
269 | 			if (exponent > size_t.max - expAdjust)
270 | 				goto BigExponentAdjustForDecimalPoint;
271 | 			exponent += expAdjust;
272 | 		}
273 | 		else if (exponent >= expAdjust)
274 | 		{
275 | 			exponent -= expAdjust;
276 | 		}
277 | 		else
278 | 		{
279 | 			// Amount of fraction digits turns exponent from positive to negative.
280 | 			expAdjust -= exponent;
281 | 			exponent = expAdjust;
282 | 			expSign = true;
283 | 		}
284 | 	}
285 | 
286 | 	/////////////////// RESULT ASSEMBLY ///////////////////
287 | 
288 | 	static if (isFloatingPoint!N)
289 | 	{
290 | 		if (significand == 0 || exponent == 0)
291 | 		{
292 | 			// The significand is the unsigned result.
293 | 			static if (opt.minus)
294 | 				if (sign)
295 | 					n = -N(significand);
296 | 			n = +N(significand);
297 | 			str = p;
298 | 			return true;
299 | 		}
300 | 
301 | 		// Try the floating-point fast path: The significand's bits, as well as the 10^x exponent can be expressed
302 | 		// accurately as a float of type N. We just need to divide or multiply them based on the signedness of the
303 | 		// exponent.
304 | 		exp2 = bsr(significand);
305 | 		if (exp2 - bsf(significand) < N.mant_dig && exponent <= pow10MaxF)
306 | 		{
307 | 			N b = pow10F[exponent - 1];
308 | 			static if (opt.minus)
309 | 				if (sign)
310 | 					b = -b;
311 | 			n = expSign ? significand / b : significand * b;
312 | 			str = p;
313 | 			return true;
314 | 		}
315 | 		else if (exponent <= pow5Max)
316 | 		{
317 | 			// Special case, mostly to handle the little bit of extra precision that comes from
318 | 			// converting a double to its string representation. The last base-10 digit doesn't quite
319 | 			// fit back into a double, but we don't need to resort to arbitrary precision math just yet.
320 | 			if (expSign)
321 | 			{
322 | 				U divisor = pow5[exponent - 1];
323 | 				static if (isAMD64 && (isLDC || isGDC))
324 | 				{
325 | 					// AMD64 can divide 128-bit numbers by 64-bit numbers directly.
326 | 					size_t expDivisor = clz(divisor);
327 | 					divisor <<= expDivisor;
328 | 					exp2 = expDivisor - exponent - bigDiv(significand, divisor);
329 | 					significand <<= 1;
330 | 				}
331 | 				else
332 | 				{
333 | 					// We perform an iterative division.
334 | 					U dividend = significand << 8 * U.sizeof - 1 - exp2;
335 | 					U quotient = dividend / divisor;
336 | 					dividend %= divisor;
337 | 
338 | 					size_t lzs = clz(quotient);
339 | 					exp2 -= exponent + lzs;
340 | 					significand = quotient << ++lzs;
341 | 					size_t accuracy = 8 * U.sizeof - lzs;
342 | 					while (accuracy < N.mant_dig)
343 | 					{
344 | 						lzs = clz(dividend);
345 | 						dividend <<= lzs;
346 | 						quotient = dividend / divisor;
347 | 						dividend %= divisor;
348 | 						significand |= quotient << (8 * U.sizeof - lzs) >> accuracy;
349 | 						accuracy += lzs;
350 | 					}
351 | 				}
352 | 
353 | 				// Assemble floating point value from bits.
354 | 				roundUp = (significand & firstFractionBit) != 0;
355 | 				significand >>= significandRightShift;
356 | 				if (roundUp)
357 | 				{
358 | 					significand++;
359 | 					significand &= ~(U(1) << N.mant_dig - 1);
360 | 					if (significand == 0)
361 | 						++exp2;
362 | 				}
363 | 
364 | 				U* result = cast(U*) &n;
365 | 				*result = exp2 + expBias << expShift | significand;
366 | 				static if (opt.minus)
367 | 					*result |= U(sign) << U.sizeof * 8 - 1;
368 | 				str = p;
369 | 				return true;
370 | 			}
371 | 			else assert(0, "Not implemented");
372 | 		}
373 | 		else assert(0, "Not implemented");
374 | 	}
375 | 	else
376 | 	{
377 | 		import fast.intmath;
378 | 
379 | 		if (exponent && significand)
380 | 		{
381 | 			// We need to account for the exponent.
382 | 			U pow = pow10[exponent - 1];
383 | 			if (expSign)
384 | 			{
385 | 				// Negative exponent, if we get a fractional result, abort.
386 | 				if (significand % pow)
387 | 					return false;
388 | 				significand /= pow;
389 | 			}
390 | 			else static if (U.sizeof < ulong.sizeof)
391 | 			{
392 | 				// Multiply using a bigger result type
393 | 				ulong prod = ulong(significand) * pow;
394 | 				if (prod > U.max)
395 | 					return false;
396 | 				significand = cast(U) prod;
397 | 			}
398 | 			else
399 | 			{
400 | 				// If the multiply will overflow, abort.
401 | 				bool overflowed;
402 | 				significand = mulu(significand, pow, overflowed);
403 | 				if (overflowed)
404 | 					return false;
405 | 			}
406 | 		}
407 | 
408 | 		n = cast(N) significand;
409 | 		static if (isSigned!N && opt.minus)
410 | 		{
411 | 			if (significand > U(N.max) + sign)
412 | 				return false;
413 | 			if (sign)
414 | 				n = cast(N)-n;
415 | 		}
416 | 		else if (significand > N.max)
417 | 			return false;
418 | 		str = p;
419 | 		return true;
420 | 	}
421 | 
422 | BigMantissa:
423 | 	if (significand <= (significand.max - digit) / 10)
424 | 		goto BigMantissaNotSoMuch;
425 | //	assert(0, "Not implemented");
426 | 
427 | BigExponent:
428 | //	assert(0, "Not implemented");
429 | 
430 | BigExponentAdjustForDecimalPoint:
431 | //	assert(0, "Not implemented");
432 | 	return false;
433 | }
434 | 
435 | 
436 | private template PowData(U, U base)
437 | {
438 | 	import std.range;
439 | 
440 | 	// Largest power of `base` that fits into an integer of type U.
441 | 	enum powMax = { U v = 1; uint exp; while (v <= U.max / base) { v *= base; exp++; } return exp; }();
442 | 	
443 | 	// Table of powers of `base`. (We skip base^0)
444 | 	static immutable U[powMax] pows = base.recurrence!((a, n) => base * a[n-1]).take(powMax).array;
445 | }
446 | 
447 | 
448 | static if (isAMD64 && (isLDC || isGDC))
449 | {
450 | 	@nogc pure nothrow
451 | 	private size_t bigDiv(ref size_t a, size_t b)
452 | 	in
453 | 	{
454 | 		assert(b > size_t.max / 2, "High bit of divisor must be set.");
455 | 	}
456 | 	body
457 | 	{
458 | 		// Make sure that the division will yield exactly 32 or 64 significant bits.
459 | 		import fast.internal.helpers;
460 | 		size_t lza = clz(a);
461 | 		version (LDC)
462 | 		{
463 | 			import ldc.llvmasm;
464 | 			a <<= lza;
465 | 			if (a >= b) { a >>= 1; lza--; }
466 | 			a = __asm!ulong("
467 | 				xor %rax, %rax
468 | 				divq $2
469 | 				", "={rax},{rdx},rm", a, b);
470 | 		}
471 | 		else version (GNU)
472 | 		{
473 | 			size_t dividend = a << lza;
474 | 			if (dividend >= b) { dividend >>= 1; lza--; }
475 | 			asm { "
476 | 				xor %%rax, %%rax
477 | 				divq %3
478 | 				" : "=&a" a, "=d" dividend : "d" dividend, "rm" b; }
479 | 		}
480 | 		return ++lza;
481 | 	}
482 | 	
483 | 	unittest
484 | 	{
485 | 		size_t a = size_t.max / 11;
486 | 		size_t b = size_t.max / 5;
487 | 		version (X86_64)
488 | 		{
489 | 			import fast.internal.helpers;
490 | 			long exp = clz(b);   // Positive base-2 exponent
491 | 			b <<= exp;
492 | 			exp -= bigDiv(a, b);
493 | 			assert(a == 0xE8BA2E8BA2E8BA2AUL);
494 | 			assert(exp == -2);
495 | 		}
496 | 	}
497 | }
498 | 
499 | 
500 | /+
501 |  ╔══════════════════════════════════════════════════════════════════════════════
502 |  ║ ⚑ String Scanning and Comparison
503 |  ╚══════════════════════════════════════════════════════════════════════════════
504 |  +/
505 | 
506 | /*******************************************************************************
507 |  * 
508 |  * Compares a string of unknown length against a statically known key.
509 |  * 
510 |  * This function also handles escapes and requires one or more terminator chars.
511 |  *
512 |  * Params:
513 |  *   C = Character with.
514 |  *   key = The static key string.
515 |  *   terminators = A list of code units that terminate the string.
516 |  *   special = A list of code units that are handled by the user callback. Use
517 |  *             this for escape string handling. Default is `null`.
518 |  *   p_str = Pointer to the string for the comparison. After the function call
519 |  *           it will be behind the last matching character.
520 |  *   callback = User callback to handle special escape characters if `special`
521 |  *              is non-empty.
522 |  *
523 |  * Returns:
524 |  *   A code with following meanings: -1 = not equal, terminator character hit,
525 |  *   0 = not equal, but string not exhausted, 1 = string equals key.
526 |  *
527 |  **************************************/
528 | int fixedTermStrCmp(C, immutable C[] key, immutable C[] terminators, immutable C[] special = null)
529 | 	(ref const(C)* p_str, scope bool delegate(ref immutable(char)*, ref const(char)*) callback = null)
530 | in
531 | {
532 | 	assert(special.length == 0 || callback !is null);
533 | }
534 | body
535 | {
536 | 	import std.algorithm, std.range;
537 | 	
538 | 	static immutable byte[256] classify =
539 | 		iota(256).map!(c => terminators.canFind(c) ? byte(-1) : special.canFind(c) ? 1 : 0).array;
540 | 	
541 | 	immutable(C)* p_key = key.ptr;
542 | 	immutable C* e_key = p_key + key.length;
543 | 	
544 | 	while (p_key !is e_key)
545 | 	{
546 | 		int clazz = *p_str <= 0xFF ? classify[*p_str] : 0;
547 | 		
548 | 		if (clazz < 0)
549 | 		{
550 | 			return clazz;
551 | 		}
552 | 		else if (clazz == 0)
553 | 		{
554 | 			if (*p_str != *p_key)
555 | 				return clazz;
556 | 			
557 | 			p_str++;
558 | 			p_key++;
559 | 		}
560 | 		else if (clazz > 0)
561 | 		{
562 | 			if (!callback(p_key, p_str))
563 | 				return 0;
564 | 		}
565 | 	}
566 | 	
567 | 	return classify[*p_str & 0xFF] < 0;
568 | }
569 | 
570 | 
571 | /*
572 | @nogc nothrow
573 | void fixedStringCompareSSE4()
574 | {
575 | 	enum words     = key.length / 16;
576 | 	enum remainder = key.length % 16;
577 | 	enum contains0 = key.canFind('\0');     // For SSE4.2 string search.
578 | 	static assert(!contains0, "Not implemented");
579 | 
580 | 	size_t remaining = e - b;
581 | 	auto p = b;
582 | 
583 | 	foreach (i; staticIota!(0, words))
584 | 	{
585 | 		auto backup = p;
586 | 		p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid);
587 | 		p = backup;
588 | 		p.vpcmpistri!(char, key[16 * i .. 16 * i + 16], Operation.equalElem, Polarity.negateValid);
589 | 	}
590 | }
591 | */
592 | 
593 | 
594 | @forceinline @nogc nothrow pure
595 | void seekToAnyOf(string cs)(ref const(char)* p)
596 | {
597 | 	p.vpcmpistri!(char, sanitizeChars(cs), Operation.equalAnyElem);
598 | }
599 | 
600 | 
601 | @forceinline @nogc nothrow pure
602 | void seekToRanges(string cs)(ref const(char)* p)
603 | {
604 | 	p.vpcmpistri!(char, sanitizeRanges(cs), Operation.inRanges);
605 | }
606 | 
607 | 
608 | /*******************************************************************************
609 |  * 
610 |  * Searches for a specific character known to appear in the stream and skips the
611 |  * read pointer over it.
612 |  *
613 |  * Params:
614 |  *   c = the character
615 |  *   p = the read pointer
616 |  *
617 |  **************************************/
618 | @forceinline @nogc nothrow pure
619 | void seekPast(char c)(ref const(char)* p)
620 | {
621 | 	p.vpcmpistri!(char, c.repeat(16).to!string, Operation.equalElem);
622 | 	p++;
623 | }
624 | 
625 | 
626 | /*******************************************************************************
627 |  * 
628 |  * Skips the read pointer over characters that fall into any of up to 8 ranges
629 |  * of characters. The first character in `cs` is the start of the first range,
630 |  * the second character is the end. This is repeated for any other character
631 |  * pair. A character falls into a range from `a` to `b` if `a <= *p <= b`.
632 |  *
633 |  * Params:
634 |  *   cs = the character ranges
635 |  *   p = the read pointer
636 |  *
637 |  **************************************/
638 | @forceinline @nogc nothrow pure
639 | void skipCharRanges(string cs)(ref const(char)* p)
640 | {
641 | 	p.vpcmpistri!(char, cs, Operation.inRanges, Polarity.negate);
642 | }
643 | 
644 | 
645 | /*******************************************************************************
646 |  * 
647 |  * Skips the read pointer over all and any of the given characters.
648 |  *
649 |  * Params:
650 |  *   cs = the characters to skip over
651 |  *   p = the read pointer
652 |  *
653 |  **************************************/
654 | @forceinline @nogc nothrow pure
655 | void skipAllOf(string cs)(ref const(char)* p)
656 | { 
657 | 	p.vpcmpistri!(char, cs, Operation.equalAnyElem, Polarity.negate);
658 | }
659 | 
660 | 
661 | /*******************************************************************************
662 |  * 
663 |  * Skips the read pointer over ASCII white-space comprising '\t', '\r', '\n' and
664 |  * ' '.
665 |  *
666 |  * Params:
667 |  *   p = the read pointer
668 |  *
669 |  **************************************/
670 | @forceinline @nogc nothrow pure
671 | void skipAsciiWhitespace(ref const(char)* p)
672 | {
673 | 	if (*p == ' ')
674 | 		p++;
675 | 	if (*p > ' ')
676 | 		return;
677 | 	p.skipAllOf!" \t\r\n";
678 | }
679 | 
680 | 
681 | /*******************************************************************************
682 |  * 
683 |  * Sets the read pointer to the start of the next line.
684 |  *
685 |  * Params:
686 |  *   p = the read pointer
687 |  *
688 |  **************************************/
689 | @forceinline @nogc nothrow pure
690 | void skipToNextLine(ref const(char)* p)
691 | {
692 | 	// Stop at next \r, \n or \0.
693 | 	p.vpcmpistri!(char, "\x01\x09\x0B\x0C\x0E\xFF", Operation.inRanges, Polarity.negate);
694 | 	if (p[0] == '\r') p++;
695 | 	if (p[0] == '\n') p++;
696 | }
697 | 
698 | 
699 | private enum sanitizeChars(string cs)
700 | {
701 | 	import std.exception;
702 | 
703 | 	bool has0 = false;
704 | 	foreach (c; cs) if (!c) { has0 = true; break; }
705 | 	assert(has0, "Parsers are required to also check for \0 when looking for specific chars.");
706 | 	
707 | 	char[] result;
708 | 	foreach (i; 1 .. 256) foreach (c; cs) if (i == c)
709 | 	result ~= c;
710 | 	return result.assumeUnique;
711 | }
712 | 
713 | 
714 | private enum sanitizeRanges(string cs)
715 | {
716 | 	import std.exception;
717 | 
718 | 	bool has0 = false;
719 | 	foreach (i; 0 .. cs.length / 2) if (!cs[2*i]) { has0 = true; break; }
720 | 	assert(has0, "Parsers are required to also check for \0 when looking for specific chars.");
721 | 	
722 | 	char[] result;
723 | 	foreach (i; 0 .. cs.length / 2)
724 | 	{
725 | 		if (cs[2*i])
726 | 			result ~= cs[2*i .. 2*i+2];
727 | 		else if (cs[2*i+1])
728 | 			result ~= ['\x01', cs[2*i+1]];
729 | 	}
730 | 	return result.assumeUnique;
731 | }
732 | 
733 | 
734 | private enum Operation
735 | {
736 | 	equalAnyElem = 0b0_00_00_00,
737 | 	inRanges     = 0b0_00_01_00,
738 | 	equalElem    = 0b0_00_10_00,
739 | 	substrPos    = 0b0_00_11_00,
740 | }
741 | 
742 | 
743 | private enum Polarity
744 | {
745 | 	keep        = 0b0_00_00_00,
746 | 	negate      = 0b0_01_00_00,
747 | 	negateValid = 0b0_11_00_00,
748 | }
749 | 
750 | 
751 | @forceinline @nogc nothrow pure
752 | private void vpcmpistri(C, immutable(C[]) cs, Operation op, Polarity pol = Polarity.keep, bool lastIndex = false)
753 | 	(ref const(char)* p)
754 | 		if (is(C == char) || is(C == ubyte) || is(C == wchar) || is(C == ushort) || is(C == byte) || is(C == short))
755 | {
756 | 	import fast.internal.helpers;
757 | 
758 | 	// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53712
759 | 	static if (is(C == char) || is(C == ubyte))
760 | 		enum ct = 0b00;
761 | 	else static if (is(C == wchar) || is(C == ushort))
762 | 		enum ct = 0b01;
763 | 	else static if (is(C == byte))
764 | 		enum ct = 0b10;
765 | 	else
766 | 		enum ct = 0b11;
767 | 	
768 | 	enum mode = ct | op | pol | (!!lastIndex << 6);
769 | 	
770 | 	version (X86_64)
771 | 		enum creg = "rcx";
772 | 	else version (X86)
773 | 		enum creg = "ecx";
774 | 	else static assert(0, "Not implemented");
775 | 	
776 | 	version (LDC)
777 | 	{
778 | 		import ldc.llvmasm;
779 | 		
780 | 		p = __asm!(const(char*))("
781 | 			1:
782 | 			pcmpistri $2, ($1), $3
783 | 			add       $$16, $1
784 | 			cmp       $$16, %ecx
785 | 			je        1b
786 | 			sub       $$16, $1
787 | 			add       %" ~ creg ~ ", $1
788 | 			", "=r,0,K,x,~{ecx}", p, mode, SIMDFromString!cs);
789 | 	}
790 | 	else version (GNU)
791 | 	{
792 | 		asm { "
793 | 			1:
794 | 			pcmpistri %2, (%1), %3
795 | 			add       $16, %1
796 | 			cmp       $16, %%ecx
797 | 			je        1b
798 | 			sub       $16, %1
799 | 			add       %%" ~ creg ~ ", %1
800 | 			" : "=r" p : "0" p, "K" mode, "x" SIMDFromString!cs : "ecx"; }
801 | 	}
802 | 	else
803 | 	{
804 | 		alias csXMM = SIMDFromString!cs;
805 | 		version (D_InlineAsm_X86_64)
806 | 		{
807 | 			version (Posix)
808 | 			{
809 | 				version (D_PIC) asm @nogc pure nothrow
810 | 				{
811 | 					naked;
812 | 					lea         RAX, csXMM;
813 | 					mov         RAX, [RAX];
814 | 					movdqu      XMM0, [RAX];
815 | 					mov         RAX, [RDI];
816 | 				L1:
817 | 					vpcmpistri  XMM0, [RAX], mode;
818 | 					add         RAX, 16;
819 | 					cmp         ECX, 16;
820 | 					je          L1;
821 | 					sub         RAX, 16;
822 | 					add         RAX, RCX;
823 | 					mov         [RDI], RAX;
824 | 					ret;
825 | 				}
826 | 				else asm @nogc pure nothrow
827 | 				{
828 | 					naked;
829 | 					movdqa      XMM0, csXMM;
830 | 					mov         RAX, [RDI];
831 | 				L1:
832 | 					vpcmpistri  XMM0, [RAX], mode;
833 | 					add         RAX, 16;
834 | 					cmp         ECX, 16;
835 | 					je          L1;
836 | 					sub         RAX, 16;
837 | 					add         RAX, RCX;
838 | 					mov         [RDI], RAX;
839 | 					ret;
840 | 				}
841 | 			}
842 | 			else static assert(0, "Not implemented");
843 | 		}
844 | 		else version (D_InlineAsm_X86)
845 | 		{
846 | 			version (Posix)
847 | 			{
848 | 				version (D_PIC) asm @nogc pure nothrow
849 | 				{
850 | 					naked;
851 | 					mov         EDX, CS:csXMM[EBX];
852 | 					movdqu      XMM0, [EDX];
853 | 					mov         EDX, [EAX];
854 | 				L1:
855 | 					vpcmpistri  XMM0, [EDX], mode;
856 | 					add         EDX, 16;
857 | 					cmp         ECX, 16;
858 | 					je          L1;
859 | 					sub         EDX, 16;
860 | 					add         EDX, ECX;
861 | 					mov         [EAX], EDX;
862 | 					ret;
863 | 				}
864 | 				else asm @nogc pure nothrow
865 | 				{
866 | 					naked;
867 | 					movdqa      XMM0, csXMM;
868 | 					mov         EDX, [EAX];
869 | 				L1:
870 | 					vpcmpistri  XMM0, [EDX], mode;
871 | 					add         EDX, 16;
872 | 					cmp         ECX, 16;
873 | 					je          L1;
874 | 					sub         EDX, 16;
875 | 					add         EDX, ECX;
876 | 					mov         [EAX], EDX;
877 | 					ret;
878 | 				}
879 | 			}
880 | 			else static assert(0, "Not implemented");
881 | 		}
882 | 		else static assert(0, "Not implemented");
883 | 	}
884 | }
885 | 


--------------------------------------------------------------------------------
/source/fast/string.d:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Fast, non-allocating string functions.
  3 |  *
  4 |  * Authors:
  5 |  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
  6 |  *
  7 |  * Copyright:
  8 |  *   © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
  9 |  *
 10 |  * License:
 11 |  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
 12 |  */
 13 | module fast.string;
 14 | 
 15 | import core.bitop;
 16 | import core.simd;
 17 | import core.stdc.stdlib;
 18 | 
 19 | version (GNU) import gcc.attribute;
 20 | 
 21 | import std.algorithm;
 22 | import std.range;
 23 | import std.stdio;
 24 | import std.string;
 25 | import std.traits;
 26 | 
 27 | import fast.buffer;
 28 | 
 29 | 
 30 | /**
 31 |  * Splits a string in two around one or more compile-time known code units.
 32 |  *
 33 |  * Params:
 34 |  *   match = An expression that matches all characters around which a split should occur.
 35 |  *   str = The string to scan.
 36 |  *   before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here.
 37 |  *   after = The part after the split is stored here. If no character in $(D match) is found, $(D null) is returned here.
 38 |  *   splitter = If not $(D null), this pointer will receive a copy of the splitting char.
 39 |  *
 40 |  * Returns:
 41 |  *   $(D true), iff a split occured.
 42 |  */
 43 | bool split(string match)(scope inout(char[]) str, ref inout(char)[] before, ref inout(char)[] after, char* splitter = null)
 44 | {
 45 | 	immutable pos = min(str.length, SimdMatcher!match.find(str.ptr, str.ptr + str.length));
 46 | 	before = str[0 .. pos];
 47 | 	if (pos < str.length) {
 48 | 		after = str[pos+1 .. $];
 49 | 		if (splitter) *splitter = str[pos];
 50 | 		return true;
 51 | 	}
 52 | 	after = null;
 53 | 	return false;
 54 | }
 55 | 
 56 | /**
 57 |  * Similar to the overload for strings, this function works a little faster as it lacks boundary checks.
 58 |  * It assumes that one of the characters in $(D match) is actually contained in the string.
 59 |  *
 60 |  * Params:
 61 |  *   match = An expression that matches all characters around which a split should occur.
 62 |  *   ptr = The string to scan.
 63 |  *   before = The part before the split is stored here. If no character in $(D match) is found, the original string is returned here.
 64 |  *   after = The pointer to the part after the split is stored here.
 65 |  * 
 66 |  * Returns:
 67 |  *   The char that caused the split. (From $(D match).)
 68 |  */
 69 | char split(string match)(scope inout(char*) ptr, ref inout(char)[] before, ref inout(char)* after)
 70 | {
 71 | 	immutable pos = SimdMatcher!match.find(str.ptr);
 72 | 	before = ptr[0 .. pos];
 73 | 	after = ptr + pos + 1;
 74 | 	return ptr[pos];
 75 | }
 76 | 
 77 | 
 78 | /*******************************************************************************
 79 |  * 
 80 |  * Finds the first occurrence of a set of compile-time known code units in a
 81 |  * string. While the algorithm is `O(n)` in relation to the count of given code
 82 |  * units, the overhead when using it on short strings weights more for only 1 or
 83 |  * 2 code units.
 84 |  *
 85 |  * Params:
 86 |  *   match = An expression that matches all characters around which a split
 87 |  *           should occur.
 88 |  *   str = The string to search for a code unit.
 89 |  *
 90 |  * Returns:
 91 |  *   If a match is found, the index into the string is returned.
 92 |  *   Otherwise an invalid index is returned. Check with
 93 |  *   `if (result &lt; str.length)`.
 94 |  *
 95 |  * See_Also:
 96 |  *   split,
 97 |  *   $(LINK2 http://mischasan.wordpress.com/2011/11/09/the-generic-sse2-loop/,
 98 |  *           The Generic SSE2 Loop)
 99 |  *
100 |  * Example:
101 |  * ---
102 |  * // Check if there is a '/' or '\' in the string
103 |  * auto pos = str.find!(`or(=/,=\)`);
104 |  * if (pos < str.length) { }
105 |  * ---
106 |  **************************************/
107 | size_t find(string match)(in char[] str) pure nothrow
108 | {
109 | 	return SimdMatcher!match.find(str.ptr, str.ptr + str.length);
110 | }
111 | 
112 | /*******************************************************************************
113 |  * 
114 |  * Same as the overload for strings, but with only a char*, making it faster as
115 |  * it cannot do a boundary check.
116 |  *
117 |  * Sometimes when looking for a character it is helpful to append it as a
118 |  * sentinel to the char buffer and then use this function instead of the slower
119 |  * one that checks the boundary constantly.
120 |  *
121 |  * Example:
122 |  * ---
123 |  * // Find a ']' in a buffer of 1024 bytes using an additional sentinel.
124 |  * size_t length = 1024;
125 |  * char[] buffer = new char[](length+1);
126 |  * buffer[length] = ']';
127 |  * auto pos = buffer.ptr.find!("=]");
128 |  * if (pos < length) { // was an actual find before the sentinel }
129 |  * ---
130 |  **************************************/
131 | inout(char)* find(string match)(inout(char*) ptr) pure nothrow
132 | {
133 | 	return SimdMatcher!match.find(ptr);
134 | }
135 | 
136 | 
137 | bool keyword1(string key)(in char[] str,
138 | 	scope bool function(ref immutable(char)* key, ref const(char)* str) mismatcher = null)
139 | {
140 | 	auto strPtr = str.ptr;
141 | 	auto keyPtr = key.ptr;
142 | 	auto keyEnd = keyPtr + key.length;
143 | 
144 | 	while (keyPtr !is keyEnd)
145 | 	{
146 | 		while (*strPtr == '\\')
147 | 			if (!mismatcher(keyPtr, strPtr))
148 | 				return false;
149 | 
150 | 		if (*strPtr == '"' || *strPtr != *keyPtr)
151 | 			return false;
152 | 
153 | 		strPtr++;
154 | 		keyPtr++;
155 | 	}
156 | 	return true;
157 | }
158 | 
159 | 
160 | bool keyword2(string key)(in char[] str,
161 | 	scope bool function(ref immutable(char)* key, ref const(char)* str) mismatcher = null)
162 | {
163 | 	version (LDC) import ldc.gccbuiltins_x86;
164 | 	
165 | 	/* Since SIMD typically works with word aligned data, we duplicate 'key' for every possible start of 'str' when
166 | 	 * loaded from an aligned memory address where the first character appears 0 to Word.sizeof bytes into the SIMD
167 | 	 * register.
168 | 	 * For 16-byte SIMD we could just create an array of 16 strings with 0 to 15 padding bytes in front and some after,
169 | 	 * but we can be more compact with at most 16 wasted padding bytes. Since machine registers are powers of 2, if we
170 | 	 * pad all keys to an odd length and repeat them 16 times we get a sequence with the following properties:
171 | 	 * - It consists of as many SIMD words as the key is long.
172 | 	 * - All 16 shift offsets of the key are contained in the SIMD words due to the periodicity introduced by using
173 | 	 *   disjunct prime factors for the key length and the SIMD word size.
174 | 	 * Interpreted as an array of SIMD words, it can be indexed with the desired shift multiplied by a constant factor
175 | 	 * and taken modulo the SIMD array length to use the periodicity. The constant factor is the smallest value that
176 | 	 * when multiplied with the key length ends up at a SIMD word boundary + 1 (the first shift).
177 | 	 */
178 | 	
179 | 	// 'key' length rounded up to next odd value is the number of SIMD words we need.
180 | 	enum keyLenOdd = uint(key.length | 1); // TODO: uint or implicit type ?
181 | 	align(16) static immutable char[keyLenOdd * Word.sizeof] keyData = key.representation
182 | 		.chain(ubyte(0x20).repeat(keyLenOdd - key.length)).cycle.take(keyLenOdd * Word.sizeof).array;
183 | 	align(16) static immutable char[Word.sizeof] dquote = '"';
184 | 	align(16) static immutable char[Word.sizeof] bslash = '\\';
185 | 	enum mul = { uint result = 0; while ((++result * Word.sizeof + 1) % keyLenOdd) {} return result; }();
186 | 	
187 | 	const(char)* strPtr = str.ptr;
188 | 	immutable(char)* keyPtr = keyData.ptr;
189 | 	auto bsWord = *cast(immutable Word*) &bslash;
190 | 	auto dqWord = *cast(immutable Word*) &dquote;
191 | 	
192 | 	do
193 | 	{
194 | 		//		writeln("enter loop");
195 | 		// Calculate SSE word boundary before 'str'
196 | 		size_t strOff = cast(size_t) strPtr % Word.sizeof;
197 | 		Word strWord = *cast(Word*) (strPtr - strOff);
198 | 		size_t keyPos = keyPtr - keyData.ptr;
199 | 		size_t keyOff = (strOff - keyPos) % Word.sizeof;
200 | 		Word keyWord = (cast(Word*) keyData.ptr)[keyOff * mul % keyLenOdd + (keyOff + keyPos) / Word.sizeof];
201 | 		
202 | 		// Escape seqences have priority. 'key' may contain backslashes as part of the text, but in 'str' a backslash
203 | 		// at the same position is actually the begin of the escape sequence "\\".
204 | 		Word bsMask = strWord.maskEqual(bsWord);
205 | 		// If after processing backslashes there is a double-quote in 'str' we must not match it with a double-quote in
206 | 		// 'key', since it is the delimiter of 'str'.
207 | 		Word dqMask = strWord.maskEqual(dqWord);
208 | 		// How many bytes of 'key' and 'str' match in our 'Word' ?
209 | 		Word missMask = strWord.maskNotEqual(keyWord);
210 | 		// Merge mismatch, backslash and double-quote masks and move them into a non-SSE register.
211 | 		Word allMasks = or(missMask, or(bsMask, dqMask));
212 | 		uint skip = bsf((__builtin_ia32_pmovmskb128(allMasks) | 1 << Word.sizeof) >> strOff);
213 | 		//		writeln(keyPtr[0 .. 5]);
214 | 		//		writeln(strPtr[0 .. 5]);
215 | 		//		writeln(skip);
216 | 		strPtr += skip;
217 | 		keyPtr += skip;
218 | 		
219 | 		// Have we matched enough bytes to reach the end of 'key' ?
220 | 		if (keyPtr - keyData.ptr >= key.length)
221 | 			return true;
222 | 		
223 | 		// When we find a mismatch between 'key' and 'str', we try to call a provided helper function.
224 | 		// It may decode escape sequences in 'str' and recover from the state.
225 | 		// If that fails we accept the mismatch and return 'false'.
226 | 		//		writefln("Key: %s, Str %s", *keyPtr, *strPtr);
227 | 		//		const(char*) strPtrOld = strPtr;
228 | 		//		immutable(char*) keyPtrOld = keyPtr;
229 | 		if (strOff + skip < Word.sizeof && !(mismatcher && mismatcher(keyPtr, strPtr)))
230 | 		{
231 | 			//			writefln("Key: %s, Str %s", *keyPtr, *strPtr);
232 | 			return false;
233 | 		}
234 | 		//		writefln("Key: %s, Str %s", *keyPtr, *strPtr);
235 | 	}
236 | 	while (keyPtr - keyData.ptr < key.length);
237 | 	
238 | 	return true;
239 | }
240 | 
241 | 
242 | bool keyword3(string key)(in char[] str, bool function(ref immutable(char)*, ref const(char)*) mismatcher = null)
243 | {
244 | 	version (LDC) import ldc.gccbuiltins_x86;
245 | 	version (GNU) import gcc.builtins;
246 | 
247 | 	/* Since SIMD typically works with word aligned data, we duplicate 'key' for every possible start of 'str' when
248 | 	 * loaded from an aligned memory address where the first character appears 0 to Word.sizeof bytes into the SIMD
249 | 	 * register.
250 | 	 * For 16-byte SIMD we could just create an array of 16 strings with 0 to 15 padding bytes in front and some after,
251 | 	 * but we can be more compact with at most 16 wasted padding bytes. Since machine registers are powers of 2, if we
252 | 	 * pad all keys to an odd length and repeat them 16 times we get a sequence with the following properties:
253 | 	 * - It consists of as many SIMD words as the key is long.
254 | 	 * - All 16 shift offsets of the key are contained in the SIMD words due to the periodicity introduced by using
255 | 	 *   disjunct prime factors for the key length and the SIMD word size.
256 | 	 * Interpreted as an array of SIMD words, it can be indexed with the desired shift multiplied by a constant factor
257 | 	 * and taken modulo the SIMD array length to use the periodicity. The constant factor is the smallest value that
258 | 	 * when multiplied with the key length ends up at a SIMD word boundary + 1 (the first shift).
259 | 	 */
260 | 	
261 | 	// 'key' length rounded up to next odd value is the number of SIMD words we need.
262 | 	enum keyLenOdd = uint(key.length | 1); // TODO: uint or implicit type ?
263 | 	align(16) static immutable char[keyLenOdd * Word.sizeof] keyData = key.representation
264 | 		.chain(ubyte(0x20).repeat(keyLenOdd - key.length)).cycle.take(keyLenOdd * Word.sizeof).array;
265 | 	align(16) static immutable char[Word.sizeof] dqbs = `\"""""""""""""""`;
266 | 	enum mul = { uint result = 0; while ((++result * Word.sizeof + 1) % keyLenOdd) {} return result; }();
267 | 
268 | 	// Calculate SSE word boundary before 'str'
269 | 	uint off = cast(uint) str.ptr % Word.sizeof;
270 | 	// SSE aligned pointer <= 'str.ptr'.
271 | 	auto strPtr = cast(const(Word)*) (str.ptr - off);
272 | 	auto keyPtr = cast(immutable(Word)*) keyData.ptr + off * mul % keyLenOdd;
273 | 	auto keyStart = cast(immutable(char)*) keyPtr + off;
274 | 	Word strWord = *strPtr;
275 | 
276 | LoadKey:
277 | 	auto keyEnd = keyStart + key.length;
278 | 
279 | Compare:
280 | 	// Get bitmask of special characters in 'str'.
281 | 	uint escMask = getScalar(cast(int4) __builtin_ia32_pcmpistrm128(*cast(Word*) &dqbs, strWord, 0b_0_00_00_00));
282 | //	writeln("Called a");
283 | 	// Get bitmask of characters from 'key' and 'str' that don't match.
284 | 	uint missMask = getScalar(cast(int4) __builtin_ia32_pcmpistrm128(*keyPtr, strWord, 0b_0_01_10_00));
285 | //	writeln("Called b");
286 | 	// Create a merged mask for both and an additional bit at position 16, serving as a delimiter for 'bsf'.
287 | 	uint mask = (escMask | missMask) & (uint.max << off);
288 | 
289 | 	// No bit set means all 16 bytes are equal and there are no escape characters. That's as good as it gets.
290 | 	if (!mask)
291 | 	{
292 | 		// Jump forward by a word size and see if we successfully compared all bytes to the end of our 'key'.
293 | 		keyPtr += 16;
294 | 		if (cast(immutable(char)*) keyPtr >= keyEnd)
295 | 			return true;
296 | 		// Otherwise continue with the next set of 16 bytes.
297 | 		strPtr += 16;
298 | 		off = 0;
299 | 		goto Compare;
300 | 	}
301 | 
302 | 	// One of two cases ...
303 | 	off = bsf(mask);
304 | 
305 | 	// 1) Did the mismatch occur past the end of 'key' ? Then we compared succesfully.
306 | 	if (cast(immutable(char)*) keyPtr + off >= keyEnd)
307 | 		return true;
308 | 
309 | 	// 2) It must be a special character or actual mismatch, let 'mismatcher' decide.
310 | //	writefln("Skipping: %s", (cast(const(char)*) strPtr)[0 .. off]);
311 | 	auto strChP = cast(const(char)*) strPtr + off;
312 | 	auto strChPOld = strChP;
313 | 	auto keyChP = cast(immutable(char)*) keyPtr + off;
314 | 	bool goodToGo = mismatcher(keyChP, strChP);
315 | 
316 | //	writefln("Mismatcher used %s key chars, %s str chars and returned: %s", keyAdd, strAdd, goodToGo);
317 | 	if (keyChP >= keyEnd)
318 | 		return true;
319 | 	if (!goodToGo)
320 | 		return false;
321 | 
322 | 	// Arriving here we just decoded an escape sequence and have to adjust our pointers.
323 | 	auto keyPos = keyChP - keyStart;
324 | 	off += strChP - strChPOld;
325 | 	if (off >= 16)
326 | 	{
327 | 		strPtr += off / 16;
328 | 		strWord = *strPtr;
329 | 		off %= 16;
330 | 	}
331 | 	auto baseOff = (off - keyPos) & 15;
332 | 	keyPtr = cast(immutable(Word)*) keyData.ptr + baseOff * mul % keyLenOdd;
333 | 	keyStart = cast(immutable(char)*) keyPtr + baseOff;
334 | 	keyPtr += (baseOff + keyPos) / 16;
335 | 	goto LoadKey;
336 | }
337 | 
338 | 
339 | size_t equalLength(scope inout(char[]) a, scope inout(char[]) b)
340 | {
341 | 	return 0;
342 | }
343 | 
344 | 
345 | /*******************************************************************************
346 |  * 
347 |  * Concatenates a series of strings.
348 |  *
349 |  * Params:
350 |  *   Strs = a series of string symbols or literals to be concatenated
351 |  *   buffer = optional buffer, implicitly allocated
352 |  *
353 |  * Returns:
354 |  *   A $(D TempBuffer!char) containing the concatenated string. It is kept alive
355 |  *   for as long as it is in scope.
356 |  *
357 |  **************************************/
358 | nothrow @nogc
359 | template concat(Strs...)
360 | {
361 | 	import core.stdc.string : memcpy;
362 | 	import fast.internal.helpers;
363 | 
364 | 	enum allocExpr = ctfeJoin!(Strs.length)("Strs[%s].length", "+") ~ "+1";
365 | 
366 | 	auto concat(void* buffer = (mixin(allocExpr) <= allocaLimit) ? alloca(mixin(allocExpr)) : null)
367 | 	{
368 | 		immutable length = mixin(allocExpr);
369 | 		auto result = TempBuffer!char(
370 | 			(cast(char*) (buffer is null ? malloc(length) : buffer))[0 .. length - 1],
371 | 			buffer is null);
372 | 
373 | 		char* p = result.ptr;
374 | 		foreach (const(char[]) str; Strs)
375 | 		{
376 | 			memcpy (p, str.ptr, str.length);
377 | 			p += str.length;
378 | 		}
379 | 		*p = '\0';
380 | 
381 | 		return result;
382 | 	}
383 | }
384 | 
385 | 
386 | 
387 | private:
388 | 
389 | template SimdMatcher(string match)
390 | {
391 | 	import core.simd;
392 | 	import std.string;
393 | 	import fast.internal.sysdef;
394 | 	
395 | 	static if (match != strip(match)) {
396 | 		// Reinstanciate the template with any whitespace stripped from the match string.
397 | 		alias SimdMatcher = SimdMatcher!(strip(match));
398 | 	} else {
399 | 		/* For SSE in DMD I am blocked by:
400 | 		 * https://d.puremagic.com/issues/show_bug.cgi?id=8047
401 | 		 * https://d.puremagic.com/issues/show_bug.cgi?id=11585
402 | 		 */
403 | 		enum isUsingSSE = hasSSE2 && (isLDC || isGDC);
404 | 		enum isSingleChar = match.length == 2 && match[0] == '=';
405 | 		static if (isSingleChar) enum singleChar = match[1];
406 | 		static if (isUsingSSE) {
407 | 			// Using MOVMSKB we get one boolean per bit in a 16-bit value.
408 | 			alias Word = ubyte16;
409 | 			alias Mask = uint;
410 | 			enum sparseness = 1;
411 | 		} else {
412 | 			// The fallback is to work with machine words and tricky bit-twiddling algorithms.
413 | 			// As a result we get machine words where matching bytes have the high bit set.
414 | 			alias Word = size_t;
415 | 			alias Mask = size_t;
416 | 			enum sparseness = 8;
417 | 		}
418 | 		enum matchCode = genMatchCode!isUsingSSE("*wp");
419 | 		// Used in generic comparison code
420 | 		enum lows = size_t.max / 0xFF;
421 | 		enum highs = lows * 0x80;
422 | 		
423 | 		enum betterUseTables = (isDMD && matchCode.complexity >= 4)
424 | 			|| (isGDC && matchCode.complexity >= 18)
425 | 			|| (isLDC && matchCode.complexity >= 18);
426 | 
427 | 		static if (betterUseTables)
428 | 		{
429 | 			immutable matchTable = genMatchTable();
430 | 			
431 | 			size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow @nogc
432 | 			{
433 | 				import core.stdc.string;
434 | 				import fast.internal.helpers;
435 | 
436 | 				// catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
437 | 				static if (isSingleChar) {
438 | 					return memchr(b, singleChar, e - b) - b;
439 | 				} else {
440 | 					if (b >= e) return 0;
441 | 					
442 | 					size_t off = cast(size_t) b % ushort.sizeof;
443 | 					ushort* wp = cast(ushort*) (b - off);
444 | 					ushort* we = cast(ushort*) alignPtrNext(e, ushort.sizeof);
445 | 					if (off) {
446 | 						// Throw away bytes from before start of the string
447 | 						if (auto mask = matchTable[*wp] >> off)
448 | 							return bsf(mask);
449 | 						if (++wp is we) return size_t.max;
450 | 					}
451 | 					
452 | 					do {
453 | 						if (auto mask = matchTable[*wp])
454 | 							return bsf(mask) + (cast(char*) wp - b);
455 | 					} while (++wp !is we);
456 | 					return size_t.max;
457 | 				}
458 | 			}
459 | 			
460 | 			inout(char)* find(scope inout(char*) b) pure nothrow @nogc
461 | 			{
462 | 				import core.stdc.string;
463 | 				// catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
464 | 				static if (isSingleChar && singleChar == '\0') {
465 | 					return strlen(b) + b;
466 | 				} else static if (isSingleChar && isDMD) { // DMD is better off using optimized C library code.
467 | 					return memchr(b, singleChar, e - b) - b;
468 | 				} else {
469 | 					size_t off = cast(size_t) b % ushort.sizeof;
470 | 					ushort* wp = cast(ushort*) (b - off);
471 | 					if (off) {
472 | 						// Throw away bytes from before start of the string
473 | 						if (auto mask = matchTable[*wp] >> off)
474 | 							return b + bsf(mask);
475 | 					}
476 | 					
477 | 					do {
478 | 						if (auto mask = matchTable[*wp])
479 | 							return cast(inout(char)*) wp + bsf(mask);
480 | 					} while (true);
481 | 				}
482 | 			}
483 | 		}
484 | 		else
485 | 		{
486 | 			import core.stdc.string, core.simd;
487 | 			import std.simd;
488 | 			import fast.internal.helpers;
489 | 			
490 | 			version (LDC) {
491 | 				import ldc.gccbuiltins_x86;
492 | 			} else version (GNU) {
493 | 				import gcc.builtins;
494 | 			}
495 | 			
496 | 			size_t find(scope inout(char*) b, scope inout(char*) e) pure nothrow
497 | 			{
498 | 				// catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
499 | 				static if (isSingleChar) {
500 | 					return memchr(b, singleChar, e - b) - b;
501 | 				} else {
502 | 					if (b >= e) return 0;
503 | 					
504 | 					size_t off = cast(size_t) b % Word.sizeof;
505 | 					Word* wp = cast(Word*) (b - off);
506 | 					Word* we = cast(Word*) alignPtrNext(e, Word.sizeof);
507 | 					if (off) {
508 | 						// Throw away bytes from before start of the string
509 | 						if (auto mask = (mixin(matchCode.code)) >> (off * sparseness))
510 | 							return bsf(mask) / sparseness;
511 | 						if (++wp is we) return size_t.max;
512 | 					}
513 | 					
514 | 					do {
515 | 						if (auto mask = mixin(matchCode.code))
516 | 							return bsf(mask) / sparseness + (cast(char*) wp - b);
517 | 					} while (++wp !is we);
518 | 					return size_t.max;
519 | 				}
520 | 			}
521 | 			
522 | 			inout(char)* find(scope inout(char*) b) pure nothrow
523 | 			{
524 | 				// catch "strlen" and "memchr" like calls, that are highly optimized compiler built-ins.
525 | 				static if (isSingleChar && singleChar == '\0') {
526 | 					return strlen(b) + b;
527 | 				} else static if (isSingleChar && isDMD) { // DMD is better off using optimized C library code.
528 | 					return cast(inout(char*)) memchr(b, singleChar, size_t.max);
529 | 				} else {
530 | 					size_t off = cast(size_t) b % Word.sizeof;
531 | 					Word* wp = cast(Word*) (b - off);
532 | 					if (off) {
533 | 						// Throw away bytes from before start of the string
534 | 						if (auto mask = (mixin(matchCode.code)) >> (off * sparseness))
535 | 							return b + bsf(mask) / sparseness;
536 | 						++wp;
537 | 					}
538 | 					
539 | 					do {
540 | 						if (auto mask = mixin(matchCode.code))
541 | 							return cast(inout(char)*) wp + bsf(mask) / sparseness;
542 | 						++wp;
543 | 					} while (true);
544 | 				}
545 | 			}
546 | 		}
547 | 		
548 | 		enum genMatchCode(bool sse)(string var)
549 | 		{
550 | 			import std.ascii, std.exception;
551 | 			
552 | 			struct Code {
553 | 				string code;
554 | 				size_t complexity = 1;
555 | 			}
556 | 			Code result;
557 | 			string[] nesting;
558 | 			
559 | 			with (result) {
560 | 				for (size_t i = 0; i < match.length;) {
561 | 					string handleChar() {
562 | 						char c = match[i+1];
563 | 						switch (c) {
564 | 							case 0:
565 | 								return `'\0'`;
566 | 							case '\\':
567 | 								return `'\\'`;
568 | 							case "'"[0]:
569 | 								return `'\''`;
570 | 							case '\t':
571 | 								return `'\t'`;
572 | 							case '\r':
573 | 								return `'\r'`;
574 | 							case '\n':
575 | 								return `'\n'`;
576 | 							default:
577 | 								return `'` ~ c ~ `'`;
578 | 						}
579 | 					}
580 | 					
581 | 					if (match[i] == '=') {
582 | 						static if (sse) {
583 | 							code ~= "maskEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))";
584 | 						} else if (match[i+1] == 0) {
585 | 							code ~= "" ~ var ~ " - lows & ~" ~ var;
586 | 						} else {
587 | 							code ~= "(" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows & ~(" ~ var ~ " ^ lows * " ~ handleChar() ~ ")";
588 | 						}
589 | 						i += 2;
590 | 					} else if (match[i] == '!') {
591 | 						static if (sse) {
592 | 							code ~= "maskNotEqual(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))";
593 | 						} else if (match[i+1] == 0) {
594 | 							code ~= "(~(" ~ var ~ " - lows) | " ~ var ~ ")";
595 | 						} else {
596 | 							code ~= "(~((" ~ var ~ " ^ lows * " ~ handleChar() ~ ") - lows) | (" ~ var ~ " ^ lows * " ~ handleChar() ~ "))";
597 | 						}
598 | 						i += 2;
599 | 					} else if (match[i] == '<') {
600 | 						static if (sse)
601 | 							code ~= "maskGreater(SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "), " ~ var ~ ")";
602 | 						else
603 | 							code ~= "maskLessGeneric!" ~ handleChar() ~ "(" ~ var ~ ")";
604 | 						i += 2;
605 | 					} else if (match[i] == '>') {
606 | 						static if (sse)
607 | 							code ~= "maskGreater(" ~ var ~ ", SIMDFromScalar!(ubyte16, " ~ handleChar() ~ "))";
608 | 						else
609 | 							code ~= "maskGreaterGeneric!" ~ handleChar() ~ "(" ~ var ~ ")";
610 | 						i += 2;
611 | 					} else if (match[i .. $].startsWith("or(")) {
612 | 						static if (sse) {
613 | 							nesting ~= ", ";
614 | 							code ~= "or(";
615 | 						} else {
616 | 							nesting ~= " | ";
617 | 						}
618 | 						complexity++;
619 | 						i += 3;
620 | 					} else if (match[i .. $].startsWith("and(")) {
621 | 						static if (sse) {
622 | 							nesting ~= ", ";
623 | 							code ~= "and(";
624 | 						} else {
625 | 							nesting ~= " & ";
626 | 						}
627 | 						complexity++;
628 | 						i += 4;
629 | 					} else if (match[i] == ',') {
630 | 						enforce(nesting.length, "',' on top level");
631 | 						code ~= nesting[$-1];
632 | 						i++;
633 | 					} else if (match[i] == ')') {
634 | 						enforce(nesting.length, "Unbalanced closing parenthesis");
635 | 						nesting.length--;
636 | 						static if (sse) {
637 | 							code ~= ")";
638 | 						}
639 | 						i++;
640 | 					} else if (match[i].isWhite) {
641 | 						i++;
642 | 					} else {
643 | 						throw new Exception(format("Unexpected character at index %s: 0x%02x", i, match[i]));
644 | 					}
645 | 				}
646 | 				static if (sse) {
647 | 					code = "__builtin_ia32_pmovmskb128(" ~ code ~ ")";
648 | 				} else {
649 | 					code = "(" ~ code ~ ") & highs";
650 | 				}
651 | 			}
652 | 			return result;
653 | 		}
654 | 		
655 | 		enum genMatchTable()
656 | 		{
657 | 			ubyte[1 << 16] table;
658 | 			ubyte[256] lut;
659 | 			foreach (uint i; 0 .. 256) {
660 | 				lut[i] = (mixin(genMatchCode!false("i").code) >> 7) & 1;
661 | 			}
662 | 			foreach (i; 0 .. 256) foreach (k; 0 .. 256) {
663 | 				table[i * 256 + k] = cast(ubyte) (lut[i] << 1 | lut[k]);
664 | 			}
665 | 			return table;
666 | 		}
667 | 	}
668 | }
669 | 
670 | /**
671 |  * Template for searching a fixed value in a word sized memory block (i.e. 1, 2, 4 or 8 bytes).
672 |  *
673 |  * Params:
674 |  *   value = The value you are looking for.
675 |  *   word = The data word to search for the value.
676 |  *
677 |  * Returns:
678 |  *   non-zero, iff the value is contained in the data word.
679 |  *   Specifically it returns 0x80 for every byte of the word that was a match and 0x00 for others.
680 |  *
681 |  * See_Also:
682 |  *   http://graphics.stanford.edu/~seander/bithacks.html#ValueInWord
683 |  */
684 | T maskEqualGeneric(ubyte value, T)(T word) @safe pure nothrow
685 | 	if (isUnsigned!T)
686 | {
687 | 	// This value results in 0x01 for each byte of a T value.
688 | 	enum lows = T.max / 0xFF;
689 | 	static if (value == 0) {
690 | 		enum highs = lows * 0x80;
691 | 		return (word - lows) & ~word & highs;
692 | 	} else {
693 | 		enum xor = lows * value;
694 | 		return maskEqualGeneric!0(word ^ xor);
695 | 	}
696 | }
697 | 
698 | T maskLessGeneric(ubyte value, T)(T word) @safe pure nothrow
699 | 	if (isUnsigned!T && value <= 128)
700 | {
701 | 	enum lows = T.max / 0xFF;
702 | 	enum highs = lows * 0x80;
703 | 	return (word - lows * value) & ~word & highs;
704 | }
705 | 
706 | T maskGreaterGeneric(ubyte value, T)(T word) @safe pure nothrow
707 | 	if (isUnsigned!T && value <= 127)
708 | {
709 | 	enum lows = T.max / 0xFF;
710 | 	enum highs = lows * 0x80;
711 | 	return (word + lows * (127 - value) | word) & highs;
712 | }
713 | 
714 | T orGeneric(T)(T a, T b) @safe pure nothrow
715 | 	if (isUnsigned!T)
716 | {
717 | 	return a | b;
718 | }
719 | 


--------------------------------------------------------------------------------
/source/fast/unicode.d:
--------------------------------------------------------------------------------
  1 | ﻿/***************************************************************************************************
  2 |  * 
  3 |  * Functions to work with the Unicode Transformation Format.
  4 |  * 
  5 |  * Grapheme clusters:
  6 |  *   A grapheme cluster is roughly speaking what the user would perceive as the smallest unit in a
  7 |  *   writing system. Their count can be thought of as a caret position in a text editor. In
  8 |  *   particular at grapheme cluster level, different normalization forms (NFC, NFD) become
  9 |  *   transparent. The default definition used here is independent of the user's locale.
 10 |  * 
 11 |  * Authors:
 12 |  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
 13 |  * 
 14 |  * Copyright:
 15 |  *   © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
 16 |  * 
 17 |  * License:
 18 |  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
 19 |  * 
 20 |  **************************************************************************************************/
 21 | module fast.unicode;
 22 | 
 23 | import fast.internal.unicode_tables;
 24 | import fast.internal.sysdef;
 25 | import std.simd;
 26 | 
 27 | 
 28 | /*******************************************************************************
 29 |  * 
 30 |  * Enumeration for the Unicode "General Category" used to roughly classify
 31 |  * codepoints into letters, punctuation etc.
 32 |  *
 33 |  **************************************/
 34 | alias GeneralCategory = DerivedGeneralCategory.Enum;
 35 | 
 36 | 
 37 | /*******************************************************************************
 38 |  * 
 39 |  * A customizable structure providing information on a code point. It consists
 40 |  * of a Unicode `property` in the form of an `enum` (e.g. `GeneralCategory`) and
 41 |  * a `length` in bytes of the code point in UTF-8.
 42 |  *
 43 |  **************************************/
 44 | struct CodePointInfo(Enum)
 45 | {
 46 | 	alias property this;
 47 | 	size_t length;
 48 | 	Enum   property;
 49 | }
 50 | 
 51 | 
 52 | /*******************************************************************************
 53 |  * 
 54 |  * Counts the number of grapheme clusters (character count) in a UTF string.
 55 |  * 
 56 |  * This function uses "extended grapheme clusters" as defined in Unicode:
 57 |  * http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
 58 |  * 
 59 |  * When invalid byte sequences are encountered, each byte that does not make up
 60 |  * a code point will be counted as one grapheme as visual representations of
 61 |  * such broken strings will often show a square with the hexadecimal byte value
 62 |  * in them.
 63 |  *
 64 |  * Params:
 65 |  *   str = the UTF-8 string
 66 |  *
 67 |  * Returns:
 68 |  *   the number of grapheme clusters
 69 |  *
 70 |  **************************************/
 71 | @nogc @trusted pure nothrow size_t
 72 | countGraphemes(scope const(char)[] str)
 73 | {
 74 | 	enum numValues = GraphemeBreakProperty.Enum.max + 1;
 75 | 	static immutable graphemeBreakRules =
 76 | 	{
 77 | 		// GB999
 78 | 		byte[numValues][numValues] graphemeBreaks = true;
 79 | 		with (GraphemeBreakProperty.Enum)
 80 | 		{
 81 | 			// GB12 + GB13 (special handling)
 82 | 			foreach (i; 0 .. numValues)
 83 | 				graphemeBreaks[i][Regional_Indicator] = -1;
 84 | 			// GB11
 85 | 			graphemeBreaks[ZWJ][Glue_After_Zwj] = false;
 86 | 			graphemeBreaks[ZWJ][E_Base_GAZ] = false;
 87 | 			// GB10 (special handling)
 88 | 			graphemeBreaks[E_Base]    [E_Modifier] = false;
 89 | 			graphemeBreaks[E_Base_GAZ][E_Modifier] = false;
 90 | 			graphemeBreaks[Extend]    [E_Modifier] = -1;
 91 | 			// GB9b
 92 | 			foreach (i; 0 .. numValues)
 93 | 				graphemeBreaks[Prepend][i] = false;
 94 | 			// GB9a
 95 | 			foreach (i; 0 .. numValues)
 96 | 				graphemeBreaks[i][SpacingMark] = false;
 97 | 			// GB9
 98 | 			foreach (i; 0 .. numValues)
 99 | 			{
100 | 				graphemeBreaks[i][Extend] = false;
101 | 				graphemeBreaks[i][ZWJ] = false;
102 | 			}
103 | 			graphemeBreaks[E_Base]    [Extend] = -1;
104 | 			graphemeBreaks[E_Base_GAZ][Extend] = -1;
105 | 			// GB8
106 | 			graphemeBreaks[LVT][T] = false;
107 | 			graphemeBreaks[T]  [T] = false;
108 | 			// GB7
109 | 			graphemeBreaks[LV][V] = false;
110 | 			graphemeBreaks[LV][T] = false;
111 | 			graphemeBreaks[V] [V] = false;
112 | 			graphemeBreaks[V] [T] = false;
113 | 			// GB6
114 | 			graphemeBreaks[L][L] = false;
115 | 			graphemeBreaks[L][V] = false;
116 | 			graphemeBreaks[L][LV] = false;
117 | 			graphemeBreaks[L][LVT] = false;
118 | 			// GB5
119 | 			foreach (i; 0 .. numValues)
120 | 			{
121 | 				graphemeBreaks[i][Control] = true;
122 | 				graphemeBreaks[i][CR] = true;
123 | 				graphemeBreaks[i][LF] = true;
124 | 			}
125 | 			// GB4
126 | 			foreach (i; 0 .. numValues)
127 | 			{
128 | 				graphemeBreaks[Control][i] = true;
129 | 				graphemeBreaks[CR]     [i] = true;
130 | 				graphemeBreaks[LF]     [i] = true;
131 | 			}
132 | 			// GB3
133 | 			graphemeBreaks[CR][LF] = false;
134 | 			// Additional homebrew top level rule to break before and after invalid characters
135 | 			foreach (i; 0 .. numValues)
136 | 			{
137 | 				graphemeBreaks[i][__] = true;
138 | 				graphemeBreaks[__][i] = true;
139 | 			}
140 | 		}
141 | 		return graphemeBreaks;
142 | 	}();
143 | 
144 | 	size_t graphemeCount = 0;
145 | 	auto p = str.ptr;
146 | 	auto graphemeStart = p;
147 | 	GraphemeBreakProperty.Enum last, next;
148 | 	bool riEven, inEmojiBaseExtension;
149 | 
150 | 	@noinline @safe @nogc pure nothrow bool
151 | 	complexRules()
152 | 	{
153 | 		pragma(inline, false);
154 | 		with (GraphemeBreakProperty.Enum)
155 | 		{
156 | 			if (next == Regional_Indicator)
157 | 			{
158 | 				// For GB12 + GB13 we need break only after a complete country code (2 indicators).
159 | 				if (last == Regional_Indicator)
160 | 					return riEven = !riEven;
161 | 				riEven = true;
162 | 				return false;
163 | 			}
164 | 			else if (next == Extend)
165 | 			{
166 | 				inEmojiBaseExtension = true;
167 | 				return false;
168 | 			}
169 | 			else if (inEmojiBaseExtension)
170 | 			{
171 | 				return inEmojiBaseExtension = false;
172 | 			}
173 | 			return true;
174 | 		}
175 | 	}
176 | 
177 | 	@forceinline void
178 | 	graphemeCountImpl(S)(ref S str)
179 | 	{
180 | 		version (LDC) pragma(inline, true);
181 | 		auto cpi = getProperty!GraphemeBreakProperty(str);
182 | 		auto next = cpi.property;
183 | 		byte isBoundary = graphemeBreakRules[last][next];
184 | 		if (isBoundary < 0 ? complexRules() : isBoundary)
185 | 		{
186 | 			graphemeCount++;
187 | 			static if (is(S == const(char)*))
188 | 				graphemeStart = str;
189 | 			else
190 | 				graphemeStart = str.ptr;
191 | 			inEmojiBaseExtension = false;
192 | 		}
193 | 		static if (is(S == const(char)*))
194 | 			str += cpi.length;
195 | 		else
196 | 			str = str[cpi.length..$];
197 | 		last = next;
198 | 	}
199 | 
200 | 	if (str.length >= 4) 
201 | 	{
202 | 		const e = str.ptr + str.length - 4;
203 | 		do
204 | 			graphemeCountImpl(p);
205 | 		while (p <= e);
206 | 		str = str[p - str.ptr..$];
207 | 	}
208 | 	while (str.length)
209 | 		graphemeCountImpl(str);
210 | 	return graphemeCount;
211 | }
212 | 
213 | 
214 | /*******************************************************************************
215 |  * 
216 |  * Retrieves the "General Category" of the first code point in some UTF-8
217 |  * string. For broken UTF-8, the property is set to `GeneralCategory.__` (`0`).
218 |  *
219 |  * Params:
220 |  *   str = the UTF-8 encoded text, which must not be empty
221 |  *
222 |  * Returns:
223 |  *   a code point information struct consisting of a the fields `property`,
224 |  *   containing the `GeneralCategory` enumeration and the `length` of the code
225 |  *   point in bytes.
226 |  * 
227 |  **************************************/
228 | @property @safe @nogc pure nothrow CodePointInfo!GeneralCategory
229 | generalCategory(scope const(char)[] str)
230 | {
231 | 	return getProperty!DerivedGeneralCategory(str);
232 | }
233 | unittest
234 | {
235 | 	assert("क".generalCategory == GeneralCategory.Other_Letter);
236 | 	assert("̸".generalCategory == GeneralCategory.Nonspacing_Mark);
237 | 	assert("\xFF".generalCategory == GeneralCategory.__);
238 | }
239 | 
240 | 
241 | 
242 | private:
243 | 
244 | @forceinline pure @nogc nothrow auto
245 | getProperty(Property, S)(scope S str) if (is(S == const(char)*) || is(S == const(char)[]))
246 | in
247 | {
248 | 	static if (is(S == const(char)[]))
249 | 		assert(str.length != 0, "No code units passed in.");
250 | }
251 | out
252 | {
253 | 	assert(__result <= Property.Enum.max);
254 | }
255 | body
256 | {
257 | 	version (LDC) pragma(inline, true);
258 | 	import fast.internal.helpers;
259 | 
260 | 	alias Enum = Property.Enum;
261 | 	alias CPI = CodePointInfo!Enum;
262 | 	// Fast path for ASCII.
263 | 	size_t idx = Property.level0[0][str[0]];
264 | 	if (byte(str[0]) >= 0) return CPI(1, cast(Enum)idx);
265 | 	// On multi-byte sequences, set the length to 1 for invalid sequences (idx == 0).
266 | 	size_t length = clz(str[0] ^ 0xFFu) - 24;
267 | 	// Safely return invalid code point of 1 byte length if string exhausted.
268 | 	static if (is(S == const(char)[]))
269 | 		if (length > str.length)
270 | 			return CPI(1, cast(Enum)0);
271 | 	// Otherwise use lookup table hierarchy to determine if code units form a valid code point
272 | 	if (idx > Enum.max) {
273 | 		idx = Property.level1[idx - Enum.max - 1][str[1]];
274 | 		if (idx > Enum.max) {
275 | 			idx = Property.level2[idx - Enum.max - 1][str[2]];
276 | 			if (idx > Enum.max)
277 | 				idx = Property.level3[idx - Enum.max - 1][str[3]];
278 | 		}
279 | 	}
280 | 	if (idx)
281 | 		return CPI(length, cast(Enum)idx);
282 | 	else
283 | 		return CPI(1, cast(Enum)0);
284 | }
285 | 


--------------------------------------------------------------------------------
/source/unicode/generator.d:
--------------------------------------------------------------------------------
  1 | ﻿/***************************************************************************************************
  2 |  * 
  3 |  * Helper program to generate the lookup tables required for certain Unicode algorithms.
  4 |  * This code is conforming with Unicode 10.0.0.
  5 |  * 
  6 |  * Authors:
  7 |  *   $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
  8 |  * 
  9 |  * Copyright:
 10 |  *   © 2017 $(LINK2 mailto:Marco.Leise@gmx.de, Marco Leise)
 11 |  * 
 12 |  * License:
 13 |  *   $(LINK2 http://www.gnu.org/licenses/gpl-3.0, GNU General Public License 3.0)
 14 |  * 
 15 |  **************************************************************************************************/
 16 | module unicode.generator;
 17 | import std.conv;
 18 | import std.exception;
 19 | import core.bitop;
 20 | import std.stdio;
 21 | import std.string;
 22 | import std.algorithm;
 23 | import std.meta;
 24 | import std.path;
 25 | 
 26 | enum PropertyType
 27 | {
 28 | 	catalog, enumeration, binary, string, numeric, miscellaneous
 29 | }
 30 | 
 31 | struct Property
 32 | {
 33 | 	string name;
 34 | 	string value;
 35 | }
 36 | 
 37 | struct Entry
 38 | {
 39 | 	bool isSet = false;
 40 | 	Property[] properties;
 41 | }
 42 | 
 43 | struct Line
 44 | {
 45 | 	uint rangeStart;
 46 | 	uint rangeEnd;
 47 | 	string[] properties;
 48 | }
 49 | 
 50 | struct UnicodeCharacterDatabase
 51 | {
 52 | 	PropertyType type;
 53 | 	Entry[] entries;
 54 | 	size_t[string] enumerationValues;
 55 | 	string varName;
 56 | 
 57 | 	this(string filename, PropertyType type)
 58 | 	{
 59 | 		import std.algorithm;
 60 | 		import std.stdio;
 61 | 		import std.uni;
 62 | 
 63 | 		this.type = type;
 64 | 		this.entries = new Entry[](0x110000);
 65 | 		this.enumerationValues[null] = 0;
 66 | 		this.varName = baseName(filename, ".txt");
 67 | 		Line[] defaults;
 68 | 		Line[] actuals;
 69 | 		bool abbreviates = false;
 70 | 		string enumOverridePrefix;
 71 | 		string enumOverride;
 72 | 
 73 | 		foreach (line; File(filename).byLine())
 74 | 		{
 75 | 			bool isDefault = false;
 76 | 			char[] code;
 77 | 			Line data;
 78 | 
 79 | 			// Special @missing line syntax ?
 80 | 			static immutable isMissingStr = "# @missing: ";
 81 | 			static immutable propNameStr = "# Property:	";
 82 | 			if (line.startsWith(isMissingStr))
 83 | 			{
 84 | 				isDefault = true;
 85 | 				code = line[isMissingStr.length..$];
 86 | 			}
 87 | 			else if (line.startsWith(propNameStr))
 88 | 			{
 89 | 				abbreviates = true;
 90 | 				enumOverridePrefix = "# "~line[propNameStr.length..$].idup~"=";
 91 | 			}
 92 | 			else if (abbreviates && line.startsWith(enumOverridePrefix))
 93 | 			{
 94 | 				enumOverride = line[enumOverridePrefix.length..$].idup;
 95 | 			}
 96 | 			else
 97 | 			{
 98 | 				// Split between code and comment section
 99 | 				auto commentSplit = findSplit(line, "#");
100 | 				code = commentSplit[0];
101 | 			}
102 | 			code = strip!isWhite(code);
103 | 			if (code.length == 0)
104 | 				continue;
105 | 
106 | 			uint fieldIdx = 0;
107 | 			foreach (field; splitter(code, ';'))
108 | 			{
109 | 				field = strip!isWhite(field);
110 | 				switch (fieldIdx)
111 | 				{
112 | 					case 0: // Code point(s)
113 | 						auto range = findSplit(field, "..");
114 | 						data.rangeStart = to!uint(range[0], 16);
115 | 						data.rangeEnd = range[1] == ".." ? to!uint(range[2], 16) : data.rangeStart;
116 | 						enforce(data.rangeEnd <= 0x10FFFF);
117 | 						enforce(data.rangeStart <= data.rangeEnd);
118 | 						data.rangeEnd++;
119 | 						break;
120 | 					default:
121 | 						string ifield = enumOverride ? enumOverride : field.idup;
122 | 						data.properties ~= ifield;
123 | 						if (type == PropertyType.enumeration)
124 | 						{
125 | 							if (ifield !in enumerationValues)
126 | 								enumerationValues[ifield] = enumerationValues.length;
127 | 						}
128 | 				}
129 | 				fieldIdx++;
130 | 			}
131 | 			if (type == PropertyType.enumeration)
132 | 				enforce(fieldIdx >= 2);
133 | 			else assert(0, "Not implemented");
134 | 
135 | 			if (isDefault)
136 | 				defaults ~= data;
137 | 			else
138 | 				actuals ~= data;
139 | 		}
140 | 
141 | 		foreach (set; [defaults, actuals])
142 | 		{
143 | 			foreach (ref definition; set)
144 | 			{
145 | 				foreach (cp; definition.rangeStart .. definition.rangeEnd)
146 | 				{
147 | 					final switch (type) with (PropertyType)
148 | 					{
149 | 						case catalog:
150 | 							assert(0, "Not implemented");
151 | 						case enumeration:
152 | 							enforce(definition.properties.length == 1);
153 | 							entries[cp].properties = [Property(null, definition.properties[0])];
154 | 							entries[cp].isSet = true;
155 | 							break;
156 | 						case binary:
157 | 						case string:
158 | 						case numeric:
159 | 						case miscellaneous:
160 | 							assert(0, "Not implemented");
161 | 					}
162 | 				}
163 | 			}
164 | 		}
165 | 
166 | 		foreach (cp; 0 .. 0x110000)
167 | 			enforce(entries[cp].isSet);
168 | 	}
169 | 
170 | 	struct TableEntry
171 | 	{
172 | 		ubyte[][] byteSeqs;
173 | 		string enumerationValue;
174 | 		Table* subEntries;
175 | 		
176 | 		string toString()
177 | 		{
178 | 			if (subEntries)
179 | 				return subEntries.to!string();
180 | 			else
181 | 				return enumerationValue;
182 | 		}
183 | 	}
184 | 	
185 | 	struct Table
186 | 	{
187 | 		uint level, idx;
188 | 		TableEntry[256] entries;
189 | 		
190 | 		size_t toHash() const nothrow
191 | 		{
192 | 			size_t result;
193 | 			foreach (i; 0 .. 256)
194 | 			{
195 | 				if (entries[i].subEntries)
196 | 					result = hashOf(entries[i].subEntries.idx, result);
197 | 				else
198 | 					result = hashOf(entries[i].enumerationValue, result);
199 | 			}
200 | 			return hashOf(level, result);
201 | 		}
202 | 		
203 | 		bool opEquals(ref const Table key) const
204 | 		{
205 | 			foreach (i; 0 .. 256)
206 | 			{
207 | 				if ((this.entries[i].subEntries is null) != (key.entries[i].subEntries is null))
208 | 					return false;
209 | 				if (this.entries[i].subEntries)
210 | 				{
211 | 					if (this.entries[i].subEntries.idx != key.entries[i].subEntries.idx)
212 | 						return false;
213 | 				}
214 | 				else if (this.entries[i].enumerationValue != key.entries[i].enumerationValue)
215 | 				{
216 | 					return false;
217 | 				}
218 | 			}
219 | 			return this.level == key.level;
220 | 		}
221 | 	}
222 | 
223 | 	string generateEnumerationCode()
224 | 	{
225 | 		auto lookup = new Table;
226 | 		uint[4] levelAssignments;
227 | 		foreach (dchar cp; 0 .. 0x110000)
228 | 		{
229 | 			ubyte[] byteSeq;
230 | 			if (cp < 128)
231 | 			{
232 | 				byteSeq ~= cast(char)cp;
233 | 			}
234 | 			else
235 | 			{
236 | 				uint topBit = 6;
237 | 				uint bits = cp;
238 | 				do
239 | 				{
240 | 					byteSeq = char(bits & 0x3F | 0x80) ~ byteSeq;
241 | 					bits >>= 6;
242 | 					topBit--;
243 | 				}
244 | 				while (bits && bsr(bits) >= topBit);
245 | 				byteSeq = cast(char)(0xFE << topBit | bits) ~ byteSeq;
246 | 			}
247 | 			auto table = lookup;
248 | 			foreach (uint i, cu; byteSeq)
249 | 			{
250 | 				auto entry = &table.entries[cu];
251 | 				if (entry.subEntries)
252 | 				{
253 | 					table = entry.subEntries;
254 | 				}
255 | 				else if (entry.enumerationValue is null)
256 | 				{
257 | 					entry.byteSeqs = [byteSeq];
258 | 					entry.enumerationValue = entries[cp].properties[0].value;
259 | 					break;
260 | 				}
261 | 				else if (entry.enumerationValue == entries[cp].properties[0].value)
262 | 				{
263 | 					entry.byteSeqs ~= byteSeq;
264 | 					break;
265 | 				}
266 | 				else
267 | 				{
268 | 					auto subTable = new Table(i+1);
269 | 					foreach (byteSeq2; entry.byteSeqs)
270 | 					{
271 | 						subTable.entries[byteSeq2[i+1]].enumerationValue = entry.enumerationValue;
272 | 						subTable.entries[byteSeq2[i+1]].byteSeqs = [byteSeq2];
273 | 					}
274 | 					entry.byteSeqs = null;
275 | 					entry.enumerationValue = null;
276 | 					entry.subEntries = subTable;
277 | 				}
278 | 				table = entry.subEntries;
279 | 			}
280 | 		}
281 | 
282 | 		Table*[Table] tableSet;
283 | 		Table*[uint][4] tableByIdx;
284 | 		tableByIdx[0][0] = lookup;
285 | 
286 | 		void assignIndices(Table* table, uint level = 0)
287 | 		{
288 | 			foreach (i, entry; table.entries)
289 | 			{
290 | 				if (entry.subEntries)
291 | 				{
292 | 					assignIndices(entry.subEntries, level + 1);
293 | 					if (auto dup = *entry.subEntries in tableSet)
294 | 					{
295 | 						entry.subEntries = *dup;
296 | 					}
297 | 					else
298 | 					{
299 | 						entry.subEntries.idx = levelAssignments[level + 1]++;
300 | 						tableByIdx[level + 1][entry.subEntries.idx] = entry.subEntries;
301 | 						tableSet[*entry.subEntries] = entry.subEntries;
302 | 					}
303 | 				}
304 | 			}
305 | 		}
306 | 		assignIndices(lookup);
307 | 		levelAssignments[0] = 1;
308 | 
309 | 		writefln("%s: Using %s tables with a total size: %s KiB",
310 | 			varName, sum(levelAssignments[]), sum(levelAssignments[]) / 4f);
311 | 		stdout.flush(); // in case we are buffered
312 | 
313 | 		auto level0 = new ubyte[256][](levelAssignments[0]);
314 | 		auto level1 = new ubyte[256][](levelAssignments[1]);
315 | 		auto level2 = new ubyte[256][](levelAssignments[2]);
316 | 		auto level3 = new ubyte[256][](levelAssignments[3]);
317 | 
318 | 		foreach (level, bin; AliasSeq!(level0, level1, level2, level3))
319 | 		{
320 | 			foreach (idx; 0 .. levelAssignments[level])
321 | 			{
322 | 				Table* table = tableByIdx[level][idx];
323 | 				enforce(table.idx   == idx);
324 | 				enforce(table.level == level);
325 | 				enforce(levelAssignments[level] + enumerationValues.length <= 256,
326 | 					format("Sum of tables and enumarations at level %s exceeds ubyte storage capacity", level));
327 | 				foreach (i, ref entry; table.entries)
328 | 				{
329 | 					if (entry.subEntries)
330 | 						bin[idx][i] = cast(ubyte)(entry.subEntries.idx + enumerationValues.length);
331 | 					else
332 | 						bin[idx][i] = cast(ubyte)enumerationValues[entry.enumerationValue];
333 | 				}
334 | 			}
335 | 		}
336 | 
337 | 		// Write struct with enum
338 | 		string code = "struct " ~ varName ~ "\n{\n";
339 | 		auto sortedEnum = new string[](enumerationValues.length);
340 | 		foreach (key, value; enumerationValues)
341 | 			sortedEnum[value] = key;
342 | 		code ~= "\tenum Enum : size_t\n\t{\n\t\t";
343 | 		foreach (key, value; sortedEnum)
344 | 			code ~= (value ? value : "__") ~ ", ";
345 | 		code ~= "\n\t}\n\n";
346 | 		foreach (k, bin; AliasSeq!(level0, level1, level2, level3))
347 | 		{
348 | 			code ~= "\tstatic immutable ubyte[256][" ~ to!string(bin.length) ~ "] level" ~ to!string(k) ~ " = [\n";
349 | 			foreach (i; 0 .. bin.length)
350 | 				code ~= "\t\t[" ~ format("%(%s,%)", bin[i]) ~ "],\n";
351 | 			code ~= "\t];\n";
352 | 		}
353 | 		code ~= "}\n\n";
354 | 		return code;
355 | 	}
356 | }
357 | 
358 | alias UCD = UnicodeCharacterDatabase;
359 | 
360 | void main()
361 | {
362 | 	string code = "module fast.internal.unicode_tables;\n\n";
363 | 	UCD ucd;
364 | 
365 | 	ucd = UCD("../ucd/auxiliary/GraphemeBreakProperty.txt", PropertyType.enumeration);
366 | 	code ~= ucd.generateEnumerationCode();
367 | 	ucd = UCD("../ucd/extracted/DerivedGeneralCategory.txt", PropertyType.enumeration);
368 | 	code ~= ucd.generateEnumerationCode();
369 | 	ucd = UCD("../ucd/extracted/DerivedLineBreak.txt", PropertyType.enumeration);
370 | 	code ~= ucd.generateEnumerationCode();
371 | 
372 | 	auto tableFile = File("../source/fast/internal/unicode_tables.d", "w");
373 | 	tableFile.write(code);
374 | }


--------------------------------------------------------------------------------
/test/fail1.json:
--------------------------------------------------------------------------------
1 | "A JSON payload should be an object or array, not a string."


--------------------------------------------------------------------------------
/test/fail10.json:
--------------------------------------------------------------------------------
1 | {"Extra value after close": true} "misplaced quoted value"


--------------------------------------------------------------------------------
/test/fail11.json:
--------------------------------------------------------------------------------
1 | {"Illegal expression": 1 + 2}


--------------------------------------------------------------------------------
/test/fail12.json:
--------------------------------------------------------------------------------
1 | {"Illegal invocation": alert()}


--------------------------------------------------------------------------------
/test/fail13.json:
--------------------------------------------------------------------------------
1 | {"Numbers cannot have leading zeroes": 013}


--------------------------------------------------------------------------------
/test/fail14.json:
--------------------------------------------------------------------------------
1 | {"Numbers cannot be hex": 0x14}


--------------------------------------------------------------------------------
/test/fail15.json:
--------------------------------------------------------------------------------
1 | ["Illegal backslash escape: \x15"]


--------------------------------------------------------------------------------
/test/fail16.json:
--------------------------------------------------------------------------------
1 | [\naked]


--------------------------------------------------------------------------------
/test/fail17.json:
--------------------------------------------------------------------------------
1 | ["Illegal backslash escape: \017"]


--------------------------------------------------------------------------------
/test/fail18.json:
--------------------------------------------------------------------------------
1 | [[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]


--------------------------------------------------------------------------------
/test/fail19.json:
--------------------------------------------------------------------------------
1 | {"Missing colon" null}


--------------------------------------------------------------------------------
/test/fail2.json:
--------------------------------------------------------------------------------
1 | ["Unclosed array"


--------------------------------------------------------------------------------
/test/fail20.json:
--------------------------------------------------------------------------------
1 | {"Double colon":: null}


--------------------------------------------------------------------------------
/test/fail21.json:
--------------------------------------------------------------------------------
1 | {"Comma instead of colon", null}


--------------------------------------------------------------------------------
/test/fail22.json:
--------------------------------------------------------------------------------
1 | ["Colon instead of comma": false]


--------------------------------------------------------------------------------
/test/fail23.json:
--------------------------------------------------------------------------------
1 | ["Bad value", truth]


--------------------------------------------------------------------------------
/test/fail24.json:
--------------------------------------------------------------------------------
1 | ['single quote']


--------------------------------------------------------------------------------
/test/fail25.json:
--------------------------------------------------------------------------------
1 | ["	tab	character	in	string	"]


--------------------------------------------------------------------------------
/test/fail26.json:
--------------------------------------------------------------------------------
1 | ["tab\   character\   in\  string\  "]


--------------------------------------------------------------------------------
/test/fail27.json:
--------------------------------------------------------------------------------
1 | ["line
2 | break"]


--------------------------------------------------------------------------------
/test/fail28.json:
--------------------------------------------------------------------------------
1 | ["line\
2 | break"]


--------------------------------------------------------------------------------
/test/fail29.json:
--------------------------------------------------------------------------------
1 | [0e]


--------------------------------------------------------------------------------
/test/fail3.json:
--------------------------------------------------------------------------------
1 | {unquoted_key: "keys must be quoted"}


--------------------------------------------------------------------------------
/test/fail30.json:
--------------------------------------------------------------------------------
1 | [0e+]


--------------------------------------------------------------------------------
/test/fail31.json:
--------------------------------------------------------------------------------
1 | [0e+-1]


--------------------------------------------------------------------------------
/test/fail32.json:
--------------------------------------------------------------------------------
1 | {"Comma instead if closing brace": true,


--------------------------------------------------------------------------------
/test/fail33.json:
--------------------------------------------------------------------------------
1 | ["mismatch"}


--------------------------------------------------------------------------------
/test/fail4.json:
--------------------------------------------------------------------------------
1 | ["extra comma",]


--------------------------------------------------------------------------------
/test/fail5.json:
--------------------------------------------------------------------------------
1 | ["double extra comma",,]


--------------------------------------------------------------------------------
/test/fail6.json:
--------------------------------------------------------------------------------
1 | [   , "<-- missing value"]


--------------------------------------------------------------------------------
/test/fail7.json:
--------------------------------------------------------------------------------
1 | ["Comma after the close"],


--------------------------------------------------------------------------------
/test/fail8.json:
--------------------------------------------------------------------------------
1 | ["Extra close"]]


--------------------------------------------------------------------------------
/test/fail9.json:
--------------------------------------------------------------------------------
1 | {"Extra comma": true,}


--------------------------------------------------------------------------------
/test/pass1.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     "JSON Test Pattern pass1",
 3 |     {"object with 1 member":["array with 1 element"]},
 4 |     {},
 5 |     [],
 6 |     -42,
 7 |     true,
 8 |     false,
 9 |     null,
10 |     {
11 |         "integer": 1234567890,
12 |         "real": -9876.543210,
13 |         "e": 0.123456789e-12,
14 |         "E": 1.234567890E+34,
15 |         "":  23456789012E66,
16 |         "zero": 0,
17 |         "one": 1,
18 |         "space": " ",
19 |         "quote": "\"",
20 |         "backslash": "\\",
21 |         "controls": "\b\f\n\r\t",
22 |         "slash": "/ & \/",
23 |         "alpha": "abcdefghijklmnopqrstuvwyz",
24 |         "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ",
25 |         "digit": "0123456789",
26 |         "0123456789": "digit",
27 |         "special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?",
28 |         "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A",
29 |         "true": true,
30 |         "false": false,
31 |         "null": null,
32 |         "array":[  ],
33 |         "object":{  },
34 |         "address": "50 St. James Street",
35 |         "url": "http://www.JSON.org/",
36 |         "comment": "// /* <!-- --",
37 |         "# -- --> */": " ",
38 |         " s p a c e d " :[1,2 , 3
39 | 
40 | ,
41 | 
42 | 4 , 5        ,          6           ,7        ],"compact":[1,2,3,4,5,6,7],
43 |         "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
44 |         "quotes": "&#34; \u0022 %22 0x22 034 &#x22;",
45 |         "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
46 | : "A key can be any string"
47 |     },
48 |     0.5 ,98.6
49 | ,
50 | 99.44
51 | ,
52 | 
53 | 1066,
54 | 1e1,
55 | 0.1e1,
56 | 1e-1,
57 | 1e00,2e+00,2e-00
58 | ,"rosebud"]


--------------------------------------------------------------------------------
/test/pass2.json:
--------------------------------------------------------------------------------
1 | [[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]]


--------------------------------------------------------------------------------
/test/pass3.json:
--------------------------------------------------------------------------------
1 | {
2 |     "JSON Test Pattern pass3": {
3 |         "The outermost value": "must be an object or array.",
4 |         "In this test": "It is an object."
5 |     }
6 | }
7 | 


--------------------------------------------------------------------------------