├── .github
└── workflows
│ ├── dotnet.yml
│ └── manual.yml
├── .gitignore
├── CardinalityEstimation.Benchmark
├── BenchmarkDotNet.Artifacts
│ └── results
│ │ ├── DifferentHashes-report-github.md
│ │ ├── DifferentHashes-report.csv
│ │ └── DifferentHashes-report.html
├── CardinalityEstimation.Benchmark.csproj
├── Program.cs
└── cardinalityestimation.snk
├── CardinalityEstimation.Test
├── BiasCorrectionTests.cs
├── CardinalityEstimation.Test.csproj
├── CardinalityEstimatorSerializerTests.cs
├── CardinalityEstimatorTests.cs
├── DictionaryUtils.cs
├── Hash
│ ├── Fnv1ATests.cs
│ ├── HashFunctionFactoryTests.cs
│ └── Murmur3Tests.cs
├── Resources.Designer.cs
├── Resources.resx
├── cardinalityestimation.snk
└── resx
│ ├── serializeddense_v1.0.bin
│ ├── serializeddense_v2_0.bin
│ ├── serializeddense_v2_1.bin
│ ├── serializeddirect_v1.0.bin
│ ├── serializeddirect_v2_0.bin
│ ├── serializeddirect_v2_1.bin
│ ├── serializedsparse_v1.0.bin
│ ├── serializedsparse_v2_0.bin
│ └── serializedsparse_v2_1.bin
├── CardinalityEstimation.sln
├── CardinalityEstimation.sln.DotSettings
├── CardinalityEstimation
├── BiasCorrection.cs
├── CardinalityEstimation.csproj
├── CardinalityEstimator.cs
├── CardinalityEstimatorSerializer.cs
├── CardinalityEstimatorState.cs
├── Hash
│ ├── Fnv1A.cs
│ └── Murmur3.cs
├── ICardinalityEstimator.cs
├── InternalsVisible.cs
└── cardinalityestimation.snk
├── LICENSE.md
└── README.md
/.github/workflows/dotnet.yml:
--------------------------------------------------------------------------------
1 | name: .NET
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 |
9 | jobs:
10 | build:
11 |
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - uses: actions/checkout@v2
16 | - name: Setup .NET
17 | uses: actions/setup-dotnet@v3
18 | with:
19 | dotnet-version: 8.x
20 | dotnet-quality: 'preview'
21 | - name: Restore dependencies
22 | run: dotnet restore
23 | - name: Build
24 | run: dotnet build --no-restore
25 | - name: Test
26 | run: dotnet test --no-build --verbosity normal
27 |
--------------------------------------------------------------------------------
/.github/workflows/manual.yml:
--------------------------------------------------------------------------------
1 | # This is a basic workflow that is manually triggered
2 |
3 | name: Manual workflow
4 |
5 | # Controls when the action will run. Workflow runs when manually triggered using the UI
6 | # or API.
7 | on:
8 | workflow_dispatch:
9 |
10 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
11 | jobs:
12 | # This workflow contains a single job called "greet"
13 | publish:
14 | # The type of runner that the job will run on
15 | runs-on: ubuntu-latest
16 |
17 | # Steps represent a sequence of tasks that will be executed as part of the job
18 | steps:
19 | - uses: actions/checkout@v2
20 | - name: Setup .NET
21 | uses: actions/setup-dotnet@v3
22 | with:
23 | dotnet-version: 8.x
24 | dotnet-quality: 'preview'
25 | - name: Restore dependencies
26 | run: dotnet restore
27 | - name: Build
28 | run: dotnet build -c Release --no-restore
29 | - name: Build Signed
30 | run: dotnet build -c Release-Signed --no-restore
31 | - name: Push to NuGet
32 | run: |
33 | dotnet pack -c Release -o $PWD/Release/nuget
34 | for file in Release/nuget/*.nupkg; do dotnet nuget push -s https://api.nuget.org/v3/index.json -k ${{secrets.NUGET_API_KEY}} $file; done
35 | - name: Push Signed to NuGet
36 | run: |
37 | dotnet pack -c Release-Signed -o $PWD/Release-Signed/nuget
38 | for file in Release-Signed/nuget/*.nupkg; do dotnet nuget push -s https://api.nuget.org/v3/index.json -k ${{secrets.NUGET_API_KEY}} $file; done
39 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 |
4 | # User-specific files
5 | *.suo
6 | *.user
7 | *.userosscache
8 | *.sln.docstates
9 |
10 | # User-specific files (MonoDevelop/Xamarin Studio)
11 | *.userprefs
12 |
13 | # Build results
14 | [Dd]ebug/
15 | [Dd]ebugPublic/
16 | [Rr]elease/
17 | [Rr]eleases/
18 | x64/
19 | x86/
20 | build/
21 | bld/
22 | [Bb]in/
23 | [Oo]bj/
24 |
25 | # Roslyn cache directories
26 | *.ide/
27 |
28 | # MSTest test Results
29 | [Tt]est[Rr]esult*/
30 | [Bb]uild[Ll]og.*
31 |
32 | #NUNIT
33 | *.VisualState.xml
34 | TestResult.xml
35 |
36 | # Build Results of an ATL Project
37 | [Dd]ebugPS/
38 | [Rr]eleasePS/
39 | dlldata.c
40 |
41 | *_i.c
42 | *_p.c
43 | *_i.h
44 | *.ilk
45 | *.meta
46 | *.obj
47 | *.pch
48 | *.pdb
49 | *.pgc
50 | *.pgd
51 | *.rsp
52 | *.sbr
53 | *.tlb
54 | *.tli
55 | *.tlh
56 | *.tmp
57 | *.tmp_proj
58 | *.log
59 | *.vspscc
60 | *.vssscc
61 | .builds
62 | *.pidb
63 | *.svclog
64 | *.scc
65 |
66 | # Chutzpah Test files
67 | _Chutzpah*
68 |
69 | # Visual C++ cache files
70 | ipch/
71 | *.aps
72 | *.ncb
73 | *.opensdf
74 | *.sdf
75 | *.cachefile
76 |
77 | # Visual Studio files
78 | .vs/
79 |
80 | # Visual Studio profiler
81 | *.psess
82 | *.vsp
83 | *.vspx
84 |
85 | # TFS 2012 Local Workspace
86 | $tf/
87 |
88 | # Guidance Automation Toolkit
89 | *.gpState
90 |
91 | # ReSharper is a .NET coding add-in
92 | _ReSharper*/
93 | *.[Rr]e[Ss]harper
94 | *.DotSettings.user
95 |
96 | # JustCode is a .NET coding addin-in
97 | .JustCode
98 |
99 | # TeamCity is a build add-in
100 | _TeamCity*
101 |
102 | # DotCover is a Code Coverage Tool
103 | *.dotCover
104 |
105 | # NCrunch
106 | _NCrunch_*
107 | .*crunch*.local.xml
108 |
109 | # MightyMoose
110 | *.mm.*
111 | AutoTest.Net/
112 |
113 | # Web workbench (sass)
114 | .sass-cache/
115 |
116 | # Installshield output folder
117 | [Ee]xpress/
118 |
119 | # DocProject is a documentation generator add-in
120 | DocProject/buildhelp/
121 | DocProject/Help/*.HxT
122 | DocProject/Help/*.HxC
123 | DocProject/Help/*.hhc
124 | DocProject/Help/*.hhk
125 | DocProject/Help/*.hhp
126 | DocProject/Help/Html2
127 | DocProject/Help/html
128 |
129 | # Click-Once directory
130 | publish/
131 |
132 | # Publish Web Output
133 | *.[Pp]ublish.xml
134 | *.azurePubxml
135 | # TODO: Comment the next line if you want to checkin your web deploy settings
136 | # but database connection strings (with potential passwords) will be unencrypted
137 | *.pubxml
138 | *.publishproj
139 |
140 | # NuGet Packages
141 | *.nupkg
142 | # The packages folder can be ignored because of Package Restore
143 | **/packages/*
144 | # except build/, which is used as an MSBuild target.
145 | !**/packages/build/
146 | # Uncomment if necessary however generally it will be regenerated when needed
147 | #!**/packages/repositories.config
148 |
149 | # Windows Azure Build Output
150 | csx/
151 | *.build.csdef
152 |
153 | # Windows Store app package directory
154 | AppPackages/
155 |
156 | # Others
157 | *.[Cc]ache
158 | ClientBin/
159 | [Ss]tyle[Cc]op.*
160 | ~$*
161 | *~
162 | *.dbmdl
163 | *.dbproj.schemaview
164 | *.pfx
165 | *.publishsettings
166 | node_modules/
167 | bower_components/
168 |
169 | # RIA/Silverlight projects
170 | Generated_Code/
171 |
172 | # Backup & report files from converting an old project file
173 | # to a newer Visual Studio version. Backup files are not needed,
174 | # because we have git ;-)
175 | _UpgradeReport_Files/
176 | Backup*/
177 | UpgradeLog*.XML
178 | UpgradeLog*.htm
179 |
180 | # SQL Server files
181 | *.mdf
182 | *.ldf
183 |
184 | # Business Intelligence projects
185 | *.rdl.data
186 | *.bim.layout
187 | *.bim_*.settings
188 |
189 | # Microsoft Fakes
190 | FakesAssemblies/
191 |
192 | # Node.js Tools for Visual Studio
193 | .ntvs_analysis.dat
194 |
195 | # Visual Studio 6 build log
196 | *.plg
197 |
198 | # Visual Studio 6 workspace options file
199 | *.opt
200 |
201 |
--------------------------------------------------------------------------------
/CardinalityEstimation.Benchmark/BenchmarkDotNet.Artifacts/results/DifferentHashes-report-github.md:
--------------------------------------------------------------------------------
1 | ```
2 |
3 | BenchmarkDotNet v0.13.8, Windows 11 (10.0.22621.2283/22H2/2022Update/SunValley2)
4 | Intel Core i7-8700 CPU 3.20GHz (Coffee Lake), 1 CPU, 12 logical and 6 physical cores
5 | .NET SDK 8.0.100-rc.1.23463.5
6 | [Host] : .NET 7.0.11 (7.0.1123.42427), X64 RyuJIT AVX2
7 | Core70 : .NET 7.0.11 (7.0.1123.42427), X64 RyuJIT AVX2
8 | Core80 : .NET 8.0.0 (8.0.23.41904), X64 RyuJIT AVX2
9 |
10 |
11 | ```
12 | | Method | Job | Runtime | Bits | Mean | Error | StdDev | Gen0 | Allocated |
13 | |---------- |------- |--------- |----- |-----------:|---------:|---------:|-----------:|-----------:|
14 | | **Murmur3** | **Core70** | **.NET 7.0** | **4** | **2,145.8 ms** | **28.16 ms** | **24.96 ms** | **34000.0000** | **2441.41 MB** |
15 | | Fnv1A | Core70 | .NET 7.0 | 4 | 1,062.2 ms | 10.78 ms | 9.55 ms | 10000.0000 | 762.95 MB |
16 | | XxHash64 | Core70 | .NET 7.0 | 4 | 861.5 ms | 7.19 ms | 6.38 ms | 13000.0000 | 1068.12 MB |
17 | | XxHash128 | Core70 | .NET 7.0 | 4 | 782.3 ms | 11.13 ms | 9.29 ms | 15000.0000 | 1144.42 MB |
18 | | Murmur3 | Core80 | .NET 8.0 | 4 | 2,007.7 ms | 38.95 ms | 52.00 ms | 27000.0000 | 2441.41 MB |
19 | | Fnv1A | Core80 | .NET 8.0 | 4 | 1,013.4 ms | 6.26 ms | 5.85 ms | 8000.0000 | 762.95 MB |
20 | | XxHash64 | Core80 | .NET 8.0 | 4 | 869.7 ms | 6.63 ms | 5.88 ms | 12000.0000 | 1068.12 MB |
21 | | XxHash128 | Core80 | .NET 8.0 | 4 | 779.6 ms | 7.87 ms | 7.36 ms | 13000.0000 | 1144.42 MB |
22 | | **Murmur3** | **Core70** | **.NET 7.0** | **16** | **2,177.4 ms** | **42.68 ms** | **70.12 ms** | **34000.0000** | **2441.72 MB** |
23 | | Fnv1A | Core70 | .NET 7.0 | 16 | 1,121.2 ms | 10.51 ms | 9.32 ms | 10000.0000 | 763.26 MB |
24 | | XxHash64 | Core70 | .NET 7.0 | 16 | 909.6 ms | 17.82 ms | 28.27 ms | 14000.0000 | 1068.43 MB |
25 | | XxHash128 | Core70 | .NET 7.0 | 16 | 825.1 ms | 16.01 ms | 22.96 ms | 15000.0000 | 1144.73 MB |
26 | | Murmur3 | Core80 | .NET 8.0 | 16 | 2,094.1 ms | 31.54 ms | 27.96 ms | 27000.0000 | 2441.72 MB |
27 | | Fnv1A | Core80 | .NET 8.0 | 16 | 1,098.3 ms | 21.94 ms | 23.47 ms | 8000.0000 | 763.26 MB |
28 | | XxHash64 | Core80 | .NET 8.0 | 16 | 903.3 ms | 16.75 ms | 17.20 ms | 12000.0000 | 1068.43 MB |
29 | | XxHash128 | Core80 | .NET 8.0 | 16 | 835.4 ms | 15.32 ms | 12.79 ms | 13000.0000 | 1144.73 MB |
30 |
--------------------------------------------------------------------------------
/CardinalityEstimation.Benchmark/BenchmarkDotNet.Artifacts/results/DifferentHashes-report.csv:
--------------------------------------------------------------------------------
1 | Method,Job,AnalyzeLaunchVariance,EvaluateOverhead,MaxAbsoluteError,MaxRelativeError,MinInvokeCount,MinIterationTime,OutlierMode,Affinity,EnvironmentVariables,Jit,LargeAddressAware,Platform,PowerPlanMode,Runtime,AllowVeryLargeObjects,Concurrent,CpuGroups,Force,HeapAffinitizeMask,HeapCount,NoAffinitize,RetainVm,Server,Arguments,BuildConfiguration,Clock,EngineFactory,NuGetReferences,IsMutator,InvocationCount,IterationCount,IterationTime,LaunchCount,MaxIterationCount,MaxWarmupIterationCount,MemoryRandomization,MinIterationCount,MinWarmupIterationCount,RunStrategy,UnrollFactor,WarmupCount,Bits,Mean,Error,StdDev,Gen0,Allocated
2 | Murmur3,Core70,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,4,"2,145.8 ms",28.16 ms,24.96 ms,34000.0000,2441.41 MB
3 | Fnv1A,Core70,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,4,"1,062.2 ms",10.78 ms,9.55 ms,10000.0000,762.95 MB
4 | XxHash64,Core70,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,4,861.5 ms,7.19 ms,6.38 ms,13000.0000,1068.12 MB
5 | XxHash128,Core70,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,4,782.3 ms,11.13 ms,9.29 ms,15000.0000,1144.42 MB
6 | Murmur3,Core80,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,4,"2,007.7 ms",38.95 ms,52.00 ms,27000.0000,2441.41 MB
7 | Fnv1A,Core80,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,4,"1,013.4 ms",6.26 ms,5.85 ms,8000.0000,762.95 MB
8 | XxHash64,Core80,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,4,869.7 ms,6.63 ms,5.88 ms,12000.0000,1068.12 MB
9 | XxHash128,Core80,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,4,779.6 ms,7.87 ms,7.36 ms,13000.0000,1144.42 MB
10 | Murmur3,Core70,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,16,"2,177.4 ms",42.68 ms,70.12 ms,34000.0000,2441.72 MB
11 | Fnv1A,Core70,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,16,"1,121.2 ms",10.51 ms,9.32 ms,10000.0000,763.26 MB
12 | XxHash64,Core70,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,16,909.6 ms,17.82 ms,28.27 ms,14000.0000,1068.43 MB
13 | XxHash128,Core70,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,16,825.1 ms,16.01 ms,22.96 ms,15000.0000,1144.73 MB
14 | Murmur3,Core80,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,16,"2,094.1 ms",31.54 ms,27.96 ms,27000.0000,2441.72 MB
15 | Fnv1A,Core80,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,16,"1,098.3 ms",21.94 ms,23.47 ms,8000.0000,763.26 MB
16 | XxHash64,Core80,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,16,903.3 ms,16.75 ms,17.20 ms,12000.0000,1068.43 MB
17 | XxHash128,Core80,False,Default,Default,Default,Default,Default,Default,111111111111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,True,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,16,835.4 ms,15.32 ms,12.79 ms,13000.0000,1144.73 MB
18 |
--------------------------------------------------------------------------------
/CardinalityEstimation.Benchmark/BenchmarkDotNet.Artifacts/results/DifferentHashes-report.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | DifferentHashes-20230916-162923
6 |
7 |
13 |
14 |
15 |
16 | BenchmarkDotNet v0.13.8, Windows 11 (10.0.22621.2283/22H2/2022Update/SunValley2)
17 | Intel Core i7-8700 CPU 3.20GHz (Coffee Lake), 1 CPU, 12 logical and 6 physical cores
18 | .NET SDK 8.0.100-rc.1.23463.5
19 | [Host] : .NET 7.0.11 (7.0.1123.42427), X64 RyuJIT AVX2
20 | Core70 : .NET 7.0.11 (7.0.1123.42427), X64 RyuJIT AVX2
21 | Core80 : .NET 8.0.0 (8.0.23.41904), X64 RyuJIT AVX2
22 |
23 |
24 |
25 |
26 | Method | Job | Runtime | Bits | Mean | Error | StdDev | Gen0 | Allocated |
27 |
28 | Murmur3 | Core70 | .NET 7.0 | 4 | 2,145.8 ms | 28.16 ms | 24.96 ms | 34000.0000 | 2441.41 MB |
29 |
Fnv1A | Core70 | .NET 7.0 | 4 | 1,062.2 ms | 10.78 ms | 9.55 ms | 10000.0000 | 762.95 MB |
30 |
XxHash64 | Core70 | .NET 7.0 | 4 | 861.5 ms | 7.19 ms | 6.38 ms | 13000.0000 | 1068.12 MB |
31 |
XxHash128 | Core70 | .NET 7.0 | 4 | 782.3 ms | 11.13 ms | 9.29 ms | 15000.0000 | 1144.42 MB |
32 |
Murmur3 | Core80 | .NET 8.0 | 4 | 2,007.7 ms | 38.95 ms | 52.00 ms | 27000.0000 | 2441.41 MB |
33 |
Fnv1A | Core80 | .NET 8.0 | 4 | 1,013.4 ms | 6.26 ms | 5.85 ms | 8000.0000 | 762.95 MB |
34 |
XxHash64 | Core80 | .NET 8.0 | 4 | 869.7 ms | 6.63 ms | 5.88 ms | 12000.0000 | 1068.12 MB |
35 |
XxHash128 | Core80 | .NET 8.0 | 4 | 779.6 ms | 7.87 ms | 7.36 ms | 13000.0000 | 1144.42 MB |
36 |
Murmur3 | Core70 | .NET 7.0 | 16 | 2,177.4 ms | 42.68 ms | 70.12 ms | 34000.0000 | 2441.72 MB |
37 |
Fnv1A | Core70 | .NET 7.0 | 16 | 1,121.2 ms | 10.51 ms | 9.32 ms | 10000.0000 | 763.26 MB |
38 |
XxHash64 | Core70 | .NET 7.0 | 16 | 909.6 ms | 17.82 ms | 28.27 ms | 14000.0000 | 1068.43 MB |
39 |
XxHash128 | Core70 | .NET 7.0 | 16 | 825.1 ms | 16.01 ms | 22.96 ms | 15000.0000 | 1144.73 MB |
40 |
Murmur3 | Core80 | .NET 8.0 | 16 | 2,094.1 ms | 31.54 ms | 27.96 ms | 27000.0000 | 2441.72 MB |
41 |
Fnv1A | Core80 | .NET 8.0 | 16 | 1,098.3 ms | 21.94 ms | 23.47 ms | 8000.0000 | 763.26 MB |
42 |
XxHash64 | Core80 | .NET 8.0 | 16 | 903.3 ms | 16.75 ms | 17.20 ms | 12000.0000 | 1068.43 MB |
43 |
XxHash128 | Core80 | .NET 8.0 | 16 | 835.4 ms | 15.32 ms | 12.79 ms | 13000.0000 | 1144.73 MB |
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/CardinalityEstimation.Benchmark/CardinalityEstimation.Benchmark.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net9.0;net8.0
6 | enable
7 | enable
8 | true
9 | cardinalityestimation.snk
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/CardinalityEstimation.Benchmark/Program.cs:
--------------------------------------------------------------------------------
1 | using BenchmarkDotNet.Attributes;
2 | using BenchmarkDotNet.Configs;
3 | using BenchmarkDotNet.Environments;
4 | using BenchmarkDotNet.Jobs;
5 | using BenchmarkDotNet.Running;
6 | using CardinalityEstimation;
7 |
8 | var config = DefaultConfig.Instance
9 | .AddJob(Job.Default.WithId("Core70").WithRuntime(CoreRuntime.Core70))
10 | .AddJob(Job.Default.WithId("Core80").WithRuntime(CoreRuntime.Core80));
11 |
12 | BenchmarkRunner.Run(config);
13 |
14 | [MemoryDiagnoser]
15 | public class DifferentHashes
16 | {
17 | public static readonly Random Rand = new Random();
18 |
19 | private const int N = 10000000;
20 |
21 | private string[] dataStrings = Enumerable.Range(0, N).Select(_ => Rand.Next().ToString() + Guid.NewGuid().ToString() + Rand.Next().ToString()).ToArray();
22 |
23 | [Params(4, 16)]
24 | public int Bits { get; set; }
25 |
26 | [Benchmark]
27 | public void Murmur3() => Run(Bits, CardinalityEstimation.Hash.Murmur3.GetHashCode);
28 | [Benchmark]
29 | public void Fnv1A() => Run(Bits, CardinalityEstimation.Hash.Fnv1A.GetHashCode);
30 | [Benchmark]
31 | public void XxHash128() => Run(Bits, (x) => BitConverter.ToUInt64(System.IO.Hashing.XxHash128.Hash(x)));
32 |
33 |
34 | private void Run(int bits, GetHashCodeDelegate hashFunction)
35 | {
36 | var hll = new CardinalityEstimator(hashFunction, bits);
37 | for (var i = 0; i < N; i++)
38 | {
39 | hll.Add(dataStrings[i]);
40 | }
41 | }
42 | }
43 |
44 | [MemoryDiagnoser]
45 | public class GetBytesTests
46 | {
47 | public static readonly Random Rand = new Random();
48 |
49 | private const int N = 10000000;
50 |
51 | private int[] dataInts = Enumerable.Range(0, N).Select(_ => Rand.Next()).ToArray();
52 |
53 | [Params(4, 16)]
54 | public int Bits { get; set; }
55 |
56 | [Benchmark(Baseline = true)]
57 | public void GetBytes()
58 | {
59 | GetHashCodeDelegate hashFunction = (x) => BitConverter.ToUInt64(System.IO.Hashing.XxHash64.Hash(x));
60 | var hll = new CardinalityEstimator(hashFunction, Bits);
61 | for (var i = 0; i < N; i++)
62 | {
63 | hll.Add(dataInts[i]);
64 | }
65 | }
66 |
67 | [Benchmark]
68 | public void WriteToBytes()
69 | {
70 | GetHashCodeDelegate hashFunction = (x) => BitConverter.ToUInt64(System.IO.Hashing.XxHash64.Hash(x));
71 | var hll = new CardinalityEstimator(hashFunction, Bits);
72 | var bytes = new byte[sizeof(int)];
73 | for (var i = 0; i < N; i++)
74 | {
75 | BitConverter.TryWriteBytes(bytes, dataInts[i]);
76 | hll.Add(bytes);
77 | }
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/CardinalityEstimation.Benchmark/cardinalityestimation.snk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saguiitay/CardinalityEstimation/d2be1161b87ae298af49f58e48698745db34bff9/CardinalityEstimation.Benchmark/cardinalityestimation.snk
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/BiasCorrectionTests.cs:
--------------------------------------------------------------------------------
1 | // /*
2 | // See https://github.com/saguiitay/CardinalityEstimation.
3 | // The MIT License (MIT)
4 | //
5 | // Copyright (c) 2015 Microsoft
6 | //
7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
8 | // of this software and associated documentation files (the "Software"), to deal
9 | // in the Software without restriction, including without limitation the rights
10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the Software is
12 | // furnished to do so, subject to the following conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be included in all
15 | // copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | // SOFTWARE.
24 | // */
25 |
26 | namespace CardinalityEstimation.Test
27 | {
28 | using Xunit;
29 |
30 | public class BiasCorrectionTests
31 | {
32 | [Fact]
33 | public void WhenRawEstimateIsInArrayCorrectBiasIsUsed()
34 | {
35 | double corrected = BiasCorrection.CorrectBias(12.207, 4);
36 | Assert.Equal(12.207 - 9.207, corrected);
37 | }
38 |
39 | [Fact]
40 | public void WhenRawEstimateIsBetweenArrayValuesCorrectBiasIsUsed()
41 | {
42 | double corrected = BiasCorrection.CorrectBias(11.1, 4);
43 | // The bias should be between 10 and 9.717, but much closer to 10
44 | Assert.Equal(1.1394700139470011, corrected);
45 | }
46 |
47 | [Fact]
48 | public void WhenRawEstimateIsLargerThanAllArrayValuesCorrectBiasIsUsed()
49 | {
50 | // The bias of the last array element should be used
51 | double corrected = BiasCorrection.CorrectBias(78.0, 4);
52 | Assert.Equal(78.0 - -1.7606, corrected);
53 | }
54 |
55 | [Fact]
56 | public void WhenRawEstimateIsSmallerThanAllArrayValuesCorrectBiasIsUsed()
57 | {
58 | // The bias of the first array element should be used
59 | double corrected = BiasCorrection.CorrectBias(10.5, 4);
60 | Assert.Equal(10.5 - 10, corrected);
61 | }
62 |
63 | [Fact]
64 | public void WhenCorrectedEstimateIsBelowZeroZeroIsReturned()
65 | {
66 | double corrected = BiasCorrection.CorrectBias(5, 4);
67 | Assert.Equal(0, corrected);
68 | }
69 |
70 | [Fact]
71 | public void RawEstimateArraysAndBiasDataArraysHaveSameLengths()
72 | {
73 | Assert.True(BiasCorrection.RawEstimate.Length >= 14);
74 | Assert.Equal(BiasCorrection.RawEstimate.Length, BiasCorrection.BiasData.Length);
75 |
76 | for (var bits = 0; bits < BiasCorrection.RawEstimate.Length; bits++)
77 | {
78 | Assert.Equal(BiasCorrection.RawEstimate[bits].Length, BiasCorrection.BiasData[bits].Length);
79 | }
80 | }
81 | }
82 | }
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/CardinalityEstimation.Test.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net9.0;net8.0
5 | false
6 | true
7 | cardinalityestimation.snk
8 |
9 |
10 |
11 |
12 |
13 |
14 | all
15 | runtime; build; native; contentfiles; analyzers; buildtransitive
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/CardinalityEstimatorSerializerTests.cs:
--------------------------------------------------------------------------------
1 | // /*
2 | // See https://github.com/saguiitay/CardinalityEstimation.
3 | // The MIT License (MIT)
4 | //
5 | // Copyright (c) 2015 Microsoft
6 | //
7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
8 | // of this software and associated documentation files (the "Software"), to deal
9 | // in the Software without restriction, including without limitation the rights
10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the Software is
12 | // furnished to do so, subject to the following conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be included in all
15 | // copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | // SOFTWARE.
24 | // */
25 |
26 | namespace CardinalityEstimation.Test
27 | {
28 | using System;
29 | using System.Collections.Generic;
30 | using System.Diagnostics;
31 | using System.IO;
32 | using System.Linq;
33 | using System.Runtime.Serialization.Formatters.Binary;
34 | using CardinalityEstimation.Hash;
35 | using Xunit;
36 | using Xunit.Abstractions;
37 | using static CardinalityEstimation.CardinalityEstimator;
38 |
39 | public class CardinalityEstimatorSerializerTests : IDisposable
40 | {
41 | private const int ElementSizeInBytes = 20;
42 | public static readonly Random Rand = new Random();
43 |
44 | private readonly ITestOutputHelper output;
45 | private readonly Stopwatch stopwatch;
46 |
47 | public CardinalityEstimatorSerializerTests(ITestOutputHelper outputHelper)
48 | {
49 | output = outputHelper;
50 | stopwatch = new Stopwatch();
51 | stopwatch.Start();
52 | }
53 |
54 | public void Dispose()
55 | {
56 | stopwatch.Stop();
57 | output.WriteLine("Total test time: {0}", stopwatch.Elapsed);
58 | }
59 |
60 | [Fact]
61 | public void TestSerializerCardinality10()
62 | {
63 | CardinalityEstimator hll = CreateAndFillCardinalityEstimator(10);
64 |
65 | var serializer = new CardinalityEstimatorSerializer();
66 |
67 | byte[] results;
68 | using (var memoryStream = new MemoryStream())
69 | {
70 | serializer.Serialize(memoryStream, hll, false);
71 |
72 | results = memoryStream.ToArray();
73 | }
74 |
75 | // Expected length is 101:
76 | // 4 bytes for the major and minor versions
77 | // 4 bytes for the Bits in Index
78 | // 1 byte for the IsSparse and IsDirectCount flags
79 | // 4 bytes for the number of elements in DirectCount
80 | // 8 bytes for each element (ulong) in DirectCount
81 | // 8 bytes for CountAdded
82 | Assert.Equal(101, results.Length);
83 |
84 | Assert.Equal(14, BitConverter.ToInt32(results[4..8])); // Bits in Index = 14
85 | Assert.Equal(3, results[8]); // IsSparse = true AND IsDirectCount = true
86 | Assert.Equal(10, BitConverter.ToInt32(results[9..13])); // Count = 10
87 | Assert.Equal(10UL, BitConverter.ToUInt64(results[^8..])); // CountAdditions = 10
88 | }
89 |
90 | [Fact]
91 | public void TestSerializerCardinality1000()
92 | {
93 | CardinalityEstimator hll = CreateAndFillCardinalityEstimator(1000);
94 |
95 | var serializer = new CardinalityEstimatorSerializer();
96 |
97 | byte[] results;
98 | using (var memoryStream = new MemoryStream())
99 | {
100 | serializer.Serialize(memoryStream, hll, false);
101 |
102 | results = memoryStream.ToArray();
103 | }
104 |
105 | CardinalityEstimatorState data = hll.GetState();
106 |
107 | // Expected length is:
108 | // 4 bytes for the major and minor versions
109 | // 4 bytes for the Bits in Index
110 | // 1 byte for the IsSparse and IsDirectCount flags
111 | // 4 bytes for the number of elements in lookupSparse
112 | // 2+1 bytes for each element (ulong) in lookupSparse
113 | // 8 bytes for CountAdded
114 | Assert.Equal(21 + (3 *data.LookupSparse.Count), results.Length);
115 |
116 | Assert.Equal(14, BitConverter.ToInt32(results[4..8], 0)); // Bits in Index = 14
117 | Assert.Equal(2, results[8]); // IsSparse = true AND IsDirectCount = false
118 | Assert.Equal(data.LookupSparse.Count, BitConverter.ToInt32(results[9..13]));
119 | Assert.Equal(1000UL, BitConverter.ToUInt64(results[(13 + (3 *data.LookupSparse.Count)) ..])); // CountAdditions = 1000
120 | }
121 |
122 | [Fact]
123 | public void TestSerializerCardinality100000()
124 | {
125 | TestSerializerCardinality100000Parameterized(false);
126 | }
127 |
128 | [Fact]
129 | public void TestSerializer()
130 | {
131 | for (var i = 0; i < 100; i++)
132 | {
133 | TestSerializerCardinality10();
134 | TestSerializerCardinality1000();
135 | TestSerializerCardinality100000Parameterized(false);
136 | TestSerializerCardinality100000Parameterized(true);
137 | }
138 | }
139 |
140 | [Fact]
141 | public void TestDeserializer2()
142 | {
143 | for (var i = 0; i < 100; i++)
144 | {
145 | TestDeserializerWithCardinality(10);
146 | TestDeserializerWithCardinality(1000);
147 | TestDeserializerWithCardinality(100000);
148 |
149 | TestDeserializer2WithCardinality(10);
150 | TestDeserializer2WithCardinality(1000);
151 | TestDeserializer2WithCardinality(100000);
152 | }
153 | }
154 |
155 | [Fact]
156 | public void TestSerializerSizes()
157 | {
158 | for (var cardinality = 1; cardinality < 10240; cardinality *= 2)
159 | {
160 | long customTotalSize = 0;
161 | var runs = 10;
162 | for (var i = 0; i < runs; i++)
163 | {
164 | customTotalSize += TestSerializerCreatesSmallerData(cardinality);
165 | }
166 |
167 | long customAverageSize = customTotalSize/runs;
168 |
169 | output.WriteLine("{0} | {1}", cardinality, customAverageSize);
170 | }
171 | }
172 |
173 | ///
174 | /// If this method fails, it's possible that the serialization format has changed and
175 | /// should be incremented.
176 | ///
177 | [Fact]
178 | public void SerializerCanDeserializeVersion1Point0()
179 | {
180 | var serializer = new CardinalityEstimatorSerializer();
181 |
182 | CardinalityEstimator hllDirect = serializer.Deserialize(new MemoryStream(Resources.serializedDirect_v1_0), Murmur3.GetHashCode);
183 | CardinalityEstimator hllSparse = serializer.Deserialize(new MemoryStream(Resources.serializedSparse_v1_0), Murmur3.GetHashCode);
184 | CardinalityEstimator hllDense = serializer.Deserialize(new MemoryStream(Resources.serializedDense_v1_0), Murmur3.GetHashCode);
185 |
186 | Assert.Equal(50UL, hllDirect.Count());
187 | Assert.Equal(151UL, hllSparse.Count());
188 | Assert.Equal(5005UL, hllDense.Count());
189 | }
190 |
191 | [Fact]
192 | public void DeserializedEstimatorUsesSameHashAsOriginal()
193 | {
194 | // Prepare some elements
195 | IList elements = new List();
196 | for (int i = 0; i < 150; i++)
197 | {
198 | elements.Add(Rand.Next());
199 | }
200 |
201 | foreach (GetHashCodeDelegate hashFunction in new GetHashCodeDelegate[] { Murmur3.GetHashCode, Fnv1A.GetHashCode })
202 | {
203 | // Add elements to an estimator using the given hashFunctionId
204 | CardinalityEstimator original = new CardinalityEstimator(hashFunction: hashFunction);
205 | foreach (int element in elements)
206 | {
207 | original.Add(element);
208 | }
209 |
210 | // Serialize
211 | var serializer = new CardinalityEstimatorSerializer();
212 | byte[] results;
213 |
214 | using (var memoryStream = new MemoryStream())
215 | {
216 | serializer.Serialize(memoryStream, original, false);
217 | results = memoryStream.ToArray();
218 | }
219 |
220 | // Deserialize
221 | CardinalityEstimator deserialized;
222 | using (var memoryStream = new MemoryStream(results))
223 | {
224 | deserialized = serializer.Deserialize(memoryStream, hashFunction, false);
225 | }
226 |
227 | // Add the elements again, should have no effect on state
228 | foreach (int element in elements)
229 | {
230 | deserialized.Add(element);
231 | }
232 |
233 | Assert.Equal(original.Count(), deserialized.Count());
234 | }
235 | }
236 |
237 | ///
238 | /// If this method fails, it's possible that the serialization format has changed and
239 | /// should be incremented.
240 | ///
241 | [Fact]
242 | public void SerializerCanDeserializeVersion2Point0()
243 | {
244 | var serializer = new CardinalityEstimatorSerializer();
245 |
246 | CardinalityEstimator hllDirect = serializer.Deserialize(new MemoryStream(Resources.serializedDirect_v2_0), Murmur3.GetHashCode);
247 | CardinalityEstimator hllSparse = serializer.Deserialize(new MemoryStream(Resources.serializedSparse_v2_0), Murmur3.GetHashCode);
248 | CardinalityEstimator hllDense = serializer.Deserialize(new MemoryStream(Resources.serializedDense_v2_0), Murmur3.GetHashCode);
249 |
250 | Assert.Equal(50UL, hllDirect.Count());
251 | Assert.Equal(151UL, hllSparse.Count());
252 | Assert.Equal(5009UL, hllDense.Count());
253 | }
254 |
255 | ///
256 | /// If this method fails, it's possible that the serialization format has changed and
257 | /// should be incremented.
258 | ///
259 | [Fact]
260 | public void SerializerCanDeserializeVersion2Point1()
261 | {
262 | var serializer = new CardinalityEstimatorSerializer();
263 |
264 | CardinalityEstimator hllDirect = serializer.Deserialize(new MemoryStream(Resources.serializedDirect_v2_1), Murmur3.GetHashCode);
265 | CardinalityEstimator hllSparse = serializer.Deserialize(new MemoryStream(Resources.serializedSparse_v2_1), Murmur3.GetHashCode);
266 | CardinalityEstimator hllDense = serializer.Deserialize(new MemoryStream(Resources.serializedDense_v2_1), Murmur3.GetHashCode);
267 |
268 | Assert.Equal(50UL, hllDirect.Count());
269 | Assert.Equal(50UL, hllDirect.CountAdditions);
270 |
271 | Assert.Equal(151UL, hllSparse.Count());
272 | Assert.Equal(150UL, hllSparse.CountAdditions);
273 |
274 | Assert.Equal(5009UL, hllDense.Count());
275 | Assert.Equal(5000UL, hllDense.CountAdditions);
276 | }
277 |
278 | [Fact]
279 | public void TestSerializerMultipleCardinalityAndBitsCombinations()
280 | {
281 | for (int bits = 4; bits <= 16; bits++)
282 | {
283 | for (int cardinality = 1; cardinality <= 1000; cardinality++)
284 | {
285 | var estimator = CreateAndFillCardinalityEstimator(cardinality, bits);
286 | CardinalityEstimatorSerializer serializer = new CardinalityEstimatorSerializer();
287 | using (var stream = new MemoryStream())
288 | {
289 | serializer.Serialize(stream, estimator, true);
290 | stream.Seek(0, SeekOrigin.Begin);
291 | var deserializedEstimator = serializer.Deserialize(stream, Murmur3.GetHashCode);
292 | Assert.True(deserializedEstimator.Count() == estimator.Count(), "Estimators should have same count before and after serialization");
293 | }
294 | }
295 | }
296 | }
297 |
298 | private CardinalityEstimator CreateAndFillCardinalityEstimator(int cardinality = 1000000, int bits = 14)
299 | {
300 | var hll = new CardinalityEstimator(Murmur3.GetHashCode, b: bits);
301 |
302 | var nextMember = new byte[ElementSizeInBytes];
303 | for (var i = 0; i < cardinality; i++)
304 | {
305 | Rand.NextBytes(nextMember);
306 | hll.Add(nextMember);
307 | }
308 |
309 | return hll;
310 | }
311 |
312 | private int TestSerializerCreatesSmallerData(int cardinality)
313 | {
314 | CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality);
315 |
316 | var customSerializer = new CardinalityEstimatorSerializer();
317 |
318 | byte[] customSerializerResults;
319 | using (var memoryStream = new MemoryStream())
320 | {
321 | customSerializer.Serialize(memoryStream, hll, false);
322 | customSerializerResults = memoryStream.ToArray();
323 | return customSerializerResults.Length;
324 | }
325 | }
326 |
327 | private void TestDeserializerWithCardinality(int cardinality)
328 | {
329 | CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality);
330 | CardinalityEstimator hll2;
331 |
332 | var serializer = new CardinalityEstimatorSerializer();
333 |
334 | byte[] results;
335 | using (var memoryStream = new MemoryStream())
336 | {
337 | serializer.Serialize(memoryStream, hll, false);
338 |
339 | results = memoryStream.ToArray();
340 | }
341 |
342 | using (var memoryStream = new MemoryStream(results))
343 | {
344 | hll2 = serializer.Deserialize(memoryStream, Murmur3.GetHashCode, false);
345 | }
346 |
347 | CompareHLL(hll, hll2);
348 | }
349 |
350 | private void TestDeserializer2WithCardinality(int cardinality)
351 | {
352 | CardinalityEstimator hll = CreateAndFillCardinalityEstimator(cardinality);
353 | CardinalityEstimator hll2;
354 |
355 | var serializer = new CardinalityEstimatorSerializer();
356 |
357 | byte[] results;
358 | using (var memoryStream = new MemoryStream())
359 | {
360 | using (var bw = new BinaryWriter(memoryStream))
361 | {
362 | serializer.Write(bw, hll);
363 | }
364 |
365 | results = memoryStream.ToArray();
366 | }
367 |
368 | using (var memoryStream = new MemoryStream(results))
369 | using (var br = new BinaryReader(memoryStream))
370 | {
371 | hll2 = serializer.Read(br, Murmur3.GetHashCode);
372 | }
373 |
374 | CompareHLL(hll, hll2);
375 | }
376 |
377 | private void TestSerializerCardinality100000Parameterized(bool useBinWriter)
378 | {
379 | CardinalityEstimator hll = CreateAndFillCardinalityEstimator(100000);
380 |
381 | var serializer = new CardinalityEstimatorSerializer();
382 |
383 | byte[] results;
384 | using (var memoryStream = new MemoryStream())
385 | {
386 | if (useBinWriter)
387 | {
388 | using (var bw = new BinaryWriter(memoryStream))
389 | {
390 | serializer.Write(bw, hll);
391 | }
392 | }
393 | else
394 | {
395 | serializer.Serialize(memoryStream, hll, false);
396 | }
397 |
398 | results = memoryStream.ToArray();
399 | }
400 |
401 | CardinalityEstimatorState data = hll.GetState();
402 |
403 | // Expected length is:
404 | // 4 bytes for the major and minor versions
405 | // 4 bytes for the Bits in Index
406 | // 1 byte for the IsSparse and IsDirectCount flags
407 | // 4 bytes for the number of elements in lookupDense
408 | // 1 bytes for each element (ulong) in lookupDense
409 | // 8 bytes for CountAdded
410 | Assert.Equal(21 + data.LookupDense.Length, results.Length);
411 |
412 | Assert.Equal(14, BitConverter.ToInt32(results[4..8])); // Bits in Index = 14
413 | Assert.Equal(0, results[8]); // IsSparse = false AND IsDirectCount = false
414 | Assert.Equal(data.LookupDense.Length, BitConverter.ToInt32(results[9..13]));
415 | Assert.Equal(100000UL, BitConverter.ToUInt64(results[(13 + data.LookupDense.Length) ..])); // CountAdditions = 100000
416 | }
417 |
418 | private void CompareHLL(CardinalityEstimator hll1, CardinalityEstimator hll2)
419 | {
420 | CardinalityEstimatorState data = hll1.GetState();
421 | CardinalityEstimatorState data2 = hll2.GetState();
422 |
423 | Assert.Equal(data.BitsPerIndex, data2.BitsPerIndex);
424 | Assert.Equal(data.IsSparse, data2.IsSparse);
425 |
426 | Assert.True((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null));
427 | Assert.True((data.LookupSparse != null && data2.LookupSparse != null) ||
428 | (data.LookupSparse == null && data2.LookupSparse == null));
429 | Assert.True((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null));
430 |
431 | if (data.DirectCount != null)
432 | {
433 | // DirectCount are subsets of each-other => they are the same set
434 | Assert.True(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount));
435 | }
436 | if (data.LookupSparse != null)
437 | {
438 | Assert.True(data.LookupSparse.DictionaryEqual(data2.LookupSparse));
439 | }
440 | if (data.LookupDense != null)
441 | {
442 | Assert.True(data.LookupDense.SequenceEqual(data2.LookupDense));
443 | }
444 | }
445 | }
446 | }
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/CardinalityEstimatorTests.cs:
--------------------------------------------------------------------------------
1 | // /*
2 | // See https://github.com/saguiitay/CardinalityEstimation.
3 | // The MIT License (MIT)
4 | //
5 | // Copyright (c) 2015 Microsoft
6 | //
7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
8 | // of this software and associated documentation files (the "Software"), to deal
9 | // in the Software without restriction, including without limitation the rights
10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the Software is
12 | // furnished to do so, subject to the following conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be included in all
15 | // copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | // SOFTWARE.
24 | // */
25 |
26 | namespace CardinalityEstimation.Test
27 | {
28 | using System;
29 | using System.Collections.Generic;
30 | using System.Diagnostics;
31 | using System.IO;
32 | using System.Linq;
33 |
34 | using CardinalityEstimation.Hash;
35 |
36 | using Xunit;
37 | using Xunit.Abstractions;
38 |
39 | public class CardinalityEstimatorTests : IDisposable
40 | {
41 | private const int ElementSizeInBytes = 20;
42 | public static readonly Random Rand = new Random();
43 |
44 | private readonly ITestOutputHelper output;
45 | private readonly Stopwatch stopwatch;
46 |
47 | public CardinalityEstimatorTests(ITestOutputHelper outputHelper)
48 | {
49 | output = outputHelper;
50 | stopwatch = new Stopwatch();
51 | stopwatch.Start();
52 | }
53 |
54 | public void Dispose()
55 | {
56 | stopwatch.Stop();
57 | output.WriteLine("Total test time: {0}", stopwatch.Elapsed);
58 | }
59 |
60 | [Fact]
61 | public void TestGetSigma()
62 | {
63 | // simulate a 64 bit hash and 14 bits for indexing
64 | const int bitsToCount = 64 - 14;
65 | Assert.Equal(51, CardinalityEstimator.GetSigma(0, bitsToCount));
66 | Assert.Equal(50, CardinalityEstimator.GetSigma(1, bitsToCount));
67 | Assert.Equal(47, CardinalityEstimator.GetSigma(8, bitsToCount));
68 | Assert.Equal(1, CardinalityEstimator.GetSigma((ulong)(Math.Pow(2, bitsToCount) - 1), bitsToCount));
69 | Assert.Equal(51, CardinalityEstimator.GetSigma((ulong)Math.Pow(2, bitsToCount + 1), bitsToCount));
70 | }
71 |
72 | [Fact]
73 | public void TestCountAdditions()
74 | {
75 | var estimator = new CardinalityEstimator();
76 |
77 | Assert.Equal(0UL, estimator.CountAdditions);
78 |
79 | estimator.Add(0);
80 | estimator.Add(0);
81 |
82 | Assert.Equal(2UL, estimator.CountAdditions);
83 |
84 | var estimator2 = new CardinalityEstimator();
85 | estimator2.Add(0);
86 | estimator.Merge(estimator2);
87 |
88 | Assert.Equal(3UL, estimator.CountAdditions);
89 | }
90 |
91 | [Fact]
92 | public void TestChanged()
93 | {
94 | var estimator = new CardinalityEstimator(Murmur3.GetHashCode);
95 |
96 | Assert.Equal(0UL, estimator.CountAdditions);
97 |
98 | bool changed = estimator.Add(0);
99 | Assert.True(changed);
100 | changed = estimator.Add(0);
101 | Assert.False(changed);
102 |
103 | for (var i = 1; i < 100; i++)
104 | {
105 | changed = estimator.Add(i);
106 | Assert.True(changed);
107 | }
108 |
109 | changed = estimator.Add(100);
110 | Assert.True(changed); //First change from direct count
111 |
112 | changed = estimator.Add(100);
113 | Assert.False(changed);
114 |
115 | changed = estimator.Add(101);
116 | Assert.True(changed);
117 |
118 | changed = estimator.Add(102);
119 | Assert.True(changed);
120 |
121 | changed = estimator.Add(0);
122 | Assert.False(changed);
123 |
124 | changed = estimator.Add(116); //element doesn't exist but the estimator internal state doesn't change
125 | Assert.False(changed);
126 | }
127 |
128 | [Fact]
129 | public void TestDifferentAccuracies()
130 | {
131 | const double stdError4Bits = 0.26;
132 | RunTest(stdError4Bits, 1000000);
133 |
134 | const double stdError12Bits = 0.01625;
135 | RunTest(stdError12Bits, 1000000);
136 |
137 | const double stdError14Bits = 0.008125;
138 | RunTest(stdError14Bits, 1000000);
139 |
140 | const double stdError16Bits = 0.0040625;
141 | RunTest(stdError16Bits, 1000000);
142 | }
143 |
144 | [Fact]
145 | public void AccuracyIsPerfectUnder100Members()
146 | {
147 | for (var i = 1; i < 100; i++)
148 | {
149 | RunTest(0.1, i, maxAcceptedError: 0);
150 | }
151 | }
152 |
153 | [Fact]
154 | public void AccuracyIsWithinMarginForDirectCountingDisabledUnder100Members()
155 | {
156 | for (var i = 1; i < 100; i++)
157 | {
158 | RunTest(0.1, i, disableDirectCount: true);
159 | RunTest(0.03, i, disableDirectCount: true);
160 | RunTest(0.005, i, disableDirectCount: true);
161 | }
162 | }
163 |
164 | [Fact]
165 | public void TestAccuracySmallCardinality()
166 | {
167 | for (var i = 1; i < 10000; i *= 2)
168 | {
169 | RunTest(0.26, i, 1.5);
170 | RunTest(0.008125, i, 0.05);
171 | RunTest(0.0040625, i, 0.05);
172 | }
173 | }
174 |
175 | [Fact]
176 | public void TestMergeCardinalityUnder100()
177 | {
178 | const double stdError = 0.008125;
179 | const int cardinality = 99;
180 | RunTest(stdError, cardinality, numHllInstances: 60, maxAcceptedError: 0);
181 | }
182 |
183 | [Fact]
184 | public void TestMergeLargeCardinality()
185 | {
186 | const double stdError = 0.008125;
187 | const int cardinality = 1000000;
188 | RunTest(stdError, cardinality, numHllInstances: 60);
189 | }
190 |
191 | [Fact]
192 | public void TestRecreationFromData()
193 | {
194 | RunRecreationFromData(10);
195 | RunRecreationFromData(100);
196 | RunRecreationFromData(1000);
197 | RunRecreationFromData(10000);
198 | RunRecreationFromData(100000);
199 | RunRecreationFromData(1000000);
200 | }
201 |
202 | [Fact]
203 | public void StaticMergeTest()
204 | {
205 | const int expectedBitsPerIndex = 11;
206 | var estimators = new CardinalityEstimator[10];
207 | for (var i = 0; i < estimators.Length; i++)
208 | {
209 | estimators[i] = new CardinalityEstimator(b: expectedBitsPerIndex);
210 | estimators[i].Add(Rand.Next());
211 | }
212 |
213 | CardinalityEstimator merged = CardinalityEstimator.Merge(estimators);
214 |
215 | Assert.Equal(10UL, merged.Count());
216 | Assert.Equal(expectedBitsPerIndex, merged.GetState().BitsPerIndex);
217 | }
218 |
219 | [Fact]
220 | public void StaticMergeHandlesNullParameter()
221 | {
222 | CardinalityEstimator result = CardinalityEstimator.Merge(null);
223 | Assert.Null(result);
224 | }
225 |
226 | [Fact]
227 | public void StaticMergeHandlesNullElements()
228 | {
229 | const int expectedBitsPerIndex = 11;
230 | var estimators = new List { null, new CardinalityEstimator(Fnv1A.GetHashCode, expectedBitsPerIndex), null };
231 | CardinalityEstimator result = CardinalityEstimator.Merge(estimators);
232 | Assert.NotNull(result);
233 | Assert.Equal(expectedBitsPerIndex, result.GetState().BitsPerIndex);
234 | }
235 |
236 | private void RunRecreationFromData(int cardinality = 1000000)
237 | {
238 | var hll = new CardinalityEstimator();
239 |
240 | var nextMember = new byte[ElementSizeInBytes];
241 | for (var i = 0; i < cardinality; i++)
242 | {
243 | Rand.NextBytes(nextMember);
244 | hll.Add(nextMember);
245 | }
246 |
247 | CardinalityEstimatorState data = hll.GetState();
248 |
249 | var hll2 = new CardinalityEstimator(Murmur3.GetHashCode, data);
250 | CardinalityEstimatorState data2 = hll2.GetState();
251 |
252 | Assert.Equal(data.BitsPerIndex, data2.BitsPerIndex);
253 | Assert.Equal(data.IsSparse, data2.IsSparse);
254 |
255 | Assert.True((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null));
256 | Assert.True((data.LookupSparse != null && data2.LookupSparse != null) ||
257 | (data.LookupSparse == null && data2.LookupSparse == null));
258 | Assert.True((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null));
259 |
260 | if (data.DirectCount != null)
261 | {
262 | // DirectCount are subsets of each-other => they are the same set
263 | Assert.True(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount));
264 | }
265 | if (data.LookupSparse != null)
266 | {
267 | Assert.True(data.LookupSparse.DictionaryEqual(data2.LookupSparse));
268 | }
269 | if (data.LookupDense != null)
270 | {
271 | Assert.True(data.LookupDense.SequenceEqual(data2.LookupDense));
272 | }
273 | }
274 |
275 | [Fact(Skip = "runtime is long")]
276 | public void TestPast32BitLimit()
277 | {
278 | const double stdError = 0.008125;
279 | var cardinality = (long)(Math.Pow(2, 32) + 1703); // just some big number beyond 32 bits
280 | RunTest(stdError, cardinality);
281 | }
282 |
283 | [Fact]
284 | public void TestAccuracyLargeCardinality()
285 | {
286 | for (var i = 10007; i < 10000000; i *= 2)
287 | {
288 | RunTest(0.26, i);
289 | RunTest(0.008125, i);
290 | RunTest(0.0040625, i);
291 | }
292 |
293 | RunTest(0.008125, 100000000);
294 | }
295 |
296 | [Fact]
297 | public void TestSequentialAccuracy()
298 | {
299 | for (var i = 10007; i < 10000000; i *= 2)
300 | {
301 | RunTest(0.26, i, sequential: true);
302 | RunTest(0.008125, i, sequential: true);
303 | RunTest(0.0040625, i, sequential: true);
304 | }
305 |
306 | RunTest(0.008125, 100000000);
307 | }
308 |
309 | [Fact]
310 | public void ReportAccuracy()
311 | {
312 | var hll = new CardinalityEstimator();
313 | double maxError = 0;
314 | var worstMember = 0;
315 | var nextMember = new byte[ElementSizeInBytes];
316 | for (var i = 0; i < 10000000; i++)
317 | {
318 | Rand.NextBytes(nextMember);
319 | hll.Add(nextMember);
320 |
321 | if (i % 1007 == 0) // just some interval to sample error at, can be any number
322 | {
323 | double error = (hll.Count() - (double)(i + 1)) / ((double)i + 1);
324 | if (error > maxError)
325 | {
326 | maxError = error;
327 | worstMember = i + 1;
328 | }
329 | }
330 | }
331 |
332 | output.WriteLine("Worst: {0}", worstMember);
333 | output.WriteLine("Max error: {0}", maxError);
334 |
335 | Assert.True(true);
336 | }
337 |
338 | [Fact]
339 | public void DirectCountingIsResetWhenMergingAlmostFullEstimators()
340 | {
341 | var addedEstimator = new CardinalityEstimator();
342 | var mergedEstimator = new CardinalityEstimator();
343 |
344 | for (int i = 0; i < 10_000; i++)
345 | {
346 | var guid = Guid.NewGuid().ToString();
347 |
348 | addedEstimator.Add(guid);
349 |
350 | // Simulate some intermediate estimators being merged together
351 | var temporaryEstimator = new CardinalityEstimator();
352 | temporaryEstimator.Add(guid);
353 | mergedEstimator.Merge(temporaryEstimator);
354 | }
355 |
356 | var serializer = new CardinalityEstimatorSerializer();
357 |
358 | var stream1 = new MemoryStream();
359 | serializer.Serialize(stream1, addedEstimator, true);
360 |
361 | var stream2 = new MemoryStream();
362 | serializer.Serialize(stream2, mergedEstimator, true);
363 |
364 | Assert.Equal(stream1.Length, stream2.Length);
365 | }
366 |
367 | [Fact]
368 | public void CopyConstructorCorrectlyCopiesValues()
369 | {
370 | for (int b = 4; b < 16; b++)
371 | {
372 | for (int cardinality = 1; cardinality < 10_000; cardinality *= 2)
373 | {
374 | var hll = new CardinalityEstimator(b: b);
375 |
376 | var nextMember = new byte[ElementSizeInBytes];
377 | for (var i = 0; i < cardinality; i++)
378 | {
379 | Rand.NextBytes(nextMember);
380 | hll.Add(nextMember);
381 | }
382 |
383 | var hll2 = new CardinalityEstimator(hll);
384 |
385 | Assert.Equal(hll, hll2);
386 | }
387 | }
388 |
389 | for (int b = 4; b < 16; b++)
390 | {
391 | for (int cardinality = 1; cardinality < 10_000; cardinality *= 2)
392 | {
393 | var hll = new CardinalityEstimator(b: b);
394 |
395 | var nextMember = new byte[ElementSizeInBytes];
396 | for (var i = 0; i < cardinality; i++)
397 | {
398 | Rand.NextBytes(nextMember);
399 | hll.Add(nextMember);
400 | }
401 |
402 | var hll2 = new CardinalityEstimator(hll);
403 |
404 | Assert.Equal(hll, hll2);
405 | }
406 | }
407 | }
408 |
409 | ///
410 | /// Generates random (or sequential) elements and adds them to CardinalityEstimators, then asserts that
411 | /// the observed error rate is no more than
412 | ///
413 | /// Expected standard error of the estimators (upper bound)
414 | /// number of elements to generate in total
415 | /// Maximum allowed error rate. Default is 4 times
416 | /// Number of estimators to create. Generated elements will be assigned to one of the estimators at random
417 | /// When false, elements will be generated at random. When true, elements will be 0,1,2...
418 | /// When true, will disable using direct counting for estimators less than 100 elements.
419 | private void RunTest(double stdError, long expectedCount, double? maxAcceptedError = null, int numHllInstances = 1,
420 | bool sequential = false, bool disableDirectCount = false)
421 | {
422 | maxAcceptedError ??= 10 * stdError; // should fail once in A LOT of runs
423 | int b = GetAccuracyInBits(stdError);
424 |
425 | var runStopwatch = new Stopwatch();
426 | long gcMemoryAtStart = GetGcMemory();
427 |
428 | // init HLLs
429 | var hlls = new CardinalityEstimator[numHllInstances];
430 | for (var i = 0; i < numHllInstances; i++)
431 | {
432 | hlls[i] = new CardinalityEstimator(b: b);
433 | }
434 |
435 | var nextMember = new byte[ElementSizeInBytes];
436 | runStopwatch.Start();
437 | for (long i = 0; i < expectedCount; i++)
438 | {
439 | // pick random hll, add member
440 | int chosenHll = Rand.Next(numHllInstances);
441 | if (sequential)
442 | {
443 | hlls[chosenHll].Add(i);
444 | }
445 | else
446 | {
447 | Rand.NextBytes(nextMember);
448 | hlls[chosenHll].Add(nextMember);
449 | }
450 | }
451 |
452 | runStopwatch.Stop();
453 | ReportMemoryCost(gcMemoryAtStart, output); // done here so references can't be GC'ed yet
454 |
455 | // Merge
456 | CardinalityEstimator mergedHll = CardinalityEstimator.Merge(hlls);
457 | output.WriteLine("Run time: {0}", runStopwatch.Elapsed);
458 | output.WriteLine("Expected {0}, got {1}", expectedCount, mergedHll.Count());
459 |
460 | double obsError = Math.Abs(mergedHll.Count() / (double)expectedCount - 1.0);
461 | output.WriteLine("StdErr: {0}. Observed error: {1}", stdError, obsError);
462 | Assert.True(obsError <= maxAcceptedError, string.Format("Observed error was {0}, over {1}, when adding {2} items", obsError, maxAcceptedError, expectedCount));
463 | output.WriteLine(string.Empty);
464 | }
465 |
466 | ///
467 | /// Gets the number of indexing bits required to produce a given standard error
468 | ///
469 | ///
470 | /// Standard error, which determines accuracy and memory consumption. For large cardinalities, the observed error is usually less than
471 | /// 3 * .
472 | ///
473 | private static int GetAccuracyInBits(double stdError)
474 | {
475 | double sqrtm = 1.04 / stdError;
476 | var b = (int)Math.Ceiling(Log2(sqrtm * sqrtm));
477 | return b;
478 | }
479 |
480 | private static long GetGcMemory()
481 | {
482 | GC.Collect();
483 | return GC.GetTotalMemory(true);
484 | }
485 |
486 | private static void ReportMemoryCost(long gcMemoryAtStart, ITestOutputHelper outputHelper)
487 | {
488 | long memoryCost = GetGcMemory() - gcMemoryAtStart;
489 | outputHelper.WriteLine("Appx. memory cost: {0} bytes", memoryCost);
490 | }
491 |
492 | ///
493 | /// Returns the base-2 logarithm of .
494 | /// This implementation is faster than as it avoids input checks
495 | ///
496 | ///
497 | /// The base-2 logarithm of
498 | private static double Log2(double x)
499 | {
500 | const double ln2 = 0.693147180559945309417232121458;
501 | return Math.Log(x) / ln2;
502 | }
503 | }
504 | }
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/DictionaryUtils.cs:
--------------------------------------------------------------------------------
1 | // /*
2 | // See https://github.com/saguiitay/CardinalityEstimation.
3 | // The MIT License (MIT)
4 | //
5 | // Copyright (c) 2015 Microsoft
6 | //
7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
8 | // of this software and associated documentation files (the "Software"), to deal
9 | // in the Software without restriction, including without limitation the rights
10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the Software is
12 | // furnished to do so, subject to the following conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be included in all
15 | // copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | // SOFTWARE.
24 | // */
25 |
26 | namespace CardinalityEstimation.Test
27 | {
28 | using System.Collections.Generic;
29 |
30 | public static class DictionaryUtils
31 | {
32 | public static bool DictionaryEqual(this IDictionary first, IDictionary second,
33 | IEqualityComparer valueComparer = null)
34 | {
35 | if (first == second)
36 | {
37 | return true;
38 | }
39 | if ((first == null) || (second == null))
40 | {
41 | return false;
42 | }
43 | if (first.Count != second.Count)
44 | {
45 | return false;
46 | }
47 |
48 | valueComparer ??= EqualityComparer.Default;
49 |
50 | foreach (KeyValuePair kvp in first)
51 | {
52 | TValue secondValue;
53 | if (!second.TryGetValue(kvp.Key, out secondValue))
54 | {
55 | return false;
56 | }
57 | if (!valueComparer.Equals(kvp.Value, secondValue))
58 | {
59 | return false;
60 | }
61 | }
62 | return true;
63 | }
64 | }
65 | }
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/Hash/Fnv1ATests.cs:
--------------------------------------------------------------------------------
1 | // /*
2 | // See https://github.com/saguiitay/CardinalityEstimation.
3 | // The MIT License (MIT)
4 | //
5 | // Copyright (c) 2015 Microsoft
6 | //
7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
8 | // of this software and associated documentation files (the "Software"), to deal
9 | // in the Software without restriction, including without limitation the rights
10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the Software is
12 | // furnished to do so, subject to the following conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be included in all
15 | // copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | // SOFTWARE.
24 | // */
25 |
26 | namespace CardinalityEstimation.Test.Hash
27 | {
28 | using CardinalityEstimation.Hash;
29 | using Xunit;
30 |
31 | public class Fnv1ATests
32 | {
33 | [Fact]
34 | public void Fnv1AProducesRightValues()
35 | {
36 | // Check some precomputed values of FNV1A
37 | Assert.Equal(14695981039346656037, Fnv1A.GetHashCode(new byte[0]));
38 | Assert.Equal(1109817072422714760UL, Fnv1A.GetHashCode(new byte[] { 1, 2, 3, 4, 5 }));
39 | Assert.Equal(11047178588169845073UL, Fnv1A.GetHashCode(new byte[] { 255, 255, 255, 255 }));
40 | }
41 | }
42 | }
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/Hash/HashFunctionFactoryTests.cs:
--------------------------------------------------------------------------------
1 | //// /*
2 | //// See https://github.com/saguiitay/CardinalityEstimation.
3 | //// The MIT License (MIT)
4 | ////
5 | //// Copyright (c) 2015 Microsoft
6 | ////
7 | //// Permission is hereby granted, free of charge, to any person obtaining a copy
8 | //// of this software and associated documentation files (the "Software"), to deal
9 | //// in the Software without restriction, including without limitation the rights
10 | //// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | //// copies of the Software, and to permit persons to whom the Software is
12 | //// furnished to do so, subject to the following conditions:
13 | ////
14 | //// The above copyright notice and this permission notice shall be included in all
15 | //// copies or substantial portions of the Software.
16 | ////
17 | //// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | //// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | //// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | //// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | //// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | //// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | //// SOFTWARE.
24 | //// */
25 |
26 | //namespace CardinalityEstimation.Test.Hash
27 | //{
28 | // using System;
29 | // using System.Collections.Generic;
30 | // using System.Linq;
31 | // using CardinalityEstimation.Hash;
32 | // using Xunit;
33 |
34 | // public class HashFunctionFactoryTests
35 | // {
36 | // [Fact]
37 | // public void FactoryCanProduceAllHashFunctionTypes()
38 | // {
39 | // // Make sure factory can produce each HashFunctionId
40 | // foreach (HashFunctionId hashFunctionId in Enum.GetValues(typeof (HashFunctionId)))
41 | // {
42 | // IHashFunction hashFunction = HashFunctionFactory.GetHashFunction(hashFunctionId);
43 | // Assert.True(hashFunction != null, "Factory created a null hash function with ID" + hashFunctionId);
44 | // }
45 | // }
46 | // }
47 | //}
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/Hash/Murmur3Tests.cs:
--------------------------------------------------------------------------------
1 | // /*
2 | // See https://github.com/saguiitay/CardinalityEstimation.
3 | // The MIT License (MIT)
4 | //
5 | // Copyright (c) 2015 Microsoft
6 | //
7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
8 | // of this software and associated documentation files (the "Software"), to deal
9 | // in the Software without restriction, including without limitation the rights
10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the Software is
12 | // furnished to do so, subject to the following conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be included in all
15 | // copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | // SOFTWARE.
24 | // */
25 |
26 | using CardinalityEstimation.Hash;
27 | using Xunit;
28 |
29 | namespace CardinalityEstimation.Test.Hash
30 | {
31 | public class Murmur3Tests
32 | {
33 | [Fact]
34 | public void Murmur3ProducesRightValues()
35 | {
36 | // Check some precomputed values of Murmur3
37 | Assert.Equal(0UL, Murmur3.GetHashCode(new byte[0]));
38 | Assert.Equal(18344466521425217038UL, Murmur3.GetHashCode(new byte[] { 1, 2, 3, 4, 5 }));
39 | Assert.Equal(4889297221962843713UL, Murmur3.GetHashCode(new byte[] { 255, 255, 255, 255 }));
40 | }
41 | }
42 | }
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/Resources.Designer.cs:
--------------------------------------------------------------------------------
1 | //------------------------------------------------------------------------------
2 | //
3 | // This code was generated by a tool.
4 | // Runtime Version:4.0.30319.42000
5 | //
6 | // Changes to this file may cause incorrect behavior and will be lost if
7 | // the code is regenerated.
8 | //
9 | //------------------------------------------------------------------------------
10 |
11 | namespace CardinalityEstimation.Test {
12 | using System;
13 |
14 |
15 | ///
16 | /// A strongly-typed resource class, for looking up localized strings, etc.
17 | ///
18 | // This class was auto-generated by the StronglyTypedResourceBuilder
19 | // class via a tool like ResGen or Visual Studio.
20 | // To add or remove a member, edit your .ResX file then rerun ResGen
21 | // with the /str option, or rebuild your VS project.
22 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "15.0.0.0")]
23 | [global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
24 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
25 | internal class Resources {
26 |
27 | private static global::System.Resources.ResourceManager resourceMan;
28 |
29 | private static global::System.Globalization.CultureInfo resourceCulture;
30 |
31 | [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
32 | internal Resources() {
33 | }
34 |
35 | ///
36 | /// Returns the cached ResourceManager instance used by this class.
37 | ///
38 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
39 | internal static global::System.Resources.ResourceManager ResourceManager {
40 | get {
41 | if (object.ReferenceEquals(resourceMan, null)) {
42 | global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("CardinalityEstimation.Test.Resources", typeof(Resources).Assembly);
43 | resourceMan = temp;
44 | }
45 | return resourceMan;
46 | }
47 | }
48 |
49 | ///
50 | /// Overrides the current thread's CurrentUICulture property for all
51 | /// resource lookups using this strongly typed resource class.
52 | ///
53 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
54 | internal static global::System.Globalization.CultureInfo Culture {
55 | get {
56 | return resourceCulture;
57 | }
58 | set {
59 | resourceCulture = value;
60 | }
61 | }
62 |
63 | ///
64 | /// Looks up a localized resource of type System.Byte[].
65 | ///
66 | internal static byte[] serializedDense_v1_0 {
67 | get {
68 | object obj = ResourceManager.GetObject("serializedDense_v1_0", resourceCulture);
69 | return ((byte[])(obj));
70 | }
71 | }
72 |
73 | ///
74 | /// Looks up a localized resource of type System.Byte[].
75 | ///
76 | internal static byte[] serializedDense_v2_0 {
77 | get {
78 | object obj = ResourceManager.GetObject("serializedDense_v2_0", resourceCulture);
79 | return ((byte[])(obj));
80 | }
81 | }
82 |
83 | ///
84 | /// Looks up a localized resource of type System.Byte[].
85 | ///
86 | internal static byte[] serializedDense_v2_1 {
87 | get {
88 | object obj = ResourceManager.GetObject("serializedDense_v2_1", resourceCulture);
89 | return ((byte[])(obj));
90 | }
91 | }
92 |
93 | ///
94 | /// Looks up a localized resource of type System.Byte[].
95 | ///
96 | internal static byte[] serializedDirect_v1_0 {
97 | get {
98 | object obj = ResourceManager.GetObject("serializedDirect_v1_0", resourceCulture);
99 | return ((byte[])(obj));
100 | }
101 | }
102 |
103 | ///
104 | /// Looks up a localized resource of type System.Byte[].
105 | ///
106 | internal static byte[] serializedDirect_v2_0 {
107 | get {
108 | object obj = ResourceManager.GetObject("serializedDirect_v2_0", resourceCulture);
109 | return ((byte[])(obj));
110 | }
111 | }
112 |
113 | ///
114 | /// Looks up a localized resource of type System.Byte[].
115 | ///
116 | internal static byte[] serializedDirect_v2_1 {
117 | get {
118 | object obj = ResourceManager.GetObject("serializedDirect_v2_1", resourceCulture);
119 | return ((byte[])(obj));
120 | }
121 | }
122 |
123 | ///
124 | /// Looks up a localized resource of type System.Byte[].
125 | ///
126 | internal static byte[] serializedSparse_v1_0 {
127 | get {
128 | object obj = ResourceManager.GetObject("serializedSparse_v1_0", resourceCulture);
129 | return ((byte[])(obj));
130 | }
131 | }
132 |
133 | ///
134 | /// Looks up a localized resource of type System.Byte[].
135 | ///
136 | internal static byte[] serializedSparse_v2_0 {
137 | get {
138 | object obj = ResourceManager.GetObject("serializedSparse_v2_0", resourceCulture);
139 | return ((byte[])(obj));
140 | }
141 | }
142 |
143 | ///
144 | /// Looks up a localized resource of type System.Byte[].
145 | ///
146 | internal static byte[] serializedSparse_v2_1 {
147 | get {
148 | object obj = ResourceManager.GetObject("serializedSparse_v2_1", resourceCulture);
149 | return ((byte[])(obj));
150 | }
151 | }
152 | }
153 | }
154 |
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/Resources.resx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 | text/microsoft-resx
110 |
111 |
112 | 2.0
113 |
114 |
115 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
116 |
117 |
118 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
119 |
120 |
121 |
122 | resx\serializeddense_v1.0.bin;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
123 |
124 |
125 | resx\serializeddense_v2_0.bin;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
126 |
127 |
128 | resx\serializeddense_v2_1.bin;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
129 |
130 |
131 | resx\serializeddirect_v1.0.bin;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
132 |
133 |
134 | resx\serializeddirect_v2_0.bin;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
135 |
136 |
137 | resx\serializeddirect_v2_1.bin;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
138 |
139 |
140 | resx\serializedsparse_v1.0.bin;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
141 |
142 |
143 | resx\serializedsparse_v2_0.bin;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
144 |
145 |
146 | resx\serializedsparse_v2_1.bin;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
147 |
148 |
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/cardinalityestimation.snk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saguiitay/CardinalityEstimation/d2be1161b87ae298af49f58e48698745db34bff9/CardinalityEstimation.Test/cardinalityestimation.snk
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/resx/serializeddense_v1.0.bin:
--------------------------------------------------------------------------------
1 | @
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/resx/serializeddense_v2_0.bin:
--------------------------------------------------------------------------------
1 | @
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/resx/serializeddense_v2_1.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saguiitay/CardinalityEstimation/d2be1161b87ae298af49f58e48698745db34bff9/CardinalityEstimation.Test/resx/serializeddense_v2_1.bin
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/resx/serializeddirect_v1.0.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saguiitay/CardinalityEstimation/d2be1161b87ae298af49f58e48698745db34bff9/CardinalityEstimation.Test/resx/serializeddirect_v1.0.bin
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/resx/serializeddirect_v2_0.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saguiitay/CardinalityEstimation/d2be1161b87ae298af49f58e48698745db34bff9/CardinalityEstimation.Test/resx/serializeddirect_v2_0.bin
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/resx/serializeddirect_v2_1.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saguiitay/CardinalityEstimation/d2be1161b87ae298af49f58e48698745db34bff9/CardinalityEstimation.Test/resx/serializeddirect_v2_1.bin
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/resx/serializedsparse_v1.0.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saguiitay/CardinalityEstimation/d2be1161b87ae298af49f58e48698745db34bff9/CardinalityEstimation.Test/resx/serializedsparse_v1.0.bin
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/resx/serializedsparse_v2_0.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saguiitay/CardinalityEstimation/d2be1161b87ae298af49f58e48698745db34bff9/CardinalityEstimation.Test/resx/serializedsparse_v2_0.bin
--------------------------------------------------------------------------------
/CardinalityEstimation.Test/resx/serializedsparse_v2_1.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saguiitay/CardinalityEstimation/d2be1161b87ae298af49f58e48698745db34bff9/CardinalityEstimation.Test/resx/serializedsparse_v2_1.bin
--------------------------------------------------------------------------------
/CardinalityEstimation.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.8.34112.27
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CardinalityEstimation", "CardinalityEstimation\CardinalityEstimation.csproj", "{214AB5A6-DD68-432C-893A-0F6CC70D2005}"
7 | EndProject
8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CardinalityEstimation.Test", "CardinalityEstimation.Test\CardinalityEstimation.Test.csproj", "{8029A737-1861-4FBB-A85E-BF4B50DF7135}"
9 | EndProject
10 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CardinalityEstimation.Benchmark", "CardinalityEstimation.Benchmark\CardinalityEstimation.Benchmark.csproj", "{EC333959-268B-4D4E-AC16-787A2CCEC131}"
11 | EndProject
12 | Global
13 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
14 | Debug|Any CPU = Debug|Any CPU
15 | Release|Any CPU = Release|Any CPU
16 | Release-Signed|Any CPU = Release-Signed|Any CPU
17 | EndGlobalSection
18 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
19 | {214AB5A6-DD68-432C-893A-0F6CC70D2005}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
20 | {214AB5A6-DD68-432C-893A-0F6CC70D2005}.Debug|Any CPU.Build.0 = Debug|Any CPU
21 | {214AB5A6-DD68-432C-893A-0F6CC70D2005}.Release|Any CPU.ActiveCfg = Release|Any CPU
22 | {214AB5A6-DD68-432C-893A-0F6CC70D2005}.Release|Any CPU.Build.0 = Release|Any CPU
23 | {214AB5A6-DD68-432C-893A-0F6CC70D2005}.Release-Signed|Any CPU.ActiveCfg = Release-Signed|Any CPU
24 | {214AB5A6-DD68-432C-893A-0F6CC70D2005}.Release-Signed|Any CPU.Build.0 = Release-Signed|Any CPU
25 | {8029A737-1861-4FBB-A85E-BF4B50DF7135}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
26 | {8029A737-1861-4FBB-A85E-BF4B50DF7135}.Debug|Any CPU.Build.0 = Debug|Any CPU
27 | {8029A737-1861-4FBB-A85E-BF4B50DF7135}.Release|Any CPU.ActiveCfg = Release|Any CPU
28 | {8029A737-1861-4FBB-A85E-BF4B50DF7135}.Release|Any CPU.Build.0 = Release|Any CPU
29 | {8029A737-1861-4FBB-A85E-BF4B50DF7135}.Release-Signed|Any CPU.ActiveCfg = Release|Any CPU
30 | {8029A737-1861-4FBB-A85E-BF4B50DF7135}.Release-Signed|Any CPU.Build.0 = Release|Any CPU
31 | {EC333959-268B-4D4E-AC16-787A2CCEC131}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
32 | {EC333959-268B-4D4E-AC16-787A2CCEC131}.Debug|Any CPU.Build.0 = Debug|Any CPU
33 | {EC333959-268B-4D4E-AC16-787A2CCEC131}.Release|Any CPU.ActiveCfg = Release|Any CPU
34 | {EC333959-268B-4D4E-AC16-787A2CCEC131}.Release|Any CPU.Build.0 = Release|Any CPU
35 | {EC333959-268B-4D4E-AC16-787A2CCEC131}.Release-Signed|Any CPU.ActiveCfg = Release|Any CPU
36 | {EC333959-268B-4D4E-AC16-787A2CCEC131}.Release-Signed|Any CPU.Build.0 = Release|Any CPU
37 | EndGlobalSection
38 | GlobalSection(SolutionProperties) = preSolution
39 | HideSolutionNode = FALSE
40 | EndGlobalSection
41 | GlobalSection(ExtensibilityGlobals) = postSolution
42 | SolutionGuid = {86F92B28-B929-4178-918C-7AC98437B827}
43 | EndGlobalSection
44 | EndGlobal
45 |
--------------------------------------------------------------------------------
/CardinalityEstimation.sln.DotSettings:
--------------------------------------------------------------------------------
1 |
2 | <?xml version="1.0" encoding="utf-16"?><Profile name="Default Cleanup"><AspOptimizeRegisterDirectives>False</AspOptimizeRegisterDirectives><HtmlReformatCode>False</HtmlReformatCode><CSArrangeThisQualifier>True</CSArrangeThisQualifier><CSRemoveCodeRedundancies>True</CSRemoveCodeRedundancies><CSUseAutoProperty>True</CSUseAutoProperty><CSMakeFieldReadonly>True</CSMakeFieldReadonly><CSUseVar><BehavourStyle>CAN_CHANGE_TO_EXPLICIT</BehavourStyle><LocalVariableStyle>ALWAYS_EXPLICIT</LocalVariableStyle><ForeachVariableStyle>ALWAYS_EXPLICIT</ForeachVariableStyle></CSUseVar><CSUpdateFileHeader>True</CSUpdateFileHeader><VBOptimizeImports>False</VBOptimizeImports><VBShortenReferences>False</VBShortenReferences><JsInsertSemicolon>False</JsInsertSemicolon><JsReformatCode>False</JsReformatCode><CssReformatCode>False</CssReformatCode><XMLReformatCode>False</XMLReformatCode><CSOptimizeUsings><OptimizeUsings>True</OptimizeUsings><EmbraceInRegion>False</EmbraceInRegion><RegionName></RegionName></CSOptimizeUsings><CSShortenReferences>True</CSShortenReferences><VBReformatCode>False</VBReformatCode><CSReformatCode>True</CSReformatCode><CSharpFormatDocComments>True</CSharpFormatDocComments><CSReorderTypeMembers>False</CSReorderTypeMembers><CSArrangeQualifiers>True</CSArrangeQualifiers><CSEnforceVarKeywordUsageSettings>True</CSEnforceVarKeywordUsageSettings></Profile>
3 | Default Cleanup
4 | Default Cleanup
5 | 1
6 | 3
7 | TOGETHER_SAME_LINE
8 | SEPARATE
9 | ALWAYS_ADD
10 | ALWAYS_ADD
11 | ALWAYS_ADD
12 | ALWAYS_ADD
13 | ALWAYS_ADD
14 | ALWAYS_ADD
15 | False
16 | USE_FOR_VARIABLES_IN_THIS_CLASS
17 | False
18 | 140
19 | True
20 | True
21 | False
22 | False
23 | 140
24 | False
25 | False
26 | 140
27 | True
28 | UseVarWhenEvident
29 | UseVarWhenEvident
30 | UseVarWhenEvident
31 | /*
32 | See https://github.com/Microsoft/CardinalityEstimation.
33 | The MIT License (MIT)
34 |
35 | Copyright (c) $CURRENT_YEAR$ Microsoft
36 |
37 | Permission is hereby granted, free of charge, to any person obtaining a copy
38 | of this software and associated documentation files (the "Software"), to deal
39 | in the Software without restriction, including without limitation the rights
40 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
41 | copies of the Software, and to permit persons to whom the Software is
42 | furnished to do so, subject to the following conditions:
43 |
44 | The above copyright notice and this permission notice shall be included in all
45 | copies or substantial portions of the Software.
46 |
47 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
48 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
49 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
50 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
51 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
52 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
53 | SOFTWARE.
54 | */
55 |
56 | True
57 | False
58 | True
59 | False
60 | False
61 | True
62 | Automatic property
63 | True
64 | False
65 | False
66 | False
67 | MQ
68 | $object$_On$event$
69 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
70 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
71 | <Policy Inspect="True" Prefix="I" Suffix="" Style="AaBb" />
72 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
73 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
74 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
75 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
76 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
77 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
78 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
79 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
80 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
81 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
82 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
83 | <Policy Inspect="True" Prefix="T" Suffix="" Style="AaBb" />
84 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
85 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
86 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
87 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
88 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
89 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
90 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
91 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
92 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
93 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
94 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
95 | <Policy Inspect="True" Prefix="I" Suffix="" Style="AaBb" />
96 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
97 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
98 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
99 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
100 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
101 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
102 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
103 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
104 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
105 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
106 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
107 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
108 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
109 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
110 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
111 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
112 | <Policy Inspect="True" Prefix="T" Suffix="" Style="AaBb" />
113 | $object$_On$event$
114 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
115 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
116 | <Policy Inspect="True" Prefix="I" Suffix="" Style="AaBb" />
117 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
118 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
119 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
120 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
121 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
122 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
123 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
124 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
125 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
126 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
127 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
128 | <Policy Inspect="True" Prefix="T" Suffix="" Style="AaBb" />
129 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
130 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
131 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
132 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
133 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
134 | <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" />
135 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
136 | <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb" />
137 | True
138 | True
139 | True
140 | True
--------------------------------------------------------------------------------
/CardinalityEstimation/CardinalityEstimation.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | net9.0;net8.0
4 | Debug;Release;Release-Signed
5 | true
6 | CardinalityEstimation.Signed
7 | CardinalityEstimation
8 | 1.12.0
9 | A C# library to estimate the number of unique elements in a set, in a quick and memory-efficient manner, based on the work of Flajolet et al. and Huele et al. Signed version.
10 | Oron Navon;Sagui Itay
11 |
12 | MIT
13 | https://github.com/saguiitay/CardinalityEstimation
14 | README.md
15 | Copyright © Sagui Itay 2022
16 | hyperloglog cardinality estimation loglog set c# cardinalityestimation
17 | Removed support for deprecated dotnet versions.
18 | Added support for .NET 9.0
19 | 1.12.0.0
20 | 1.12.0.0
21 | true
22 | snupkg
23 |
24 |
25 |
26 | pdbonly
27 | True
28 |
29 |
30 |
31 | cardinalityestimation.snk
32 | true
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/CardinalityEstimation/CardinalityEstimator.cs:
--------------------------------------------------------------------------------
1 | // /*
2 | // See https://github.com/saguiitay/CardinalityEstimation.
3 | // The MIT License (MIT)
4 | //
5 | // Copyright (c) 2015 Microsoft
6 | //
7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
8 | // of this software and associated documentation files (the "Software"), to deal
9 | // in the Software without restriction, including without limitation the rights
10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the Software is
12 | // furnished to do so, subject to the following conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be included in all
15 | // copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | // SOFTWARE.
24 | // */
25 |
26 | namespace CardinalityEstimation
27 | {
28 | using System;
29 | using System.Collections.Generic;
30 | using System.Text;
31 | using Hash;
32 |
33 | public delegate ulong GetHashCodeDelegate(byte[] bytes);
34 |
35 | ///
36 | /// A cardinality estimator for sets of some common types, which uses a HashSet for small cardinalities,
37 | /// LinearCounting for medium-range cardinalities and HyperLogLog for large cardinalities. Based off of the following:
38 | /// 1. Flajolet et al., "HyperLogLog: the analysis of a near-optimal cardinality estimation algorithm",
39 | /// DMTCS proc. AH 2007,
40 | /// 2. Heule, Nunkesser and Hall 2013, "HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation
41 | /// Algorithm",
42 | ///
43 | ///
44 | ///
45 | /// 1. This implementation is not thread-safe
46 | /// 2. By default, it uses the 128-bit Murmur3 hash function, .
47 | /// For legacy support, the CTOR also allows using the 64-bit Fowler/Noll/Vo-0 FNV-1a hash function,
48 | /// 3. Estimation is perfect up to 100 elements, then approximate
49 | ///
50 | [Serializable]
51 | public class CardinalityEstimator : ICardinalityEstimator, ICardinalityEstimator, ICardinalityEstimator,
52 | ICardinalityEstimator, ICardinalityEstimator, ICardinalityEstimator, ICardinalityEstimator,
53 | ICardinalityEstimator, IEquatable
54 | {
55 |
56 | #region Private consts
57 | ///
58 | /// Max number of elements to hold in the direct representation
59 | ///
60 | private const int DirectCounterMaxElements = 100;
61 | #endregion
62 |
63 | #region Private fields
64 | ///
65 | /// Number of bits for indexing HLL sub-streams - the number of estimators is 2^bitsPerIndex
66 | ///
67 | private readonly int bitsPerIndex;
68 |
69 | ///
70 | /// Number of bits to compute the HLL estimate on
71 | ///
72 | private readonly byte bitsForHll;
73 |
74 | ///
75 | /// HLL lookup table size
76 | ///
77 | private readonly int m;
78 |
79 | ///
80 | /// Fixed bias correction factor
81 | ///
82 | private readonly double alphaM;
83 |
84 | ///
85 | /// Threshold determining whether to use LinearCounting or HyperLogLog based on an initial estimate
86 | ///
87 | private readonly double subAlgorithmSelectionThreshold;
88 |
89 | ///
90 | /// Lookup table for the dense representation
91 | ///
92 | private byte[] lookupDense;
93 |
94 | ///
95 | /// Lookup dictionary for the sparse representation
96 | ///
97 | private IDictionary lookupSparse;
98 |
99 | ///
100 | /// Max number of elements to hold in the sparse representation
101 | ///
102 | private readonly int sparseMaxElements;
103 |
104 | ///
105 | /// Indicates that the sparse representation is currently used
106 | ///
107 | private bool isSparse;
108 |
109 | ///
110 | /// Set for direct counting of elements
111 | ///
112 | private HashSet directCount;
113 |
114 | ///
115 | /// Hash function used
116 | ///
117 | [NonSerialized]
118 | private GetHashCodeDelegate hashFunction;
119 | #endregion
120 |
121 | #region Constructors
122 | ///
123 | /// Creates a new instance of CardinalityEstimator
124 | ///
125 | ///
126 | /// Number of bits determining accuracy and memory consumption, in the range [4, 16] (higher = greater accuracy and memory usage).
127 | /// For large cardinalities, the standard error is 1.04 * 2^(-b/2), and the memory consumption is bounded by 2^b kilobytes.
128 | /// The default value of 14 typically yields 3% error or less across the entire range of cardinalities (usually much less),
129 | /// and uses up to ~16kB of memory. b=4 yields less than ~100% error and uses less than 1kB. b=16 uses up to ~64kB and usually yields 1%
130 | /// error or less
131 | ///
132 | /// Type of hash function to use. Default is Murmur3, and FNV-1a is provided for legacy support
133 | ///
134 | /// True if direct count should be used for up to elements.
135 | /// False if direct count should be avoided and use always estimation, even for low cardinalities.
136 | ///
137 | public CardinalityEstimator(GetHashCodeDelegate hashFunction = null, int b = 14, bool useDirectCounting = true)
138 | : this(hashFunction, CreateEmptyState(b, useDirectCounting))
139 | { }
140 |
141 | ///
142 | /// Copy constructor
143 | ///
144 | public CardinalityEstimator(CardinalityEstimator other)
145 | {
146 | bitsPerIndex = other.bitsPerIndex;
147 | bitsForHll = other.bitsForHll;
148 | m = other.m;
149 | alphaM = other.alphaM;
150 | subAlgorithmSelectionThreshold = other.subAlgorithmSelectionThreshold;
151 | if (other.lookupDense != null)
152 | {
153 | lookupDense = new byte[other.lookupDense.Length];
154 | Array.Copy(other.lookupDense, lookupDense, other.lookupDense.Length);
155 | }
156 |
157 | if (other.lookupSparse != null)
158 | {
159 | lookupSparse = new Dictionary(other.lookupSparse);
160 | }
161 | sparseMaxElements = other.sparseMaxElements;
162 | isSparse = other.isSparse;
163 | if (other.directCount != null)
164 | {
165 | directCount = new HashSet(other.directCount, other.directCount.Comparer);
166 | }
167 | hashFunction = other.hashFunction;
168 | }
169 |
170 | ///
171 | /// Creates a CardinalityEstimator with the given
172 | ///
173 | internal CardinalityEstimator(GetHashCodeDelegate hashFunction, CardinalityEstimatorState state)
174 | {
175 | bitsPerIndex = state.BitsPerIndex;
176 | bitsForHll = (byte)(64 - bitsPerIndex);
177 | m = (int) Math.Pow(2, bitsPerIndex);
178 | alphaM = GetAlphaM(m);
179 | subAlgorithmSelectionThreshold = GetSubAlgorithmSelectionThreshold(bitsPerIndex);
180 |
181 | // Init the hash function
182 | this.hashFunction = hashFunction;
183 | if (this.hashFunction == null)
184 | {
185 | this.hashFunction = (x) => BitConverter.ToUInt64(System.IO.Hashing.XxHash128.Hash(x));
186 | }
187 |
188 | // Init the direct count
189 | directCount = state.DirectCount != null ? new HashSet(state.DirectCount) : null;
190 |
191 | // Init the sparse representation
192 | isSparse = state.IsSparse;
193 | lookupSparse = state.LookupSparse != null ? new Dictionary(state.LookupSparse) : null;
194 | lookupDense = state.LookupDense;
195 | CountAdditions = state.CountAdditions;
196 |
197 | // Each element in the sparse representation takes 15 bytes, and there is some constant overhead
198 | sparseMaxElements = Math.Max(0, (m / 15) - 10);
199 | // If necessary, switch to the dense representation
200 | if (sparseMaxElements <= 0)
201 | {
202 | SwitchToDenseRepresentation();
203 | }
204 |
205 | // if DirectCount is not null, populate the HLL lookup with its elements. This allows serialization to include only directCount
206 | if (directCount != null)
207 | {
208 | // since we are re-initializing the object, we need to reset isSparse to true and sparse lookup
209 | isSparse = true;
210 | lookupSparse = new Dictionary();
211 | foreach (ulong element in directCount)
212 | {
213 | AddElementHash(element);
214 | }
215 | }
216 | else
217 | {
218 | directCount = null;
219 | }
220 | }
221 | #endregion
222 |
223 | #region Public properties
224 | public ulong CountAdditions { get; private set; }
225 | #endregion
226 |
227 | #region Public methods
228 | ///
229 | /// Add an element of type
230 | ///
231 | /// True is estimator's state was modified. False otherwise
232 | public bool Add(string element)
233 | {
234 | ulong hashCode = hashFunction(Encoding.UTF8.GetBytes(element));
235 | bool changed = AddElementHash(hashCode);
236 | CountAdditions++;
237 | return changed;
238 | }
239 |
240 | ///
241 | /// Add an element of type
242 | ///
243 | /// True is estimator's state was modified. False otherwise
244 | public bool Add(int element)
245 | {
246 | ulong hashCode = hashFunction(BitConverter.GetBytes(element));
247 | bool changed = AddElementHash(hashCode);
248 | CountAdditions++;
249 | return changed;
250 | }
251 |
252 | ///
253 | /// Add an element of type
254 | ///
255 | /// True is estimator's state was modified. False otherwise
256 | public bool Add(uint element)
257 | {
258 | ulong hashCode = hashFunction(BitConverter.GetBytes(element));
259 | bool changed = AddElementHash(hashCode);
260 | CountAdditions++;
261 | return changed;
262 | }
263 |
264 | ///
265 | /// Add an element of type
266 | ///
267 | /// True is estimator's state was modified. False otherwise
268 | public bool Add(long element)
269 | {
270 | ulong hashCode = hashFunction(BitConverter.GetBytes(element));
271 | bool changed = AddElementHash(hashCode);
272 | CountAdditions++;
273 | return changed;
274 | }
275 |
276 | ///
277 | /// Add an element of type
278 | ///
279 | /// True is estimator's state was modified. False otherwise
280 | public bool Add(ulong element)
281 | {
282 | ulong hashCode = hashFunction(BitConverter.GetBytes(element));
283 | bool changed = AddElementHash(hashCode);
284 | CountAdditions++;
285 | return changed;
286 | }
287 |
288 | ///
289 | /// Add an element of type
290 | ///
291 | /// True is estimator's state was modified. False otherwise
292 | public bool Add(float element)
293 | {
294 | ulong hashCode = hashFunction(BitConverter.GetBytes(element));
295 | bool changed = AddElementHash(hashCode);
296 | CountAdditions++;
297 | return changed;
298 | }
299 |
300 | ///
301 | /// Add an element of type
302 | ///
303 | /// True is estimator's state was modified. False otherwise
304 | public bool Add(double element)
305 | {
306 | ulong hashCode = hashFunction(BitConverter.GetBytes(element));
307 | bool changed = AddElementHash(hashCode);
308 | CountAdditions++;
309 | return changed;
310 | }
311 |
312 | ///
313 | /// Add an element of type
314 | ///
315 | /// True is estimator's state was modified. False otherwise
316 | public bool Add(byte[] element)
317 | {
318 | ulong hashCode = hashFunction(element);
319 | bool changed = AddElementHash(hashCode);
320 | CountAdditions++;
321 | return changed;
322 | }
323 |
324 | ///
325 | /// Returns the estimated number of items in the estimator
326 | ///
327 | ///
328 | /// If Direct Count is enabled, and only a few items were added, the exact count is returned
329 | ///
330 | public ulong Count()
331 | {
332 | // If only a few elements have been seen, return the exact count
333 | if (directCount != null)
334 | {
335 | return (ulong) directCount.Count;
336 | }
337 |
338 | double zInverse = 0;
339 | double v = 0;
340 |
341 | if (isSparse)
342 | {
343 | // calc c and Z's inverse
344 | foreach (KeyValuePair kvp in lookupSparse)
345 | {
346 | byte sigma = kvp.Value;
347 | zInverse += Math.Pow(2, -sigma);
348 | }
349 | v = m - lookupSparse.Count;
350 | zInverse += m - lookupSparse.Count;
351 | }
352 | else
353 | {
354 | // calc c and Z's inverse
355 | for (var i = 0; i < m; i++)
356 | {
357 | byte sigma = lookupDense[i];
358 | zInverse += Math.Pow(2, -sigma);
359 | if (sigma == 0)
360 | {
361 | v++;
362 | }
363 | }
364 | }
365 |
366 | double e = alphaM*m*m/zInverse;
367 | if (e <= 5.0*m)
368 | {
369 | e = BiasCorrection.CorrectBias(e, bitsPerIndex);
370 | }
371 |
372 | double h;
373 | if (v > 0)
374 | {
375 | // LinearCounting estimate
376 | h = m*Math.Log(m/v);
377 | }
378 | else
379 | {
380 | h = e;
381 | }
382 |
383 | if (h <= subAlgorithmSelectionThreshold)
384 | {
385 | return (ulong) Math.Round(h);
386 | }
387 | return (ulong) Math.Round(e);
388 | }
389 |
390 | ///
391 | /// Merges the given CardinalityEstimator instance into this one
392 | ///
393 | /// another instance of CardinalityEstimator
394 | public void Merge(CardinalityEstimator other)
395 | {
396 | if (other == null)
397 | {
398 | throw new ArgumentNullException(nameof(other));
399 | }
400 |
401 | if (other.m != m)
402 | {
403 | throw new ArgumentOutOfRangeException(nameof(other),
404 | "Cannot merge CardinalityEstimator instances with different accuracy/map sizes");
405 | }
406 |
407 | CountAdditions += other.CountAdditions;
408 | if (isSparse && other.isSparse)
409 | {
410 | // Merge two sparse instances
411 | foreach (KeyValuePair kvp in other.lookupSparse)
412 | {
413 | ushort index = kvp.Key;
414 | byte otherRank = kvp.Value;
415 | lookupSparse.TryGetValue(index, out byte thisRank);
416 | lookupSparse[index] = Math.Max(thisRank, otherRank);
417 | }
418 |
419 | // Switch to dense if necessary
420 | if (lookupSparse.Count > sparseMaxElements)
421 | {
422 | SwitchToDenseRepresentation();
423 | }
424 | }
425 | else
426 | {
427 | // Make sure this (target) instance is dense, then merge
428 | SwitchToDenseRepresentation();
429 | if (other.isSparse)
430 | {
431 | foreach (KeyValuePair kvp in other.lookupSparse)
432 | {
433 | ushort index = kvp.Key;
434 | byte rank = kvp.Value;
435 | lookupDense[index] = Math.Max(lookupDense[index], rank);
436 | }
437 | }
438 | else
439 | {
440 | for (var i = 0; i < m; i++)
441 | {
442 | lookupDense[i] = Math.Max(lookupDense[i], other.lookupDense[i]);
443 | }
444 | }
445 | }
446 |
447 | if (other.directCount != null)
448 | {
449 | // Other instance is using direct counter. If this instance is also using direct counter, merge them.
450 | if (directCount != null)
451 | {
452 | directCount.UnionWith(other.directCount);
453 | if (directCount.Count > DirectCounterMaxElements)
454 | {
455 | directCount = null;
456 | }
457 | }
458 | }
459 | else
460 | {
461 | // Other instance is not using direct counter, make sure this instance doesn't either
462 | directCount = null;
463 | }
464 | }
465 | #endregion
466 |
467 | ///
468 | /// Merges into a new .
469 | ///
470 | /// Instances of
471 | ///
472 | /// A new if there is at least one non-null in
473 | /// ; otherwise .
474 | ///
475 | ///
476 | /// The b and hashFunctionId provided to the constructor for the result are taken from the first non-null
477 | /// in . The remaining estimators are assumed to use the same parameters.
478 | ///
479 | public static CardinalityEstimator Merge(IEnumerable estimators)
480 | {
481 | if (estimators == null)
482 | {
483 | return null;
484 | }
485 |
486 | CardinalityEstimator result = null;
487 | foreach (CardinalityEstimator estimator in estimators)
488 | {
489 | if (estimator == null)
490 | {
491 | continue;
492 | }
493 |
494 | if (result == null)
495 | {
496 | result = new CardinalityEstimator(estimator);
497 | }
498 |
499 | result.Merge(estimator);
500 | }
501 |
502 | return result;
503 | }
504 |
505 | #region Private/Internal methods
506 | internal CardinalityEstimatorState GetState()
507 | {
508 | return new CardinalityEstimatorState
509 | {
510 | BitsPerIndex = bitsPerIndex,
511 | DirectCount = directCount,
512 | IsSparse = isSparse,
513 | LookupDense = lookupDense,
514 | LookupSparse = lookupSparse,
515 | CountAdditions = CountAdditions,
516 | };
517 | }
518 |
519 | ///
520 | /// Creates state for an empty CardinalityEstimator : DirectCount and LookupSparse are empty, LookupDense is null.
521 | ///
522 | ///
523 | ///
524 | ///
525 | /// True if direct count should be used for up to elements.
526 | /// False if direct count should be avoided and use always estimation, even for low cardinalities.
527 | ///
528 | private static CardinalityEstimatorState CreateEmptyState(int b, bool useDirectCount)
529 | {
530 | if (b < 4 || b > 16)
531 | {
532 | throw new ArgumentOutOfRangeException(nameof(b), b, "Accuracy out of range, legal range is 4 <= BitsPerIndex <= 16");
533 | }
534 |
535 | return new CardinalityEstimatorState
536 | {
537 | BitsPerIndex = b,
538 | DirectCount = useDirectCount ? new HashSet() : null,
539 | IsSparse = true,
540 | LookupSparse = new Dictionary(),
541 | LookupDense = null,
542 | CountAdditions = 0,
543 | };
544 | }
545 |
546 | ///
547 | /// Returns the threshold determining whether to use LinearCounting or HyperLogLog for an estimate. Values are from the supplementary
548 | /// material of Huele et al.,
549 | ///
550 | ///
551 | /// Number of bits
552 | private double GetSubAlgorithmSelectionThreshold(int bits)
553 | {
554 | switch (bits)
555 | {
556 | case 4:
557 | return 10;
558 | case 5:
559 | return 20;
560 | case 6:
561 | return 40;
562 | case 7:
563 | return 80;
564 | case 8:
565 | return 220;
566 | case 9:
567 | return 400;
568 | case 10:
569 | return 900;
570 | case 11:
571 | return 1800;
572 | case 12:
573 | return 3100;
574 | case 13:
575 | return 6500;
576 | case 14:
577 | return 11500;
578 | case 15:
579 | return 20000;
580 | case 16:
581 | return 50000;
582 | case 17:
583 | return 120000;
584 | case 18:
585 | return 350000;
586 | }
587 | throw new ArgumentOutOfRangeException(nameof(bits), "Unexpected number of bits (should never happen)");
588 | }
589 |
590 | ///
591 | /// Adds an element's hash code to the counted set
592 | ///
593 | /// Hash code of the element to add
594 | private bool AddElementHash(ulong hashCode)
595 | {
596 | var changed = false;
597 | if (directCount != null)
598 | {
599 | changed = directCount.Add(hashCode);
600 | if (directCount.Count > DirectCounterMaxElements)
601 | {
602 | directCount = null;
603 | changed = true;
604 | }
605 | }
606 |
607 | var substream = (ushort)(hashCode >> bitsForHll);
608 | byte sigma = GetSigma(hashCode, bitsForHll);
609 | if (isSparse)
610 | {
611 | lookupSparse.TryGetValue(substream, out byte prevRank);
612 | lookupSparse[substream] = Math.Max(prevRank, sigma);
613 | changed = changed || (prevRank != sigma && lookupSparse[substream] == sigma);
614 | if (lookupSparse.Count > sparseMaxElements)
615 | {
616 | SwitchToDenseRepresentation();
617 | changed = true;
618 | }
619 | }
620 | else
621 | {
622 | var prevMax = lookupDense[substream];
623 | lookupDense[substream] = Math.Max(prevMax, sigma);
624 | changed = changed || (prevMax != sigma && lookupDense[substream] == sigma);
625 | }
626 | return changed;
627 | }
628 |
629 | ///
630 | /// Gets the appropriate value of alpha_M for the given
631 | ///
632 | /// size of the lookup table
633 | /// alpha_M for bias correction
634 | private double GetAlphaM(int m)
635 | {
636 | switch (m)
637 | {
638 | case 16:
639 | return 0.673;
640 | case 32:
641 | return 0.697;
642 | case 64:
643 | return 0.709;
644 | default:
645 | return 0.7213/(1 + (1.079 / m));
646 | }
647 | }
648 |
649 | ///
650 | /// Returns the number of leading zeroes in the highest bits of , plus one
651 | ///
652 | /// Hash value to calculate the statistic on
653 | /// Lowest bit to count from
654 | /// The number of leading zeroes in the binary representation of , plus one
655 | public static byte GetSigma(ulong hash, byte bitsToCount)
656 | {
657 | if (hash == 0)
658 | {
659 | return (byte)(bitsToCount + 1);
660 | }
661 |
662 | ulong mask = ((1UL << bitsToCount) - 1);
663 | int knownZeros = 64 - bitsToCount;
664 |
665 | var masked = hash & mask;
666 | var leadingZeros = (byte)ulong.LeadingZeroCount(masked);
667 | return (byte)(leadingZeros - knownZeros + 1);
668 | }
669 |
670 | ///
671 | /// Converts this estimator from the sparse to the dense representation
672 | ///
673 | private void SwitchToDenseRepresentation()
674 | {
675 | if (!isSparse)
676 | {
677 | return;
678 | }
679 |
680 | lookupDense = new byte[m];
681 | foreach (KeyValuePair kvp in lookupSparse)
682 | {
683 | int index = kvp.Key;
684 | lookupDense[index] = kvp.Value;
685 | }
686 | lookupSparse = null;
687 | isSparse = false;
688 | }
689 | #endregion
690 |
691 | #region IEquatable implementation
692 | public bool Equals(CardinalityEstimator other)
693 | {
694 | if (other == null)
695 | {
696 | return false;
697 | }
698 |
699 | if (bitsPerIndex != other.bitsPerIndex ||
700 | bitsForHll != other.bitsForHll ||
701 | m != other.m ||
702 | alphaM != other.alphaM ||
703 | subAlgorithmSelectionThreshold != other.subAlgorithmSelectionThreshold ||
704 | sparseMaxElements != other.sparseMaxElements ||
705 | isSparse != other.isSparse ||
706 | hashFunction != other.hashFunction)
707 | {
708 | return false;
709 | }
710 |
711 | if ((lookupDense != null && other.lookupDense == null) ||
712 | (lookupDense == null && other.lookupDense != null))
713 | {
714 | return false;
715 | }
716 |
717 | if ((lookupSparse != null && other.lookupSparse == null) ||
718 | (lookupSparse == null && other.lookupSparse != null))
719 | {
720 | return false;
721 | }
722 |
723 | if ((directCount != null && other.directCount == null) ||
724 | (directCount == null && other.directCount != null))
725 | {
726 | return false;
727 | }
728 |
729 | if (lookupDense != null &&
730 | lookupDense.Length != other.lookupDense.Length)
731 | {
732 | return false;
733 | }
734 | if (lookupSparse != null &&
735 | lookupSparse.Count != other.lookupSparse.Count)
736 | {
737 | return false;
738 | }
739 | if (directCount != null &&
740 | directCount.Count != other.directCount.Count)
741 | {
742 | return false;
743 | }
744 |
745 | if (lookupDense != null)
746 | {
747 | for (int i = 0; i < lookupDense.Length; i++)
748 | {
749 | if (lookupDense[i] != other.lookupDense[i])
750 | {
751 | return false;
752 | }
753 | }
754 | }
755 |
756 | if (directCount != null &&
757 | !directCount.SetEquals(other.directCount))
758 | {
759 | return false;
760 | }
761 |
762 | if (lookupSparse != null)
763 | {
764 | foreach (var kvp in lookupSparse)
765 | {
766 | if (!other.lookupSparse.TryGetValue(kvp.Key, out var otherValue) ||
767 | otherValue != kvp.Value)
768 | {
769 | return false;
770 | }
771 | }
772 | }
773 |
774 | return true;
775 | }
776 | #endregion
777 | }
778 | }
--------------------------------------------------------------------------------
/CardinalityEstimation/CardinalityEstimatorSerializer.cs:
--------------------------------------------------------------------------------
1 | // /*
2 | // See https://github.com/saguiitay/CardinalityEstimation.
3 | // The MIT License (MIT)
4 | //
5 | // Copyright (c) 2015 Microsoft
6 | //
7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
8 | // of this software and associated documentation files (the "Software"), to deal
9 | // in the Software without restriction, including without limitation the rights
10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the Software is
12 | // furnished to do so, subject to the following conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be included in all
15 | // copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | // SOFTWARE.
24 | // */
25 |
26 | namespace CardinalityEstimation
27 | {
28 | using System.Collections.Generic;
29 | using System.IO;
30 | using System.Runtime.Serialization;
31 | using System.Text;
32 |
33 | using Hash;
34 |
35 | ///
36 | /// Efficient serializer for
37 | ///
38 | public class CardinalityEstimatorSerializer
39 | {
40 | ///
41 | /// Highest major version of the serialization format which this serializer can deserialize. A breaking change in the format requires a
42 | /// bump in major version, i.e. version 2.X cannot read 3.Y
43 | ///
44 | public const ushort DataFormatMajorVersion = 3;
45 |
46 | ///
47 | /// Minor version of the serialization format. A non-breaking change should be marked by a bump in minor version, i.e. version 2.2
48 | /// should be able to read version 2.3
49 | ///
50 | public const ushort DataFormatMinorVersion = 1;
51 |
52 | ///
53 | /// Serializes to .
54 | ///
55 | /// The stream.
56 | /// The cardinality estimator.
57 | public void Serialize(Stream stream, CardinalityEstimator cardinalityEstimator)
58 | {
59 | Serialize(stream, cardinalityEstimator, false);
60 | }
61 |
62 | ///
63 | /// Serializes to .
64 | ///
65 | /// The stream.
66 | /// The cardinality estimator.
67 | /// if set to leave the stream open after serialization.
68 | public void Serialize(Stream stream, CardinalityEstimator cardinalityEstimator, bool leaveOpen)
69 | {
70 | using (var bw = new BinaryWriter(stream, Encoding.UTF8, leaveOpen))
71 | {
72 | Write(bw, cardinalityEstimator);
73 | }
74 | }
75 |
76 | ///
77 | /// Writer binary representation of using
78 | ///
79 | /// The writer
80 | /// The cardinality estimator.
81 | public void Write(BinaryWriter writer, CardinalityEstimator cardinalityEstimator)
82 | {
83 | writer.Write(DataFormatMajorVersion);
84 | writer.Write(DataFormatMinorVersion);
85 |
86 | CardinalityEstimatorState data = cardinalityEstimator.GetState();
87 |
88 | writer.Write(data.BitsPerIndex);
89 | writer.Write((byte)(((data.IsSparse ? 1 : 0) << 1) + (data.DirectCount != null ? 1 : 0)));
90 | if (data.DirectCount != null)
91 | {
92 | writer.Write(data.DirectCount.Count);
93 | foreach (ulong element in data.DirectCount)
94 | {
95 | writer.Write(element);
96 | }
97 | }
98 | else if (data.IsSparse)
99 | {
100 | writer.Write(data.LookupSparse.Count);
101 | foreach (KeyValuePair element in data.LookupSparse)
102 | {
103 | writer.Write(element.Key);
104 | writer.Write(element.Value);
105 | }
106 | }
107 | else
108 | {
109 | writer.Write(data.LookupDense.Length);
110 | foreach (byte element in data.LookupDense)
111 | {
112 | writer.Write(element);
113 | }
114 | }
115 |
116 | writer.Write(data.CountAdditions);
117 | writer.Flush();
118 | }
119 |
120 | ///
121 | /// Deserialize a from the given
122 | ///
123 | /// The stream.
124 | /// if set to leave the stream open after deserialization.
125 | /// A new CardinalityEstimator.
126 | public CardinalityEstimator Deserialize(Stream stream, GetHashCodeDelegate hashFunction = null, bool leaveOpen = false)
127 | {
128 | using (var br = new BinaryReader(stream, Encoding.UTF8, leaveOpen))
129 | {
130 | return Read(br, hashFunction);
131 | }
132 | }
133 |
134 | ///
135 | /// Reads a using the given
136 | ///
137 | /// The reader
138 | /// An instance of
139 | public CardinalityEstimator Read(BinaryReader reader, GetHashCodeDelegate hashFunction = null)
140 | {
141 | int dataFormatMajorVersion = reader.ReadUInt16();
142 | int dataFormatMinorVersion = reader.ReadUInt16();
143 |
144 | AssertDataVersionCanBeRead(dataFormatMajorVersion, dataFormatMinorVersion);
145 |
146 | byte hashFunctionId;
147 | if (dataFormatMajorVersion >= 3)
148 | {
149 | }
150 | else if (dataFormatMajorVersion >= 2)
151 | {
152 | // Starting with version 2.0, the serializer writes the hash function ID
153 | hashFunctionId = reader.ReadByte();
154 | if (hashFunction == null)
155 | {
156 | hashFunction = (hashFunctionId == 1) ? (GetHashCodeDelegate)Murmur3.GetHashCode : (GetHashCodeDelegate)Fnv1A.GetHashCode;
157 | }
158 | }
159 | else
160 | {
161 | // Versions before 2.0 all used FNV-1a
162 | hashFunctionId = 0;
163 | hashFunction = Fnv1A.GetHashCode;
164 | }
165 |
166 | int bitsPerIndex = reader.ReadInt32();
167 | byte flags = reader.ReadByte();
168 | bool isSparse = (flags & 2) == 2;
169 | bool isDirectCount = (flags & 1) == 1;
170 |
171 | HashSet directCount = null;
172 | IDictionary lookupSparse = isSparse ? new Dictionary() : null;
173 | byte[] lookupDense = null;
174 |
175 | if (isDirectCount)
176 | {
177 | int count = reader.ReadInt32();
178 | directCount = new HashSet();
179 |
180 | for (var i = 0; i < count; i++)
181 | {
182 | ulong element = reader.ReadUInt64();
183 | directCount.Add(element);
184 | }
185 | }
186 | else if (isSparse)
187 | {
188 | int count = reader.ReadInt32();
189 |
190 | for (var i = 0; i < count; i++)
191 | {
192 | ushort elementKey = reader.ReadUInt16();
193 | byte elementValue = reader.ReadByte();
194 | lookupSparse.Add(elementKey, elementValue);
195 | }
196 | }
197 | else
198 | {
199 | int count = reader.ReadInt32();
200 | lookupDense = reader.ReadBytes(count);
201 | }
202 |
203 | // Starting with version 2.1, the serializer writes CountAdditions
204 | ulong countAdditions = 0UL;
205 | if (dataFormatMajorVersion >= 2 && dataFormatMinorVersion >= 1)
206 | {
207 | countAdditions = reader.ReadUInt64();
208 | }
209 |
210 | var data = new CardinalityEstimatorState
211 | {
212 | BitsPerIndex = bitsPerIndex,
213 | DirectCount = directCount,
214 | IsSparse = isSparse,
215 | LookupDense = lookupDense,
216 | LookupSparse = lookupSparse,
217 | CountAdditions = countAdditions,
218 | };
219 |
220 | var result = new CardinalityEstimator(hashFunction, data);
221 |
222 | return result;
223 | }
224 |
225 | ///
226 | /// Checks that this serializer can deserialize data with the given major and minor version numbers
227 | ///
228 | /// If this serializer cannot read data with the given version numbers
229 | private static void AssertDataVersionCanBeRead(int dataFormatMajorVersion, int dataFormatMinorVersion)
230 | {
231 | if (dataFormatMajorVersion > DataFormatMajorVersion)
232 | {
233 | throw new SerializationException(
234 | string.Format("Incompatible data format, can't deserialize data version {0}.{1} (serializer version: {2}.{3})",
235 | dataFormatMajorVersion, dataFormatMinorVersion, DataFormatMajorVersion, DataFormatMinorVersion));
236 | }
237 | }
238 | }
239 | }
--------------------------------------------------------------------------------
/CardinalityEstimation/CardinalityEstimatorState.cs:
--------------------------------------------------------------------------------
1 | // /*
2 | // See https://github.com/saguiitay/CardinalityEstimation.
3 | // The MIT License (MIT)
4 | //
5 | // Copyright (c) 2015 Microsoft
6 | //
7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
8 | // of this software and associated documentation files (the "Software"), to deal
9 | // in the Software without restriction, including without limitation the rights
10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the Software is
12 | // furnished to do so, subject to the following conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be included in all
15 | // copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | // SOFTWARE.
24 | // */
25 |
26 | namespace CardinalityEstimation
27 | {
28 | using System.Collections.Generic;
29 | using Hash;
30 |
31 | ///
32 | /// Represents state of a for serialization,
33 | ///
34 | internal class CardinalityEstimatorState
35 | {
36 | public int BitsPerIndex;
37 | public HashSet DirectCount;
38 | public bool IsSparse;
39 | public byte[] LookupDense;
40 | public IDictionary LookupSparse;
41 | public ulong CountAdditions;
42 | }
43 | }
--------------------------------------------------------------------------------
/CardinalityEstimation/Hash/Fnv1A.cs:
--------------------------------------------------------------------------------
1 | // /*
2 | // See https://github.com/saguiitay/CardinalityEstimation.
3 | // The MIT License (MIT)
4 | //
5 | // Copyright (c) 2015 Microsoft
6 | //
7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
8 | // of this software and associated documentation files (the "Software"), to deal
9 | // in the Software without restriction, including without limitation the rights
10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the Software is
12 | // furnished to do so, subject to the following conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be included in all
15 | // copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | // SOFTWARE.
24 | // */
25 |
26 | namespace CardinalityEstimation.Hash
27 | {
28 | ///
29 | /// Helper class to computes the 64-bit FNV-1a hash of byte arrays,
30 | ///
31 | public class Fnv1A
32 | {
33 | ///
34 | /// Computes the 64-bit FNV-1a hash of the given , see
35 | ///
36 | /// and
37 | ///
38 | /// Text to compute the hash for
39 | /// The 64-bit fnv1a hash
40 | public static ulong GetHashCode(byte[] bytes)
41 | {
42 | const ulong fnv1A64Init = 14695981039346656037;
43 | const ulong fnv64Prime = 0x100000001b3;
44 | ulong hash = fnv1A64Init;
45 |
46 | foreach (byte b in bytes)
47 | {
48 | /* xor the bottom with the current octet */
49 | hash ^= b;
50 | /* multiply by the 64 bit FNV magic prime mod 2^64 */
51 | hash *= fnv64Prime;
52 | }
53 |
54 | return hash;
55 | }
56 | }
57 | }
--------------------------------------------------------------------------------
/CardinalityEstimation/Hash/Murmur3.cs:
--------------------------------------------------------------------------------
1 | // /*
2 | // See https://github.com/saguiitay/CardinalityEstimation.
3 | // The MIT License (MIT)
4 | //
5 | // Copyright (c) 2015 Microsoft
6 | //
7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
8 | // of this software and associated documentation files (the "Software"), to deal
9 | // in the Software without restriction, including without limitation the rights
10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the Software is
12 | // furnished to do so, subject to the following conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be included in all
15 | // copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | // SOFTWARE.
24 | // */
25 |
26 | namespace CardinalityEstimation.Hash
27 | {
28 | using System;
29 | using System.Collections.Concurrent;
30 | using Murmur;
31 |
32 | public class Murmur3
33 | {
34 | private static readonly ConcurrentStack pool = new ConcurrentStack();
35 |
36 | public static ulong GetHashCode(byte[] bytes)
37 | {
38 | Murmur128 murmurHash;
39 | if (!pool.TryPop(out murmurHash))
40 | {
41 | murmurHash = MurmurHash.Create128(managed: true, preference: AlgorithmPreference.X64);
42 | }
43 |
44 | byte[] result = murmurHash.ComputeHash(bytes);
45 | pool.Push(murmurHash);
46 | return BitConverter.ToUInt64(result, 0);
47 | }
48 | }
49 | }
--------------------------------------------------------------------------------
/CardinalityEstimation/ICardinalityEstimator.cs:
--------------------------------------------------------------------------------
1 | // /*
2 | // See https://github.com/saguiitay/CardinalityEstimation.
3 | // The MIT License (MIT)
4 | //
5 | // Copyright (c) 2015 Microsoft
6 | //
7 | // Permission is hereby granted, free of charge, to any person obtaining a copy
8 | // of this software and associated documentation files (the "Software"), to deal
9 | // in the Software without restriction, including without limitation the rights
10 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | // copies of the Software, and to permit persons to whom the Software is
12 | // furnished to do so, subject to the following conditions:
13 | //
14 | // The above copyright notice and this permission notice shall be included in all
15 | // copies or substantial portions of the Software.
16 | //
17 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | // SOFTWARE.
24 | // */
25 |
26 | namespace CardinalityEstimation
27 | {
28 | ///
29 | /// Estimator for the number of unique elements in a set
30 | ///
31 | /// The type of elements in the set
32 | public interface ICardinalityEstimator
33 | {
34 | ///
35 | /// Adds an element to the counted set. Elements added multiple times will be counted only once.
36 | ///
37 | /// The element to add
38 | ///
39 | /// True if the state of the estimator has changed, false otherwise
40 | ///
41 | bool Add(T element);
42 |
43 | ///
44 | /// Returns the estimated number of unique elements in the counted set
45 | ///
46 | ///
47 | /// The estimated count of unique elements
48 | ///
49 | ulong Count();
50 |
51 | ///
52 | /// Gets the number of times elements were added (including duplicates)
53 | ///
54 | ///
55 | /// The number of times was called
56 | ///
57 | ulong CountAdditions { get; }
58 | }
59 | }
--------------------------------------------------------------------------------
/CardinalityEstimation/InternalsVisible.cs:
--------------------------------------------------------------------------------
1 | using System.Runtime.CompilerServices;
2 |
3 | // Used to enable access to internal classes
4 | [assembly: InternalsVisibleTo("CardinalityEstimation.Test, PublicKey=002400000480000094000000060200000024000052534131000400000100010081f7aadc0bf7501dedd2471fdad4b71350a761acc01141226ebef71bac6d5f077da23411616de38106126f31b1281ea7089f9269b3dd4107f92a26b623c982115c4131ce44f80b420bbf8df003be7e389902b82ac8ebc65d4b553307a4c826fc5b09308c5dcfa989bf86a1fc31621175657c8494819b630b91960e86d14837ba")]
5 |
--------------------------------------------------------------------------------
/CardinalityEstimation/cardinalityestimation.snk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saguiitay/CardinalityEstimation/d2be1161b87ae298af49f58e48698745db34bff9/CardinalityEstimation/cardinalityestimation.snk
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | This software uses an implementation of the Murmur3 hash algorithm provided at
2 | http://github.com/darrenkopp/murmurhash-net, which was made available under
3 | the terms of the Apache 2.0 license. A copy of the Apache 2.0 license is
4 | provided at the end of this document. The rest of this software is licensed
5 | under the MIT license, which is provided here.
6 |
7 | The MIT License (MIT)
8 |
9 | Copyright (c) 2021 Microsoft
10 |
11 | Permission is hereby granted, free of charge, to any person obtaining a copy
12 | of this software and associated documentation files (the "Software"), to deal
13 | in the Software without restriction, including without limitation the rights
14 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 | copies of the Software, and to permit persons to whom the Software is
16 | furnished to do so, subject to the following conditions:
17 |
18 | The above copyright notice and this permission notice shall be included in all
19 | copies or substantial portions of the Software.
20 |
21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 | SOFTWARE.
28 |
29 |
30 |
31 | Apache License
32 |
33 | Version 2.0, January 2004
34 |
35 | http://www.apache.org/licenses/
36 |
37 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
38 |
39 | 1. Definitions.
40 |
41 | "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
42 |
43 | "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
44 |
45 | "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
46 |
47 | "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
48 |
49 | "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
50 |
51 | "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
52 |
53 | "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
54 |
55 | "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
56 |
57 | "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
58 |
59 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
60 |
61 | 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
62 |
63 | 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
64 |
65 | 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
66 |
67 | You must give any other recipients of the Work or Derivative Works a copy of this License; and
68 |
69 | You must cause any modified files to carry prominent notices stating that You changed the files; and
70 |
71 | You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
72 |
73 | If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
74 |
75 | 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
76 |
77 | 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
78 |
79 | 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
80 |
81 | 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
82 |
83 | 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CardinalityEstimation
2 |
3 | HyperLogLog-based set cardinality estimation library
4 |
5 | This library estimates the number of unique elements in a set, in a quick and memory-efficient manner. It's based on the following:
6 |
7 | 1. Flajolet et al., "HyperLogLog: the analysis of a near-optimal cardinality estimation algorithm", DMTCS proc. AH 2007, http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
8 | 2. Heule, Nunkesser and Hall 2013, "HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en/us/pubs/archive/40671.pdf
9 |
10 | The accuracy/memory usage are user-selectable. Typically, a cardinality estimator will give a perfect estimate of small cardinalities (up to 100 unique elements), and 97% accuracy or better (usually much better) for any cardinality up to near 2^64, while consuming several KB of memory (no more than 16KB).
11 |
12 | ## Usage
13 |
14 | Usage is very simple:
15 |
16 | ```csharp
17 | ICardinalityEstimator estimator = new CardinalityEstimator();
18 |
19 | estimator.Add("Alice");
20 | estimator.Add("Bob");
21 | estimator.Add("Alice");
22 | estimator.Add("George Michael");
23 |
24 | ulong numberOfuniqueElements = estimator.Count(); // will be 3
25 | ```
26 |
27 | ## Nuget Package
28 |
29 | This code is available as the Nuget package [CardinalityEstimation](https://www.nuget.org/packages/CardinalityEstimation/).
30 |
31 | To install, run the following command in the Package Manager Console:
32 |
33 | ```powershell
34 | Install-Package CardinalityEstimation
35 | ```
36 |
37 | ## Release Notes
38 |
39 | ## Keeping things friendly
40 |
41 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
42 |
--------------------------------------------------------------------------------